Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 :
4 : // Copyright (c) 2006, 2011, 2012 Google Inc.
5 : // All rights reserved.
6 : //
7 : // Redistribution and use in source and binary forms, with or without
8 : // modification, are permitted provided that the following conditions are
9 : // met:
10 : //
11 : // * Redistributions of source code must retain the above copyright
12 : // notice, this list of conditions and the following disclaimer.
13 : // * Redistributions in binary form must reproduce the above
14 : // copyright notice, this list of conditions and the following disclaimer
15 : // in the documentation and/or other materials provided with the
16 : // distribution.
17 : // * Neither the name of Google Inc. nor the names of its
18 : // contributors may be used to endorse or promote products derived from
19 : // this software without specific prior written permission.
20 : //
21 : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 : // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 : // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 : // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 : // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 : // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 : // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 : // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 : // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 : // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 :
33 : // Restructured in 2009 by: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
34 :
35 : // (derived from)
36 : // dump_symbols.cc: implement google_breakpad::WriteSymbolFile:
37 : // Find all the debugging info in a file and dump it as a Breakpad symbol file.
38 : //
39 : // dump_symbols.h: Read debugging information from an ELF file, and write
40 : // it out as a Breakpad symbol file.
41 :
42 : // This file is derived from the following files in
43 : // toolkit/crashreporter/google-breakpad:
44 : // src/common/linux/dump_symbols.cc
45 : // src/common/linux/elfutils.cc
46 : // src/common/linux/file_id.cc
47 :
48 : #include <errno.h>
49 : #include <fcntl.h>
50 : #include <stdio.h>
51 : #include <string.h>
52 : #include <sys/mman.h>
53 : #include <sys/stat.h>
54 : #include <unistd.h>
55 : #include <arpa/inet.h>
56 :
57 : #include <set>
58 : #include <string>
59 : #include <vector>
60 :
61 : #include "mozilla/Assertions.h"
62 : #include "mozilla/Sprintf.h"
63 :
64 : #include "PlatformMacros.h"
65 : #include "LulCommonExt.h"
66 : #include "LulDwarfExt.h"
67 : #include "LulElfInt.h"
68 : #include "LulMainInt.h"
69 :
70 :
71 : #if defined(GP_PLAT_arm_android) && !defined(SHT_ARM_EXIDX)
72 : // bionic and older glibsc don't define it
73 : # define SHT_ARM_EXIDX (SHT_LOPROC + 1)
74 : #endif
75 :
76 :
77 : // This namespace contains helper functions.
78 : namespace {
79 :
80 : using lul::DwarfCFIToModule;
81 : using lul::FindElfSectionByName;
82 : using lul::GetOffset;
83 : using lul::IsValidElf;
84 : using lul::Module;
85 : using lul::UniqueStringUniverse;
86 : using lul::scoped_ptr;
87 : using lul::Summariser;
88 : using std::string;
89 : using std::vector;
90 : using std::set;
91 :
92 : //
93 : // FDWrapper
94 : //
95 : // Wrapper class to make sure opened file is closed.
96 : //
97 : class FDWrapper {
98 : public:
99 0 : explicit FDWrapper(int fd) :
100 0 : fd_(fd) {}
101 0 : ~FDWrapper() {
102 0 : if (fd_ != -1)
103 0 : close(fd_);
104 0 : }
105 : int get() {
106 : return fd_;
107 : }
108 : int release() {
109 : int fd = fd_;
110 : fd_ = -1;
111 : return fd;
112 : }
113 : private:
114 : int fd_;
115 : };
116 :
117 : //
118 : // MmapWrapper
119 : //
120 : // Wrapper class to make sure mapped regions are unmapped.
121 : //
122 : class MmapWrapper {
123 : public:
124 0 : MmapWrapper() : is_set_(false), base_(NULL), size_(0){}
125 0 : ~MmapWrapper() {
126 0 : if (is_set_ && base_ != NULL) {
127 0 : MOZ_ASSERT(size_ > 0);
128 0 : munmap(base_, size_);
129 : }
130 0 : }
131 0 : void set(void *mapped_address, size_t mapped_size) {
132 0 : is_set_ = true;
133 0 : base_ = mapped_address;
134 0 : size_ = mapped_size;
135 0 : }
136 : void release() {
137 : MOZ_ASSERT(is_set_);
138 : is_set_ = false;
139 : base_ = NULL;
140 : size_ = 0;
141 : }
142 :
143 : private:
144 : bool is_set_;
145 : void *base_;
146 : size_t size_;
147 : };
148 :
149 :
150 : // Set NUM_DW_REGNAMES to be the number of Dwarf register names
151 : // appropriate to the machine architecture given in HEADER. Return
152 : // true on success, or false if HEADER's machine architecture is not
153 : // supported.
154 : template<typename ElfClass>
155 0 : bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header,
156 : unsigned int* num_dw_regnames) {
157 0 : switch (elf_header->e_machine) {
158 : case EM_386:
159 0 : *num_dw_regnames = DwarfCFIToModule::RegisterNames::I386();
160 0 : return true;
161 : case EM_ARM:
162 0 : *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM();
163 0 : return true;
164 : case EM_X86_64:
165 0 : *num_dw_regnames = DwarfCFIToModule::RegisterNames::X86_64();
166 0 : return true;
167 : default:
168 0 : MOZ_ASSERT(0);
169 : return false;
170 : }
171 : }
172 :
173 : template<typename ElfClass>
174 0 : bool LoadDwarfCFI(const string& dwarf_filename,
175 : const typename ElfClass::Ehdr* elf_header,
176 : const char* section_name,
177 : const typename ElfClass::Shdr* section,
178 : const bool eh_frame,
179 : const typename ElfClass::Shdr* got_section,
180 : const typename ElfClass::Shdr* text_section,
181 : const bool big_endian,
182 : SecMap* smap,
183 : uintptr_t text_bias,
184 : UniqueStringUniverse* usu,
185 : void (*log)(const char*)) {
186 : // Find the appropriate set of register names for this file's
187 : // architecture.
188 0 : unsigned int num_dw_regs = 0;
189 0 : if (!DwarfCFIRegisterNames<ElfClass>(elf_header, &num_dw_regs)) {
190 0 : fprintf(stderr, "%s: unrecognized ELF machine architecture '%d';"
191 : " cannot convert DWARF call frame information\n",
192 0 : dwarf_filename.c_str(), elf_header->e_machine);
193 0 : return false;
194 : }
195 :
196 : const lul::Endianness endianness
197 0 : = big_endian ? lul::ENDIANNESS_BIG : lul::ENDIANNESS_LITTLE;
198 :
199 : // Find the call frame information and its size.
200 : const char* cfi =
201 0 : GetOffset<ElfClass, char>(elf_header, section->sh_offset);
202 0 : size_t cfi_size = section->sh_size;
203 :
204 : // Plug together the parser, handler, and their entourages.
205 :
206 : // Here's a summariser, which will receive the output of the
207 : // parser, create summaries, and add them to |smap|.
208 0 : Summariser summ(smap, text_bias, log);
209 :
210 0 : lul::ByteReader reader(endianness);
211 0 : reader.SetAddressSize(ElfClass::kAddrSize);
212 :
213 0 : DwarfCFIToModule::Reporter module_reporter(log, dwarf_filename, section_name);
214 0 : DwarfCFIToModule handler(num_dw_regs, &module_reporter, &reader, usu, &summ);
215 :
216 : // Provide the base addresses for .eh_frame encoded pointers, if
217 : // possible.
218 0 : reader.SetCFIDataBase(section->sh_addr, cfi);
219 0 : if (got_section)
220 0 : reader.SetDataBase(got_section->sh_addr);
221 0 : if (text_section)
222 0 : reader.SetTextBase(text_section->sh_addr);
223 :
224 : lul::CallFrameInfo::Reporter dwarf_reporter(log, dwarf_filename,
225 0 : section_name);
226 : lul::CallFrameInfo parser(cfi, cfi_size,
227 : &reader, &handler, &dwarf_reporter,
228 0 : eh_frame);
229 0 : parser.Start();
230 :
231 0 : return true;
232 : }
233 :
234 0 : bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper,
235 : void** elf_header) {
236 0 : int obj_fd = open(obj_file.c_str(), O_RDONLY);
237 0 : if (obj_fd < 0) {
238 0 : fprintf(stderr, "Failed to open ELF file '%s': %s\n",
239 0 : obj_file.c_str(), strerror(errno));
240 0 : return false;
241 : }
242 0 : FDWrapper obj_fd_wrapper(obj_fd);
243 : struct stat st;
244 0 : if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) {
245 0 : fprintf(stderr, "Unable to fstat ELF file '%s': %s\n",
246 0 : obj_file.c_str(), strerror(errno));
247 0 : return false;
248 : }
249 : // Mapping it read-only is good enough. In any case, mapping it
250 : // read-write confuses Valgrind's debuginfo acquire/discard
251 : // heuristics, making it hard to profile the profiler.
252 0 : void *obj_base = mmap(nullptr, st.st_size,
253 0 : PROT_READ, MAP_PRIVATE, obj_fd, 0);
254 0 : if (obj_base == MAP_FAILED) {
255 0 : fprintf(stderr, "Failed to mmap ELF file '%s': %s\n",
256 0 : obj_file.c_str(), strerror(errno));
257 0 : return false;
258 : }
259 0 : map_wrapper->set(obj_base, st.st_size);
260 0 : *elf_header = obj_base;
261 0 : if (!IsValidElf(*elf_header)) {
262 0 : fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str());
263 0 : return false;
264 : }
265 0 : return true;
266 : }
267 :
268 : // Get the endianness of ELF_HEADER. If it's invalid, return false.
269 : template<typename ElfClass>
270 0 : bool ElfEndianness(const typename ElfClass::Ehdr* elf_header,
271 : bool* big_endian) {
272 0 : if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) {
273 0 : *big_endian = false;
274 0 : return true;
275 : }
276 0 : if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) {
277 0 : *big_endian = true;
278 0 : return true;
279 : }
280 :
281 0 : fprintf(stderr, "bad data encoding in ELF header: %d\n",
282 0 : elf_header->e_ident[EI_DATA]);
283 0 : return false;
284 : }
285 :
286 : //
287 : // LoadSymbolsInfo
288 : //
289 : // Holds the state between the two calls to LoadSymbols() in case it's necessary
290 : // to follow the .gnu_debuglink section and load debug information from a
291 : // different file.
292 : //
293 : template<typename ElfClass>
294 0 : class LoadSymbolsInfo {
295 : public:
296 : typedef typename ElfClass::Addr Addr;
297 :
298 0 : explicit LoadSymbolsInfo(const vector<string>& dbg_dirs) :
299 : debug_dirs_(dbg_dirs),
300 0 : has_loading_addr_(false) {}
301 :
302 : // Keeps track of which sections have been loaded so sections don't
303 : // accidentally get loaded twice from two different files.
304 0 : void LoadedSection(const string §ion) {
305 0 : if (loaded_sections_.count(section) == 0) {
306 0 : loaded_sections_.insert(section);
307 : } else {
308 0 : fprintf(stderr, "Section %s has already been loaded.\n",
309 : section.c_str());
310 : }
311 0 : }
312 :
313 0 : string debuglink_file() const {
314 0 : return debuglink_file_;
315 : }
316 :
317 : private:
318 : const vector<string>& debug_dirs_; // Directories in which to
319 : // search for the debug ELF file.
320 :
321 : string debuglink_file_; // Full path to the debug ELF file.
322 :
323 : bool has_loading_addr_; // Indicate if LOADING_ADDR_ is valid.
324 :
325 : set<string> loaded_sections_; // Tracks the Loaded ELF sections
326 : // between calls to LoadSymbols().
327 : };
328 :
329 : // Find the preferred loading address of the binary.
330 : template<typename ElfClass>
331 0 : typename ElfClass::Addr GetLoadingAddress(
332 : const typename ElfClass::Phdr* program_headers,
333 : int nheader) {
334 : typedef typename ElfClass::Phdr Phdr;
335 :
336 : // For non-PIC executables (e_type == ET_EXEC), the load address is
337 : // the start address of the first PT_LOAD segment. (ELF requires
338 : // the segments to be sorted by load address.) For PIC executables
339 : // and dynamic libraries (e_type == ET_DYN), this address will
340 : // normally be zero.
341 0 : for (int i = 0; i < nheader; ++i) {
342 0 : const Phdr& header = program_headers[i];
343 0 : if (header.p_type == PT_LOAD)
344 0 : return header.p_vaddr;
345 : }
346 0 : return 0;
347 : }
348 :
349 : template<typename ElfClass>
350 0 : bool LoadSymbols(const string& obj_file,
351 : const bool big_endian,
352 : const typename ElfClass::Ehdr* elf_header,
353 : const bool read_gnu_debug_link,
354 : LoadSymbolsInfo<ElfClass>* info,
355 : SecMap* smap,
356 : void* rx_avma, size_t rx_size,
357 : UniqueStringUniverse* usu,
358 : void (*log)(const char*)) {
359 : typedef typename ElfClass::Phdr Phdr;
360 : typedef typename ElfClass::Shdr Shdr;
361 :
362 : char buf[500];
363 0 : SprintfLiteral(buf, "LoadSymbols: BEGIN %s\n", obj_file.c_str());
364 0 : buf[sizeof(buf)-1] = 0;
365 0 : log(buf);
366 :
367 : // This is how the text bias is calculated.
368 : // BEGIN CALCULATE BIAS
369 0 : uintptr_t loading_addr = GetLoadingAddress<ElfClass>(
370 0 : GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff),
371 0 : elf_header->e_phnum);
372 0 : uintptr_t text_bias = ((uintptr_t)rx_avma) - loading_addr;
373 0 : SprintfLiteral(buf,
374 : "LoadSymbols: rx_avma=%llx, text_bias=%llx",
375 : (unsigned long long int)(uintptr_t)rx_avma,
376 : (unsigned long long int)text_bias);
377 0 : buf[sizeof(buf)-1] = 0;
378 0 : log(buf);
379 : // END CALCULATE BIAS
380 :
381 : const Shdr* sections =
382 0 : GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
383 0 : const Shdr* section_names = sections + elf_header->e_shstrndx;
384 : const char* names =
385 0 : GetOffset<ElfClass, char>(elf_header, section_names->sh_offset);
386 0 : const char *names_end = names + section_names->sh_size;
387 0 : bool found_usable_info = false;
388 :
389 : // Dwarf Call Frame Information (CFI) is actually independent from
390 : // the other DWARF debugging information, and can be used alone.
391 : const Shdr* dwarf_cfi_section =
392 0 : FindElfSectionByName<ElfClass>(".debug_frame", SHT_PROGBITS,
393 : sections, names, names_end,
394 0 : elf_header->e_shnum);
395 0 : if (dwarf_cfi_section) {
396 : // Ignore the return value of this function; even without call frame
397 : // information, the other debugging information could be perfectly
398 : // useful.
399 0 : info->LoadedSection(".debug_frame");
400 : bool result =
401 0 : LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".debug_frame",
402 : dwarf_cfi_section, false, 0, 0, big_endian,
403 0 : smap, text_bias, usu, log);
404 0 : found_usable_info = found_usable_info || result;
405 0 : if (result)
406 0 : log("LoadSymbols: read CFI from .debug_frame");
407 : }
408 :
409 : // Linux C++ exception handling information can also provide
410 : // unwinding data.
411 : const Shdr* eh_frame_section =
412 0 : FindElfSectionByName<ElfClass>(".eh_frame", SHT_PROGBITS,
413 : sections, names, names_end,
414 0 : elf_header->e_shnum);
415 0 : if (eh_frame_section) {
416 : // Pointers in .eh_frame data may be relative to the base addresses of
417 : // certain sections. Provide those sections if present.
418 : const Shdr* got_section =
419 0 : FindElfSectionByName<ElfClass>(".got", SHT_PROGBITS,
420 : sections, names, names_end,
421 0 : elf_header->e_shnum);
422 : const Shdr* text_section =
423 0 : FindElfSectionByName<ElfClass>(".text", SHT_PROGBITS,
424 : sections, names, names_end,
425 0 : elf_header->e_shnum);
426 0 : info->LoadedSection(".eh_frame");
427 : // As above, ignore the return value of this function.
428 : bool result =
429 0 : LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".eh_frame",
430 : eh_frame_section, true,
431 : got_section, text_section, big_endian,
432 0 : smap, text_bias, usu, log);
433 0 : found_usable_info = found_usable_info || result;
434 0 : if (result)
435 0 : log("LoadSymbols: read CFI from .eh_frame");
436 : }
437 :
438 0 : SprintfLiteral(buf, "LoadSymbols: END %s\n", obj_file.c_str());
439 0 : buf[sizeof(buf)-1] = 0;
440 0 : log(buf);
441 :
442 0 : return found_usable_info;
443 : }
444 :
445 : // Return the breakpad symbol file identifier for the architecture of
446 : // ELF_HEADER.
447 : template<typename ElfClass>
448 0 : const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) {
449 : typedef typename ElfClass::Half Half;
450 0 : Half arch = elf_header->e_machine;
451 0 : switch (arch) {
452 0 : case EM_386: return "x86";
453 0 : case EM_ARM: return "arm";
454 0 : case EM_MIPS: return "mips";
455 0 : case EM_PPC64: return "ppc64";
456 0 : case EM_PPC: return "ppc";
457 0 : case EM_S390: return "s390";
458 0 : case EM_SPARC: return "sparc";
459 0 : case EM_SPARCV9: return "sparcv9";
460 0 : case EM_X86_64: return "x86_64";
461 0 : default: return NULL;
462 : }
463 : }
464 :
465 : // Format the Elf file identifier in IDENTIFIER as a UUID with the
466 : // dashes removed.
467 0 : string FormatIdentifier(unsigned char identifier[16]) {
468 : char identifier_str[40];
469 : lul::FileID::ConvertIdentifierToString(
470 : identifier,
471 : identifier_str,
472 0 : sizeof(identifier_str));
473 0 : string id_no_dash;
474 0 : for (int i = 0; identifier_str[i] != '\0'; ++i)
475 0 : if (identifier_str[i] != '-')
476 0 : id_no_dash += identifier_str[i];
477 : // Add an extra "0" by the end. PDB files on Windows have an 'age'
478 : // number appended to the end of the file identifier; this isn't
479 : // really used or necessary on other platforms, but be consistent.
480 0 : id_no_dash += '0';
481 0 : return id_no_dash;
482 : }
483 :
484 : // Return the non-directory portion of FILENAME: the portion after the
485 : // last slash, or the whole filename if there are no slashes.
486 0 : string BaseFileName(const string &filename) {
487 : // Lots of copies! basename's behavior is less than ideal.
488 0 : char *c_filename = strdup(filename.c_str());
489 0 : string base = basename(c_filename);
490 0 : free(c_filename);
491 0 : return base;
492 : }
493 :
494 : template<typename ElfClass>
495 0 : bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header,
496 : const string& obj_filename,
497 : const vector<string>& debug_dirs,
498 : SecMap* smap, void* rx_avma, size_t rx_size,
499 : UniqueStringUniverse* usu,
500 : void (*log)(const char*)) {
501 : typedef typename ElfClass::Ehdr Ehdr;
502 :
503 : unsigned char identifier[16];
504 0 : if (!lul
505 0 : ::FileID::ElfFileIdentifierFromMappedFile(elf_header, identifier)) {
506 0 : fprintf(stderr, "%s: unable to generate file identifier\n",
507 : obj_filename.c_str());
508 0 : return false;
509 : }
510 :
511 0 : const char *architecture = ElfArchitecture<ElfClass>(elf_header);
512 0 : if (!architecture) {
513 0 : fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
514 0 : obj_filename.c_str(), elf_header->e_machine);
515 0 : return false;
516 : }
517 :
518 : // Figure out what endianness this file is.
519 : bool big_endian;
520 0 : if (!ElfEndianness<ElfClass>(elf_header, &big_endian))
521 0 : return false;
522 :
523 0 : string name = BaseFileName(obj_filename);
524 0 : string os = "Linux";
525 0 : string id = FormatIdentifier(identifier);
526 :
527 0 : LoadSymbolsInfo<ElfClass> info(debug_dirs);
528 0 : if (!LoadSymbols<ElfClass>(obj_filename, big_endian, elf_header,
529 0 : !debug_dirs.empty(), &info,
530 : smap, rx_avma, rx_size, usu, log)) {
531 0 : const string debuglink_file = info.debuglink_file();
532 0 : if (debuglink_file.empty())
533 0 : return false;
534 :
535 : // Load debuglink ELF file.
536 0 : fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str());
537 0 : MmapWrapper debug_map_wrapper;
538 0 : Ehdr* debug_elf_header = NULL;
539 0 : if (!LoadELF(debuglink_file, &debug_map_wrapper,
540 : reinterpret_cast<void**>(&debug_elf_header)))
541 0 : return false;
542 : // Sanity checks to make sure everything matches up.
543 : const char *debug_architecture =
544 0 : ElfArchitecture<ElfClass>(debug_elf_header);
545 0 : if (!debug_architecture) {
546 0 : fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
547 0 : debuglink_file.c_str(), debug_elf_header->e_machine);
548 0 : return false;
549 : }
550 0 : if (strcmp(architecture, debug_architecture)) {
551 0 : fprintf(stderr, "%s with ELF machine architecture %s does not match "
552 : "%s with ELF architecture %s\n",
553 : debuglink_file.c_str(), debug_architecture,
554 : obj_filename.c_str(), architecture);
555 0 : return false;
556 : }
557 :
558 : bool debug_big_endian;
559 0 : if (!ElfEndianness<ElfClass>(debug_elf_header, &debug_big_endian))
560 0 : return false;
561 0 : if (debug_big_endian != big_endian) {
562 0 : fprintf(stderr, "%s and %s does not match in endianness\n",
563 : obj_filename.c_str(), debuglink_file.c_str());
564 0 : return false;
565 : }
566 :
567 0 : if (!LoadSymbols<ElfClass>(debuglink_file, debug_big_endian,
568 : debug_elf_header, false, &info,
569 : smap, rx_avma, rx_size, usu, log)) {
570 0 : return false;
571 : }
572 : }
573 :
574 0 : return true;
575 : }
576 :
577 : } // namespace (anon)
578 :
579 :
580 : namespace lul {
581 :
582 0 : bool ReadSymbolDataInternal(const uint8_t* obj_file,
583 : const string& obj_filename,
584 : const vector<string>& debug_dirs,
585 : SecMap* smap, void* rx_avma, size_t rx_size,
586 : UniqueStringUniverse* usu,
587 : void (*log)(const char*)) {
588 :
589 0 : if (!IsValidElf(obj_file)) {
590 0 : fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str());
591 0 : return false;
592 : }
593 :
594 0 : int elfclass = ElfClass(obj_file);
595 0 : if (elfclass == ELFCLASS32) {
596 : return ReadSymbolDataElfClass<ElfClass32>(
597 : reinterpret_cast<const Elf32_Ehdr*>(obj_file),
598 0 : obj_filename, debug_dirs, smap, rx_avma, rx_size, usu, log);
599 : }
600 0 : if (elfclass == ELFCLASS64) {
601 : return ReadSymbolDataElfClass<ElfClass64>(
602 : reinterpret_cast<const Elf64_Ehdr*>(obj_file),
603 0 : obj_filename, debug_dirs, smap, rx_avma, rx_size, usu, log);
604 : }
605 :
606 0 : return false;
607 : }
608 :
609 0 : bool ReadSymbolData(const string& obj_file,
610 : const vector<string>& debug_dirs,
611 : SecMap* smap, void* rx_avma, size_t rx_size,
612 : UniqueStringUniverse* usu,
613 : void (*log)(const char*)) {
614 0 : MmapWrapper map_wrapper;
615 0 : void* elf_header = NULL;
616 0 : if (!LoadELF(obj_file, &map_wrapper, &elf_header))
617 0 : return false;
618 :
619 0 : return ReadSymbolDataInternal(reinterpret_cast<uint8_t*>(elf_header),
620 : obj_file, debug_dirs,
621 0 : smap, rx_avma, rx_size, usu, log);
622 : }
623 :
624 :
625 : namespace {
626 :
627 : template<typename ElfClass>
628 0 : void FindElfClassSection(const char *elf_base,
629 : const char *section_name,
630 : typename ElfClass::Word section_type,
631 : const void **section_start,
632 : int *section_size) {
633 : typedef typename ElfClass::Ehdr Ehdr;
634 : typedef typename ElfClass::Shdr Shdr;
635 :
636 0 : MOZ_ASSERT(elf_base);
637 0 : MOZ_ASSERT(section_start);
638 0 : MOZ_ASSERT(section_size);
639 :
640 0 : MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0);
641 :
642 0 : const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base);
643 0 : MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass);
644 :
645 : const Shdr* sections =
646 0 : GetOffset<ElfClass,Shdr>(elf_header, elf_header->e_shoff);
647 0 : const Shdr* section_names = sections + elf_header->e_shstrndx;
648 : const char* names =
649 0 : GetOffset<ElfClass,char>(elf_header, section_names->sh_offset);
650 0 : const char *names_end = names + section_names->sh_size;
651 :
652 : const Shdr* section =
653 0 : FindElfSectionByName<ElfClass>(section_name, section_type,
654 : sections, names, names_end,
655 0 : elf_header->e_shnum);
656 :
657 0 : if (section != NULL && section->sh_size > 0) {
658 0 : *section_start = elf_base + section->sh_offset;
659 0 : *section_size = section->sh_size;
660 : }
661 0 : }
662 :
663 : template<typename ElfClass>
664 0 : void FindElfClassSegment(const char *elf_base,
665 : typename ElfClass::Word segment_type,
666 : const void **segment_start,
667 : int *segment_size) {
668 : typedef typename ElfClass::Ehdr Ehdr;
669 : typedef typename ElfClass::Phdr Phdr;
670 :
671 0 : MOZ_ASSERT(elf_base);
672 0 : MOZ_ASSERT(segment_start);
673 0 : MOZ_ASSERT(segment_size);
674 :
675 0 : MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0);
676 :
677 0 : const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base);
678 0 : MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass);
679 :
680 : const Phdr* phdrs =
681 0 : GetOffset<ElfClass,Phdr>(elf_header, elf_header->e_phoff);
682 :
683 0 : for (int i = 0; i < elf_header->e_phnum; ++i) {
684 0 : if (phdrs[i].p_type == segment_type) {
685 0 : *segment_start = elf_base + phdrs[i].p_offset;
686 0 : *segment_size = phdrs[i].p_filesz;
687 0 : return;
688 : }
689 : }
690 : }
691 :
692 : } // namespace (anon)
693 :
694 0 : bool IsValidElf(const void* elf_base) {
695 0 : return strncmp(reinterpret_cast<const char*>(elf_base),
696 0 : ELFMAG, SELFMAG) == 0;
697 : }
698 :
699 0 : int ElfClass(const void* elf_base) {
700 : const ElfW(Ehdr)* elf_header =
701 0 : reinterpret_cast<const ElfW(Ehdr)*>(elf_base);
702 :
703 0 : return elf_header->e_ident[EI_CLASS];
704 : }
705 :
706 0 : bool FindElfSection(const void *elf_mapped_base,
707 : const char *section_name,
708 : uint32_t section_type,
709 : const void **section_start,
710 : int *section_size,
711 : int *elfclass) {
712 0 : MOZ_ASSERT(elf_mapped_base);
713 0 : MOZ_ASSERT(section_start);
714 0 : MOZ_ASSERT(section_size);
715 :
716 0 : *section_start = NULL;
717 0 : *section_size = 0;
718 :
719 0 : if (!IsValidElf(elf_mapped_base))
720 0 : return false;
721 :
722 0 : int cls = ElfClass(elf_mapped_base);
723 0 : if (elfclass) {
724 0 : *elfclass = cls;
725 : }
726 :
727 : const char* elf_base =
728 0 : static_cast<const char*>(elf_mapped_base);
729 :
730 0 : if (cls == ELFCLASS32) {
731 : FindElfClassSection<ElfClass32>(elf_base, section_name, section_type,
732 0 : section_start, section_size);
733 0 : return *section_start != NULL;
734 0 : } else if (cls == ELFCLASS64) {
735 : FindElfClassSection<ElfClass64>(elf_base, section_name, section_type,
736 0 : section_start, section_size);
737 0 : return *section_start != NULL;
738 : }
739 :
740 0 : return false;
741 : }
742 :
743 0 : bool FindElfSegment(const void *elf_mapped_base,
744 : uint32_t segment_type,
745 : const void **segment_start,
746 : int *segment_size,
747 : int *elfclass) {
748 0 : MOZ_ASSERT(elf_mapped_base);
749 0 : MOZ_ASSERT(segment_start);
750 0 : MOZ_ASSERT(segment_size);
751 :
752 0 : *segment_start = NULL;
753 0 : *segment_size = 0;
754 :
755 0 : if (!IsValidElf(elf_mapped_base))
756 0 : return false;
757 :
758 0 : int cls = ElfClass(elf_mapped_base);
759 0 : if (elfclass) {
760 0 : *elfclass = cls;
761 : }
762 :
763 : const char* elf_base =
764 0 : static_cast<const char*>(elf_mapped_base);
765 :
766 0 : if (cls == ELFCLASS32) {
767 : FindElfClassSegment<ElfClass32>(elf_base, segment_type,
768 0 : segment_start, segment_size);
769 0 : return *segment_start != NULL;
770 0 : } else if (cls == ELFCLASS64) {
771 : FindElfClassSegment<ElfClass64>(elf_base, segment_type,
772 0 : segment_start, segment_size);
773 0 : return *segment_start != NULL;
774 : }
775 :
776 0 : return false;
777 : }
778 :
779 :
780 : // (derived from)
781 : // file_id.cc: Return a unique identifier for a file
782 : //
783 : // See file_id.h for documentation
784 : //
785 :
786 : // ELF note name and desc are 32-bits word padded.
787 : #define NOTE_PADDING(a) ((a + 3) & ~3)
788 :
789 : // These functions are also used inside the crashed process, so be safe
790 : // and use the syscall/libc wrappers instead of direct syscalls or libc.
791 :
792 : template<typename ElfClass>
793 0 : static bool ElfClassBuildIDNoteIdentifier(const void *section, int length,
794 : uint8_t identifier[kMDGUIDSize]) {
795 : typedef typename ElfClass::Nhdr Nhdr;
796 :
797 0 : const void* section_end = reinterpret_cast<const char*>(section) + length;
798 0 : const Nhdr* note_header = reinterpret_cast<const Nhdr*>(section);
799 0 : while (reinterpret_cast<const void *>(note_header) < section_end) {
800 0 : if (note_header->n_type == NT_GNU_BUILD_ID)
801 0 : break;
802 0 : note_header = reinterpret_cast<const Nhdr*>(
803 : reinterpret_cast<const char*>(note_header) + sizeof(Nhdr) +
804 0 : NOTE_PADDING(note_header->n_namesz) +
805 0 : NOTE_PADDING(note_header->n_descsz));
806 : }
807 0 : if (reinterpret_cast<const void *>(note_header) >= section_end ||
808 0 : note_header->n_descsz == 0) {
809 0 : return false;
810 : }
811 :
812 : const char* build_id = reinterpret_cast<const char*>(note_header) +
813 0 : sizeof(Nhdr) + NOTE_PADDING(note_header->n_namesz);
814 : // Copy as many bits of the build ID as will fit
815 : // into the GUID space.
816 0 : memset(identifier, 0, kMDGUIDSize);
817 0 : memcpy(identifier, build_id,
818 0 : std::min(kMDGUIDSize, (size_t)note_header->n_descsz));
819 :
820 0 : return true;
821 : }
822 :
823 : // Attempt to locate a .note.gnu.build-id section in an ELF binary
824 : // and copy as many bytes of it as will fit into |identifier|.
825 0 : static bool FindElfBuildIDNote(const void *elf_mapped_base,
826 : uint8_t identifier[kMDGUIDSize]) {
827 : void* note_section;
828 : int note_size, elfclass;
829 0 : if ((!FindElfSegment(elf_mapped_base, PT_NOTE,
830 0 : (const void**)¬e_section, ¬e_size, &elfclass) ||
831 0 : note_size == 0) &&
832 0 : (!FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE,
833 0 : (const void**)¬e_section, ¬e_size, &elfclass) ||
834 0 : note_size == 0)) {
835 0 : return false;
836 : }
837 :
838 0 : if (elfclass == ELFCLASS32) {
839 0 : return ElfClassBuildIDNoteIdentifier<ElfClass32>(note_section, note_size,
840 0 : identifier);
841 0 : } else if (elfclass == ELFCLASS64) {
842 0 : return ElfClassBuildIDNoteIdentifier<ElfClass64>(note_section, note_size,
843 0 : identifier);
844 : }
845 :
846 0 : return false;
847 : }
848 :
849 : // Attempt to locate the .text section of an ELF binary and generate
850 : // a simple hash by XORing the first page worth of bytes into |identifier|.
851 0 : static bool HashElfTextSection(const void *elf_mapped_base,
852 : uint8_t identifier[kMDGUIDSize]) {
853 : void* text_section;
854 : int text_size;
855 0 : if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS,
856 0 : (const void**)&text_section, &text_size, NULL) ||
857 0 : text_size == 0) {
858 0 : return false;
859 : }
860 :
861 0 : memset(identifier, 0, kMDGUIDSize);
862 0 : const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text_section);
863 0 : const uint8_t* ptr_end = ptr + std::min(text_size, 4096);
864 0 : while (ptr < ptr_end) {
865 0 : for (unsigned i = 0; i < kMDGUIDSize; i++)
866 0 : identifier[i] ^= ptr[i];
867 0 : ptr += kMDGUIDSize;
868 : }
869 0 : return true;
870 : }
871 :
872 : // static
873 0 : bool FileID::ElfFileIdentifierFromMappedFile(const void* base,
874 : uint8_t identifier[kMDGUIDSize]) {
875 : // Look for a build id note first.
876 0 : if (FindElfBuildIDNote(base, identifier))
877 0 : return true;
878 :
879 : // Fall back on hashing the first page of the text section.
880 0 : return HashElfTextSection(base, identifier);
881 : }
882 :
883 : // static
884 0 : void FileID::ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize],
885 : char* buffer, int buffer_length) {
886 : uint8_t identifier_swapped[kMDGUIDSize];
887 :
888 : // Endian-ness swap to match dump processor expectation.
889 0 : memcpy(identifier_swapped, identifier, kMDGUIDSize);
890 0 : uint32_t* data1 = reinterpret_cast<uint32_t*>(identifier_swapped);
891 0 : *data1 = htonl(*data1);
892 0 : uint16_t* data2 = reinterpret_cast<uint16_t*>(identifier_swapped + 4);
893 0 : *data2 = htons(*data2);
894 0 : uint16_t* data3 = reinterpret_cast<uint16_t*>(identifier_swapped + 6);
895 0 : *data3 = htons(*data3);
896 :
897 0 : int buffer_idx = 0;
898 0 : for (unsigned int idx = 0;
899 0 : (buffer_idx < buffer_length) && (idx < kMDGUIDSize);
900 : ++idx) {
901 0 : int hi = (identifier_swapped[idx] >> 4) & 0x0F;
902 0 : int lo = (identifier_swapped[idx]) & 0x0F;
903 :
904 0 : if (idx == 4 || idx == 6 || idx == 8 || idx == 10)
905 0 : buffer[buffer_idx++] = '-';
906 :
907 0 : buffer[buffer_idx++] = (hi >= 10) ? 'A' + hi - 10 : '0' + hi;
908 0 : buffer[buffer_idx++] = (lo >= 10) ? 'A' + lo - 10 : '0' + lo;
909 : }
910 :
911 : // NULL terminate
912 0 : buffer[(buffer_idx < buffer_length) ? buffer_idx : buffer_idx - 1] = 0;
913 0 : }
914 :
915 : } // namespace lul
|