Line data Source code
1 : // Copyright (c) 2006, Google Inc.
2 : // All rights reserved.
3 : //
4 : // Redistribution and use in source and binary forms, with or without
5 : // modification, are permitted provided that the following conditions are
6 : // met:
7 : //
8 : // * Redistributions of source code must retain the above copyright
9 : // notice, this list of conditions and the following disclaimer.
10 : // * Redistributions in binary form must reproduce the above
11 : // copyright notice, this list of conditions and the following disclaimer
12 : // in the documentation and/or other materials provided with the
13 : // distribution.
14 : // * Neither the name of Google Inc. nor the names of its
15 : // contributors may be used to endorse or promote products derived from
16 : // this software without specific prior written permission.
17 : //
18 : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 : // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 : // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 : // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 : // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 : // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 : // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 : // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 : // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 : // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 : //
30 : // file_id.cc: Return a unique identifier for a file
31 : //
32 : // See file_id.h for documentation
33 : //
34 :
35 : #include "common/linux/file_id.h"
36 :
37 : #include <arpa/inet.h>
38 : #include <assert.h>
39 : #include <string.h>
40 :
41 : #include <algorithm>
42 : #include <string>
43 :
44 : #include "common/linux/elf_gnu_compat.h"
45 : #include "common/linux/elfutils.h"
46 : #include "common/linux/linux_libc_support.h"
47 : #include "common/linux/memory_mapped_file.h"
48 : #include "common/using_std_string.h"
49 : #include "third_party/lss/linux_syscall_support.h"
50 :
51 : namespace google_breakpad {
52 :
53 : // Used in a few places for backwards-compatibility.
54 : const size_t kMDGUIDSize = sizeof(MDGUID);
55 :
56 0 : FileID::FileID(const char* path) : path_(path) {}
57 :
58 : // ELF note name and desc are 32-bits word padded.
59 : #define NOTE_PADDING(a) ((a + 3) & ~3)
60 :
61 : // These functions are also used inside the crashed process, so be safe
62 : // and use the syscall/libc wrappers instead of direct syscalls or libc.
63 :
64 : template<typename ElfClass>
65 0 : static bool ElfClassBuildIDNoteIdentifier(const void *section, size_t length,
66 : wasteful_vector<uint8_t>& identifier) {
67 : typedef typename ElfClass::Nhdr Nhdr;
68 :
69 0 : const void* section_end = reinterpret_cast<const char*>(section) + length;
70 0 : const Nhdr* note_header = reinterpret_cast<const Nhdr*>(section);
71 0 : while (reinterpret_cast<const void *>(note_header) < section_end) {
72 0 : if (note_header->n_type == NT_GNU_BUILD_ID)
73 0 : break;
74 0 : note_header = reinterpret_cast<const Nhdr*>(
75 : reinterpret_cast<const char*>(note_header) + sizeof(Nhdr) +
76 0 : NOTE_PADDING(note_header->n_namesz) +
77 0 : NOTE_PADDING(note_header->n_descsz));
78 : }
79 0 : if (reinterpret_cast<const void *>(note_header) >= section_end ||
80 0 : note_header->n_descsz == 0) {
81 0 : return false;
82 : }
83 :
84 : const uint8_t* build_id = reinterpret_cast<const uint8_t*>(note_header) +
85 0 : sizeof(Nhdr) + NOTE_PADDING(note_header->n_namesz);
86 0 : identifier.insert(identifier.end(),
87 : build_id,
88 0 : build_id + note_header->n_descsz);
89 :
90 0 : return true;
91 : }
92 :
93 : // Attempt to locate a .note.gnu.build-id section in an ELF binary
94 : // and copy it into |identifier|.
95 0 : static bool FindElfBuildIDNote(const void* elf_mapped_base,
96 : wasteful_vector<uint8_t>& identifier) {
97 : void* note_section;
98 : size_t note_size;
99 : int elfclass;
100 0 : if ((!FindElfSegment(elf_mapped_base, PT_NOTE,
101 0 : (const void**)¬e_section, ¬e_size, &elfclass) ||
102 0 : note_size == 0) &&
103 0 : (!FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE,
104 0 : (const void**)¬e_section, ¬e_size, &elfclass) ||
105 0 : note_size == 0)) {
106 0 : return false;
107 : }
108 :
109 0 : if (elfclass == ELFCLASS32) {
110 0 : return ElfClassBuildIDNoteIdentifier<ElfClass32>(note_section, note_size,
111 0 : identifier);
112 0 : } else if (elfclass == ELFCLASS64) {
113 0 : return ElfClassBuildIDNoteIdentifier<ElfClass64>(note_section, note_size,
114 0 : identifier);
115 : }
116 :
117 0 : return false;
118 : }
119 :
120 : // Attempt to locate the .text section of an ELF binary and generate
121 : // a simple hash by XORing the first page worth of bytes into |identifier|.
122 0 : static bool HashElfTextSection(const void* elf_mapped_base,
123 : wasteful_vector<uint8_t>& identifier) {
124 0 : identifier.resize(kMDGUIDSize);
125 :
126 : void* text_section;
127 : size_t text_size;
128 0 : if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS,
129 0 : (const void**)&text_section, &text_size, NULL) ||
130 0 : text_size == 0) {
131 0 : return false;
132 : }
133 :
134 : // Only provide |kMDGUIDSize| bytes to keep identifiers produced by this
135 : // function backwards-compatible.
136 0 : my_memset(&identifier[0], 0, kMDGUIDSize);
137 0 : const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text_section);
138 0 : const uint8_t* ptr_end = ptr + std::min(text_size, static_cast<size_t>(4096));
139 0 : while (ptr < ptr_end) {
140 0 : for (unsigned i = 0; i < kMDGUIDSize; i++)
141 0 : identifier[i] ^= ptr[i];
142 0 : ptr += kMDGUIDSize;
143 : }
144 0 : return true;
145 : }
146 :
147 : // static
148 0 : bool FileID::ElfFileIdentifierFromMappedFile(const void* base,
149 : wasteful_vector<uint8_t>& identifier) {
150 : // Look for a build id note first.
151 0 : if (FindElfBuildIDNote(base, identifier))
152 0 : return true;
153 :
154 : // Fall back on hashing the first page of the text section.
155 0 : return HashElfTextSection(base, identifier);
156 : }
157 :
158 0 : bool FileID::ElfFileIdentifier(wasteful_vector<uint8_t>& identifier) {
159 0 : MemoryMappedFile mapped_file(path_.c_str(), 0);
160 0 : if (!mapped_file.data()) // Should probably check if size >= ElfW(Ehdr)?
161 0 : return false;
162 :
163 0 : return ElfFileIdentifierFromMappedFile(mapped_file.data(), identifier);
164 : }
165 :
166 : // These three functions are not ever called in an unsafe context, so it's OK
167 : // to allocate memory and use libc.
168 0 : static string bytes_to_hex_string(const uint8_t* bytes, size_t count) {
169 0 : string result;
170 0 : for (unsigned int idx = 0; idx < count; ++idx) {
171 : char buf[3];
172 0 : snprintf(buf, sizeof(buf), "%02X", bytes[idx]);
173 0 : result.append(buf);
174 : }
175 0 : return result;
176 : }
177 :
178 : // static
179 0 : string FileID::ConvertIdentifierToUUIDString(
180 : const wasteful_vector<uint8_t>& identifier) {
181 0 : uint8_t identifier_swapped[kMDGUIDSize] = { 0 };
182 :
183 : // Endian-ness swap to match dump processor expectation.
184 0 : memcpy(identifier_swapped, &identifier[0],
185 0 : std::min(kMDGUIDSize, identifier.size()));
186 0 : uint32_t* data1 = reinterpret_cast<uint32_t*>(identifier_swapped);
187 0 : *data1 = htonl(*data1);
188 0 : uint16_t* data2 = reinterpret_cast<uint16_t*>(identifier_swapped + 4);
189 0 : *data2 = htons(*data2);
190 0 : uint16_t* data3 = reinterpret_cast<uint16_t*>(identifier_swapped + 6);
191 0 : *data3 = htons(*data3);
192 :
193 0 : return bytes_to_hex_string(identifier_swapped, kMDGUIDSize);
194 : }
195 :
196 : // static
197 0 : string FileID::ConvertIdentifierToString(
198 : const wasteful_vector<uint8_t>& identifier) {
199 0 : return bytes_to_hex_string(&identifier[0], identifier.size());
200 : }
201 :
202 : } // namespace google_breakpad
|