Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 : /* This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : #include "VariableLengthPrefixSet.h"
8 : #include "nsUrlClassifierPrefixSet.h"
9 : #include "nsPrintfCString.h"
10 : #include "nsThreadUtils.h"
11 : #include "mozilla/EndianUtils.h"
12 : #include "mozilla/Telemetry.h"
13 : #include "mozilla/Unused.h"
14 : #include <algorithm>
15 :
16 : // MOZ_LOG=UrlClassifierPrefixSet:5
17 : static mozilla::LazyLogModule gUrlClassifierPrefixSetLog("UrlClassifierPrefixSet");
18 : #define LOG(args) MOZ_LOG(gUrlClassifierPrefixSetLog, mozilla::LogLevel::Debug, args)
19 : #define LOG_ENABLED() MOZ_LOG_TEST(gUrlClassifierPrefixSetLog, mozilla::LogLevel::Debug)
20 :
21 : namespace mozilla {
22 : namespace safebrowsing {
23 :
24 : #define PREFIX_SIZE_FIXED 4
25 :
26 0 : NS_IMPL_ISUPPORTS(VariableLengthPrefixSet, nsIMemoryReporter)
27 :
28 : // Definition required due to std::max<>()
29 : const uint32_t VariableLengthPrefixSet::MAX_BUFFER_SIZE;
30 :
31 : // This class will process prefix size between 4~32. But for 4 bytes prefixes,
32 : // they will be passed to nsUrlClassifierPrefixSet because of better optimization.
33 0 : VariableLengthPrefixSet::VariableLengthPrefixSet()
34 : : mLock("VariableLengthPrefixSet.mLock")
35 0 : , mMemoryReportPath()
36 : {
37 0 : mFixedPrefixSet = new nsUrlClassifierPrefixSet();
38 0 : }
39 :
40 : nsresult
41 0 : VariableLengthPrefixSet::Init(const nsACString& aName)
42 : {
43 : mMemoryReportPath =
44 0 : nsPrintfCString(
45 : "explicit/storage/prefix-set/%s",
46 0 : (!aName.IsEmpty() ? PromiseFlatCString(aName).get() : "?!")
47 0 : );
48 :
49 0 : RegisterWeakMemoryReporter(this);
50 :
51 0 : return NS_OK;
52 : }
53 :
54 0 : VariableLengthPrefixSet::~VariableLengthPrefixSet()
55 : {
56 0 : UnregisterWeakMemoryReporter(this);
57 0 : }
58 :
59 : nsresult
60 0 : VariableLengthPrefixSet::SetPrefixes(const PrefixStringMap& aPrefixMap)
61 : {
62 0 : MutexAutoLock lock(mLock);
63 :
64 : // Prefix size should not less than 4-bytes or greater than 32-bytes
65 0 : for (auto iter = aPrefixMap.ConstIter(); !iter.Done(); iter.Next()) {
66 0 : if (iter.Key() < PREFIX_SIZE_FIXED ||
67 0 : iter.Key() > COMPLETE_SIZE) {
68 0 : return NS_ERROR_FAILURE;
69 : }
70 : }
71 :
72 : // Clear old prefixSet before setting new one.
73 0 : mFixedPrefixSet->SetPrefixes(nullptr, 0);
74 0 : mVLPrefixSet.Clear();
75 :
76 : // 4-bytes prefixes are handled by nsUrlClassifierPrefixSet.
77 0 : nsCString* prefixes = aPrefixMap.Get(PREFIX_SIZE_FIXED);
78 0 : if (prefixes) {
79 0 : NS_ENSURE_TRUE(prefixes->Length() % PREFIX_SIZE_FIXED == 0, NS_ERROR_FAILURE);
80 :
81 0 : uint32_t numPrefixes = prefixes->Length() / PREFIX_SIZE_FIXED;
82 :
83 : #if MOZ_BIG_ENDIAN
84 : const uint32_t* arrayPtr = reinterpret_cast<const uint32_t*>(prefixes->BeginReading());
85 : #else
86 0 : FallibleTArray<uint32_t> array;
87 : // Prefixes are lexicographically-sorted, so the interger array
88 : // passed to nsUrlClassifierPrefixSet should also follow the same order.
89 : // To make sure of that, we convert char array to integer with Big-Endian
90 : // instead of casting to integer directly.
91 0 : if (!array.SetCapacity(numPrefixes, fallible)) {
92 0 : return NS_ERROR_OUT_OF_MEMORY;
93 : }
94 :
95 0 : const char* begin = prefixes->BeginReading();
96 0 : const char* end = prefixes->EndReading();
97 :
98 0 : while (begin != end) {
99 0 : array.AppendElement(BigEndian::readUint32(begin), fallible);
100 0 : begin += sizeof(uint32_t);
101 : }
102 :
103 0 : const uint32_t* arrayPtr = array.Elements();
104 : #endif
105 :
106 0 : nsresult rv = mFixedPrefixSet->SetPrefixes(arrayPtr, numPrefixes);
107 0 : NS_ENSURE_SUCCESS(rv, rv);
108 : }
109 :
110 : // 5~32 bytes prefixes are stored in mVLPrefixSet.
111 0 : for (auto iter = aPrefixMap.ConstIter(); !iter.Done(); iter.Next()) {
112 : // Skip 4bytes prefixes because it is already stored in mFixedPrefixSet.
113 0 : if (iter.Key() == PREFIX_SIZE_FIXED) {
114 0 : continue;
115 : }
116 :
117 0 : mVLPrefixSet.Put(iter.Key(), new nsCString(*iter.Data()));
118 : }
119 :
120 0 : return NS_OK;
121 : }
122 :
123 : nsresult
124 0 : VariableLengthPrefixSet::GetPrefixes(PrefixStringMap& aPrefixMap)
125 : {
126 0 : MutexAutoLock lock(mLock);
127 :
128 : // 4-bytes prefixes are handled by nsUrlClassifierPrefixSet.
129 0 : FallibleTArray<uint32_t> array;
130 0 : nsresult rv = mFixedPrefixSet->GetPrefixesNative(array);
131 0 : NS_ENSURE_SUCCESS(rv, rv);
132 :
133 0 : size_t count = array.Length();
134 0 : if (count) {
135 0 : nsCString* prefixes = new nsCString();
136 0 : prefixes->SetLength(PREFIX_SIZE_FIXED * count);
137 :
138 : // Writing integer array to character array
139 0 : uint32_t* begin = reinterpret_cast<uint32_t*>(prefixes->BeginWriting());
140 0 : for (uint32_t i = 0; i < count; i++) {
141 0 : begin[i] = NativeEndian::swapToBigEndian(array[i]);
142 : }
143 :
144 0 : aPrefixMap.Put(PREFIX_SIZE_FIXED, prefixes);
145 : }
146 :
147 : // Copy variable-length prefix set
148 0 : for (auto iter = mVLPrefixSet.ConstIter(); !iter.Done(); iter.Next()) {
149 0 : aPrefixMap.Put(iter.Key(), new nsCString(*iter.Data()));
150 : }
151 :
152 0 : return NS_OK;
153 : }
154 :
155 : nsresult
156 0 : VariableLengthPrefixSet::GetFixedLengthPrefixes(FallibleTArray<uint32_t>& aPrefixes)
157 : {
158 0 : return mFixedPrefixSet->GetPrefixesNative(aPrefixes);
159 : }
160 :
161 : // It should never be the case that more than one hash prefixes match a given
162 : // full hash. However, if that happens, this method returns any one of them.
163 : // It does not guarantee which one of those will be returned.
164 : nsresult
165 0 : VariableLengthPrefixSet::Matches(const nsACString& aFullHash, uint32_t* aLength)
166 : {
167 0 : MutexAutoLock lock(mLock);
168 :
169 : // Only allow full-length hash to check if match any of the prefix
170 0 : MOZ_ASSERT(aFullHash.Length() == COMPLETE_SIZE);
171 0 : NS_ENSURE_ARG_POINTER(aLength);
172 :
173 0 : *aLength = 0;
174 :
175 : // Check if it matches 4-bytes prefixSet first
176 0 : const uint32_t* hash = reinterpret_cast<const uint32_t*>(aFullHash.BeginReading());
177 0 : uint32_t value = BigEndian::readUint32(hash);
178 :
179 0 : bool found = false;
180 0 : nsresult rv = mFixedPrefixSet->Contains(value, &found);
181 0 : NS_ENSURE_SUCCESS(rv, rv);
182 :
183 0 : if (found) {
184 0 : *aLength = PREFIX_SIZE_FIXED;
185 0 : return NS_OK;
186 : }
187 :
188 0 : for (auto iter = mVLPrefixSet.ConstIter(); !iter.Done(); iter.Next()) {
189 0 : if (BinarySearch(aFullHash, *iter.Data(), iter.Key())) {
190 0 : *aLength = iter.Key();
191 0 : MOZ_ASSERT(*aLength > 4);
192 0 : return NS_OK;
193 : }
194 : }
195 :
196 0 : return NS_OK;
197 : }
198 :
199 : nsresult
200 0 : VariableLengthPrefixSet::IsEmpty(bool* aEmpty)
201 : {
202 0 : MutexAutoLock lock(mLock);
203 :
204 0 : NS_ENSURE_ARG_POINTER(aEmpty);
205 :
206 0 : mFixedPrefixSet->IsEmpty(aEmpty);
207 0 : *aEmpty = *aEmpty && mVLPrefixSet.IsEmpty();
208 :
209 0 : return NS_OK;
210 : }
211 :
212 : nsresult
213 0 : VariableLengthPrefixSet::LoadFromFile(nsIFile* aFile)
214 : {
215 0 : MutexAutoLock lock(mLock);
216 :
217 0 : NS_ENSURE_ARG_POINTER(aFile);
218 :
219 0 : Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_VLPS_FILELOAD_TIME> timer;
220 :
221 0 : nsCOMPtr<nsIInputStream> localInFile;
222 0 : nsresult rv = NS_NewLocalFileInputStream(getter_AddRefs(localInFile), aFile,
223 0 : PR_RDONLY | nsIFile::OS_READAHEAD);
224 0 : NS_ENSURE_SUCCESS(rv, rv);
225 :
226 : // Calculate how big the file is, make sure our read buffer isn't bigger
227 : // than the file itself which is just wasting memory.
228 : int64_t fileSize;
229 0 : rv = aFile->GetFileSize(&fileSize);
230 0 : NS_ENSURE_SUCCESS(rv, rv);
231 :
232 0 : if (fileSize < 0 || fileSize > UINT32_MAX) {
233 0 : return NS_ERROR_FAILURE;
234 : }
235 :
236 0 : uint32_t bufferSize = std::min<uint32_t>(static_cast<uint32_t>(fileSize),
237 0 : MAX_BUFFER_SIZE);
238 :
239 : // Convert to buffered stream
240 0 : nsCOMPtr<nsIInputStream> in = NS_BufferInputStream(localInFile, bufferSize);
241 :
242 0 : rv = mFixedPrefixSet->LoadPrefixes(in);
243 0 : NS_ENSURE_SUCCESS(rv, rv);
244 :
245 0 : rv = LoadPrefixes(in);
246 0 : NS_ENSURE_SUCCESS(rv, rv);
247 :
248 0 : return NS_OK;;
249 : }
250 :
251 : nsresult
252 0 : VariableLengthPrefixSet::StoreToFile(nsIFile* aFile)
253 : {
254 0 : NS_ENSURE_ARG_POINTER(aFile);
255 :
256 0 : MutexAutoLock lock(mLock);
257 :
258 0 : nsCOMPtr<nsIOutputStream> localOutFile;
259 0 : nsresult rv = NS_NewLocalFileOutputStream(getter_AddRefs(localOutFile), aFile,
260 0 : PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE);
261 0 : NS_ENSURE_SUCCESS(rv, rv);
262 :
263 0 : uint32_t fileSize = 0;
264 : // Preallocate the file storage
265 : {
266 0 : nsCOMPtr<nsIFileOutputStream> fos(do_QueryInterface(localOutFile));
267 0 : Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_VLPS_FALLOCATE_TIME> timer;
268 :
269 0 : fileSize += mFixedPrefixSet->CalculatePreallocateSize();
270 0 : fileSize += CalculatePreallocateSize();
271 :
272 0 : Unused << fos->Preallocate(fileSize);
273 : }
274 :
275 : // Convert to buffered stream
276 : nsCOMPtr<nsIOutputStream> out =
277 0 : NS_BufferOutputStream(localOutFile, std::min(fileSize, MAX_BUFFER_SIZE));
278 :
279 0 : rv = mFixedPrefixSet->WritePrefixes(out);
280 0 : NS_ENSURE_SUCCESS(rv, rv);
281 :
282 0 : rv = WritePrefixes(out);
283 0 : NS_ENSURE_SUCCESS(rv, rv);
284 :
285 0 : return NS_OK;
286 : }
287 :
288 : nsresult
289 0 : VariableLengthPrefixSet::LoadPrefixes(nsIInputStream* in)
290 : {
291 : uint32_t magic;
292 : uint32_t read;
293 :
294 0 : nsresult rv = in->Read(reinterpret_cast<char*>(&magic), sizeof(uint32_t), &read);
295 0 : NS_ENSURE_SUCCESS(rv, rv);
296 0 : NS_ENSURE_TRUE(read == sizeof(uint32_t), NS_ERROR_FAILURE);
297 :
298 0 : if (magic != PREFIXSET_VERSION_MAGIC) {
299 0 : LOG(("Version magic mismatch, not loading"));
300 0 : return NS_ERROR_FILE_CORRUPTED;
301 : }
302 :
303 0 : mVLPrefixSet.Clear();
304 :
305 : uint32_t count;
306 0 : rv = in->Read(reinterpret_cast<char*>(&count), sizeof(uint32_t), &read);
307 0 : NS_ENSURE_SUCCESS(rv, rv);
308 0 : NS_ENSURE_TRUE(read == sizeof(uint32_t), NS_ERROR_FAILURE);
309 :
310 0 : for(;count > 0; count--) {
311 : uint8_t prefixSize;
312 0 : rv = in->Read(reinterpret_cast<char*>(&prefixSize), sizeof(uint8_t), &read);
313 0 : NS_ENSURE_SUCCESS(rv, rv);
314 0 : NS_ENSURE_TRUE(read == sizeof(uint8_t), NS_ERROR_FAILURE);
315 :
316 : uint32_t stringLength;
317 0 : rv = in->Read(reinterpret_cast<char*>(&stringLength), sizeof(uint32_t), &read);
318 0 : NS_ENSURE_SUCCESS(rv, rv);
319 0 : NS_ENSURE_TRUE(read == sizeof(uint32_t), NS_ERROR_FAILURE);
320 :
321 0 : nsCString* vlPrefixes = new nsCString();
322 0 : if (!vlPrefixes->SetLength(stringLength, fallible)) {
323 0 : return NS_ERROR_OUT_OF_MEMORY;
324 : }
325 :
326 0 : rv = in->Read(reinterpret_cast<char*>(vlPrefixes->BeginWriting()), stringLength, &read);
327 0 : NS_ENSURE_SUCCESS(rv, rv);
328 0 : NS_ENSURE_TRUE(read == stringLength, NS_ERROR_FAILURE);
329 :
330 0 : mVLPrefixSet.Put(prefixSize, vlPrefixes);
331 : }
332 :
333 0 : return NS_OK;
334 : }
335 :
336 : uint32_t
337 0 : VariableLengthPrefixSet::CalculatePreallocateSize()
338 : {
339 0 : uint32_t fileSize = 0;
340 :
341 : // Store how many prefix string.
342 0 : fileSize += sizeof(uint32_t);
343 :
344 0 : for (auto iter = mVLPrefixSet.ConstIter(); !iter.Done(); iter.Next()) {
345 : // Store prefix size, prefix string length, and prefix string.
346 0 : fileSize += sizeof(uint8_t);
347 0 : fileSize += sizeof(uint32_t);
348 0 : fileSize += iter.Data()->Length();
349 : }
350 0 : return fileSize;
351 : }
352 :
353 : nsresult
354 0 : VariableLengthPrefixSet::WritePrefixes(nsIOutputStream* out)
355 : {
356 : uint32_t written;
357 0 : uint32_t writelen = sizeof(uint32_t);
358 0 : uint32_t magic = PREFIXSET_VERSION_MAGIC;
359 0 : nsresult rv = out->Write(reinterpret_cast<char*>(&magic), writelen, &written);
360 0 : NS_ENSURE_SUCCESS(rv, rv);
361 0 : NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
362 :
363 0 : uint32_t count = mVLPrefixSet.Count();
364 0 : rv = out->Write(reinterpret_cast<char*>(&count), writelen, &written);
365 0 : NS_ENSURE_SUCCESS(rv, rv);
366 0 : NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
367 :
368 : // Store PrefixSize, Length of Prefix String and then Prefix String
369 0 : for (auto iter = mVLPrefixSet.ConstIter(); !iter.Done(); iter.Next()) {
370 0 : const nsCString& vlPrefixes = *iter.Data();
371 :
372 0 : uint8_t prefixSize = iter.Key();
373 0 : writelen = sizeof(uint8_t);
374 0 : rv = out->Write(reinterpret_cast<char*>(&prefixSize), writelen, &written);
375 0 : NS_ENSURE_SUCCESS(rv, rv);
376 0 : NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
377 :
378 0 : uint32_t stringLength = vlPrefixes.Length();
379 0 : writelen = sizeof(uint32_t);
380 0 : rv = out->Write(reinterpret_cast<char*>(&stringLength), writelen, &written);
381 0 : NS_ENSURE_SUCCESS(rv, rv);
382 0 : NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
383 :
384 0 : rv = out->Write(const_cast<char*>(vlPrefixes.BeginReading()),
385 0 : stringLength, &written);
386 0 : NS_ENSURE_SUCCESS(rv, rv);
387 0 : NS_ENSURE_TRUE(stringLength == written, NS_ERROR_FAILURE);
388 : }
389 :
390 0 : return NS_OK;
391 : }
392 :
393 : bool
394 0 : VariableLengthPrefixSet::BinarySearch(const nsACString& aFullHash,
395 : const nsACString& aPrefixes,
396 : uint32_t aPrefixSize)
397 : {
398 0 : const char* fullhash = aFullHash.BeginReading();
399 0 : const char* prefixes = aPrefixes.BeginReading();
400 0 : int32_t begin = 0, end = aPrefixes.Length() / aPrefixSize;
401 :
402 0 : while (end > begin) {
403 0 : int32_t mid = (begin + end) >> 1;
404 0 : int cmp = memcmp(fullhash, prefixes + mid*aPrefixSize, aPrefixSize);
405 0 : if (cmp < 0) {
406 0 : end = mid;
407 0 : } else if (cmp > 0) {
408 0 : begin = mid + 1;
409 : } else {
410 0 : return true;
411 : }
412 : }
413 0 : return false;
414 : }
415 :
416 0 : MOZ_DEFINE_MALLOC_SIZE_OF(UrlClassifierMallocSizeOf)
417 :
418 : NS_IMETHODIMP
419 0 : VariableLengthPrefixSet::CollectReports(nsIHandleReportCallback* aHandleReport,
420 : nsISupports* aData, bool aAnonymize)
421 : {
422 0 : MOZ_ASSERT(NS_IsMainThread());
423 :
424 0 : size_t amount = SizeOfIncludingThis(UrlClassifierMallocSizeOf);
425 :
426 0 : return aHandleReport->Callback(
427 0 : EmptyCString(), mMemoryReportPath, KIND_HEAP, UNITS_BYTES, amount,
428 0 : NS_LITERAL_CSTRING("Memory used by the variable-length prefix set for a URL classifier."),
429 0 : aData);
430 : }
431 :
432 : size_t
433 0 : VariableLengthPrefixSet::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf)
434 : {
435 0 : MutexAutoLock lock(mLock);
436 :
437 0 : size_t n = 0;
438 0 : n += aMallocSizeOf(this);
439 0 : n += mFixedPrefixSet->SizeOfIncludingThis(moz_malloc_size_of) - aMallocSizeOf(mFixedPrefixSet);
440 :
441 0 : n += mVLPrefixSet.ShallowSizeOfExcludingThis(aMallocSizeOf);
442 0 : for (auto iter = mVLPrefixSet.ConstIter(); !iter.Done(); iter.Next()) {
443 0 : n += iter.Data()->SizeOfExcludingThisIfUnshared(aMallocSizeOf);
444 : }
445 :
446 0 : return n;
447 : }
448 :
449 : } // namespace safebrowsing
450 : } // namespace mozilla
|