Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 : /* This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : #include "nsUrlClassifierPrefixSet.h"
8 : #include "nsIUrlClassifierPrefixSet.h"
9 : #include "nsCOMPtr.h"
10 : #include "nsDebug.h"
11 : #include "nsPrintfCString.h"
12 : #include "nsTArray.h"
13 : #include "nsString.h"
14 : #include "nsIFile.h"
15 : #include "nsToolkitCompsCID.h"
16 : #include "nsTArray.h"
17 : #include "nsThreadUtils.h"
18 : #include "nsNetUtil.h"
19 : #include "nsISeekableStream.h"
20 : #include "nsIBufferedStreams.h"
21 : #include "nsIFileStreams.h"
22 : #include "mozilla/MemoryReporting.h"
23 : #include "mozilla/SizePrintfMacros.h"
24 : #include "mozilla/Telemetry.h"
25 : #include "mozilla/FileUtils.h"
26 : #include "mozilla/Logging.h"
27 : #include "mozilla/Unused.h"
28 : #include <algorithm>
29 :
30 : using namespace mozilla;
31 :
32 : // MOZ_LOG=UrlClassifierPrefixSet:5
33 : static LazyLogModule gUrlClassifierPrefixSetLog("UrlClassifierPrefixSet");
34 : #define LOG(args) MOZ_LOG(gUrlClassifierPrefixSetLog, mozilla::LogLevel::Debug, args)
35 : #define LOG_ENABLED() MOZ_LOG_TEST(gUrlClassifierPrefixSetLog, mozilla::LogLevel::Debug)
36 :
37 58 : NS_IMPL_ISUPPORTS(
38 : nsUrlClassifierPrefixSet, nsIUrlClassifierPrefixSet, nsIMemoryReporter)
39 :
40 : // Definition required due to std::max<>()
41 : const uint32_t nsUrlClassifierPrefixSet::MAX_BUFFER_SIZE;
42 :
43 13 : nsUrlClassifierPrefixSet::nsUrlClassifierPrefixSet()
44 : : mLock("nsUrlClassifierPrefixSet.mLock")
45 : , mTotalPrefixes(0)
46 13 : , mMemoryReportPath()
47 : {
48 13 : }
49 :
50 : NS_IMETHODIMP
51 13 : nsUrlClassifierPrefixSet::Init(const nsACString& aName)
52 : {
53 : mMemoryReportPath =
54 39 : nsPrintfCString(
55 : "explicit/storage/prefix-set/%s",
56 52 : (!aName.IsEmpty() ? PromiseFlatCString(aName).get() : "?!")
57 13 : );
58 :
59 13 : RegisterWeakMemoryReporter(this);
60 :
61 13 : return NS_OK;
62 : }
63 :
64 18 : nsUrlClassifierPrefixSet::~nsUrlClassifierPrefixSet()
65 : {
66 6 : UnregisterWeakMemoryReporter(this);
67 18 : }
68 :
69 : NS_IMETHODIMP
70 6 : nsUrlClassifierPrefixSet::SetPrefixes(const uint32_t* aArray, uint32_t aLength)
71 : {
72 12 : MutexAutoLock lock(mLock);
73 :
74 6 : nsresult rv = NS_OK;
75 :
76 6 : if (aLength <= 0) {
77 6 : if (mIndexPrefixes.Length() > 0) {
78 0 : LOG(("Clearing PrefixSet"));
79 0 : mIndexDeltas.Clear();
80 0 : mIndexPrefixes.Clear();
81 0 : mTotalPrefixes = 0;
82 : }
83 : } else {
84 0 : rv = MakePrefixSet(aArray, aLength);
85 : }
86 :
87 12 : return rv;
88 : }
89 :
90 : nsresult
91 0 : nsUrlClassifierPrefixSet::MakePrefixSet(const uint32_t* aPrefixes, uint32_t aLength)
92 : {
93 0 : mLock.AssertCurrentThreadOwns();
94 :
95 0 : if (aLength == 0) {
96 0 : return NS_OK;
97 : }
98 :
99 : #ifdef DEBUG
100 0 : for (uint32_t i = 1; i < aLength; i++) {
101 0 : MOZ_ASSERT(aPrefixes[i] >= aPrefixes[i-1]);
102 : }
103 : #endif
104 :
105 0 : mIndexPrefixes.Clear();
106 0 : mIndexDeltas.Clear();
107 0 : mTotalPrefixes = aLength;
108 :
109 0 : mIndexPrefixes.AppendElement(aPrefixes[0]);
110 0 : mIndexDeltas.AppendElement();
111 :
112 0 : uint32_t numOfDeltas = 0;
113 0 : uint32_t totalDeltas = 0;
114 0 : uint32_t previousItem = aPrefixes[0];
115 0 : for (uint32_t i = 1; i < aLength; i++) {
116 0 : if ((numOfDeltas >= DELTAS_LIMIT) ||
117 0 : (aPrefixes[i] - previousItem >= MAX_INDEX_DIFF)) {
118 : // Compact the previous element.
119 : // Note there is always at least one element when we get here,
120 : // because we created the first element before the loop.
121 0 : mIndexDeltas.LastElement().Compact();
122 0 : mIndexDeltas.AppendElement();
123 0 : mIndexPrefixes.AppendElement(aPrefixes[i]);
124 0 : numOfDeltas = 0;
125 : } else {
126 0 : uint16_t delta = aPrefixes[i] - previousItem;
127 0 : mIndexDeltas.LastElement().AppendElement(delta);
128 0 : numOfDeltas++;
129 0 : totalDeltas++;
130 : }
131 0 : previousItem = aPrefixes[i];
132 : }
133 :
134 0 : mIndexDeltas.LastElement().Compact();
135 0 : mIndexDeltas.Compact();
136 0 : mIndexPrefixes.Compact();
137 :
138 0 : LOG(("Total number of indices: %d", aLength));
139 0 : LOG(("Total number of deltas: %d", totalDeltas));
140 0 : LOG(("Total number of delta chunks: %" PRIuSIZE, mIndexDeltas.Length()));
141 :
142 0 : return NS_OK;
143 : }
144 :
145 : nsresult
146 6 : nsUrlClassifierPrefixSet::GetPrefixesNative(FallibleTArray<uint32_t>& outArray)
147 : {
148 12 : MutexAutoLock lock(mLock);
149 :
150 6 : if (!outArray.SetLength(mTotalPrefixes, fallible)) {
151 0 : return NS_ERROR_OUT_OF_MEMORY;
152 : }
153 :
154 6 : uint32_t prefixIdxLength = mIndexPrefixes.Length();
155 6 : uint32_t prefixCnt = 0;
156 :
157 6 : for (uint32_t i = 0; i < prefixIdxLength; i++) {
158 0 : uint32_t prefix = mIndexPrefixes[i];
159 :
160 0 : if (prefixCnt >= mTotalPrefixes) {
161 0 : return NS_ERROR_FAILURE;
162 : }
163 0 : outArray[prefixCnt++] = prefix;
164 :
165 0 : for (uint32_t j = 0; j < mIndexDeltas[i].Length(); j++) {
166 0 : prefix += mIndexDeltas[i][j];
167 0 : if (prefixCnt >= mTotalPrefixes) {
168 0 : return NS_ERROR_FAILURE;
169 : }
170 0 : outArray[prefixCnt++] = prefix;
171 : }
172 : }
173 :
174 6 : NS_ASSERTION(mTotalPrefixes == prefixCnt, "Lengths are inconsistent");
175 6 : return NS_OK;
176 : }
177 :
178 : NS_IMETHODIMP
179 0 : nsUrlClassifierPrefixSet::GetPrefixes(uint32_t* aCount,
180 : uint32_t** aPrefixes)
181 : {
182 : // No need to get mLock here because this function does not directly touch
183 : // the class's data members. (GetPrefixesNative() will get mLock, however.)
184 :
185 0 : NS_ENSURE_ARG_POINTER(aCount);
186 0 : *aCount = 0;
187 0 : NS_ENSURE_ARG_POINTER(aPrefixes);
188 0 : *aPrefixes = nullptr;
189 :
190 0 : FallibleTArray<uint32_t> prefixes;
191 0 : nsresult rv = GetPrefixesNative(prefixes);
192 0 : if (NS_FAILED(rv)) {
193 0 : return rv;
194 : }
195 :
196 0 : uint64_t itemCount = prefixes.Length();
197 0 : uint32_t* prefixArray = static_cast<uint32_t*>(moz_xmalloc(itemCount * sizeof(uint32_t)));
198 0 : NS_ENSURE_TRUE(prefixArray, NS_ERROR_OUT_OF_MEMORY);
199 :
200 0 : memcpy(prefixArray, prefixes.Elements(), sizeof(uint32_t) * itemCount);
201 :
202 0 : *aCount = itemCount;
203 0 : *aPrefixes = prefixArray;
204 :
205 0 : return NS_OK;
206 : }
207 :
208 0 : uint32_t nsUrlClassifierPrefixSet::BinSearch(uint32_t start,
209 : uint32_t end,
210 : uint32_t target)
211 : {
212 0 : mLock.AssertCurrentThreadOwns();
213 :
214 0 : while (start != end && end >= start) {
215 0 : uint32_t i = start + ((end - start) >> 1);
216 0 : uint32_t value = mIndexPrefixes[i];
217 0 : if (value < target) {
218 0 : start = i + 1;
219 0 : } else if (value > target) {
220 0 : end = i - 1;
221 : } else {
222 0 : return i;
223 : }
224 : }
225 0 : return end;
226 : }
227 :
228 : NS_IMETHODIMP
229 4 : nsUrlClassifierPrefixSet::Contains(uint32_t aPrefix, bool* aFound)
230 : {
231 8 : MutexAutoLock lock(mLock);
232 :
233 4 : *aFound = false;
234 :
235 4 : if (mIndexPrefixes.Length() == 0) {
236 4 : return NS_OK;
237 : }
238 :
239 0 : uint32_t target = aPrefix;
240 :
241 : // We want to do a "Price is Right" binary search, that is, we want to find
242 : // the index of the value either equal to the target or the closest value
243 : // that is less than the target.
244 : //
245 0 : if (target < mIndexPrefixes[0]) {
246 0 : return NS_OK;
247 : }
248 :
249 : // |binsearch| does not necessarily return the correct index (when the
250 : // target is not found) but rather it returns an index at least one away
251 : // from the correct index.
252 : // Because of this, we need to check if the target lies before the beginning
253 : // of the indices.
254 :
255 0 : uint32_t i = BinSearch(0, mIndexPrefixes.Length() - 1, target);
256 0 : if (mIndexPrefixes[i] > target && i > 0) {
257 0 : i--;
258 : }
259 :
260 : // Now search through the deltas for the target.
261 0 : uint32_t diff = target - mIndexPrefixes[i];
262 0 : uint32_t deltaSize = mIndexDeltas[i].Length();
263 0 : uint32_t deltaIndex = 0;
264 :
265 0 : while (diff > 0 && deltaIndex < deltaSize) {
266 0 : diff -= mIndexDeltas[i][deltaIndex];
267 0 : deltaIndex++;
268 : }
269 :
270 0 : if (diff == 0) {
271 0 : *aFound = true;
272 : }
273 :
274 0 : return NS_OK;
275 : }
276 :
277 0 : MOZ_DEFINE_MALLOC_SIZE_OF(UrlClassifierMallocSizeOf)
278 :
279 : NS_IMETHODIMP
280 0 : nsUrlClassifierPrefixSet::CollectReports(nsIHandleReportCallback* aHandleReport,
281 : nsISupports* aData, bool aAnonymize)
282 : {
283 0 : MOZ_ASSERT(NS_IsMainThread());
284 :
285 : // No need to get mLock here because this function does not directly touch
286 : // the class's data members. (SizeOfIncludingThis() will get mLock, however.)
287 :
288 0 : aHandleReport->Callback(
289 0 : EmptyCString(), mMemoryReportPath, KIND_HEAP, UNITS_BYTES,
290 0 : SizeOfIncludingThis(UrlClassifierMallocSizeOf),
291 0 : NS_LITERAL_CSTRING("Memory used by the prefix set for a URL classifier."),
292 0 : aData);
293 :
294 0 : return NS_OK;
295 : }
296 :
297 : size_t
298 18 : nsUrlClassifierPrefixSet::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf)
299 : {
300 36 : MutexAutoLock lock(mLock);
301 :
302 18 : size_t n = 0;
303 18 : n += aMallocSizeOf(this);
304 18 : n += mIndexDeltas.ShallowSizeOfExcludingThis(aMallocSizeOf);
305 18 : for (uint32_t i = 0; i < mIndexDeltas.Length(); i++) {
306 0 : n += mIndexDeltas[i].ShallowSizeOfExcludingThis(aMallocSizeOf);
307 : }
308 18 : n += mIndexPrefixes.ShallowSizeOfExcludingThis(aMallocSizeOf);
309 36 : return n;
310 : }
311 :
312 : NS_IMETHODIMP
313 0 : nsUrlClassifierPrefixSet::IsEmpty(bool * aEmpty)
314 : {
315 0 : MutexAutoLock lock(mLock);
316 :
317 0 : *aEmpty = (mIndexPrefixes.Length() == 0);
318 0 : return NS_OK;
319 : }
320 :
321 : NS_IMETHODIMP
322 12 : nsUrlClassifierPrefixSet::LoadFromFile(nsIFile* aFile)
323 : {
324 24 : MutexAutoLock lock(mLock);
325 :
326 24 : Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_PS_FILELOAD_TIME> timer;
327 :
328 24 : nsCOMPtr<nsIInputStream> localInFile;
329 24 : nsresult rv = NS_NewLocalFileInputStream(getter_AddRefs(localInFile), aFile,
330 12 : PR_RDONLY | nsIFile::OS_READAHEAD);
331 12 : NS_ENSURE_SUCCESS(rv, rv);
332 :
333 : // Calculate how big the file is, make sure our read buffer isn't bigger
334 : // than the file itself which is just wasting memory.
335 : int64_t fileSize;
336 12 : rv = aFile->GetFileSize(&fileSize);
337 12 : NS_ENSURE_SUCCESS(rv, rv);
338 :
339 12 : if (fileSize < 0 || fileSize > UINT32_MAX) {
340 0 : return NS_ERROR_FAILURE;
341 : }
342 :
343 24 : uint32_t bufferSize = std::min<uint32_t>(static_cast<uint32_t>(fileSize),
344 12 : MAX_BUFFER_SIZE);
345 :
346 : // Convert to buffered stream
347 24 : nsCOMPtr<nsIInputStream> in = NS_BufferInputStream(localInFile, bufferSize);
348 :
349 12 : rv = LoadPrefixes(in);
350 12 : NS_ENSURE_SUCCESS(rv, rv);
351 :
352 12 : return NS_OK;
353 : }
354 :
355 : NS_IMETHODIMP
356 6 : nsUrlClassifierPrefixSet::StoreToFile(nsIFile* aFile)
357 : {
358 12 : MutexAutoLock lock(mLock);
359 :
360 12 : nsCOMPtr<nsIOutputStream> localOutFile;
361 12 : nsresult rv = NS_NewLocalFileOutputStream(getter_AddRefs(localOutFile), aFile,
362 6 : PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE);
363 6 : NS_ENSURE_SUCCESS(rv, rv);
364 :
365 : uint32_t fileSize;
366 :
367 : // Preallocate the file storage
368 : {
369 12 : nsCOMPtr<nsIFileOutputStream> fos(do_QueryInterface(localOutFile));
370 12 : Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_PS_FALLOCATE_TIME> timer;
371 :
372 6 : fileSize = CalculatePreallocateSize();
373 :
374 : // Ignore failure, the preallocation is a hint and we write out the entire
375 : // file later on
376 6 : Unused << fos->Preallocate(fileSize);
377 : }
378 :
379 : // Convert to buffered stream
380 : nsCOMPtr<nsIOutputStream> out =
381 12 : NS_BufferOutputStream(localOutFile, std::min(fileSize, MAX_BUFFER_SIZE));
382 :
383 6 : rv = WritePrefixes(out);
384 6 : NS_ENSURE_SUCCESS(rv, rv);
385 :
386 6 : LOG(("Saving PrefixSet successful\n"));
387 :
388 6 : return NS_OK;
389 : }
390 :
391 : nsresult
392 12 : nsUrlClassifierPrefixSet::LoadPrefixes(nsIInputStream* in)
393 : {
394 : uint32_t magic;
395 : uint32_t read;
396 :
397 12 : nsresult rv = in->Read(reinterpret_cast<char*>(&magic), sizeof(uint32_t), &read);
398 12 : NS_ENSURE_SUCCESS(rv, rv);
399 12 : NS_ENSURE_TRUE(read == sizeof(uint32_t), NS_ERROR_FAILURE);
400 :
401 12 : if (magic == PREFIXSET_VERSION_MAGIC) {
402 : uint32_t indexSize;
403 : uint32_t deltaSize;
404 :
405 12 : rv = in->Read(reinterpret_cast<char*>(&indexSize), sizeof(uint32_t), &read);
406 12 : NS_ENSURE_SUCCESS(rv, rv);
407 12 : NS_ENSURE_TRUE(read == sizeof(uint32_t), NS_ERROR_FAILURE);
408 :
409 12 : rv = in->Read(reinterpret_cast<char*>(&deltaSize), sizeof(uint32_t), &read);
410 12 : NS_ENSURE_SUCCESS(rv, rv);
411 12 : NS_ENSURE_TRUE(read == sizeof(uint32_t), NS_ERROR_FAILURE);
412 :
413 12 : if (indexSize == 0) {
414 12 : LOG(("stored PrefixSet is empty!"));
415 12 : return NS_OK;
416 : }
417 :
418 0 : if (deltaSize > (indexSize * DELTAS_LIMIT)) {
419 0 : return NS_ERROR_FILE_CORRUPTED;
420 : }
421 :
422 0 : nsTArray<uint32_t> indexStarts;
423 0 : indexStarts.SetLength(indexSize);
424 0 : mIndexPrefixes.SetLength(indexSize);
425 0 : mIndexDeltas.SetLength(indexSize);
426 :
427 0 : mTotalPrefixes = indexSize;
428 :
429 0 : uint32_t toRead = indexSize*sizeof(uint32_t);
430 0 : rv = in->Read(reinterpret_cast<char*>(mIndexPrefixes.Elements()), toRead, &read);
431 0 : NS_ENSURE_SUCCESS(rv, rv);
432 0 : NS_ENSURE_TRUE(read == toRead, NS_ERROR_FAILURE);
433 :
434 0 : rv = in->Read(reinterpret_cast<char*>(indexStarts.Elements()), toRead, &read);
435 0 : NS_ENSURE_SUCCESS(rv, rv);
436 0 : NS_ENSURE_TRUE(read == toRead, NS_ERROR_FAILURE);
437 :
438 0 : if (indexSize != 0 && indexStarts[0] != 0) {
439 0 : return NS_ERROR_FILE_CORRUPTED;
440 : }
441 0 : for (uint32_t i = 0; i < indexSize; i++) {
442 0 : uint32_t numInDelta = i == indexSize - 1 ? deltaSize - indexStarts[i]
443 0 : : indexStarts[i + 1] - indexStarts[i];
444 0 : if (numInDelta > DELTAS_LIMIT) {
445 0 : return NS_ERROR_FILE_CORRUPTED;
446 : }
447 0 : if (numInDelta > 0) {
448 0 : mIndexDeltas[i].SetLength(numInDelta);
449 0 : mTotalPrefixes += numInDelta;
450 0 : toRead = numInDelta * sizeof(uint16_t);
451 0 : rv = in->Read(reinterpret_cast<char*>(mIndexDeltas[i].Elements()), toRead, &read);
452 0 : NS_ENSURE_SUCCESS(rv, rv);
453 0 : NS_ENSURE_TRUE(read == toRead, NS_ERROR_FAILURE);
454 : }
455 : }
456 : } else {
457 0 : LOG(("Version magic mismatch, not loading"));
458 0 : return NS_ERROR_FILE_CORRUPTED;
459 : }
460 :
461 0 : MOZ_ASSERT(mIndexPrefixes.Length() == mIndexDeltas.Length());
462 0 : LOG(("Loading PrefixSet successful"));
463 :
464 0 : return NS_OK;
465 : }
466 :
467 : uint32_t
468 6 : nsUrlClassifierPrefixSet::CalculatePreallocateSize()
469 : {
470 6 : uint32_t fileSize = 4 * sizeof(uint32_t);
471 6 : uint32_t deltas = mTotalPrefixes - mIndexPrefixes.Length();
472 6 : fileSize += 2 * mIndexPrefixes.Length() * sizeof(uint32_t);
473 6 : fileSize += deltas * sizeof(uint16_t);
474 6 : return fileSize;
475 : }
476 :
477 : nsresult
478 6 : nsUrlClassifierPrefixSet::WritePrefixes(nsIOutputStream* out)
479 : {
480 : uint32_t written;
481 6 : uint32_t writelen = sizeof(uint32_t);
482 6 : uint32_t magic = PREFIXSET_VERSION_MAGIC;
483 6 : nsresult rv = out->Write(reinterpret_cast<char*>(&magic), writelen, &written);
484 6 : NS_ENSURE_SUCCESS(rv, rv);
485 6 : NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
486 :
487 6 : uint32_t indexSize = mIndexPrefixes.Length();
488 6 : uint32_t indexDeltaSize = mIndexDeltas.Length();
489 6 : uint32_t totalDeltas = 0;
490 :
491 : // Store the shape of mIndexDeltas by noting at which "count" of total
492 : // indexes a new subarray starts. This is slightly cumbersome but keeps
493 : // file format compatibility.
494 : // If we ever update the format, we can gain space by storing the delta
495 : // subarray sizes, which fit in bytes.
496 12 : nsTArray<uint32_t> indexStarts;
497 6 : indexStarts.AppendElement(0);
498 :
499 6 : for (uint32_t i = 0; i < indexDeltaSize; i++) {
500 0 : uint32_t deltaLength = mIndexDeltas[i].Length();
501 0 : totalDeltas += deltaLength;
502 0 : indexStarts.AppendElement(totalDeltas);
503 : }
504 :
505 6 : rv = out->Write(reinterpret_cast<char*>(&indexSize), writelen, &written);
506 6 : NS_ENSURE_SUCCESS(rv, rv);
507 6 : NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
508 :
509 6 : rv = out->Write(reinterpret_cast<char*>(&totalDeltas), writelen, &written);
510 6 : NS_ENSURE_SUCCESS(rv, rv);
511 6 : NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
512 :
513 6 : writelen = indexSize * sizeof(uint32_t);
514 6 : rv = out->Write(reinterpret_cast<char*>(mIndexPrefixes.Elements()), writelen, &written);
515 6 : NS_ENSURE_SUCCESS(rv, rv);
516 6 : NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
517 :
518 6 : rv = out->Write(reinterpret_cast<char*>(indexStarts.Elements()), writelen, &written);
519 6 : NS_ENSURE_SUCCESS(rv, rv);
520 6 : NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
521 :
522 6 : if (totalDeltas > 0) {
523 0 : for (uint32_t i = 0; i < indexDeltaSize; i++) {
524 0 : writelen = mIndexDeltas[i].Length() * sizeof(uint16_t);
525 0 : rv = out->Write(reinterpret_cast<char*>(mIndexDeltas[i].Elements()), writelen, &written);
526 0 : NS_ENSURE_SUCCESS(rv, rv);
527 0 : NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
528 : }
529 : }
530 :
531 6 : LOG(("Saving PrefixSet successful\n"));
532 :
533 6 : return NS_OK;
534 : }
|