Line data Source code
1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* This Source Code Form is subject to the terms of the Mozilla Public
3 : * License, v. 2.0. If a copy of the MPL was not distributed with this
4 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 :
6 : #ifndef nsZipArchive_h_
7 : #define nsZipArchive_h_
8 :
9 : #include "mozilla/Attributes.h"
10 :
11 : #define ZIP_TABSIZE 256
12 : #define ZIP_BUFLEN (4*1024) /* Used as output buffer when deflating items to a file */
13 :
14 : #include "zlib.h"
15 : #include "zipstruct.h"
16 : #include "nsAutoPtr.h"
17 : #include "nsIFile.h"
18 : #include "nsISupportsImpl.h" // For mozilla::ThreadSafeAutoRefCnt
19 : #include "mozilla/ArenaAllocator.h"
20 : #include "mozilla/FileUtils.h"
21 : #include "mozilla/FileLocation.h"
22 : #include "mozilla/UniquePtr.h"
23 :
24 : #ifdef HAVE_SEH_EXCEPTIONS
25 : #define MOZ_WIN_MEM_TRY_BEGIN __try {
26 : #define MOZ_WIN_MEM_TRY_CATCH(cmd) } \
27 : __except(GetExceptionCode()==EXCEPTION_IN_PAGE_ERROR ? \
28 : EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) \
29 : { \
30 : NS_WARNING("unexpected EXCEPTION_IN_PAGE_ERROR"); \
31 : cmd; \
32 : }
33 : #else
34 : #define MOZ_WIN_MEM_TRY_BEGIN {
35 : #define MOZ_WIN_MEM_TRY_CATCH(cmd) }
36 : #endif
37 :
38 : class nsZipFind;
39 : struct PRFileDesc;
40 : #ifdef MOZ_JAR_BROTLI
41 : struct BrotliStateStruct;
42 : #endif
43 :
44 : /**
45 : * This file defines some of the basic structures used by libjar to
46 : * read Zip files. It makes use of zlib in order to do the decompression.
47 : *
48 : * A few notes on the classes/structs:
49 : * nsZipArchive represents a single Zip file, and maintains an index
50 : * of all the items in the file.
51 : * nsZipItem represents a single item (file) in the Zip archive.
52 : * nsZipFind represents the metadata involved in doing a search,
53 : * and current state of the iteration of found objects.
54 : * 'MT''safe' reading from the zipfile is performed through JARInputStream,
55 : * which maintains its own file descriptor, allowing for multiple reads
56 : * concurrently from the same zip file.
57 : */
58 :
59 : /**
60 : * nsZipItem -- a helper struct for nsZipArchive
61 : *
62 : * each nsZipItem represents one file in the archive and all the
63 : * information needed to manipulate it.
64 : */
65 : class nsZipItem final
66 : {
67 : public:
68 : nsZipItem();
69 :
70 387 : const char* Name() { return ((const char*)central) + ZIPCENTRAL_SIZE; }
71 :
72 : uint32_t LocalOffset();
73 : uint32_t Size();
74 : uint32_t RealSize();
75 : uint32_t CRC32();
76 : uint16_t Date();
77 : uint16_t Time();
78 : uint16_t Compression();
79 : bool IsDirectory();
80 : uint16_t Mode();
81 : const uint8_t* GetExtraField(uint16_t aTag, uint16_t *aBlockSize);
82 : PRTime LastModTime();
83 :
84 : #ifdef XP_UNIX
85 : bool IsSymlink();
86 : #endif
87 :
88 : nsZipItem* next;
89 : const ZipCentral* central;
90 : uint16_t nameLength;
91 : bool isSynthetic;
92 : };
93 :
94 : class nsZipHandle;
95 :
96 : /**
97 : * nsZipArchive -- a class for reading the PKZIP file format.
98 : *
99 : */
100 : class nsZipArchive final
101 : {
102 : friend class nsZipFind;
103 :
104 : /** destructing the object closes the archive */
105 : ~nsZipArchive();
106 :
107 : public:
108 : static const char* sFileCorruptedReason;
109 :
110 : /** constructing does not open the archive. See OpenArchive() */
111 : nsZipArchive();
112 :
113 : /**
114 : * OpenArchive
115 : *
116 : * It's an error to call this more than once on the same nsZipArchive
117 : * object. If we were allowed to use exceptions this would have been
118 : * part of the constructor
119 : *
120 : * @param aZipHandle The nsZipHandle used to access the zip
121 : * @param aFd Optional PRFileDesc for Windows readahead optimization
122 : * @return status code
123 : */
124 : nsresult OpenArchive(nsZipHandle *aZipHandle, PRFileDesc *aFd = nullptr);
125 :
126 : /**
127 : * OpenArchive
128 : *
129 : * Convenience function that generates nsZipHandle
130 : *
131 : * @param aFile The file used to access the zip
132 : * @return status code
133 : */
134 : nsresult OpenArchive(nsIFile *aFile);
135 :
136 : /**
137 : * Test the integrity of items in this archive by running
138 : * a CRC check after extracting each item into a memory
139 : * buffer. If an entry name is supplied only the
140 : * specified item is tested. Else, if null is supplied
141 : * then all the items in the archive are tested.
142 : *
143 : * @return status code
144 : */
145 : nsresult Test(const char *aEntryName);
146 :
147 : /**
148 : * Closes an open archive.
149 : */
150 : nsresult CloseArchive();
151 :
152 : /**
153 : * GetItem
154 : * @param aEntryName Name of file in the archive
155 : * @return pointer to nsZipItem
156 : */
157 : nsZipItem* GetItem(const char * aEntryName);
158 :
159 : /**
160 : * ExtractFile
161 : *
162 : * @param zipEntry Name of file in archive to extract
163 : * @param outFD Filedescriptor to write contents to
164 : * @param outname Name of file to write to
165 : * @return status code
166 : */
167 : nsresult ExtractFile(nsZipItem * zipEntry, const char *outname, PRFileDesc * outFD);
168 :
169 : /**
170 : * FindInit
171 : *
172 : * Initializes a search for files in the archive. FindNext() returns
173 : * the actual matches. The nsZipFind must be deleted when you're done
174 : *
175 : * @param aPattern a string or RegExp pattern to search for
176 : * (may be nullptr to find all files in archive)
177 : * @param aFind a pointer to a pointer to a structure used
178 : * in FindNext. In the case of an error this
179 : * will be set to nullptr.
180 : * @return status code
181 : */
182 : nsresult FindInit(const char * aPattern, nsZipFind** aFind);
183 :
184 : /*
185 : * Gets an undependent handle to the mapped file.
186 : */
187 : nsZipHandle* GetFD();
188 :
189 : /**
190 : * Gets the data offset.
191 : * @param aItem Pointer to nsZipItem
192 : * returns 0 on failure.
193 : */
194 : uint32_t GetDataOffset(nsZipItem* aItem);
195 :
196 : /**
197 : * Get pointer to the data of the item.
198 : * @param aItem Pointer to nsZipItem
199 : * reutrns null when zip file is corrupt.
200 : */
201 : const uint8_t* GetData(nsZipItem* aItem);
202 :
203 : bool GetComment(nsACString &aComment);
204 :
205 : /**
206 : * Gets the amount of memory taken up by the archive's mapping.
207 : * @return the size
208 : */
209 : int64_t SizeOfMapping();
210 :
211 : /*
212 : * Refcounting
213 : */
214 : NS_METHOD_(MozExternalRefCountType) AddRef(void);
215 : NS_METHOD_(MozExternalRefCountType) Release(void);
216 :
217 : private:
218 : //--- private members ---
219 : mozilla::ThreadSafeAutoRefCnt mRefCnt; /* ref count */
220 : NS_DECL_OWNINGTHREAD
221 :
222 : nsZipItem* mFiles[ZIP_TABSIZE];
223 : mozilla::ArenaAllocator<1024, sizeof(void*)> mArena;
224 :
225 : const char* mCommentPtr;
226 : uint16_t mCommentLen;
227 :
228 : // Whether we synthesized the directory entries
229 : bool mBuiltSynthetics;
230 :
231 : // file handle
232 : RefPtr<nsZipHandle> mFd;
233 :
234 : // file URI, for logging
235 : nsCString mURI;
236 :
237 : private:
238 : //--- private methods ---
239 : nsZipItem* CreateZipItem();
240 : nsresult BuildFileList(PRFileDesc *aFd = nullptr);
241 : nsresult BuildSynthetics();
242 :
243 : nsZipArchive& operator=(const nsZipArchive& rhs) = delete;
244 : nsZipArchive(const nsZipArchive& rhs) = delete;
245 : };
246 :
247 : /**
248 : * nsZipFind
249 : *
250 : * a helper class for nsZipArchive, representing a search
251 : */
252 : class nsZipFind final
253 : {
254 : public:
255 : nsZipFind(nsZipArchive* aZip, char* aPattern, bool regExp);
256 : ~nsZipFind();
257 :
258 : nsresult FindNext(const char** aResult, uint16_t* aNameLen);
259 :
260 : private:
261 : RefPtr<nsZipArchive> mArchive;
262 : char* mPattern;
263 : nsZipItem* mItem;
264 : uint16_t mSlot;
265 : bool mRegExp;
266 :
267 : nsZipFind& operator=(const nsZipFind& rhs) = delete;
268 : nsZipFind(const nsZipFind& rhs) = delete;
269 : };
270 :
271 : /**
272 : * nsZipCursor -- a low-level class for reading the individual items in a zip.
273 : */
274 : class nsZipCursor final
275 : {
276 : public:
277 : /**
278 : * Initializes the cursor
279 : *
280 : * @param aItem Item of interest
281 : * @param aZip Archive
282 : * @param aBuf Buffer used for decompression.
283 : * This determines the maximum Read() size in the compressed case.
284 : * @param aBufSize Buffer size
285 : * @param doCRC When set to true Read() will check crc
286 : */
287 : nsZipCursor(nsZipItem *aItem, nsZipArchive *aZip, uint8_t* aBuf = nullptr, uint32_t aBufSize = 0, bool doCRC = false);
288 :
289 : ~nsZipCursor();
290 :
291 : /**
292 : * Performs reads. In the compressed case it uses aBuf(passed in constructor), for stored files
293 : * it returns a zero-copy buffer.
294 : *
295 : * @param aBytesRead Outparam for number of bytes read.
296 : * @return data read or nullptr if item is corrupted.
297 : */
298 100 : uint8_t* Read(uint32_t *aBytesRead) {
299 100 : return ReadOrCopy(aBytesRead, false);
300 : }
301 :
302 : /**
303 : * Performs a copy. It always uses aBuf(passed in constructor).
304 : *
305 : * @param aBytesRead Outparam for number of bytes read.
306 : * @return data read or nullptr if item is corrupted.
307 : */
308 0 : uint8_t* Copy(uint32_t *aBytesRead) {
309 0 : return ReadOrCopy(aBytesRead, true);
310 : }
311 :
312 : private:
313 : /* Actual implementation for both Read and Copy above */
314 : uint8_t* ReadOrCopy(uint32_t *aBytesRead, bool aCopy);
315 :
316 : nsZipItem *mItem;
317 : uint8_t *mBuf;
318 : uint32_t mBufSize;
319 : z_stream mZs;
320 : #ifdef MOZ_JAR_BROTLI
321 : BrotliStateStruct* mBrotliState;
322 : #endif
323 : uint32_t mCRC;
324 : bool mDoCRC;
325 : };
326 :
327 : /**
328 : * nsZipItemPtr - a RAII convenience class for reading the individual items in a zip.
329 : * It reads whole files and does zero-copy IO for stored files. A buffer is allocated
330 : * for decompression.
331 : * Do not use when the file may be very large.
332 : */
333 166 : class nsZipItemPtr_base
334 : {
335 : public:
336 : /**
337 : * Initializes the reader
338 : *
339 : * @param aZip Archive
340 : * @param aEntryName Archive membername
341 : * @param doCRC When set to true Read() will check crc
342 : */
343 : nsZipItemPtr_base(nsZipArchive *aZip, const char *aEntryName, bool doCRC);
344 :
345 100 : uint32_t Length() const {
346 100 : return mReadlen;
347 : }
348 :
349 : protected:
350 : RefPtr<nsZipHandle> mZipHandle;
351 : mozilla::UniquePtr<uint8_t[]> mAutoBuf;
352 : uint8_t *mReturnBuf;
353 : uint32_t mReadlen;
354 : };
355 :
356 : template <class T>
357 166 : class nsZipItemPtr final : public nsZipItemPtr_base
358 : {
359 : static_assert(sizeof(T) == sizeof(char),
360 : "This class cannot be used with larger T without re-examining"
361 : " a number of assumptions.");
362 :
363 : public:
364 166 : nsZipItemPtr(nsZipArchive *aZip, const char *aEntryName, bool doCRC = false) : nsZipItemPtr_base(aZip, aEntryName, doCRC) { }
365 : /**
366 : * @return buffer containing the whole zip member or nullptr on error.
367 : * The returned buffer is owned by nsZipItemReader.
368 : */
369 166 : const T* Buffer() const {
370 166 : return (const T*)mReturnBuf;
371 : }
372 :
373 166 : operator const T*() const {
374 166 : return Buffer();
375 : }
376 :
377 : /**
378 : * Relinquish ownership of zip member if compressed.
379 : * Copy member into a new buffer if uncompressed.
380 : * @return a buffer with whole zip member. It is caller's responsibility to free() it.
381 : */
382 100 : mozilla::UniquePtr<T[]> Forget() {
383 100 : if (!mReturnBuf)
384 0 : return nullptr;
385 : // In uncompressed mmap case, give up buffer
386 100 : if (mAutoBuf.get() == mReturnBuf) {
387 100 : mReturnBuf = nullptr;
388 100 : return mozilla::UniquePtr<T[]>(reinterpret_cast<T*>(mAutoBuf.release()));
389 : }
390 0 : auto ret = mozilla::MakeUnique<T[]>(Length());
391 0 : memcpy(ret.get(), mReturnBuf, Length());
392 0 : mReturnBuf = nullptr;
393 0 : return ret;
394 : }
395 : };
396 :
397 : class nsZipHandle final
398 : {
399 : friend class nsZipArchive;
400 : friend class mozilla::FileLocation;
401 : public:
402 : static nsresult Init(nsIFile *file, nsZipHandle **ret,
403 : PRFileDesc **aFd = nullptr);
404 : static nsresult Init(nsZipArchive *zip, const char *entry,
405 : nsZipHandle **ret);
406 : static nsresult Init(const uint8_t* aData, uint32_t aLen,
407 : nsZipHandle **aRet);
408 :
409 : NS_METHOD_(MozExternalRefCountType) AddRef(void);
410 : NS_METHOD_(MozExternalRefCountType) Release(void);
411 :
412 : int64_t SizeOfMapping();
413 :
414 : nsresult GetNSPRFileDesc(PRFileDesc** aNSPRFileDesc);
415 :
416 : protected:
417 : const uint8_t * mFileData; /* pointer to zip data */
418 : uint32_t mLen; /* length of zip data */
419 : mozilla::FileLocation mFile; /* source file if any, for logging */
420 :
421 : private:
422 : nsZipHandle();
423 : ~nsZipHandle();
424 :
425 : nsresult findDataStart();
426 :
427 : PRFileMap * mMap; /* nspr datastructure for mmap */
428 : mozilla::AutoFDClose mNSPRFileDesc;
429 : nsAutoPtr<nsZipItemPtr<uint8_t> > mBuf;
430 : mozilla::ThreadSafeAutoRefCnt mRefCnt; /* ref count */
431 : NS_DECL_OWNINGTHREAD
432 :
433 : const uint8_t * mFileStart; /* pointer to mmaped file */
434 : uint32_t mTotalLen; /* total length of the mmaped file */
435 :
436 : /* Magic number for CRX type expressed in Big Endian since it is a literal */
437 : static const uint32_t kCRXMagic = 0x34327243;
438 : };
439 :
440 : nsresult gZlibInit(z_stream *zs);
441 :
442 : #endif /* nsZipArchive_h_ */
|