Line data Source code
1 : // Copyright 2008 Google Inc. All Rights Reserved.
2 : //
3 : // Redistribution and use in source and binary forms, with or without
4 : // modification, are permitted provided that the following conditions are
5 : // met:
6 : //
7 : // * Redistributions of source code must retain the above copyright
8 : // notice, this list of conditions and the following disclaimer.
9 : // * Redistributions in binary form must reproduce the above
10 : // copyright notice, this list of conditions and the following disclaimer
11 : // in the documentation and/or other materials provided with the
12 : // distribution.
13 : // * Neither the name of Google Inc. nor the names of its
14 : // contributors may be used to endorse or promote products derived from
15 : // this software without specific prior written permission.
16 : //
17 : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 : // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 : // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 : // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 : // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 : // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 : // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 : // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 : // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 : // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 : //
29 : // Internals shared between the Snappy implementation and its unittest.
30 :
31 : #ifndef THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
32 : #define THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
33 :
34 : #include "snappy-stubs-internal.h"
35 :
36 : namespace snappy {
37 : namespace internal {
38 :
39 : class WorkingMemory {
40 : public:
41 0 : WorkingMemory() : large_table_(NULL) { }
42 0 : ~WorkingMemory() { delete[] large_table_; }
43 :
44 : // Allocates and clears a hash table using memory in "*this",
45 : // stores the number of buckets in "*table_size" and returns a pointer to
46 : // the base of the hash table.
47 : uint16* GetHashTable(size_t input_size, int* table_size);
48 :
49 : private:
50 : uint16 small_table_[1<<10]; // 2KB
51 : uint16* large_table_; // Allocated only when needed
52 :
53 : DISALLOW_COPY_AND_ASSIGN(WorkingMemory);
54 : };
55 :
56 : // Flat array compression that does not emit the "uncompressed length"
57 : // prefix. Compresses "input" string to the "*op" buffer.
58 : //
59 : // REQUIRES: "input_length <= kBlockSize"
60 : // REQUIRES: "op" points to an array of memory that is at least
61 : // "MaxCompressedLength(input_length)" in size.
62 : // REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
63 : // REQUIRES: "table_size" is a power of two
64 : //
65 : // Returns an "end" pointer into "op" buffer.
66 : // "end - op" is the compressed size of "input".
67 : char* CompressFragment(const char* input,
68 : size_t input_length,
69 : char* op,
70 : uint16* table,
71 : const int table_size);
72 :
73 : // Return the largest n such that
74 : //
75 : // s1[0,n-1] == s2[0,n-1]
76 : // and n <= (s2_limit - s2).
77 : //
78 : // Does not read *s2_limit or beyond.
79 : // Does not read *(s1 + (s2_limit - s2)) or beyond.
80 : // Requires that s2_limit >= s2.
81 : //
82 : // Separate implementation for x86_64, for speed. Uses the fact that
83 : // x86_64 is little endian.
84 : #if defined(ARCH_K8)
85 0 : static inline int FindMatchLength(const char* s1,
86 : const char* s2,
87 : const char* s2_limit) {
88 0 : assert(s2_limit >= s2);
89 0 : int matched = 0;
90 :
91 : // Find out how long the match is. We loop over the data 64 bits at a
92 : // time until we find a 64-bit block that doesn't match; then we find
93 : // the first non-matching bit and use that to calculate the total
94 : // length of the match.
95 0 : while (PREDICT_TRUE(s2 <= s2_limit - 8)) {
96 0 : if (UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) {
97 0 : s2 += 8;
98 0 : matched += 8;
99 : } else {
100 : // On current (mid-2008) Opteron models there is a 3% more
101 : // efficient code sequence to find the first non-matching byte.
102 : // However, what follows is ~10% better on Intel Core 2 and newer,
103 : // and we expect AMD's bsf instruction to improve.
104 0 : uint64 x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched);
105 0 : int matching_bits = Bits::FindLSBSetNonZero64(x);
106 0 : matched += matching_bits >> 3;
107 0 : return matched;
108 : }
109 : }
110 0 : while (PREDICT_TRUE(s2 < s2_limit)) {
111 0 : if (s1[matched] == *s2) {
112 0 : ++s2;
113 0 : ++matched;
114 : } else {
115 0 : return matched;
116 : }
117 : }
118 0 : return matched;
119 : }
120 : #else
121 : static inline int FindMatchLength(const char* s1,
122 : const char* s2,
123 : const char* s2_limit) {
124 : // Implementation based on the x86-64 version, above.
125 : assert(s2_limit >= s2);
126 : int matched = 0;
127 :
128 : while (s2 <= s2_limit - 4 &&
129 : UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) {
130 : s2 += 4;
131 : matched += 4;
132 : }
133 : if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 4) {
134 : uint32 x = UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched);
135 : int matching_bits = Bits::FindLSBSetNonZero(x);
136 : matched += matching_bits >> 3;
137 : } else {
138 : while ((s2 < s2_limit) && (s1[matched] == *s2)) {
139 : ++s2;
140 : ++matched;
141 : }
142 : }
143 : return matched;
144 : }
145 : #endif
146 :
147 : // Lookup tables for decompression code. Give --snappy_dump_decompression_table
148 : // to the unit test to recompute char_table.
149 :
150 : enum {
151 : LITERAL = 0,
152 : COPY_1_BYTE_OFFSET = 1, // 3 bit length + 3 bits of offset in opcode
153 : COPY_2_BYTE_OFFSET = 2,
154 : COPY_4_BYTE_OFFSET = 3
155 : };
156 : static const int kMaximumTagLength = 5; // COPY_4_BYTE_OFFSET plus the actual offset.
157 :
158 : // Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits
159 : static const uint32 wordmask[] = {
160 : 0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
161 : };
162 :
163 : // Data stored per entry in lookup table:
164 : // Range Bits-used Description
165 : // ------------------------------------
166 : // 1..64 0..7 Literal/copy length encoded in opcode byte
167 : // 0..7 8..10 Copy offset encoded in opcode byte / 256
168 : // 0..4 11..13 Extra bytes after opcode
169 : //
170 : // We use eight bits for the length even though 7 would have sufficed
171 : // because of efficiency reasons:
172 : // (1) Extracting a byte is faster than a bit-field
173 : // (2) It properly aligns copy offset so we do not need a <<8
174 : static const uint16 char_table[256] = {
175 : 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
176 : 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
177 : 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
178 : 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
179 : 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
180 : 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
181 : 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
182 : 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
183 : 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
184 : 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
185 : 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
186 : 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
187 : 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
188 : 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
189 : 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
190 : 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
191 : 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
192 : 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
193 : 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
194 : 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
195 : 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
196 : 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
197 : 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
198 : 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
199 : 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
200 : 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
201 : 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
202 : 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
203 : 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
204 : 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
205 : 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
206 : 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
207 : };
208 :
209 : } // end namespace internal
210 : } // end namespace snappy
211 :
212 : #endif // THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
|