Line data Source code
1 : /* ***** BEGIN LICENSE BLOCK *****
2 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3 : *
4 : * Copyright (C) 2002-2017 Németh László
5 : *
6 : * The contents of this file are subject to the Mozilla Public License Version
7 : * 1.1 (the "License"); you may not use this file except in compliance with
8 : * the License. You may obtain a copy of the License at
9 : * http://www.mozilla.org/MPL/
10 : *
11 : * Software distributed under the License is distributed on an "AS IS" basis,
12 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 : * for the specific language governing rights and limitations under the
14 : * License.
15 : *
16 : * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
17 : *
18 : * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
19 : * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
20 : * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
21 : * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
22 : * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
23 : *
24 : * Alternatively, the contents of this file may be used under the terms of
25 : * either the GNU General Public License Version 2 or later (the "GPL"), or
26 : * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 : * in which case the provisions of the GPL or the LGPL are applicable instead
28 : * of those above. If you wish to allow use of your version of this file only
29 : * under the terms of either the GPL or the LGPL, and not to allow others to
30 : * use your version of this file under the terms of the MPL, indicate your
31 : * decision by deleting the provisions above and replace them with the notice
32 : * and other provisions required by the GPL or the LGPL. If you do not delete
33 : * the provisions above, a recipient may use your version of this file under
34 : * the terms of any one of the MPL, the GPL or the LGPL.
35 : *
36 : * ***** END LICENSE BLOCK ***** */
37 :
38 : #include <stdlib.h>
39 : #include <string.h>
40 : #include <stdio.h>
41 :
42 : #include "hunzip.hxx"
43 : #include "csutil.hxx"
44 :
45 : #define CODELEN 65536
46 : #define BASEBITREC 5000
47 :
48 : #define UNCOMPRESSED '\002'
49 : #define MAGIC "hz0"
50 : #define MAGIC_ENCRYPT "hz1"
51 : #define MAGICLEN (sizeof(MAGIC) - 1)
52 :
53 0 : int Hunzip::fail(const char* err, const char* par) {
54 0 : fprintf(stderr, err, par);
55 0 : return -1;
56 : }
57 :
58 0 : Hunzip::Hunzip(const char* file, const char* key)
59 0 : : bufsiz(0), lastbit(0), inc(0), inbits(0), outc(0) {
60 0 : in[0] = out[0] = line[0] = '\0';
61 0 : filename = mystrdup(file);
62 0 : if (getcode(key) == -1)
63 0 : bufsiz = -1;
64 : else
65 0 : bufsiz = getbuf();
66 0 : }
67 :
68 0 : int Hunzip::getcode(const char* key) {
69 : unsigned char c[2];
70 : int i, j, n;
71 0 : int allocatedbit = BASEBITREC;
72 0 : const char* enc = key;
73 :
74 0 : if (!filename)
75 0 : return -1;
76 :
77 0 : myopen(fin, filename, std::ios_base::in | std::ios_base::binary);
78 0 : if (!fin.is_open())
79 0 : return -1;
80 :
81 : // read magic number
82 0 : if (!fin.read(in, 3) ||
83 0 : !(strncmp(MAGIC, in, MAGICLEN) == 0 ||
84 0 : strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0)) {
85 0 : return fail(MSG_FORMAT, filename);
86 : }
87 :
88 : // check encryption
89 0 : if (strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0) {
90 : unsigned char cs;
91 0 : if (!key)
92 0 : return fail(MSG_KEY, filename);
93 0 : if (!fin.read(reinterpret_cast<char*>(c), 1))
94 0 : return fail(MSG_FORMAT, filename);
95 0 : for (cs = 0; *enc; enc++)
96 0 : cs ^= *enc;
97 0 : if (cs != c[0])
98 0 : return fail(MSG_KEY, filename);
99 0 : enc = key;
100 : } else
101 0 : key = NULL;
102 :
103 : // read record count
104 0 : if (!fin.read(reinterpret_cast<char*>(c), 2))
105 0 : return fail(MSG_FORMAT, filename);
106 :
107 0 : if (key) {
108 0 : c[0] ^= *enc;
109 0 : if (*(++enc) == '\0')
110 0 : enc = key;
111 0 : c[1] ^= *enc;
112 : }
113 :
114 0 : n = ((int)c[0] << 8) + c[1];
115 0 : dec.resize(BASEBITREC);
116 0 : dec[0].v[0] = 0;
117 0 : dec[0].v[1] = 0;
118 :
119 : // read codes
120 0 : for (i = 0; i < n; i++) {
121 : unsigned char l;
122 0 : if (!fin.read(reinterpret_cast<char*>(c), 2))
123 0 : return fail(MSG_FORMAT, filename);
124 0 : if (key) {
125 0 : if (*(++enc) == '\0')
126 0 : enc = key;
127 0 : c[0] ^= *enc;
128 0 : if (*(++enc) == '\0')
129 0 : enc = key;
130 0 : c[1] ^= *enc;
131 : }
132 0 : if (!fin.read(reinterpret_cast<char*>(&l), 1))
133 0 : return fail(MSG_FORMAT, filename);
134 0 : if (key) {
135 0 : if (*(++enc) == '\0')
136 0 : enc = key;
137 0 : l ^= *enc;
138 : }
139 0 : if (!fin.read(in, l / 8 + 1))
140 0 : return fail(MSG_FORMAT, filename);
141 0 : if (key)
142 0 : for (j = 0; j <= l / 8; j++) {
143 0 : if (*(++enc) == '\0')
144 0 : enc = key;
145 0 : in[j] ^= *enc;
146 : }
147 0 : int p = 0;
148 0 : for (j = 0; j < l; j++) {
149 0 : int b = (in[j / 8] & (1 << (7 - (j % 8)))) ? 1 : 0;
150 0 : int oldp = p;
151 0 : p = dec[p].v[b];
152 0 : if (p == 0) {
153 0 : lastbit++;
154 0 : if (lastbit == allocatedbit) {
155 0 : allocatedbit += BASEBITREC;
156 0 : dec.resize(allocatedbit);
157 : }
158 0 : dec[lastbit].v[0] = 0;
159 0 : dec[lastbit].v[1] = 0;
160 0 : dec[oldp].v[b] = lastbit;
161 0 : p = lastbit;
162 : }
163 : }
164 0 : dec[p].c[0] = c[0];
165 0 : dec[p].c[1] = c[1];
166 : }
167 0 : return 0;
168 : }
169 :
170 0 : Hunzip::~Hunzip() {
171 0 : if (filename)
172 0 : free(filename);
173 0 : }
174 :
175 0 : int Hunzip::getbuf() {
176 0 : int p = 0;
177 0 : int o = 0;
178 0 : do {
179 0 : if (inc == 0) {
180 0 : fin.read(in, BUFSIZE);
181 0 : inbits = fin.gcount() * 8;
182 : }
183 0 : for (; inc < inbits; inc++) {
184 0 : int b = (in[inc / 8] & (1 << (7 - (inc % 8)))) ? 1 : 0;
185 0 : int oldp = p;
186 0 : p = dec[p].v[b];
187 0 : if (p == 0) {
188 0 : if (oldp == lastbit) {
189 0 : fin.close();
190 : // add last odd byte
191 0 : if (dec[lastbit].c[0])
192 0 : out[o++] = dec[lastbit].c[1];
193 0 : return o;
194 : }
195 0 : out[o++] = dec[oldp].c[0];
196 0 : out[o++] = dec[oldp].c[1];
197 0 : if (o == BUFSIZE)
198 0 : return o;
199 0 : p = dec[p].v[b];
200 : }
201 : }
202 0 : inc = 0;
203 0 : } while (inbits == BUFSIZE * 8);
204 0 : return fail(MSG_FORMAT, filename);
205 : }
206 :
207 0 : bool Hunzip::getline(std::string& dest) {
208 : char linebuf[BUFSIZE];
209 0 : int l = 0, eol = 0, left = 0, right = 0;
210 0 : if (bufsiz == -1)
211 0 : return false;
212 0 : while (l < bufsiz && !eol) {
213 0 : linebuf[l++] = out[outc];
214 0 : switch (out[outc]) {
215 : case '\t':
216 0 : break;
217 : case 31: { // escape
218 0 : if (++outc == bufsiz) {
219 0 : bufsiz = getbuf();
220 0 : outc = 0;
221 : }
222 0 : linebuf[l - 1] = out[outc];
223 0 : break;
224 : }
225 : case ' ':
226 0 : break;
227 : default:
228 0 : if (((unsigned char)out[outc]) < 47) {
229 0 : if (out[outc] > 32) {
230 0 : right = out[outc] - 31;
231 0 : if (++outc == bufsiz) {
232 0 : bufsiz = getbuf();
233 0 : outc = 0;
234 : }
235 : }
236 0 : if (out[outc] == 30)
237 0 : left = 9;
238 : else
239 0 : left = out[outc];
240 0 : linebuf[l - 1] = '\n';
241 0 : eol = 1;
242 : }
243 : }
244 0 : if (++outc == bufsiz) {
245 0 : outc = 0;
246 0 : bufsiz = fin.is_open() ? getbuf() : -1;
247 : }
248 : }
249 0 : if (right)
250 0 : strcpy(linebuf + l - 1, line + strlen(line) - right - 1);
251 : else
252 0 : linebuf[l] = '\0';
253 0 : strcpy(line + left, linebuf);
254 0 : dest.assign(line);
255 0 : return true;
256 : }
|