Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 : /* This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : #include "nsUnicharInputStream.h"
8 : #include "nsIInputStream.h"
9 : #include "nsIServiceManager.h"
10 : #include "nsString.h"
11 : #include "nsTArray.h"
12 : #include "nsAutoPtr.h"
13 : #include "nsCRT.h"
14 : #include "nsStreamUtils.h"
15 : #include "nsUTF8Utils.h"
16 : #include "mozilla/Attributes.h"
17 : #include <fcntl.h>
18 : #if defined(XP_WIN)
19 : #include <io.h>
20 : #else
21 : #include <unistd.h>
22 : #endif
23 :
24 : #define STRING_BUFFER_SIZE 8192
25 :
26 : class StringUnicharInputStream final : public nsIUnicharInputStream
27 : {
28 : public:
29 : explicit StringUnicharInputStream(const nsAString& aString) :
30 : mString(aString), mPos(0), mLen(aString.Length()) { }
31 :
32 : NS_DECL_ISUPPORTS
33 : NS_DECL_NSIUNICHARINPUTSTREAM
34 :
35 : nsString mString;
36 : uint32_t mPos;
37 : uint32_t mLen;
38 :
39 : private:
40 0 : ~StringUnicharInputStream() { }
41 : };
42 :
43 : NS_IMETHODIMP
44 0 : StringUnicharInputStream::Read(char16_t* aBuf,
45 : uint32_t aCount,
46 : uint32_t* aReadCount)
47 : {
48 0 : if (mPos >= mLen) {
49 0 : *aReadCount = 0;
50 0 : return NS_OK;
51 : }
52 0 : nsAString::const_iterator iter;
53 0 : mString.BeginReading(iter);
54 0 : const char16_t* us = iter.get();
55 0 : uint32_t amount = mLen - mPos;
56 0 : if (amount > aCount) {
57 0 : amount = aCount;
58 : }
59 0 : memcpy(aBuf, us + mPos, sizeof(char16_t) * amount);
60 0 : mPos += amount;
61 0 : *aReadCount = amount;
62 0 : return NS_OK;
63 : }
64 :
65 : NS_IMETHODIMP
66 0 : StringUnicharInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter,
67 : void* aClosure,
68 : uint32_t aCount, uint32_t* aReadCount)
69 : {
70 : uint32_t bytesWritten;
71 0 : uint32_t totalBytesWritten = 0;
72 :
73 : nsresult rv;
74 0 : aCount = XPCOM_MIN(mString.Length() - mPos, aCount);
75 :
76 0 : nsAString::const_iterator iter;
77 0 : mString.BeginReading(iter);
78 :
79 0 : while (aCount) {
80 0 : rv = aWriter(this, aClosure, iter.get() + mPos,
81 0 : totalBytesWritten, aCount, &bytesWritten);
82 :
83 0 : if (NS_FAILED(rv)) {
84 : // don't propagate errors to the caller
85 0 : break;
86 : }
87 :
88 0 : aCount -= bytesWritten;
89 0 : totalBytesWritten += bytesWritten;
90 0 : mPos += bytesWritten;
91 : }
92 :
93 0 : *aReadCount = totalBytesWritten;
94 :
95 0 : return NS_OK;
96 : }
97 :
98 : NS_IMETHODIMP
99 0 : StringUnicharInputStream::ReadString(uint32_t aCount, nsAString& aString,
100 : uint32_t* aReadCount)
101 : {
102 0 : if (mPos >= mLen) {
103 0 : *aReadCount = 0;
104 0 : return NS_OK;
105 : }
106 0 : uint32_t amount = mLen - mPos;
107 0 : if (amount > aCount) {
108 0 : amount = aCount;
109 : }
110 0 : aString = Substring(mString, mPos, amount);
111 0 : mPos += amount;
112 0 : *aReadCount = amount;
113 0 : return NS_OK;
114 : }
115 :
116 : nsresult
117 0 : StringUnicharInputStream::Close()
118 : {
119 0 : mPos = mLen;
120 0 : return NS_OK;
121 : }
122 :
123 0 : NS_IMPL_ISUPPORTS(StringUnicharInputStream, nsIUnicharInputStream)
124 :
125 : //----------------------------------------------------------------------
126 :
127 : class UTF8InputStream final : public nsIUnicharInputStream
128 : {
129 : public:
130 : UTF8InputStream();
131 : nsresult Init(nsIInputStream* aStream);
132 :
133 : NS_DECL_ISUPPORTS
134 : NS_DECL_NSIUNICHARINPUTSTREAM
135 :
136 : private:
137 : ~UTF8InputStream();
138 :
139 : protected:
140 : int32_t Fill(nsresult* aErrorCode);
141 :
142 : static void CountValidUTF8Bytes(const char* aBuf, uint32_t aMaxBytes,
143 : uint32_t& aValidUTF8bytes,
144 : uint32_t& aValidUTF16CodeUnits);
145 :
146 : nsCOMPtr<nsIInputStream> mInput;
147 : FallibleTArray<char> mByteData;
148 : FallibleTArray<char16_t> mUnicharData;
149 :
150 : uint32_t mByteDataOffset;
151 : uint32_t mUnicharDataOffset;
152 : uint32_t mUnicharDataLength;
153 : };
154 :
155 21 : UTF8InputStream::UTF8InputStream() :
156 : mByteDataOffset(0),
157 : mUnicharDataOffset(0),
158 21 : mUnicharDataLength(0)
159 : {
160 21 : }
161 :
162 : nsresult
163 21 : UTF8InputStream::Init(nsIInputStream* aStream)
164 : {
165 42 : if (!mByteData.SetCapacity(STRING_BUFFER_SIZE, mozilla::fallible) ||
166 21 : !mUnicharData.SetCapacity(STRING_BUFFER_SIZE, mozilla::fallible)) {
167 0 : return NS_ERROR_OUT_OF_MEMORY;
168 : }
169 21 : mInput = aStream;
170 :
171 21 : return NS_OK;
172 : }
173 :
174 126 : NS_IMPL_ISUPPORTS(UTF8InputStream, nsIUnicharInputStream)
175 :
176 42 : UTF8InputStream::~UTF8InputStream()
177 : {
178 21 : Close();
179 21 : }
180 :
181 : nsresult
182 21 : UTF8InputStream::Close()
183 : {
184 21 : mInput = nullptr;
185 21 : mByteData.Clear();
186 21 : mUnicharData.Clear();
187 21 : return NS_OK;
188 : }
189 :
190 : nsresult
191 0 : UTF8InputStream::Read(char16_t* aBuf, uint32_t aCount, uint32_t* aReadCount)
192 : {
193 0 : NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
194 0 : uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
195 : nsresult errorCode;
196 0 : if (0 == readCount) {
197 : // Fill the unichar buffer
198 0 : int32_t bytesRead = Fill(&errorCode);
199 0 : if (bytesRead <= 0) {
200 0 : *aReadCount = 0;
201 0 : return errorCode;
202 : }
203 0 : readCount = bytesRead;
204 : }
205 0 : if (readCount > aCount) {
206 0 : readCount = aCount;
207 : }
208 0 : memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset,
209 0 : readCount * sizeof(char16_t));
210 0 : mUnicharDataOffset += readCount;
211 0 : *aReadCount = readCount;
212 0 : return NS_OK;
213 : }
214 :
215 : NS_IMETHODIMP
216 63 : UTF8InputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter,
217 : void* aClosure,
218 : uint32_t aCount, uint32_t* aReadCount)
219 : {
220 63 : NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
221 63 : uint32_t bytesToWrite = mUnicharDataLength - mUnicharDataOffset;
222 63 : nsresult rv = NS_OK;
223 63 : if (0 == bytesToWrite) {
224 : // Fill the unichar buffer
225 50 : int32_t bytesRead = Fill(&rv);
226 50 : if (bytesRead <= 0) {
227 21 : *aReadCount = 0;
228 21 : return rv;
229 : }
230 29 : bytesToWrite = bytesRead;
231 : }
232 :
233 42 : if (bytesToWrite > aCount) {
234 13 : bytesToWrite = aCount;
235 : }
236 :
237 : uint32_t bytesWritten;
238 42 : uint32_t totalBytesWritten = 0;
239 :
240 126 : while (bytesToWrite) {
241 42 : rv = aWriter(this, aClosure,
242 42 : mUnicharData.Elements() + mUnicharDataOffset,
243 : totalBytesWritten, bytesToWrite, &bytesWritten);
244 :
245 42 : if (NS_FAILED(rv)) {
246 : // don't propagate errors to the caller
247 0 : break;
248 : }
249 :
250 42 : bytesToWrite -= bytesWritten;
251 42 : totalBytesWritten += bytesWritten;
252 42 : mUnicharDataOffset += bytesWritten;
253 : }
254 :
255 42 : *aReadCount = totalBytesWritten;
256 :
257 42 : return NS_OK;
258 : }
259 :
260 : NS_IMETHODIMP
261 0 : UTF8InputStream::ReadString(uint32_t aCount, nsAString& aString,
262 : uint32_t* aReadCount)
263 : {
264 0 : NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
265 0 : uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
266 : nsresult errorCode;
267 0 : if (0 == readCount) {
268 : // Fill the unichar buffer
269 0 : int32_t bytesRead = Fill(&errorCode);
270 0 : if (bytesRead <= 0) {
271 0 : *aReadCount = 0;
272 0 : return errorCode;
273 : }
274 0 : readCount = bytesRead;
275 : }
276 0 : if (readCount > aCount) {
277 0 : readCount = aCount;
278 : }
279 0 : const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset;
280 0 : aString.Assign(buf, readCount);
281 :
282 0 : mUnicharDataOffset += readCount;
283 0 : *aReadCount = readCount;
284 0 : return NS_OK;
285 : }
286 :
287 : int32_t
288 50 : UTF8InputStream::Fill(nsresult* aErrorCode)
289 : {
290 50 : if (!mInput) {
291 : // We already closed the stream!
292 0 : *aErrorCode = NS_BASE_STREAM_CLOSED;
293 0 : return -1;
294 : }
295 :
296 50 : NS_ASSERTION(mByteData.Length() >= mByteDataOffset, "unsigned madness");
297 50 : uint32_t remainder = mByteData.Length() - mByteDataOffset;
298 50 : mByteDataOffset = remainder;
299 : uint32_t nb;
300 50 : *aErrorCode = NS_FillArray(mByteData, mInput, remainder, &nb);
301 50 : if (nb == 0) {
302 : // Because we assume a many to one conversion, the lingering data
303 : // in the byte buffer must be a partial conversion
304 : // fragment. Because we know that we have received no more new
305 : // data to add to it, we can't convert it. Therefore, we discard
306 : // it.
307 21 : return nb;
308 : }
309 29 : NS_ASSERTION(remainder + nb == mByteData.Length(), "bad nb");
310 :
311 : // Now convert as much of the byte buffer to unicode as possible
312 : uint32_t srcLen, dstLen;
313 29 : CountValidUTF8Bytes(mByteData.Elements(), remainder + nb, srcLen, dstLen);
314 :
315 : // the number of UCS2 characters should always be <= the number of
316 : // UTF8 chars
317 29 : NS_ASSERTION(remainder + nb >= srcLen, "cannot be longer than out buffer");
318 29 : NS_ASSERTION(dstLen <= mUnicharData.Capacity(),
319 : "Ouch. I would overflow my buffer if I wasn't so careful.");
320 29 : if (dstLen > mUnicharData.Capacity()) {
321 0 : return 0;
322 : }
323 :
324 29 : ConvertUTF8toUTF16 converter(mUnicharData.Elements());
325 :
326 29 : nsACString::const_char_iterator start = mByteData.Elements();
327 29 : nsACString::const_char_iterator end = mByteData.Elements() + srcLen;
328 :
329 29 : copy_string(start, end, converter);
330 29 : if (converter.Length() != dstLen) {
331 0 : *aErrorCode = NS_BASE_STREAM_BAD_CONVERSION;
332 0 : return -1;
333 : }
334 :
335 29 : mUnicharDataOffset = 0;
336 29 : mUnicharDataLength = dstLen;
337 29 : mByteDataOffset = srcLen;
338 :
339 29 : return dstLen;
340 : }
341 :
342 : void
343 29 : UTF8InputStream::CountValidUTF8Bytes(const char* aBuffer, uint32_t aMaxBytes,
344 : uint32_t& aValidUTF8bytes,
345 : uint32_t& aValidUTF16CodeUnits)
346 : {
347 29 : const char* c = aBuffer;
348 29 : const char* end = aBuffer + aMaxBytes;
349 29 : const char* lastchar = c; // pre-initialize in case of 0-length buffer
350 29 : uint32_t utf16length = 0;
351 230013 : while (c < end && *c) {
352 114992 : lastchar = c;
353 114992 : utf16length++;
354 :
355 114992 : if (UTF8traits::isASCII(*c)) {
356 114200 : c++;
357 792 : } else if (UTF8traits::is2byte(*c)) {
358 0 : c += 2;
359 792 : } else if (UTF8traits::is3byte(*c)) {
360 792 : c += 3;
361 0 : } else if (UTF8traits::is4byte(*c)) {
362 0 : c += 4;
363 0 : utf16length++; // add 1 more because this will be converted to a
364 : // surrogate pair.
365 0 : } else if (UTF8traits::is5byte(*c)) {
366 0 : c += 5;
367 0 : } else if (UTF8traits::is6byte(*c)) {
368 0 : c += 6;
369 : } else {
370 0 : NS_WARNING("Unrecognized UTF8 string in UTF8InputStream::CountValidUTF8Bytes()");
371 0 : break; // Otherwise we go into an infinite loop. But what happens now?
372 : }
373 : }
374 29 : if (c > end) {
375 0 : c = lastchar;
376 0 : utf16length--;
377 : }
378 :
379 29 : aValidUTF8bytes = c - aBuffer;
380 29 : aValidUTF16CodeUnits = utf16length;
381 29 : }
382 :
383 : nsresult
384 21 : NS_NewUnicharInputStream(nsIInputStream* aStreamToWrap,
385 : nsIUnicharInputStream** aResult)
386 : {
387 21 : *aResult = nullptr;
388 :
389 : // Create converter input stream
390 42 : RefPtr<UTF8InputStream> it = new UTF8InputStream();
391 21 : nsresult rv = it->Init(aStreamToWrap);
392 21 : if (NS_FAILED(rv)) {
393 0 : return rv;
394 : }
395 :
396 21 : it.forget(aResult);
397 21 : return NS_OK;
398 : }
|