Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 : /* This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : /*
8 : * A class which represents a fragment of text (eg inside a text
9 : * node); if only codepoints below 256 are used, the text is stored as
10 : * a char*; otherwise the text is stored as a char16_t*
11 : */
12 :
13 : #include "nsTextFragment.h"
14 : #include "nsCRT.h"
15 : #include "nsReadableUtils.h"
16 : #include "nsMemory.h"
17 : #include "nsBidiUtils.h"
18 : #include "nsUnicharUtils.h"
19 : #include "nsUTF8Utils.h"
20 : #include "mozilla/CheckedInt.h"
21 : #include "mozilla/MemoryReporting.h"
22 : #include "mozilla/SSE.h"
23 : #include "nsTextFragmentImpl.h"
24 : #include <algorithm>
25 :
26 : #define TEXTFRAG_WHITE_AFTER_NEWLINE 50
27 : #define TEXTFRAG_MAX_NEWLINES 7
28 :
29 : // Static buffer used for common fragments
30 : static char* sSpaceSharedString[TEXTFRAG_MAX_NEWLINES + 1];
31 : static char* sTabSharedString[TEXTFRAG_MAX_NEWLINES + 1];
32 : static char sSingleCharSharedString[256];
33 :
34 : using mozilla::CheckedUint32;
35 :
36 : // static
37 : nsresult
38 3 : nsTextFragment::Init()
39 : {
40 : // Create whitespace strings
41 : uint32_t i;
42 27 : for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) {
43 48 : sSpaceSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE];
44 48 : sTabSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE];
45 24 : sSpaceSharedString[i][0] = ' ';
46 24 : sTabSharedString[i][0] = ' ';
47 : uint32_t j;
48 108 : for (j = 1; j < 1 + i; ++j) {
49 84 : sSpaceSharedString[i][j] = '\n';
50 84 : sTabSharedString[i][j] = '\n';
51 : }
52 2424 : for (; j < (1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE); ++j) {
53 1200 : sSpaceSharedString[i][j] = ' ';
54 1200 : sTabSharedString[i][j] = '\t';
55 : }
56 : }
57 :
58 : // Create single-char strings
59 771 : for (i = 0; i < 256; ++i) {
60 768 : sSingleCharSharedString[i] = i;
61 : }
62 :
63 3 : return NS_OK;
64 : }
65 :
66 : // static
67 : void
68 0 : nsTextFragment::Shutdown()
69 : {
70 : uint32_t i;
71 0 : for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) {
72 0 : delete [] sSpaceSharedString[i];
73 0 : delete [] sTabSharedString[i];
74 0 : sSpaceSharedString[i] = nullptr;
75 0 : sTabSharedString[i] = nullptr;
76 : }
77 0 : }
78 :
79 6 : nsTextFragment::~nsTextFragment()
80 : {
81 3 : ReleaseText();
82 3 : MOZ_COUNT_DTOR(nsTextFragment);
83 3 : }
84 :
85 : void
86 406 : nsTextFragment::ReleaseText()
87 : {
88 406 : if (mState.mLength && m1b && mState.mInHeap) {
89 1 : free(m2b); // m1b == m2b as far as free is concerned
90 : }
91 :
92 406 : m1b = nullptr;
93 406 : mState.mIsBidi = false;
94 :
95 : // Set mState.mIs2b, mState.mInHeap, and mState.mLength = 0 with mAllBits;
96 406 : mAllBits = 0;
97 406 : }
98 :
99 : nsTextFragment&
100 23 : nsTextFragment::operator=(const nsTextFragment& aOther)
101 : {
102 23 : ReleaseText();
103 :
104 23 : if (aOther.mState.mLength) {
105 23 : if (!aOther.mState.mInHeap) {
106 5 : m1b = aOther.m1b; // This will work even if aOther is using m2b
107 : }
108 : else {
109 18 : CheckedUint32 m2bSize = aOther.mState.mLength;
110 18 : m2bSize *= (aOther.mState.mIs2b ? sizeof(char16_t) : sizeof(char));
111 18 : m2b = nullptr;
112 18 : if (m2bSize.isValid()) {
113 18 : m2b = static_cast<char16_t*>(malloc(m2bSize.value()));
114 : }
115 :
116 18 : if (m2b) {
117 18 : memcpy(m2b, aOther.m2b, m2bSize.value());
118 : } else {
119 : // allocate a buffer for a single REPLACEMENT CHARACTER
120 0 : m2b = static_cast<char16_t*>(moz_xmalloc(sizeof(char16_t)));
121 0 : m2b[0] = 0xFFFD; // REPLACEMENT CHARACTER
122 0 : mState.mIs2b = true;
123 0 : mState.mInHeap = true;
124 0 : mState.mLength = 1;
125 : }
126 : }
127 :
128 23 : if (m1b) {
129 23 : mAllBits = aOther.mAllBits;
130 : }
131 : }
132 :
133 23 : return *this;
134 : }
135 :
136 : static inline int32_t
137 0 : FirstNon8BitUnvectorized(const char16_t *str, const char16_t *end)
138 : {
139 : typedef Non8BitParameters<sizeof(size_t)> p;
140 0 : const size_t mask = p::mask();
141 0 : const uint32_t alignMask = p::alignMask();
142 0 : const uint32_t numUnicharsPerWord = p::numUnicharsPerWord();
143 0 : const int32_t len = end - str;
144 0 : int32_t i = 0;
145 :
146 : // Align ourselves to a word boundary.
147 : int32_t alignLen =
148 0 : std::min(len, int32_t(((-NS_PTR_TO_INT32(str)) & alignMask) / sizeof(char16_t)));
149 0 : for (; i < alignLen; i++) {
150 0 : if (str[i] > 255)
151 0 : return i;
152 : }
153 :
154 : // Check one word at a time.
155 0 : const int32_t wordWalkEnd = ((len - i) / numUnicharsPerWord) * numUnicharsPerWord;
156 0 : for (; i < wordWalkEnd; i += numUnicharsPerWord) {
157 0 : const size_t word = *reinterpret_cast<const size_t*>(str + i);
158 0 : if (word & mask)
159 0 : return i;
160 : }
161 :
162 : // Take care of the remainder one character at a time.
163 0 : for (; i < len; i++) {
164 0 : if (str[i] > 255)
165 0 : return i;
166 : }
167 :
168 0 : return -1;
169 : }
170 :
171 : #ifdef MOZILLA_MAY_SUPPORT_SSE2
172 : namespace mozilla {
173 : namespace SSE2 {
174 : int32_t FirstNon8Bit(const char16_t *str, const char16_t *end);
175 : } // namespace SSE2
176 : } // namespace mozilla
177 : #endif
178 :
179 : /*
180 : * This function returns -1 if all characters in str are 8 bit characters.
181 : * Otherwise, it returns a value less than or equal to the index of the first
182 : * non-8bit character in str. For example, if first non-8bit character is at
183 : * position 25, it may return 25, or for example 24, or 16. But it guarantees
184 : * there is no non-8bit character before returned value.
185 : */
186 : static inline int32_t
187 151 : FirstNon8Bit(const char16_t *str, const char16_t *end)
188 : {
189 : #ifdef MOZILLA_MAY_SUPPORT_SSE2
190 151 : if (mozilla::supports_sse2()) {
191 151 : return mozilla::SSE2::FirstNon8Bit(str, end);
192 : }
193 : #endif
194 :
195 0 : return FirstNon8BitUnvectorized(str, end);
196 : }
197 :
198 : bool
199 380 : nsTextFragment::SetTo(const char16_t* aBuffer, int32_t aLength, bool aUpdateBidi)
200 : {
201 380 : ReleaseText();
202 :
203 380 : if (aLength == 0) {
204 14 : return true;
205 : }
206 :
207 366 : char16_t firstChar = *aBuffer;
208 366 : if (aLength == 1 && firstChar < 256) {
209 41 : m1b = sSingleCharSharedString + firstChar;
210 41 : mState.mInHeap = false;
211 41 : mState.mIs2b = false;
212 41 : mState.mLength = 1;
213 :
214 41 : return true;
215 : }
216 :
217 325 : const char16_t *ucp = aBuffer;
218 325 : const char16_t *uend = aBuffer + aLength;
219 :
220 : // Check if we can use a shared string
221 325 : if (aLength <= 1 + TEXTFRAG_WHITE_AFTER_NEWLINE + TEXTFRAG_MAX_NEWLINES &&
222 255 : (firstChar == ' ' || firstChar == '\n' || firstChar == '\t')) {
223 188 : if (firstChar == ' ') {
224 5 : ++ucp;
225 : }
226 :
227 188 : const char16_t* start = ucp;
228 578 : while (ucp < uend && *ucp == '\n') {
229 195 : ++ucp;
230 : }
231 188 : const char16_t* endNewLine = ucp;
232 :
233 188 : char16_t space = ucp < uend && *ucp == '\t' ? '\t' : ' ';
234 1764 : while (ucp < uend && *ucp == space) {
235 788 : ++ucp;
236 : }
237 :
238 363 : if (ucp == uend &&
239 350 : endNewLine - start <= TEXTFRAG_MAX_NEWLINES &&
240 175 : ucp - endNewLine <= TEXTFRAG_WHITE_AFTER_NEWLINE) {
241 175 : char** strings = space == ' ' ? sSpaceSharedString : sTabSharedString;
242 175 : m1b = strings[endNewLine - start];
243 :
244 : // If we didn't find a space in the beginning, skip it now.
245 175 : if (firstChar != ' ') {
246 175 : ++m1b;
247 : }
248 :
249 175 : mState.mInHeap = false;
250 175 : mState.mIs2b = false;
251 175 : mState.mLength = aLength;
252 :
253 175 : return true;
254 : }
255 : }
256 :
257 : // See if we need to store the data in ucs2 or not
258 150 : int32_t first16bit = FirstNon8Bit(ucp, uend);
259 :
260 150 : if (first16bit != -1) { // aBuffer contains no non-8bit character
261 : // Use ucs2 storage because we have to
262 8 : CheckedUint32 m2bSize = aLength;
263 8 : m2bSize *= sizeof(char16_t);
264 8 : if (!m2bSize.isValid()) {
265 0 : return false;
266 : }
267 :
268 8 : m2b = static_cast<char16_t*>(malloc(m2bSize.value()));
269 8 : if (!m2b) {
270 0 : return false;
271 : }
272 8 : memcpy(m2b, aBuffer, m2bSize.value());
273 :
274 8 : mState.mIs2b = true;
275 8 : if (aUpdateBidi) {
276 8 : UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit);
277 : }
278 :
279 : } else {
280 : // Use 1 byte storage because we can
281 142 : char* buff = static_cast<char*>(malloc(aLength));
282 142 : if (!buff) {
283 0 : return false;
284 : }
285 :
286 : // Copy data
287 142 : LossyConvertEncoding16to8 converter(buff);
288 142 : copy_string(aBuffer, aBuffer+aLength, converter);
289 142 : m1b = buff;
290 142 : mState.mIs2b = false;
291 : }
292 :
293 : // Setup our fields
294 150 : mState.mInHeap = true;
295 150 : mState.mLength = aLength;
296 :
297 150 : return true;
298 : }
299 :
300 : void
301 0 : nsTextFragment::CopyTo(char16_t *aDest, int32_t aOffset, int32_t aCount)
302 : {
303 0 : NS_ASSERTION(aOffset >= 0, "Bad offset passed to nsTextFragment::CopyTo()!");
304 0 : NS_ASSERTION(aCount >= 0, "Bad count passed to nsTextFragment::CopyTo()!");
305 :
306 0 : if (aOffset < 0) {
307 0 : aOffset = 0;
308 : }
309 :
310 0 : if (uint32_t(aOffset + aCount) > GetLength()) {
311 0 : aCount = mState.mLength - aOffset;
312 : }
313 :
314 0 : if (aCount != 0) {
315 0 : if (mState.mIs2b) {
316 0 : memcpy(aDest, m2b + aOffset, sizeof(char16_t) * aCount);
317 : } else {
318 0 : const char *cp = m1b + aOffset;
319 0 : const char *end = cp + aCount;
320 0 : LossyConvertEncoding8to16 converter(aDest);
321 0 : copy_string(cp, end, converter);
322 : }
323 : }
324 0 : }
325 :
326 : bool
327 1 : nsTextFragment::Append(const char16_t* aBuffer, uint32_t aLength, bool aUpdateBidi)
328 : {
329 : // This is a common case because some callsites create a textnode
330 : // with a value by creating the node and then calling AppendData.
331 1 : if (mState.mLength == 0) {
332 0 : return SetTo(aBuffer, aLength, aUpdateBidi);
333 : }
334 :
335 : // Should we optimize for aData.Length() == 0?
336 :
337 1 : CheckedUint32 length = mState.mLength;
338 1 : length += aLength;
339 :
340 1 : if (!length.isValid()) {
341 0 : return false;
342 : }
343 :
344 1 : if (mState.mIs2b) {
345 0 : length *= sizeof(char16_t);
346 0 : if (!length.isValid()) {
347 0 : return false;
348 : }
349 :
350 : // Already a 2-byte string so the result will be too
351 0 : char16_t* buff = static_cast<char16_t*>(realloc(m2b, length.value()));
352 0 : if (!buff) {
353 0 : return false;
354 : }
355 :
356 0 : memcpy(buff + mState.mLength, aBuffer, aLength * sizeof(char16_t));
357 0 : mState.mLength += aLength;
358 0 : m2b = buff;
359 :
360 0 : if (aUpdateBidi) {
361 0 : UpdateBidiFlag(aBuffer, aLength);
362 : }
363 :
364 0 : return true;
365 : }
366 :
367 : // Current string is a 1-byte string, check if the new data fits in one byte too.
368 1 : int32_t first16bit = FirstNon8Bit(aBuffer, aBuffer + aLength);
369 :
370 1 : if (first16bit != -1) { // aBuffer contains no non-8bit character
371 0 : length *= sizeof(char16_t);
372 0 : if (!length.isValid()) {
373 0 : return false;
374 : }
375 :
376 : // The old data was 1-byte, but the new is not so we have to expand it
377 : // all to 2-byte
378 0 : char16_t* buff = static_cast<char16_t*>(malloc(length.value()));
379 0 : if (!buff) {
380 0 : return false;
381 : }
382 :
383 : // Copy data into buff
384 0 : LossyConvertEncoding8to16 converter(buff);
385 0 : copy_string(m1b, m1b+mState.mLength, converter);
386 :
387 0 : memcpy(buff + mState.mLength, aBuffer, aLength * sizeof(char16_t));
388 0 : mState.mLength += aLength;
389 0 : mState.mIs2b = true;
390 :
391 0 : if (mState.mInHeap) {
392 0 : free(m2b);
393 : }
394 0 : m2b = buff;
395 :
396 0 : mState.mInHeap = true;
397 :
398 0 : if (aUpdateBidi) {
399 0 : UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit);
400 : }
401 :
402 0 : return true;
403 : }
404 :
405 : // The new and the old data is all 1-byte
406 : char* buff;
407 1 : if (mState.mInHeap) {
408 0 : buff = static_cast<char*>(realloc(const_cast<char*>(m1b), length.value()));
409 0 : if (!buff) {
410 0 : return false;
411 : }
412 : }
413 : else {
414 1 : buff = static_cast<char*>(malloc(length.value()));
415 1 : if (!buff) {
416 0 : return false;
417 : }
418 :
419 1 : memcpy(buff, m1b, mState.mLength);
420 1 : mState.mInHeap = true;
421 : }
422 :
423 : // Copy aBuffer into buff.
424 1 : LossyConvertEncoding16to8 converter(buff + mState.mLength);
425 1 : copy_string(aBuffer, aBuffer + aLength, converter);
426 :
427 1 : m1b = buff;
428 1 : mState.mLength += aLength;
429 :
430 1 : return true;
431 : }
432 :
433 : /* virtual */ size_t
434 184 : nsTextFragment::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const
435 : {
436 184 : if (Is2b()) {
437 0 : return aMallocSizeOf(m2b);
438 : }
439 :
440 184 : if (mState.mInHeap) {
441 27 : return aMallocSizeOf(m1b);
442 : }
443 :
444 157 : return 0;
445 : }
446 :
447 : // To save time we only do this when we really want to know, not during
448 : // every allocation
449 : void
450 8 : nsTextFragment::UpdateBidiFlag(const char16_t* aBuffer, uint32_t aLength)
451 : {
452 8 : if (mState.mIs2b && !mState.mIsBidi) {
453 8 : if (HasRTLChars(aBuffer, aLength)) {
454 0 : mState.mIsBidi = true;
455 : }
456 : }
457 8 : }
|