Line data Source code
1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 : * This Source Code Form is subject to the terms of the Mozilla Public
3 : * License, v. 2.0. If a copy of the MPL was not distributed with this
4 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 :
6 : #include "nsTextFrameUtils.h"
7 :
8 : #include "nsBidiUtils.h"
9 : #include "nsCharTraits.h"
10 : #include "nsIContent.h"
11 : #include "nsStyleStruct.h"
12 : #include "nsTextFragment.h"
13 : #include "nsUnicharUtils.h"
14 : #include <algorithm>
15 :
16 : using namespace mozilla;
17 :
18 : static bool
19 0 : IsDiscardable(char16_t ch, nsTextFrameUtils::Flags* aFlags)
20 : {
21 : // Unlike IS_DISCARDABLE, we don't discard \r. \r will be ignored by gfxTextRun
22 : // and discarding it would force us to copy text in many cases of preformatted
23 : // text containing \r\n.
24 0 : if (ch == CH_SHY) {
25 0 : *aFlags |= nsTextFrameUtils::Flags::TEXT_HAS_SHY;
26 0 : return true;
27 : }
28 0 : return IsBidiControl(ch);
29 : }
30 :
31 : static bool
32 214 : IsDiscardable(uint8_t ch, nsTextFrameUtils::Flags* aFlags)
33 : {
34 214 : if (ch == CH_SHY) {
35 0 : *aFlags |= nsTextFrameUtils::Flags::TEXT_HAS_SHY;
36 0 : return true;
37 : }
38 214 : return false;
39 : }
40 :
41 : static bool
42 104 : IsSegmentBreak(char16_t aCh)
43 : {
44 104 : return aCh == '\n' || aCh == '\r';
45 : }
46 :
47 : static bool
48 110 : IsSpaceOrTab(char16_t aCh)
49 : {
50 110 : return aCh == ' ' || aCh == '\t';
51 : }
52 :
53 : static bool
54 104 : IsSpaceOrTabOrSegmentBreak(char16_t aCh)
55 : {
56 104 : return IsSpaceOrTab(aCh) || IsSegmentBreak(aCh);
57 : }
58 :
59 : template<typename CharT>
60 : /* static */ bool
61 0 : nsTextFrameUtils::IsSkippableCharacterForTransformText(CharT aChar)
62 : {
63 0 : return aChar == ' ' ||
64 0 : aChar == '\t' ||
65 0 : aChar == '\n' ||
66 0 : aChar == CH_SHY ||
67 0 : (aChar > 0xFF && IsBidiControl(aChar));
68 : }
69 :
70 : #ifdef DEBUG
71 : template<typename CharT>
72 21 : static void AssertSkippedExpectedChars(const CharT* aText,
73 : const gfxSkipChars& aSkipChars,
74 : int32_t aSkipCharsOffset)
75 : {
76 21 : gfxSkipCharsIterator it(aSkipChars);
77 21 : it.AdvanceOriginal(aSkipCharsOffset);
78 425 : while (it.GetOriginalOffset() < it.GetOriginalEnd()) {
79 202 : CharT ch = aText[it.GetOriginalOffset() - aSkipCharsOffset];
80 202 : MOZ_ASSERT(!it.IsOriginalCharSkipped() ||
81 : nsTextFrameUtils::IsSkippableCharacterForTransformText(ch),
82 : "skipped unexpected character; need to update "
83 : "IsSkippableCharacterForTransformText?");
84 202 : it.AdvanceOriginal(1);
85 : }
86 21 : }
87 : #endif
88 :
89 : template<class CharT>
90 : static CharT*
91 6 : TransformWhiteSpaces(const CharT* aText, uint32_t aLength,
92 : uint32_t aBegin, uint32_t aEnd,
93 : bool aHasSegmentBreak,
94 : bool& aInWhitespace,
95 : CharT* aOutput,
96 : nsTextFrameUtils::Flags& aFlags,
97 : nsTextFrameUtils::CompressionMode aCompression,
98 : gfxSkipChars* aSkipChars)
99 : {
100 6 : MOZ_ASSERT(aCompression == nsTextFrameUtils::COMPRESS_WHITESPACE ||
101 : aCompression == nsTextFrameUtils::COMPRESS_WHITESPACE_NEWLINE,
102 : "whitespaces should be skippable!!");
103 : // Get the context preceding/following this white space range.
104 : // For 8-bit text (sizeof CharT == 1), the checks here should get optimized
105 : // out, and isSegmentBreakSkippable should be initialized to be 'false'.
106 : bool isSegmentBreakSkippable =
107 : sizeof(CharT) > 1 &&
108 0 : ((aBegin > 0 && IS_ZERO_WIDTH_SPACE(aText[aBegin - 1])) ||
109 6 : (aEnd < aLength && IS_ZERO_WIDTH_SPACE(aText[aEnd])));
110 0 : if (sizeof(CharT) > 1 && !isSegmentBreakSkippable &&
111 0 : aBegin > 0 && aEnd < aLength) {
112 : uint32_t ucs4before;
113 : uint32_t ucs4after;
114 0 : if (aBegin > 1 &&
115 0 : NS_IS_LOW_SURROGATE(aText[aBegin - 1]) &&
116 0 : NS_IS_HIGH_SURROGATE(aText[aBegin - 2])) {
117 0 : ucs4before = SURROGATE_TO_UCS4(aText[aBegin - 2], aText[aBegin - 1]);
118 : } else {
119 0 : ucs4before = aText[aBegin - 1];
120 : }
121 0 : if (aEnd + 1 < aLength &&
122 0 : NS_IS_HIGH_SURROGATE(aText[aEnd]) &&
123 0 : NS_IS_LOW_SURROGATE(aText[aEnd + 1])) {
124 0 : ucs4after = SURROGATE_TO_UCS4(aText[aEnd], aText[aEnd + 1]);
125 : } else {
126 0 : ucs4after = aText[aEnd];
127 : }
128 : // Discard newlines between characters that have F, W, or H
129 : // EastAsianWidth property and neither side is Hangul.
130 0 : isSegmentBreakSkippable = IsSegmentBreakSkipChar(ucs4before) &&
131 0 : IsSegmentBreakSkipChar(ucs4after);
132 : }
133 :
134 12 : for (uint32_t i = aBegin; i < aEnd; ++i) {
135 6 : CharT ch = aText[i];
136 6 : bool keepChar = false;
137 6 : bool keepTransformedWhiteSpace = false;
138 6 : if (IsDiscardable(ch, &aFlags)) {
139 0 : aSkipChars->SkipChar();
140 0 : continue;
141 : }
142 6 : if (IsSpaceOrTab(ch)) {
143 6 : if (aHasSegmentBreak) {
144 : // If white-space is set to normal, nowrap, or pre-line, white space
145 : // characters are considered collapsible and all spaces and tabs
146 : // immediately preceding or following a segment break are removed.
147 0 : aSkipChars->SkipChar();
148 0 : continue;
149 : }
150 :
151 6 : if (aInWhitespace) {
152 0 : aSkipChars->SkipChar();
153 0 : continue;
154 : } else {
155 6 : keepTransformedWhiteSpace = true;
156 : }
157 : } else {
158 : // Apply Segment Break Transformation Rules (CSS Text 3 - 4.1.2) for
159 : // segment break characters.
160 0 : if (aCompression == nsTextFrameUtils::COMPRESS_WHITESPACE ||
161 : // XXX: According to CSS Text 3, a lone CR should not always be
162 : // kept, but still go through the Segment Break Transformation
163 : // Rules. However, this is what current modern browser engines
164 : // (webkit/blink/edge) do. So, once we can get some clarity
165 : // from the specification issue, we should either remove the
166 : // lone CR condition here, or leave it here with this comment
167 : // being rephrased.
168 : // Please see https://github.com/w3c/csswg-drafts/issues/855.
169 : ch == '\r') {
170 0 : keepChar = true;
171 : } else {
172 : // aCompression == COMPRESS_WHITESPACE_NEWLINE
173 :
174 : // Any collapsible segment break immediately following another
175 : // collapsible segment break is removed. Then the remaining segment
176 : // break is either transformed into a space (U+0020) or removed
177 : // depending on the context before and after the break.
178 0 : if (isSegmentBreakSkippable || aInWhitespace) {
179 0 : aSkipChars->SkipChar();
180 0 : continue;
181 : }
182 0 : isSegmentBreakSkippable = true;
183 0 : keepTransformedWhiteSpace = true;
184 : }
185 : }
186 :
187 6 : if (keepChar) {
188 0 : *aOutput++ = ch;
189 0 : aSkipChars->KeepChar();
190 0 : aInWhitespace = IsSpaceOrTab(ch);
191 6 : } else if (keepTransformedWhiteSpace) {
192 6 : *aOutput++ = ' ';
193 6 : aSkipChars->KeepChar();
194 6 : aInWhitespace = true;
195 : } else {
196 0 : MOZ_ASSERT_UNREACHABLE("Should've skipped the character!!");
197 : }
198 : }
199 6 : return aOutput;
200 : }
201 :
202 : template<class CharT>
203 : CharT*
204 21 : nsTextFrameUtils::TransformText(const CharT* aText, uint32_t aLength,
205 : CharT* aOutput,
206 : CompressionMode aCompression,
207 : uint8_t* aIncomingFlags,
208 : gfxSkipChars* aSkipChars,
209 : Flags* aAnalysisFlags)
210 : {
211 21 : Flags flags = Flags();
212 : #ifdef DEBUG
213 21 : int32_t skipCharsOffset = aSkipChars->GetOriginalCharCount();
214 : #endif
215 :
216 21 : bool lastCharArabic = false;
217 21 : if (aCompression == COMPRESS_NONE ||
218 : aCompression == COMPRESS_NONE_TRANSFORM_TO_SPACE) {
219 : // Skip discardables.
220 : uint32_t i;
221 111 : for (i = 0; i < aLength; ++i) {
222 104 : CharT ch = aText[i];
223 104 : if (IsDiscardable(ch, &flags)) {
224 0 : aSkipChars->SkipChar();
225 : } else {
226 104 : aSkipChars->KeepChar();
227 104 : if (ch > ' ') {
228 98 : lastCharArabic = IS_ARABIC_CHAR(ch);
229 6 : } else if (aCompression == COMPRESS_NONE_TRANSFORM_TO_SPACE) {
230 0 : if (ch == '\t' || ch == '\n') {
231 0 : ch = ' ';
232 : }
233 : } else {
234 : // aCompression == COMPRESS_NONE
235 6 : if (ch == '\t') {
236 0 : flags |= Flags::TEXT_HAS_TAB;
237 : }
238 : }
239 104 : *aOutput++ = ch;
240 : }
241 : }
242 7 : if (lastCharArabic) {
243 0 : *aIncomingFlags |= INCOMING_ARABICCHAR;
244 : } else {
245 7 : *aIncomingFlags &= ~INCOMING_ARABICCHAR;
246 : }
247 7 : *aIncomingFlags &= ~INCOMING_WHITESPACE;
248 : } else {
249 14 : bool inWhitespace = (*aIncomingFlags & INCOMING_WHITESPACE) != 0;
250 : uint32_t i;
251 112 : for (i = 0; i < aLength; ++i) {
252 98 : CharT ch = aText[i];
253 : // CSS Text 3 - 4.1. The White Space Processing Rules
254 : // White space processing in CSS affects only the document white space
255 : // characters: spaces (U+0020), tabs (U+0009), and segment breaks.
256 : // Since we need the context of segment breaks and their surrounding
257 : // white spaces to proceed the white space processing, a consecutive run
258 : // of spaces/tabs/segment breaks is collected in a first pass loop, then
259 : // we apply the collapsing and transformation rules to this run in a
260 : // second pass loop.
261 98 : if (IsSpaceOrTabOrSegmentBreak(ch)) {
262 6 : bool keepLastSpace = false;
263 6 : bool hasSegmentBreak = IsSegmentBreak(ch);
264 6 : uint32_t countTrailingDiscardables = 0;
265 : uint32_t j;
266 12 : for (j = i + 1; j < aLength &&
267 12 : (IsSpaceOrTabOrSegmentBreak(aText[j]) ||
268 6 : IsDiscardable(aText[j], &flags));
269 : j++) {
270 0 : if (IsSegmentBreak(aText[j])) {
271 0 : hasSegmentBreak = true;
272 : }
273 : }
274 : // Exclude trailing discardables before checking space combining
275 : // sequence tail.
276 6 : for (; IsDiscardable(aText[j - 1], &flags); j--) {
277 0 : countTrailingDiscardables++;
278 : }
279 : // If the last white space is followed by a combining sequence tail,
280 : // exclude it from the range of TransformWhiteSpaces.
281 6 : if (sizeof(CharT) > 1 && aText[j - 1] == ' ' && j < aLength &&
282 0 : IsSpaceCombiningSequenceTail(&aText[j], aLength - j)) {
283 0 : keepLastSpace = true;
284 0 : j--;
285 : }
286 6 : if (j > i) {
287 6 : aOutput = TransformWhiteSpaces(aText, aLength, i, j, hasSegmentBreak,
288 : inWhitespace, aOutput, flags,
289 : aCompression, aSkipChars);
290 : }
291 : // We need to keep KeepChar()/SkipChar() in order, so process the
292 : // last white space first, then process the trailing discardables.
293 6 : if (keepLastSpace) {
294 0 : keepLastSpace = false;
295 0 : *aOutput++ = ' ';
296 0 : aSkipChars->KeepChar();
297 0 : lastCharArabic = false;
298 0 : j++;
299 : }
300 6 : for (; countTrailingDiscardables > 0; countTrailingDiscardables--) {
301 0 : aSkipChars->SkipChar();
302 0 : j++;
303 : }
304 6 : i = j - 1;
305 6 : continue;
306 : }
307 : // Process characters other than the document white space characters.
308 92 : if (IsDiscardable(ch, &flags)) {
309 0 : aSkipChars->SkipChar();
310 : } else {
311 92 : *aOutput++ = ch;
312 92 : aSkipChars->KeepChar();
313 : }
314 92 : lastCharArabic = IS_ARABIC_CHAR(ch);
315 92 : inWhitespace = false;
316 : }
317 :
318 14 : if (lastCharArabic) {
319 0 : *aIncomingFlags |= INCOMING_ARABICCHAR;
320 : } else {
321 14 : *aIncomingFlags &= ~INCOMING_ARABICCHAR;
322 : }
323 14 : if (inWhitespace) {
324 0 : *aIncomingFlags |= INCOMING_WHITESPACE;
325 : } else {
326 14 : *aIncomingFlags &= ~INCOMING_WHITESPACE;
327 : }
328 : }
329 :
330 21 : *aAnalysisFlags = flags;
331 :
332 : #ifdef DEBUG
333 21 : AssertSkippedExpectedChars(aText, *aSkipChars, skipCharsOffset);
334 : #endif
335 21 : return aOutput;
336 : }
337 :
338 : /*
339 : * NOTE: The TransformText and IsSkippableCharacterForTransformText template
340 : * functions are part of the public API of nsTextFrameUtils, while
341 : * their function bodies are not available in the header. They may stop working
342 : * (fail to resolve symbol in link time) once their callsites are moved to a
343 : * different translation unit (e.g. a different unified source file).
344 : * Explicit instantiating this function template with `uint8_t` and `char16_t`
345 : * could prevent us from the potential risk.
346 : */
347 : template uint8_t*
348 : nsTextFrameUtils::TransformText(const uint8_t* aText, uint32_t aLength,
349 : uint8_t* aOutput,
350 : CompressionMode aCompression,
351 : uint8_t* aIncomingFlags,
352 : gfxSkipChars* aSkipChars,
353 : Flags* aAnalysisFlags);
354 : template char16_t*
355 : nsTextFrameUtils::TransformText(const char16_t* aText, uint32_t aLength,
356 : char16_t* aOutput,
357 : CompressionMode aCompression,
358 : uint8_t* aIncomingFlags,
359 : gfxSkipChars* aSkipChars,
360 : Flags* aAnalysisFlags);
361 : template bool
362 : nsTextFrameUtils::IsSkippableCharacterForTransformText(uint8_t aChar);
363 : template bool
364 : nsTextFrameUtils::IsSkippableCharacterForTransformText(char16_t aChar);
365 :
366 : uint32_t
367 0 : nsTextFrameUtils::ComputeApproximateLengthWithWhitespaceCompression(
368 : nsIContent *aContent, const nsStyleText *aStyleText)
369 : {
370 0 : const nsTextFragment *frag = aContent->GetText();
371 : // This is an approximation so we don't really need anything
372 : // too fancy here.
373 : uint32_t len;
374 0 : if (aStyleText->WhiteSpaceIsSignificant()) {
375 0 : len = frag->GetLength();
376 : } else {
377 0 : bool is2b = frag->Is2b();
378 : union {
379 : const char *s1b;
380 : const char16_t *s2b;
381 : } u;
382 0 : if (is2b) {
383 0 : u.s2b = frag->Get2b();
384 : } else {
385 0 : u.s1b = frag->Get1b();
386 : }
387 0 : bool prevWS = true; // more important to ignore blocks with
388 : // only whitespace than get inline boundaries
389 : // exactly right
390 0 : len = 0;
391 0 : for (uint32_t i = 0, i_end = frag->GetLength(); i < i_end; ++i) {
392 0 : char16_t c = is2b ? u.s2b[i] : u.s1b[i];
393 0 : if (c == ' ' || c == '\n' || c == '\t' || c == '\r') {
394 0 : if (!prevWS) {
395 0 : ++len;
396 : }
397 0 : prevWS = true;
398 : } else {
399 0 : ++len;
400 0 : prevWS = false;
401 : }
402 : }
403 : }
404 0 : return len;
405 : }
406 :
407 0 : bool nsSkipCharsRunIterator::NextRun() {
408 0 : do {
409 0 : if (mRunLength) {
410 0 : mIterator.AdvanceOriginal(mRunLength);
411 0 : NS_ASSERTION(mRunLength > 0, "No characters in run (initial length too large?)");
412 0 : if (!mSkipped || mLengthIncludesSkipped) {
413 0 : mRemainingLength -= mRunLength;
414 : }
415 : }
416 0 : if (!mRemainingLength)
417 0 : return false;
418 : int32_t length;
419 0 : mSkipped = mIterator.IsOriginalCharSkipped(&length);
420 0 : mRunLength = std::min(length, mRemainingLength);
421 0 : } while (!mVisitSkipped && mSkipped);
422 :
423 0 : return true;
424 : }
|