Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 : /* This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : #include "nsLineBreaker.h"
8 : #include "nsContentUtils.h"
9 : #include "nsILineBreaker.h"
10 : #include "gfxTextRun.h" // for the gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_* values
11 : #include "nsHyphenationManager.h"
12 : #include "nsHyphenator.h"
13 : #include "mozilla/gfx/2D.h"
14 :
15 21 : nsLineBreaker::nsLineBreaker()
16 : : mCurrentWordLanguage(nullptr),
17 : mCurrentWordContainsMixedLang(false),
18 : mCurrentWordContainsComplexChar(false),
19 : mAfterBreakableSpace(false), mBreakHere(false),
20 21 : mWordBreak(nsILineBreaker::kWordBreak_Normal)
21 : {
22 21 : }
23 :
24 42 : nsLineBreaker::~nsLineBreaker()
25 : {
26 21 : NS_ASSERTION(mCurrentWord.Length() == 0, "Should have Reset() before destruction!");
27 21 : }
28 :
29 : static void
30 0 : SetupCapitalization(const char16_t* aWord, uint32_t aLength,
31 : bool* aCapitalization)
32 : {
33 : // Capitalize the first alphanumeric character after a space or start
34 : // of the word.
35 : // The only space character a word can contain is NBSP.
36 0 : bool capitalizeNextChar = true;
37 0 : for (uint32_t i = 0; i < aLength; ++i) {
38 0 : uint32_t ch = aWord[i];
39 0 : if (capitalizeNextChar) {
40 0 : if (NS_IS_HIGH_SURROGATE(ch) && i + 1 < aLength &&
41 0 : NS_IS_LOW_SURROGATE(aWord[i + 1])) {
42 0 : ch = SURROGATE_TO_UCS4(ch, aWord[i + 1]);
43 : }
44 0 : if (nsContentUtils::IsAlphanumeric(ch)) {
45 0 : aCapitalization[i] = true;
46 0 : capitalizeNextChar = false;
47 : }
48 0 : if (!IS_IN_BMP(ch)) {
49 0 : ++i;
50 : }
51 : }
52 0 : if (ch == 0xA0 /*NBSP*/) {
53 0 : capitalizeNextChar = true;
54 : }
55 : }
56 0 : }
57 :
58 : nsresult
59 21 : nsLineBreaker::FlushCurrentWord()
60 : {
61 21 : uint32_t length = mCurrentWord.Length();
62 42 : AutoTArray<uint8_t,4000> breakState;
63 21 : if (!breakState.AppendElements(length))
64 0 : return NS_ERROR_OUT_OF_MEMORY;
65 :
66 42 : nsTArray<bool> capitalizationState;
67 :
68 21 : if (!mCurrentWordContainsComplexChar) {
69 : // For break-strict set everything internal to "break", otherwise
70 : // to "no break"!
71 36 : memset(breakState.Elements(),
72 18 : mWordBreak == nsILineBreaker::kWordBreak_BreakAll ?
73 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL :
74 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
75 18 : length*sizeof(uint8_t));
76 : } else {
77 3 : nsContentUtils::LineBreaker()->
78 3 : GetJISx4051Breaks(mCurrentWord.Elements(), length, mWordBreak,
79 6 : breakState.Elements());
80 : }
81 :
82 21 : bool autoHyphenate = mCurrentWordLanguage &&
83 21 : !mCurrentWordContainsMixedLang;
84 : uint32_t i;
85 21 : for (i = 0; autoHyphenate && i < mTextItems.Length(); ++i) {
86 0 : TextItem* ti = &mTextItems[i];
87 0 : if (!(ti->mFlags & BREAK_USE_AUTO_HYPHENATION)) {
88 0 : autoHyphenate = false;
89 : }
90 : }
91 21 : if (autoHyphenate) {
92 : RefPtr<nsHyphenator> hyphenator =
93 0 : nsHyphenationManager::Instance()->GetHyphenator(mCurrentWordLanguage);
94 0 : if (hyphenator) {
95 0 : FindHyphenationPoints(hyphenator,
96 0 : mCurrentWord.Elements(),
97 0 : mCurrentWord.Elements() + length,
98 0 : breakState.Elements());
99 : }
100 : }
101 :
102 21 : uint32_t offset = 0;
103 30 : for (i = 0; i < mTextItems.Length(); ++i) {
104 9 : TextItem* ti = &mTextItems[i];
105 9 : NS_ASSERTION(ti->mLength > 0, "Zero length word contribution?");
106 :
107 9 : if ((ti->mFlags & BREAK_SUPPRESS_INITIAL) && ti->mSinkOffset == 0) {
108 4 : breakState[offset] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
109 : }
110 9 : if (ti->mFlags & BREAK_SUPPRESS_INSIDE) {
111 8 : uint32_t exclude = ti->mSinkOffset == 0 ? 1 : 0;
112 8 : memset(breakState.Elements() + offset + exclude,
113 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
114 16 : (ti->mLength - exclude)*sizeof(uint8_t));
115 : }
116 :
117 : // Don't set the break state for the first character of the word, because
118 : // it was already set correctly earlier and we don't know what the true
119 : // value should be.
120 9 : uint32_t skipSet = i == 0 ? 1 : 0;
121 9 : if (ti->mSink) {
122 18 : ti->mSink->SetBreaks(ti->mSinkOffset + skipSet, ti->mLength - skipSet,
123 18 : breakState.Elements() + offset + skipSet);
124 :
125 9 : if (ti->mFlags & BREAK_NEED_CAPITALIZATION) {
126 0 : if (capitalizationState.Length() == 0) {
127 0 : if (!capitalizationState.AppendElements(length))
128 0 : return NS_ERROR_OUT_OF_MEMORY;
129 0 : memset(capitalizationState.Elements(), false, length*sizeof(bool));
130 0 : SetupCapitalization(mCurrentWord.Elements(), length,
131 0 : capitalizationState.Elements());
132 : }
133 0 : ti->mSink->SetCapitalization(ti->mSinkOffset, ti->mLength,
134 0 : capitalizationState.Elements() + offset);
135 : }
136 : }
137 :
138 9 : offset += ti->mLength;
139 : }
140 :
141 21 : mCurrentWord.Clear();
142 21 : mTextItems.Clear();
143 21 : mCurrentWordContainsComplexChar = false;
144 21 : mCurrentWordContainsMixedLang = false;
145 21 : mCurrentWordLanguage = nullptr;
146 21 : return NS_OK;
147 : }
148 :
149 : // If the aFlags parameter to AppendText has all these bits set,
150 : // then we don't need to worry about finding break opportunities
151 : // in the appended text.
152 : #define NO_BREAKS_NEEDED_FLAGS (BREAK_SUPPRESS_INITIAL | \
153 : BREAK_SUPPRESS_INSIDE | \
154 : BREAK_SKIP_SETTING_NO_BREAKS)
155 :
156 : nsresult
157 0 : nsLineBreaker::AppendText(nsIAtom* aHyphenationLanguage, const char16_t* aText, uint32_t aLength,
158 : uint32_t aFlags, nsILineBreakSink* aSink)
159 : {
160 0 : NS_ASSERTION(aLength > 0, "Appending empty text...");
161 :
162 0 : uint32_t offset = 0;
163 :
164 : // Continue the current word
165 0 : if (mCurrentWord.Length() > 0) {
166 0 : NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere, "These should not be set");
167 :
168 0 : while (offset < aLength && !IsSpace(aText[offset])) {
169 0 : mCurrentWord.AppendElement(aText[offset]);
170 0 : if (!mCurrentWordContainsComplexChar && IsComplexChar(aText[offset])) {
171 0 : mCurrentWordContainsComplexChar = true;
172 : }
173 0 : UpdateCurrentWordLanguage(aHyphenationLanguage);
174 0 : ++offset;
175 : }
176 :
177 0 : if (offset > 0) {
178 0 : mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags));
179 : }
180 :
181 0 : if (offset == aLength)
182 0 : return NS_OK;
183 :
184 : // We encountered whitespace, so we're done with this word
185 0 : nsresult rv = FlushCurrentWord();
186 0 : if (NS_FAILED(rv))
187 0 : return rv;
188 : }
189 :
190 0 : AutoTArray<uint8_t,4000> breakState;
191 0 : if (aSink) {
192 0 : if (!breakState.AppendElements(aLength))
193 0 : return NS_ERROR_OUT_OF_MEMORY;
194 : }
195 :
196 0 : bool noCapitalizationNeeded = true;
197 0 : nsTArray<bool> capitalizationState;
198 0 : if (aSink && (aFlags & BREAK_NEED_CAPITALIZATION)) {
199 0 : if (!capitalizationState.AppendElements(aLength))
200 0 : return NS_ERROR_OUT_OF_MEMORY;
201 0 : memset(capitalizationState.Elements(), false, aLength*sizeof(bool));
202 0 : noCapitalizationNeeded = false;
203 : }
204 :
205 0 : uint32_t start = offset;
206 0 : bool noBreaksNeeded = !aSink ||
207 0 : ((aFlags & NO_BREAKS_NEEDED_FLAGS) == NO_BREAKS_NEEDED_FLAGS &&
208 0 : !mBreakHere && !mAfterBreakableSpace);
209 0 : if (noBreaksNeeded && noCapitalizationNeeded) {
210 : // Skip to the space before the last word, since either the break data
211 : // here is not needed, or no breaks are set in the sink and there cannot
212 : // be any breaks in this chunk; and we don't need to do word-initial
213 : // capitalization. All we need is the context for the next chunk (if any).
214 0 : offset = aLength;
215 0 : while (offset > start) {
216 0 : --offset;
217 0 : if (IsSpace(aText[offset]))
218 0 : break;
219 : }
220 : }
221 0 : uint32_t wordStart = offset;
222 0 : bool wordHasComplexChar = false;
223 :
224 0 : RefPtr<nsHyphenator> hyphenator;
225 0 : if ((aFlags & BREAK_USE_AUTO_HYPHENATION) &&
226 0 : !(aFlags & BREAK_SUPPRESS_INSIDE) &&
227 : aHyphenationLanguage) {
228 0 : hyphenator = nsHyphenationManager::Instance()->GetHyphenator(aHyphenationLanguage);
229 : }
230 :
231 : for (;;) {
232 0 : char16_t ch = aText[offset];
233 0 : bool isSpace = IsSpace(ch);
234 0 : bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE);
235 :
236 0 : if (aSink && !noBreaksNeeded) {
237 0 : breakState[offset] =
238 0 : mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) ||
239 0 : (mWordBreak == nsILineBreaker::kWordBreak_BreakAll) ?
240 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL :
241 0 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
242 : }
243 0 : mBreakHere = false;
244 0 : mAfterBreakableSpace = isBreakableSpace;
245 :
246 0 : if (isSpace || ch == '\n') {
247 0 : if (offset > wordStart && aSink) {
248 0 : if (!(aFlags & BREAK_SUPPRESS_INSIDE)) {
249 0 : if (wordHasComplexChar) {
250 : // Save current start-of-word state because GetJISx4051Breaks will
251 : // set it to false
252 0 : uint8_t currentStart = breakState[wordStart];
253 0 : nsContentUtils::LineBreaker()->
254 0 : GetJISx4051Breaks(aText + wordStart, offset - wordStart,
255 0 : mWordBreak,
256 0 : breakState.Elements() + wordStart);
257 0 : breakState[wordStart] = currentStart;
258 : }
259 0 : if (hyphenator) {
260 0 : FindHyphenationPoints(hyphenator,
261 0 : aText + wordStart, aText + offset,
262 0 : breakState.Elements() + wordStart);
263 : }
264 : }
265 0 : if (!noCapitalizationNeeded) {
266 0 : SetupCapitalization(aText + wordStart, offset - wordStart,
267 0 : capitalizationState.Elements() + wordStart);
268 : }
269 : }
270 0 : wordHasComplexChar = false;
271 0 : ++offset;
272 0 : if (offset >= aLength)
273 0 : break;
274 0 : wordStart = offset;
275 : } else {
276 0 : if (!wordHasComplexChar && IsComplexChar(ch)) {
277 0 : wordHasComplexChar = true;
278 : }
279 0 : ++offset;
280 0 : if (offset >= aLength) {
281 : // Save this word
282 0 : mCurrentWordContainsComplexChar = wordHasComplexChar;
283 0 : uint32_t len = offset - wordStart;
284 0 : char16_t* elems = mCurrentWord.AppendElements(len);
285 0 : if (!elems)
286 0 : return NS_ERROR_OUT_OF_MEMORY;
287 0 : memcpy(elems, aText + wordStart, sizeof(char16_t)*len);
288 0 : mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags));
289 : // Ensure that the break-before for this word is written out
290 0 : offset = wordStart + 1;
291 0 : UpdateCurrentWordLanguage(aHyphenationLanguage);
292 0 : break;
293 : }
294 : }
295 0 : }
296 :
297 0 : if (aSink) {
298 0 : if (!noBreaksNeeded) {
299 0 : aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
300 : }
301 0 : if (!noCapitalizationNeeded) {
302 0 : aSink->SetCapitalization(start, offset - start,
303 0 : capitalizationState.Elements() + start);
304 : }
305 : }
306 0 : return NS_OK;
307 : }
308 :
309 : void
310 0 : nsLineBreaker::FindHyphenationPoints(nsHyphenator *aHyphenator,
311 : const char16_t *aTextStart,
312 : const char16_t *aTextLimit,
313 : uint8_t *aBreakState)
314 : {
315 0 : nsDependentSubstring string(aTextStart, aTextLimit);
316 0 : AutoTArray<bool,200> hyphens;
317 0 : if (NS_SUCCEEDED(aHyphenator->Hyphenate(string, hyphens))) {
318 0 : for (uint32_t i = 0; i + 1 < string.Length(); ++i) {
319 0 : if (hyphens[i]) {
320 0 : aBreakState[i + 1] =
321 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN;
322 : }
323 : }
324 : }
325 0 : }
326 :
327 : nsresult
328 9 : nsLineBreaker::AppendText(nsIAtom* aHyphenationLanguage, const uint8_t* aText, uint32_t aLength,
329 : uint32_t aFlags, nsILineBreakSink* aSink)
330 : {
331 9 : NS_ASSERTION(aLength > 0, "Appending empty text...");
332 :
333 9 : if (aFlags & (BREAK_NEED_CAPITALIZATION | BREAK_USE_AUTO_HYPHENATION)) {
334 : // Defer to the Unicode path if capitalization or hyphenation is required
335 0 : nsAutoString str;
336 0 : const char* cp = reinterpret_cast<const char*>(aText);
337 0 : CopyASCIItoUTF16(nsDependentCSubstring(cp, cp + aLength), str);
338 0 : return AppendText(aHyphenationLanguage, str.get(), aLength, aFlags, aSink);
339 : }
340 :
341 9 : uint32_t offset = 0;
342 :
343 : // Continue the current word
344 9 : if (mCurrentWord.Length() > 0) {
345 0 : NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere, "These should not be set");
346 :
347 0 : while (offset < aLength && !IsSpace(aText[offset])) {
348 0 : mCurrentWord.AppendElement(aText[offset]);
349 0 : if (!mCurrentWordContainsComplexChar &&
350 0 : IsComplexASCIIChar(aText[offset])) {
351 0 : mCurrentWordContainsComplexChar = true;
352 : }
353 0 : ++offset;
354 : }
355 :
356 0 : if (offset > 0) {
357 0 : mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags));
358 : }
359 :
360 0 : if (offset == aLength) {
361 : // We did not encounter whitespace so the word hasn't finished yet.
362 0 : return NS_OK;
363 : }
364 :
365 : // We encountered whitespace, so we're done with this word
366 0 : nsresult rv = FlushCurrentWord();
367 0 : if (NS_FAILED(rv))
368 0 : return rv;
369 : }
370 :
371 18 : AutoTArray<uint8_t,4000> breakState;
372 9 : if (aSink) {
373 9 : if (!breakState.AppendElements(aLength))
374 0 : return NS_ERROR_OUT_OF_MEMORY;
375 : }
376 :
377 9 : uint32_t start = offset;
378 26 : bool noBreaksNeeded = !aSink ||
379 17 : ((aFlags & NO_BREAKS_NEEDED_FLAGS) == NO_BREAKS_NEEDED_FLAGS &&
380 25 : !mBreakHere && !mAfterBreakableSpace);
381 9 : if (noBreaksNeeded) {
382 : // Skip to the space before the last word, since either the break data
383 : // here is not needed, or no breaks are set in the sink and there cannot
384 : // be any breaks in this chunk; all we need is the context for the next
385 : // chunk (if any)
386 8 : offset = aLength;
387 270 : while (offset > start) {
388 135 : --offset;
389 135 : if (IsSpace(aText[offset]))
390 4 : break;
391 : }
392 : }
393 9 : uint32_t wordStart = offset;
394 9 : bool wordHasComplexChar = false;
395 :
396 : for (;;) {
397 166 : uint8_t ch = aText[offset];
398 166 : bool isSpace = IsSpace(ch);
399 166 : bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE);
400 :
401 166 : if (aSink) {
402 : // Consider word-break style. Since the break position of CJK scripts
403 : // will be set by nsILineBreaker, we don't consider CJK at this point.
404 332 : breakState[offset] =
405 494 : mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) ||
406 162 : (mWordBreak == nsILineBreaker::kWordBreak_BreakAll) ?
407 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL :
408 170 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
409 : }
410 166 : mBreakHere = false;
411 166 : mAfterBreakableSpace = isBreakableSpace;
412 :
413 166 : if (isSpace) {
414 8 : if (offset > wordStart && wordHasComplexChar) {
415 1 : if (aSink && !(aFlags & BREAK_SUPPRESS_INSIDE)) {
416 : // Save current start-of-word state because GetJISx4051Breaks will
417 : // set it to false
418 1 : uint8_t currentStart = breakState[wordStart];
419 1 : nsContentUtils::LineBreaker()->
420 2 : GetJISx4051Breaks(aText + wordStart, offset - wordStart,
421 1 : mWordBreak,
422 2 : breakState.Elements() + wordStart);
423 1 : breakState[wordStart] = currentStart;
424 : }
425 1 : wordHasComplexChar = false;
426 : }
427 :
428 8 : ++offset;
429 8 : if (offset >= aLength)
430 0 : break;
431 8 : wordStart = offset;
432 : } else {
433 158 : if (!wordHasComplexChar && IsComplexASCIIChar(ch)) {
434 4 : wordHasComplexChar = true;
435 : }
436 158 : ++offset;
437 158 : if (offset >= aLength) {
438 : // Save this word
439 9 : mCurrentWordContainsComplexChar = wordHasComplexChar;
440 9 : uint32_t len = offset - wordStart;
441 9 : char16_t* elems = mCurrentWord.AppendElements(len);
442 9 : if (!elems)
443 0 : return NS_ERROR_OUT_OF_MEMORY;
444 : uint32_t i;
445 150 : for (i = wordStart; i < offset; ++i) {
446 141 : elems[i - wordStart] = aText[i];
447 : }
448 9 : mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags));
449 : // Ensure that the break-before for this word is written out
450 9 : offset = wordStart + 1;
451 9 : break;
452 : }
453 : }
454 157 : }
455 :
456 9 : if (!noBreaksNeeded) {
457 1 : aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
458 : }
459 9 : return NS_OK;
460 : }
461 :
462 : void
463 0 : nsLineBreaker::UpdateCurrentWordLanguage(nsIAtom *aHyphenationLanguage)
464 : {
465 0 : if (mCurrentWordLanguage && mCurrentWordLanguage != aHyphenationLanguage) {
466 0 : mCurrentWordContainsMixedLang = true;
467 : } else {
468 0 : mCurrentWordLanguage = aHyphenationLanguage;
469 : }
470 0 : }
471 :
472 : nsresult
473 0 : nsLineBreaker::AppendInvisibleWhitespace(uint32_t aFlags)
474 : {
475 0 : nsresult rv = FlushCurrentWord();
476 0 : if (NS_FAILED(rv))
477 0 : return rv;
478 :
479 0 : bool isBreakableSpace = !(aFlags & BREAK_SUPPRESS_INSIDE);
480 0 : if (mAfterBreakableSpace && !isBreakableSpace) {
481 0 : mBreakHere = true;
482 : }
483 0 : mAfterBreakableSpace = isBreakableSpace;
484 0 : return NS_OK;
485 : }
486 :
487 : nsresult
488 21 : nsLineBreaker::Reset(bool* aTrailingBreak)
489 : {
490 21 : nsresult rv = FlushCurrentWord();
491 21 : if (NS_FAILED(rv))
492 0 : return rv;
493 :
494 21 : *aTrailingBreak = mBreakHere || mAfterBreakableSpace;
495 21 : mBreakHere = false;
496 21 : mAfterBreakableSpace = false;
497 21 : return NS_OK;
498 : }
|