LCOV - code coverage report
Current view: top level - layout/generic - nsTextFrameUtils.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 82 180 45.6 %
Date: 2017-07-14 16:53:18 Functions: 7 15 46.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
       2             :  * This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
       5             : 
       6             : #include "nsTextFrameUtils.h"
       7             : 
       8             : #include "nsBidiUtils.h"
       9             : #include "nsCharTraits.h"
      10             : #include "nsIContent.h"
      11             : #include "nsStyleStruct.h"
      12             : #include "nsTextFragment.h"
      13             : #include "nsUnicharUtils.h"
      14             : #include <algorithm>
      15             : 
      16             : using namespace mozilla;
      17             : 
      18             : static bool
      19           0 : IsDiscardable(char16_t ch, nsTextFrameUtils::Flags* aFlags)
      20             : {
      21             :   // Unlike IS_DISCARDABLE, we don't discard \r. \r will be ignored by gfxTextRun
      22             :   // and discarding it would force us to copy text in many cases of preformatted
      23             :   // text containing \r\n.
      24           0 :   if (ch == CH_SHY) {
      25           0 :     *aFlags |= nsTextFrameUtils::Flags::TEXT_HAS_SHY;
      26           0 :     return true;
      27             :   }
      28           0 :   return IsBidiControl(ch);
      29             : }
      30             : 
      31             : static bool
      32         214 : IsDiscardable(uint8_t ch, nsTextFrameUtils::Flags* aFlags)
      33             : {
      34         214 :   if (ch == CH_SHY) {
      35           0 :     *aFlags |= nsTextFrameUtils::Flags::TEXT_HAS_SHY;
      36           0 :     return true;
      37             :   }
      38         214 :   return false;
      39             : }
      40             : 
      41             : static bool
      42         104 : IsSegmentBreak(char16_t aCh)
      43             : {
      44         104 :   return aCh == '\n' || aCh == '\r';
      45             : }
      46             : 
      47             : static bool
      48         110 : IsSpaceOrTab(char16_t aCh)
      49             : {
      50         110 :   return aCh == ' ' || aCh == '\t';
      51             : }
      52             : 
      53             : static bool
      54         104 : IsSpaceOrTabOrSegmentBreak(char16_t aCh)
      55             : {
      56         104 :   return IsSpaceOrTab(aCh) || IsSegmentBreak(aCh);
      57             : }
      58             : 
      59             : template<typename CharT>
      60             : /* static */ bool
      61           0 : nsTextFrameUtils::IsSkippableCharacterForTransformText(CharT aChar)
      62             : {
      63           0 :   return aChar == ' ' ||
      64           0 :          aChar == '\t' ||
      65           0 :          aChar == '\n' ||
      66           0 :          aChar == CH_SHY ||
      67           0 :          (aChar > 0xFF && IsBidiControl(aChar));
      68             : }
      69             : 
      70             : #ifdef DEBUG
      71             : template<typename CharT>
      72          21 : static void AssertSkippedExpectedChars(const CharT* aText,
      73             :                                        const gfxSkipChars& aSkipChars,
      74             :                                        int32_t aSkipCharsOffset)
      75             : {
      76          21 :   gfxSkipCharsIterator it(aSkipChars);
      77          21 :   it.AdvanceOriginal(aSkipCharsOffset);
      78         425 :   while (it.GetOriginalOffset() < it.GetOriginalEnd()) {
      79         202 :     CharT ch = aText[it.GetOriginalOffset() - aSkipCharsOffset];
      80         202 :     MOZ_ASSERT(!it.IsOriginalCharSkipped() ||
      81             :                nsTextFrameUtils::IsSkippableCharacterForTransformText(ch),
      82             :                "skipped unexpected character; need to update "
      83             :                "IsSkippableCharacterForTransformText?");
      84         202 :     it.AdvanceOriginal(1);
      85             :   }
      86          21 : }
      87             : #endif
      88             : 
      89             : template<class CharT>
      90             : static CharT*
      91           6 : TransformWhiteSpaces(const CharT* aText, uint32_t aLength,
      92             :                      uint32_t aBegin, uint32_t aEnd,
      93             :                      bool aHasSegmentBreak,
      94             :                      bool& aInWhitespace,
      95             :                      CharT* aOutput,
      96             :                      nsTextFrameUtils::Flags& aFlags,
      97             :                      nsTextFrameUtils::CompressionMode aCompression,
      98             :                      gfxSkipChars* aSkipChars)
      99             : {
     100           6 :   MOZ_ASSERT(aCompression == nsTextFrameUtils::COMPRESS_WHITESPACE ||
     101             :              aCompression == nsTextFrameUtils::COMPRESS_WHITESPACE_NEWLINE,
     102             :              "whitespaces should be skippable!!");
     103             :   // Get the context preceding/following this white space range.
     104             :   // For 8-bit text (sizeof CharT == 1), the checks here should get optimized
     105             :   // out, and isSegmentBreakSkippable should be initialized to be 'false'.
     106             :   bool isSegmentBreakSkippable =
     107             :     sizeof(CharT) > 1 &&
     108           0 :     ((aBegin > 0 && IS_ZERO_WIDTH_SPACE(aText[aBegin - 1])) ||
     109           6 :      (aEnd < aLength && IS_ZERO_WIDTH_SPACE(aText[aEnd])));
     110           0 :   if (sizeof(CharT) > 1 && !isSegmentBreakSkippable &&
     111           0 :       aBegin > 0 && aEnd < aLength) {
     112             :     uint32_t ucs4before;
     113             :     uint32_t ucs4after;
     114           0 :     if (aBegin > 1 &&
     115           0 :         NS_IS_LOW_SURROGATE(aText[aBegin - 1]) &&
     116           0 :         NS_IS_HIGH_SURROGATE(aText[aBegin - 2])) {
     117           0 :       ucs4before = SURROGATE_TO_UCS4(aText[aBegin - 2], aText[aBegin - 1]);
     118             :     } else {
     119           0 :       ucs4before = aText[aBegin - 1];
     120             :     }
     121           0 :     if (aEnd + 1 < aLength &&
     122           0 :         NS_IS_HIGH_SURROGATE(aText[aEnd]) &&
     123           0 :         NS_IS_LOW_SURROGATE(aText[aEnd + 1])) {
     124           0 :       ucs4after = SURROGATE_TO_UCS4(aText[aEnd], aText[aEnd + 1]);
     125             :     } else {
     126           0 :       ucs4after = aText[aEnd];
     127             :     }
     128             :     // Discard newlines between characters that have F, W, or H
     129             :     // EastAsianWidth property and neither side is Hangul.
     130           0 :     isSegmentBreakSkippable = IsSegmentBreakSkipChar(ucs4before) &&
     131           0 :                               IsSegmentBreakSkipChar(ucs4after);
     132             :   }
     133             : 
     134          12 :   for (uint32_t i = aBegin; i < aEnd; ++i) {
     135           6 :     CharT ch = aText[i];
     136           6 :     bool keepChar = false;
     137           6 :     bool keepTransformedWhiteSpace = false;
     138           6 :     if (IsDiscardable(ch, &aFlags)) {
     139           0 :       aSkipChars->SkipChar();
     140           0 :       continue;
     141             :     }
     142           6 :     if (IsSpaceOrTab(ch)) {
     143           6 :       if (aHasSegmentBreak) {
     144             :         // If white-space is set to normal, nowrap, or pre-line, white space
     145             :         // characters are considered collapsible and all spaces and tabs
     146             :         // immediately preceding or following a segment break are removed.
     147           0 :         aSkipChars->SkipChar();
     148           0 :         continue;
     149             :       }
     150             : 
     151           6 :       if (aInWhitespace) {
     152           0 :         aSkipChars->SkipChar();
     153           0 :         continue;
     154             :       } else {
     155           6 :         keepTransformedWhiteSpace = true;
     156             :       }
     157             :     } else {
     158             :       // Apply Segment Break Transformation Rules (CSS Text 3 - 4.1.2) for
     159             :       // segment break characters.
     160           0 :       if (aCompression == nsTextFrameUtils::COMPRESS_WHITESPACE ||
     161             :           // XXX: According to CSS Text 3, a lone CR should not always be
     162             :           //      kept, but still go through the Segment Break Transformation
     163             :           //      Rules. However, this is what current modern browser engines
     164             :           //      (webkit/blink/edge) do. So, once we can get some clarity
     165             :           //      from the specification issue, we should either remove the
     166             :           //      lone CR condition here, or leave it here with this comment
     167             :           //      being rephrased.
     168             :           //      Please see https://github.com/w3c/csswg-drafts/issues/855.
     169             :           ch == '\r') {
     170           0 :         keepChar = true;
     171             :       } else {
     172             :         // aCompression == COMPRESS_WHITESPACE_NEWLINE
     173             : 
     174             :         // Any collapsible segment break immediately following another
     175             :         // collapsible segment break is removed.  Then the remaining segment
     176             :         // break is either transformed into a space (U+0020) or removed
     177             :         // depending on the context before and after the break.
     178           0 :         if (isSegmentBreakSkippable || aInWhitespace) {
     179           0 :           aSkipChars->SkipChar();
     180           0 :           continue;
     181             :         }
     182           0 :         isSegmentBreakSkippable = true;
     183           0 :         keepTransformedWhiteSpace = true;
     184             :       }
     185             :     }
     186             : 
     187           6 :     if (keepChar) {
     188           0 :       *aOutput++ = ch;
     189           0 :       aSkipChars->KeepChar();
     190           0 :       aInWhitespace = IsSpaceOrTab(ch);
     191           6 :     } else if (keepTransformedWhiteSpace) {
     192           6 :       *aOutput++ = ' ';
     193           6 :       aSkipChars->KeepChar();
     194           6 :       aInWhitespace = true;
     195             :     } else {
     196           0 :       MOZ_ASSERT_UNREACHABLE("Should've skipped the character!!");
     197             :     }
     198             :   }
     199           6 :   return aOutput;
     200             : }
     201             : 
     202             : template<class CharT>
     203             : CharT*
     204          21 : nsTextFrameUtils::TransformText(const CharT* aText, uint32_t aLength,
     205             :                                 CharT* aOutput,
     206             :                                 CompressionMode aCompression,
     207             :                                 uint8_t* aIncomingFlags,
     208             :                                 gfxSkipChars* aSkipChars,
     209             :                                 Flags* aAnalysisFlags)
     210             : {
     211          21 :   Flags flags = Flags();
     212             : #ifdef DEBUG
     213          21 :   int32_t skipCharsOffset = aSkipChars->GetOriginalCharCount();
     214             : #endif
     215             : 
     216          21 :   bool lastCharArabic = false;
     217          21 :   if (aCompression == COMPRESS_NONE ||
     218             :       aCompression == COMPRESS_NONE_TRANSFORM_TO_SPACE) {
     219             :     // Skip discardables.
     220             :     uint32_t i;
     221         111 :     for (i = 0; i < aLength; ++i) {
     222         104 :       CharT ch = aText[i];
     223         104 :       if (IsDiscardable(ch, &flags)) {
     224           0 :         aSkipChars->SkipChar();
     225             :       } else {
     226         104 :         aSkipChars->KeepChar();
     227         104 :         if (ch > ' ') {
     228          98 :           lastCharArabic = IS_ARABIC_CHAR(ch);
     229           6 :         } else if (aCompression == COMPRESS_NONE_TRANSFORM_TO_SPACE) {
     230           0 :           if (ch == '\t' || ch == '\n') {
     231           0 :             ch = ' ';
     232             :           }
     233             :         } else {
     234             :           // aCompression == COMPRESS_NONE
     235           6 :           if (ch == '\t') {
     236           0 :             flags |= Flags::TEXT_HAS_TAB;
     237             :           }
     238             :         }
     239         104 :         *aOutput++ = ch;
     240             :       }
     241             :     }
     242           7 :     if (lastCharArabic) {
     243           0 :       *aIncomingFlags |= INCOMING_ARABICCHAR;
     244             :     } else {
     245           7 :       *aIncomingFlags &= ~INCOMING_ARABICCHAR;
     246             :     }
     247           7 :     *aIncomingFlags &= ~INCOMING_WHITESPACE;
     248             :   } else {
     249          14 :     bool inWhitespace = (*aIncomingFlags & INCOMING_WHITESPACE) != 0;
     250             :     uint32_t i;
     251         112 :     for (i = 0; i < aLength; ++i) {
     252          98 :       CharT ch = aText[i];
     253             :       // CSS Text 3 - 4.1. The White Space Processing Rules
     254             :       // White space processing in CSS affects only the document white space
     255             :       // characters: spaces (U+0020), tabs (U+0009), and segment breaks.
     256             :       // Since we need the context of segment breaks and their surrounding
     257             :       // white spaces to proceed the white space processing, a consecutive run
     258             :       // of spaces/tabs/segment breaks is collected in a first pass loop, then
     259             :       // we apply the collapsing and transformation rules to this run in a
     260             :       // second pass loop.
     261          98 :       if (IsSpaceOrTabOrSegmentBreak(ch)) {
     262           6 :         bool keepLastSpace = false;
     263           6 :         bool hasSegmentBreak = IsSegmentBreak(ch);
     264           6 :         uint32_t countTrailingDiscardables = 0;
     265             :         uint32_t j;
     266          12 :         for (j = i + 1; j < aLength &&
     267          12 :                         (IsSpaceOrTabOrSegmentBreak(aText[j]) ||
     268           6 :                          IsDiscardable(aText[j], &flags));
     269             :              j++) {
     270           0 :           if (IsSegmentBreak(aText[j])) {
     271           0 :             hasSegmentBreak = true;
     272             :           }
     273             :         }
     274             :         // Exclude trailing discardables before checking space combining
     275             :         // sequence tail.
     276           6 :         for (; IsDiscardable(aText[j - 1], &flags); j--) {
     277           0 :           countTrailingDiscardables++;
     278             :         }
     279             :         // If the last white space is followed by a combining sequence tail,
     280             :         // exclude it from the range of TransformWhiteSpaces.
     281           6 :         if (sizeof(CharT) > 1 && aText[j - 1] == ' ' && j < aLength &&
     282           0 :             IsSpaceCombiningSequenceTail(&aText[j], aLength - j)) {
     283           0 :           keepLastSpace = true;
     284           0 :           j--;
     285             :         }
     286           6 :         if (j > i) {
     287           6 :           aOutput = TransformWhiteSpaces(aText, aLength, i, j, hasSegmentBreak,
     288             :                                          inWhitespace, aOutput, flags,
     289             :                                          aCompression, aSkipChars);
     290             :         }
     291             :         // We need to keep KeepChar()/SkipChar() in order, so process the
     292             :         // last white space first, then process the trailing discardables.
     293           6 :         if (keepLastSpace) {
     294           0 :           keepLastSpace = false;
     295           0 :           *aOutput++ = ' ';
     296           0 :           aSkipChars->KeepChar();
     297           0 :           lastCharArabic = false;
     298           0 :           j++;
     299             :         }
     300           6 :         for (; countTrailingDiscardables > 0; countTrailingDiscardables--) {
     301           0 :           aSkipChars->SkipChar();
     302           0 :           j++;
     303             :         }
     304           6 :         i = j - 1;
     305           6 :         continue;
     306             :       }
     307             :       // Process characters other than the document white space characters.
     308          92 :       if (IsDiscardable(ch, &flags)) {
     309           0 :         aSkipChars->SkipChar();
     310             :       } else {
     311          92 :         *aOutput++ = ch;
     312          92 :         aSkipChars->KeepChar();
     313             :       }
     314          92 :       lastCharArabic = IS_ARABIC_CHAR(ch);
     315          92 :       inWhitespace = false;
     316             :     }
     317             : 
     318          14 :     if (lastCharArabic) {
     319           0 :       *aIncomingFlags |= INCOMING_ARABICCHAR;
     320             :     } else {
     321          14 :       *aIncomingFlags &= ~INCOMING_ARABICCHAR;
     322             :     }
     323          14 :     if (inWhitespace) {
     324           0 :       *aIncomingFlags |= INCOMING_WHITESPACE;
     325             :     } else {
     326          14 :       *aIncomingFlags &= ~INCOMING_WHITESPACE;
     327             :     }
     328             :   }
     329             : 
     330          21 :   *aAnalysisFlags = flags;
     331             : 
     332             : #ifdef DEBUG
     333          21 :   AssertSkippedExpectedChars(aText, *aSkipChars, skipCharsOffset);
     334             : #endif
     335          21 :   return aOutput;
     336             : }
     337             : 
     338             : /*
     339             :  * NOTE: The TransformText and IsSkippableCharacterForTransformText template
     340             :  * functions are part of the public API of nsTextFrameUtils, while
     341             :  * their function bodies are not available in the header. They may stop working
     342             :  * (fail to resolve symbol in link time) once their callsites are moved to a
     343             :  * different translation unit (e.g. a different unified source file).
     344             :  * Explicit instantiating this function template with `uint8_t` and `char16_t`
     345             :  * could prevent us from the potential risk.
     346             :  */
     347             : template uint8_t*
     348             : nsTextFrameUtils::TransformText(const uint8_t* aText, uint32_t aLength,
     349             :                                 uint8_t* aOutput,
     350             :                                 CompressionMode aCompression,
     351             :                                 uint8_t* aIncomingFlags,
     352             :                                 gfxSkipChars* aSkipChars,
     353             :                                 Flags* aAnalysisFlags);
     354             : template char16_t*
     355             : nsTextFrameUtils::TransformText(const char16_t* aText, uint32_t aLength,
     356             :                                 char16_t* aOutput,
     357             :                                 CompressionMode aCompression,
     358             :                                 uint8_t* aIncomingFlags,
     359             :                                 gfxSkipChars* aSkipChars,
     360             :                                 Flags* aAnalysisFlags);
     361             : template bool
     362             : nsTextFrameUtils::IsSkippableCharacterForTransformText(uint8_t aChar);
     363             : template bool
     364             : nsTextFrameUtils::IsSkippableCharacterForTransformText(char16_t aChar);
     365             : 
     366             : uint32_t
     367           0 : nsTextFrameUtils::ComputeApproximateLengthWithWhitespaceCompression(
     368             :                     nsIContent *aContent, const nsStyleText *aStyleText)
     369             : {
     370           0 :   const nsTextFragment *frag = aContent->GetText();
     371             :   // This is an approximation so we don't really need anything
     372             :   // too fancy here.
     373             :   uint32_t len;
     374           0 :   if (aStyleText->WhiteSpaceIsSignificant()) {
     375           0 :     len = frag->GetLength();
     376             :   } else {
     377           0 :     bool is2b = frag->Is2b();
     378             :     union {
     379             :       const char *s1b;
     380             :       const char16_t *s2b;
     381             :     } u;
     382           0 :     if (is2b) {
     383           0 :       u.s2b = frag->Get2b();
     384             :     } else {
     385           0 :       u.s1b = frag->Get1b();
     386             :     }
     387           0 :     bool prevWS = true; // more important to ignore blocks with
     388             :                         // only whitespace than get inline boundaries
     389             :                         // exactly right
     390           0 :     len = 0;
     391           0 :     for (uint32_t i = 0, i_end = frag->GetLength(); i < i_end; ++i) {
     392           0 :       char16_t c = is2b ? u.s2b[i] : u.s1b[i];
     393           0 :       if (c == ' ' || c == '\n' || c == '\t' || c == '\r') {
     394           0 :         if (!prevWS) {
     395           0 :           ++len;
     396             :         }
     397           0 :         prevWS = true;
     398             :       } else {
     399           0 :         ++len;
     400           0 :         prevWS = false;
     401             :       }
     402             :     }
     403             :   }
     404           0 :   return len;
     405             : }
     406             : 
     407           0 : bool nsSkipCharsRunIterator::NextRun() {
     408           0 :   do {
     409           0 :     if (mRunLength) {
     410           0 :       mIterator.AdvanceOriginal(mRunLength);
     411           0 :       NS_ASSERTION(mRunLength > 0, "No characters in run (initial length too large?)");
     412           0 :       if (!mSkipped || mLengthIncludesSkipped) {
     413           0 :         mRemainingLength -= mRunLength;
     414             :       }
     415             :     }
     416           0 :     if (!mRemainingLength)
     417           0 :       return false;
     418             :     int32_t length;
     419           0 :     mSkipped = mIterator.IsOriginalCharSkipped(&length);
     420           0 :     mRunLength = std::min(length, mRemainingLength);
     421           0 :   } while (!mVisitSkipped && mSkipped);
     422             : 
     423           0 :   return true;
     424             : }

Generated by: LCOV version 1.13