LCOV - output.info - intl/hyphenation/glue/nsHyphenator.cpp

LCOV - code coverage report

Current view:	top level - intl/hyphenation/glue - nsHyphenator.cpp (source / functions)		Hit	Total	Coverage
Test:	output.info	Lines:	0	87	0.0 %
Date:	2017-07-14 16:53:18	Functions:	0	4	0.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2             : /* This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
       5             : 
       6             : #include "nsHyphenator.h"
       7             : #include "nsIFile.h"
       8             : #include "nsUTF8Utils.h"
       9             : #include "nsUnicodeProperties.h"
      10             : #include "nsIURI.h"
      11             : 
      12             : #include "hyphen.h"
      13             : 
      14           0 : nsHyphenator::nsHyphenator(nsIURI *aURI)
      15           0 :   : mDict(nullptr)
      16             : {
      17           0 :   nsCString uriSpec;
      18           0 :   nsresult rv = aURI->GetSpec(uriSpec);
      19           0 :   if (NS_FAILED(rv)) {
      20           0 :     return;
      21             :   }
      22           0 :   mDict = hnj_hyphen_load(uriSpec.get());
      23             : #ifdef DEBUG
      24           0 :   if (mDict) {
      25           0 :     printf("loaded hyphenation patterns from %s\n", uriSpec.get());
      26             :   }
      27             : #endif
      28             : }
      29             : 
      30           0 : nsHyphenator::~nsHyphenator()
      31             : {
      32           0 :   if (mDict != nullptr) {
      33           0 :     hnj_hyphen_free((HyphenDict*)mDict);
      34           0 :     mDict = nullptr;
      35             :   }
      36           0 : }
      37             : 
      38             : bool
      39           0 : nsHyphenator::IsValid()
      40             : {
      41           0 :   return (mDict != nullptr);
      42             : }
      43             : 
      44             : nsresult
      45           0 : nsHyphenator::Hyphenate(const nsAString& aString, nsTArray<bool>& aHyphens)
      46             : {
      47           0 :   if (!aHyphens.SetLength(aString.Length(), mozilla::fallible)) {
      48           0 :     return NS_ERROR_OUT_OF_MEMORY;
      49             :   }
      50           0 :   memset(aHyphens.Elements(), false, aHyphens.Length() * sizeof(bool));
      51             : 
      52           0 :   bool inWord = false;
      53           0 :   uint32_t wordStart = 0, wordLimit = 0;
      54             :   uint32_t chLen;
      55           0 :   for (uint32_t i = 0; i < aString.Length(); i += chLen) {
      56           0 :     uint32_t ch = aString[i];
      57           0 :     chLen = 1;
      58             : 
      59           0 :     if (NS_IS_HIGH_SURROGATE(ch)) {
      60           0 :       if (i + 1 < aString.Length() && NS_IS_LOW_SURROGATE(aString[i+1])) {
      61           0 :         ch = SURROGATE_TO_UCS4(ch, aString[i+1]);
      62           0 :         chLen = 2;
      63             :       } else {
      64           0 :         NS_WARNING("unpaired surrogate found during hyphenation");
      65             :       }
      66             :     }
      67             : 
      68           0 :     nsUGenCategory cat = mozilla::unicode::GetGenCategory(ch);
      69           0 :     if (cat == nsUGenCategory::kLetter || cat == nsUGenCategory::kMark) {
      70           0 :       if (!inWord) {
      71           0 :         inWord = true;
      72           0 :         wordStart = i;
      73             :       }
      74           0 :       wordLimit = i + chLen;
      75           0 :       if (i + chLen < aString.Length()) {
      76           0 :         continue;
      77             :       }
      78             :     }
      79             : 
      80           0 :     if (inWord) {
      81             :       // Convert the word to utf-8 for libhyphen, lowercasing it as we go
      82             :       // so that it will match the (lowercased) patterns (bug 1105644).
      83           0 :       nsAutoCString utf8;
      84           0 :       const char16_t* const begin = aString.BeginReading();
      85           0 :       const char16_t *cur = begin + wordStart;
      86           0 :       const char16_t *end = begin + wordLimit;
      87           0 :       while (cur < end) {
      88           0 :         uint32_t ch = *cur++;
      89             : 
      90           0 :         if (NS_IS_HIGH_SURROGATE(ch)) {
      91           0 :           if (cur < end && NS_IS_LOW_SURROGATE(*cur)) {
      92           0 :             ch = SURROGATE_TO_UCS4(ch, *cur++);
      93             :           } else {
      94           0 :             ch = 0xfffd; // unpaired surrogate, treat as REPLACEMENT CHAR
      95             :           }
      96           0 :         } else if (NS_IS_LOW_SURROGATE(ch)) {
      97           0 :           ch = 0xfffd; // unpaired surrogate
      98             :         }
      99             : 
     100             :         // XXX What about language-specific casing? Consider Turkish I/i...
     101             :         // In practice, it looks like the current patterns will not be
     102             :         // affected by this, as they treat dotted and undotted i similarly.
     103           0 :         ch = ToLowerCase(ch);
     104             : 
     105           0 :         if (ch < 0x80) { // U+0000 - U+007F
     106           0 :           utf8.Append(ch);
     107           0 :         } else if (ch < 0x0800) { // U+0100 - U+07FF
     108           0 :           utf8.Append(0xC0 | (ch >> 6));
     109           0 :           utf8.Append(0x80 | (0x003F & ch));
     110           0 :         } else if (ch < 0x10000) { // U+0800 - U+D7FF,U+E000 - U+FFFF
     111           0 :           utf8.Append(0xE0 | (ch >> 12));
     112           0 :           utf8.Append(0x80 | (0x003F & (ch >> 6)));
     113           0 :           utf8.Append(0x80 | (0x003F & ch));
     114             :         } else {
     115           0 :           utf8.Append(0xF0 | (ch >> 18));
     116           0 :           utf8.Append(0x80 | (0x003F & (ch >> 12)));
     117           0 :           utf8.Append(0x80 | (0x003F & (ch >> 6)));
     118           0 :           utf8.Append(0x80 | (0x003F & ch));
     119             :         }
     120             :       }
     121             : 
     122           0 :       AutoTArray<char,200> utf8hyphens;
     123           0 :       utf8hyphens.SetLength(utf8.Length() + 5);
     124           0 :       char **rep = nullptr;
     125           0 :       int *pos = nullptr;
     126           0 :       int *cut = nullptr;
     127           0 :       int err = hnj_hyphen_hyphenate2((HyphenDict*)mDict,
     128           0 :                                       utf8.BeginReading(), utf8.Length(),
     129             :                                       utf8hyphens.Elements(), nullptr,
     130           0 :                                       &rep, &pos, &cut);
     131           0 :       if (!err) {
     132             :         // Surprisingly, hnj_hyphen_hyphenate2 converts the 'hyphens' buffer
     133             :         // from utf8 code unit indexing (which would match the utf8 input
     134             :         // string directly) to Unicode character indexing.
     135             :         // We then need to convert this to utf16 code unit offsets for Gecko.
     136           0 :         const char *hyphPtr = utf8hyphens.Elements();
     137           0 :         const char16_t *cur = begin + wordStart;
     138           0 :         const char16_t *end = begin + wordLimit;
     139           0 :         while (cur < end) {
     140           0 :           if (*hyphPtr & 0x01) {
     141           0 :             aHyphens[cur - begin] = true;
     142             :           }
     143           0 :           cur++;
     144           0 :           if (cur < end && NS_IS_LOW_SURROGATE(*cur) &&
     145           0 :               NS_IS_HIGH_SURROGATE(*(cur-1)))
     146             :           {
     147           0 :             cur++;
     148             :           }
     149           0 :           hyphPtr++;
     150             :         }
     151             :       }
     152             :     }
     153             : 
     154           0 :     inWord = false;
     155             :   }
     156             : 
     157           0 :   return NS_OK;
     158             : }

Generated by: LCOV version 1.13