LCOV - code coverage report
Current view: top level - netwerk/streamconv/converters - mozTXTToHTMLConv.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 649 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 35 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2             : /* This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
       5             : 
       6             : #include "mozTXTToHTMLConv.h"
       7             : #include "nsNetUtil.h"
       8             : #include "nsUnicharUtils.h"
       9             : #include "nsCRT.h"
      10             : #include "nsIExternalProtocolHandler.h"
      11             : #include "nsIIOService.h"
      12             : #include "nsIURI.h"
      13             : 
      14             : #include <algorithm>
      15             : 
      16             : #ifdef DEBUG_BenB_Perf
      17             : #include "prtime.h"
      18             : #include "prinrval.h"
      19             : #endif
      20             : 
      21             : const double growthRate = 1.2;
      22             : 
      23             : // Bug 183111, editor now replaces multiple spaces with leading
      24             : // 0xA0's and a single ending space, so need to treat 0xA0's as spaces.
      25             : // 0xA0 is the Latin1/Unicode character for "non-breaking space (nbsp)"
      26             : // Also recognize the Japanese ideographic space 0x3000 as a space.
      27           0 : static inline bool IsSpace(const char16_t aChar)
      28             : {
      29           0 :   return (nsCRT::IsAsciiSpace(aChar) || aChar == 0xA0 || aChar == 0x3000);
      30             : }
      31             : 
      32             : // Escape Char will take ch, escape it and append the result to
      33             : // aStringToAppendTo
      34             : void
      35           0 : mozTXTToHTMLConv::EscapeChar(const char16_t ch, nsString& aStringToAppendTo,
      36             :                              bool inAttribute)
      37             : {
      38           0 :     switch (ch)
      39             :     {
      40             :     case '<':
      41           0 :       aStringToAppendTo.AppendLiteral("&lt;");
      42           0 :       break;
      43             :     case '>':
      44           0 :       aStringToAppendTo.AppendLiteral("&gt;");
      45           0 :       break;
      46             :     case '&':
      47           0 :       aStringToAppendTo.AppendLiteral("&amp;");
      48           0 :       break;
      49             :     case '"':
      50           0 :       if (inAttribute)
      51             :       {
      52           0 :         aStringToAppendTo.AppendLiteral("&quot;");
      53           0 :         break;
      54             :       }
      55             :       // else fall through
      56             :       MOZ_FALLTHROUGH;
      57             :     default:
      58           0 :       aStringToAppendTo += ch;
      59             :     }
      60             : 
      61           0 :     return;
      62             : }
      63             : 
      64             : // EscapeStr takes the passed in string and
      65             : // escapes it IN PLACE.
      66             : void
      67           0 : mozTXTToHTMLConv::EscapeStr(nsString& aInString, bool inAttribute)
      68             : {
      69             :   // the replace substring routines
      70             :   // don't seem to work if you have a character
      71             :   // in the in string that is also in the replacement
      72             :   // string! =(
      73             :   //aInString.ReplaceSubstring("&", "&amp;");
      74             :   //aInString.ReplaceSubstring("<", "&lt;");
      75             :   //aInString.ReplaceSubstring(">", "&gt;");
      76           0 :   for (uint32_t i = 0; i < aInString.Length();)
      77             :   {
      78           0 :     switch (aInString[i])
      79             :     {
      80             :     case '<':
      81           0 :       aInString.Cut(i, 1);
      82           0 :       aInString.Insert(NS_LITERAL_STRING("&lt;"), i);
      83           0 :       i += 4; // skip past the integers we just added
      84           0 :       break;
      85             :     case '>':
      86           0 :       aInString.Cut(i, 1);
      87           0 :       aInString.Insert(NS_LITERAL_STRING("&gt;"), i);
      88           0 :       i += 4; // skip past the integers we just added
      89           0 :       break;
      90             :     case '&':
      91           0 :       aInString.Cut(i, 1);
      92           0 :       aInString.Insert(NS_LITERAL_STRING("&amp;"), i);
      93           0 :       i += 5; // skip past the integers we just added
      94           0 :       break;
      95             :     case '"':
      96           0 :       if (inAttribute)
      97             :       {
      98           0 :         aInString.Cut(i, 1);
      99           0 :         aInString.Insert(NS_LITERAL_STRING("&quot;"), i);
     100           0 :         i += 6;
     101           0 :         break;
     102             :       }
     103             :       // else fall through
     104             :       MOZ_FALLTHROUGH;
     105             :     default:
     106           0 :       i++;
     107             :     }
     108             :   }
     109           0 : }
     110             : 
     111             : void
     112           0 : mozTXTToHTMLConv::UnescapeStr(const char16_t * aInString, int32_t aStartPos, int32_t aLength, nsString& aOutString)
     113             : {
     114           0 :   const char16_t * subString = nullptr;
     115           0 :   for (uint32_t i = aStartPos; int32_t(i) - aStartPos < aLength;)
     116             :   {
     117           0 :     int32_t remainingChars = i - aStartPos;
     118           0 :     if (aInString[i] == '&')
     119             :     {
     120           0 :       subString = &aInString[i];
     121           0 :       if (!nsCRT::strncmp(subString, u"&lt;", std::min(4, aLength - remainingChars)))
     122             :       {
     123           0 :         aOutString.Append(char16_t('<'));
     124           0 :         i += 4;
     125             :       }
     126           0 :       else if (!nsCRT::strncmp(subString, u"&gt;", std::min(4, aLength - remainingChars)))
     127             :       {
     128           0 :         aOutString.Append(char16_t('>'));
     129           0 :         i += 4;
     130             :       }
     131           0 :       else if (!nsCRT::strncmp(subString, u"&amp;", std::min(5, aLength - remainingChars)))
     132             :       {
     133           0 :         aOutString.Append(char16_t('&'));
     134           0 :         i += 5;
     135             :       }
     136           0 :       else if (!nsCRT::strncmp(subString, u"&quot;", std::min(6, aLength - remainingChars)))
     137             :       {
     138           0 :         aOutString.Append(char16_t('"'));
     139           0 :         i += 6;
     140             :       }
     141             :       else
     142             :       {
     143           0 :         aOutString += aInString[i];
     144           0 :         i++;
     145             :       }
     146             :     }
     147             :     else
     148             :     {
     149           0 :       aOutString += aInString[i];
     150           0 :       i++;
     151             :     }
     152             :   }
     153           0 : }
     154             : 
     155             : void
     156           0 : mozTXTToHTMLConv::CompleteAbbreviatedURL(const char16_t * aInString, int32_t aInLength,
     157             :                                          const uint32_t pos, nsString& aOutString)
     158             : {
     159           0 :   NS_ASSERTION(int32_t(pos) < aInLength, "bad args to CompleteAbbreviatedURL, see bug #190851");
     160           0 :   if (int32_t(pos) >= aInLength)
     161           0 :     return;
     162             : 
     163           0 :   if (aInString[pos] == '@')
     164             :   {
     165             :     // only pre-pend a mailto url if the string contains a .domain in it..
     166             :     //i.e. we want to linkify johndoe@foo.com but not "let's meet @8pm"
     167           0 :     nsDependentString inString(aInString, aInLength);
     168           0 :     if (inString.FindChar('.', pos) != kNotFound) // if we have a '.' after the @ sign....
     169             :     {
     170           0 :       aOutString.AssignLiteral("mailto:");
     171           0 :       aOutString += aInString;
     172             :     }
     173             :   }
     174           0 :   else if (aInString[pos] == '.')
     175             :   {
     176           0 :     if (ItMatchesDelimited(aInString, aInLength,
     177             :                            u"www.", 4, LT_IGNORE, LT_IGNORE))
     178             :     {
     179           0 :       aOutString.AssignLiteral("http://");
     180           0 :       aOutString += aInString;
     181             :     }
     182           0 :     else if (ItMatchesDelimited(aInString,aInLength, u"ftp.", 4, LT_IGNORE, LT_IGNORE))
     183             :     {
     184           0 :       aOutString.AssignLiteral("ftp://");
     185           0 :       aOutString += aInString;
     186             :     }
     187             :   }
     188             : }
     189             : 
     190             : bool
     191           0 : mozTXTToHTMLConv::FindURLStart(const char16_t * aInString, int32_t aInLength,
     192             :                                const uint32_t pos, const modetype check,
     193             :                                uint32_t& start)
     194             : {
     195           0 :   switch(check)
     196             :   { // no breaks, because end of blocks is never reached
     197             :   case RFC1738:
     198             :   {
     199           0 :     if (!nsCRT::strncmp(&aInString[std::max(int32_t(pos - 4), 0)], u"<URL:", 5))
     200             :     {
     201           0 :       start = pos + 1;
     202           0 :       return true;
     203             :     }
     204             :     else
     205           0 :       return false;
     206             :   }
     207             :   case RFC2396E:
     208             :   {
     209           0 :     nsString temp(aInString, aInLength);
     210           0 :     int32_t i = pos <= 0 ? kNotFound : temp.RFindCharInSet(u"<>\"", pos - 1);
     211           0 :     if (i != kNotFound && (temp[uint32_t(i)] == '<' ||
     212           0 :                            temp[uint32_t(i)] == '"'))
     213             :     {
     214           0 :       start = uint32_t(++i);
     215           0 :       return start < pos;
     216             :     }
     217             :     else
     218           0 :       return false;
     219             :   }
     220             :   case freetext:
     221             :   {
     222           0 :     int32_t i = pos - 1;
     223           0 :     for (; i >= 0 && (
     224           0 :          nsCRT::IsAsciiAlpha(aInString[uint32_t(i)]) ||
     225           0 :          nsCRT::IsAsciiDigit(aInString[uint32_t(i)]) ||
     226           0 :          aInString[uint32_t(i)] == '+' ||
     227           0 :          aInString[uint32_t(i)] == '-' ||
     228           0 :          aInString[uint32_t(i)] == '.'
     229             :          ); i--)
     230             :       ;
     231           0 :     if (++i >= 0 && uint32_t(i) < pos && nsCRT::IsAsciiAlpha(aInString[uint32_t(i)]))
     232             :     {
     233           0 :       start = uint32_t(i);
     234           0 :       return true;
     235             :     }
     236             :     else
     237           0 :       return false;
     238             :   }
     239             :   case abbreviated:
     240             :   {
     241           0 :     int32_t i = pos - 1;
     242             :     // This disallows non-ascii-characters for email.
     243             :     // Currently correct, but revisit later after standards changed.
     244           0 :     bool isEmail = aInString[pos] == (char16_t)'@';
     245             :     // These chars mark the start of the URL
     246           0 :     for (; i >= 0
     247           0 :              && aInString[uint32_t(i)] != '>' && aInString[uint32_t(i)] != '<'
     248           0 :              && aInString[uint32_t(i)] != '"' && aInString[uint32_t(i)] != '\''
     249           0 :              && aInString[uint32_t(i)] != '`' && aInString[uint32_t(i)] != ','
     250           0 :              && aInString[uint32_t(i)] != '{' && aInString[uint32_t(i)] != '['
     251           0 :              && aInString[uint32_t(i)] != '(' && aInString[uint32_t(i)] != '|'
     252           0 :              && aInString[uint32_t(i)] != '\\'
     253           0 :              && !IsSpace(aInString[uint32_t(i)])
     254           0 :              && (!isEmail || nsCRT::IsAscii(aInString[uint32_t(i)]))
     255             :          ; i--)
     256             :       ;
     257           0 :     if
     258             :       (
     259           0 :         ++i >= 0 && uint32_t(i) < pos
     260           0 :           &&
     261             :           (
     262           0 :             nsCRT::IsAsciiAlpha(aInString[uint32_t(i)]) ||
     263           0 :             nsCRT::IsAsciiDigit(aInString[uint32_t(i)])
     264             :           )
     265             :       )
     266             :     {
     267           0 :       start = uint32_t(i);
     268           0 :       return true;
     269             :     }
     270             :     else
     271           0 :       return false;
     272             :   }
     273             :   default:
     274           0 :     return false;
     275             :   } //switch
     276             : }
     277             : 
     278             : bool
     279           0 : mozTXTToHTMLConv::FindURLEnd(const char16_t * aInString, int32_t aInStringLength, const uint32_t pos,
     280             :            const modetype check, const uint32_t start, uint32_t& end)
     281             : {
     282           0 :   switch(check)
     283             :   { // no breaks, because end of blocks is never reached
     284             :   case RFC1738:
     285             :   case RFC2396E:
     286             :   {
     287           0 :     nsString temp(aInString, aInStringLength);
     288             : 
     289           0 :     int32_t i = temp.FindCharInSet(u"<>\"", pos + 1);
     290           0 :     if (i != kNotFound && temp[uint32_t(i--)] ==
     291           0 :         (check == RFC1738 || temp[start - 1] == '<' ? '>' : '"'))
     292             :     {
     293           0 :       end = uint32_t(i);
     294           0 :       return end > pos;
     295             :     }
     296           0 :     return false;
     297             :   }
     298             :   case freetext:
     299             :   case abbreviated:
     300             :   {
     301           0 :     uint32_t i = pos + 1;
     302           0 :     bool isEmail = aInString[pos] == (char16_t)'@';
     303           0 :     bool seenOpeningParenthesis = false; // there is a '(' earlier in the URL
     304           0 :     bool seenOpeningSquareBracket = false; // there is a '[' earlier in the URL
     305           0 :     for (; int32_t(i) < aInStringLength; i++)
     306             :     {
     307             :       // These chars mark the end of the URL
     308           0 :       if (aInString[i] == '>' || aInString[i] == '<' ||
     309           0 :           aInString[i] == '"' || aInString[i] == '`' ||
     310           0 :           aInString[i] == '}' || aInString[i] == '{' ||
     311           0 :           (aInString[i] == ')' && !seenOpeningParenthesis) ||
     312           0 :           (aInString[i] == ']' && !seenOpeningSquareBracket) ||
     313             :           // Allow IPv6 adresses like http://[1080::8:800:200C:417A]/foo.
     314           0 :           (aInString[i] == '[' && i > 2 &&
     315           0 :            (aInString[i - 1] != '/' || aInString[i - 2] != '/')) ||
     316           0 :           IsSpace(aInString[i]))
     317           0 :           break;
     318             :       // Disallow non-ascii-characters for email.
     319             :       // Currently correct, but revisit later after standards changed.
     320           0 :       if (isEmail && (
     321           0 :             aInString[i] == '(' || aInString[i] == '\'' ||
     322           0 :             !nsCRT::IsAscii(aInString[i])))
     323           0 :           break;
     324           0 :       if (aInString[i] == '(')
     325           0 :         seenOpeningParenthesis = true;
     326           0 :       if (aInString[i] == '[')
     327           0 :         seenOpeningSquareBracket = true;
     328             :     }
     329             :     // These chars are allowed in the middle of the URL, but not at end.
     330             :     // Technically they are, but are used in normal text after the URL.
     331           0 :     while (--i > pos && (
     332           0 :              aInString[i] == '.' || aInString[i] == ',' || aInString[i] == ';' ||
     333           0 :              aInString[i] == '!' || aInString[i] == '?' || aInString[i] == '-' ||
     334           0 :              aInString[i] == ':' || aInString[i] == '\''
     335             :              ))
     336             :         ;
     337           0 :     if (i > pos)
     338             :     {
     339           0 :       end = i;
     340           0 :       return true;
     341             :     }
     342           0 :     return false;
     343             :   }
     344             :   default:
     345           0 :     return false;
     346             :   } //switch
     347             : }
     348             : 
     349             : void
     350           0 : mozTXTToHTMLConv::CalculateURLBoundaries(const char16_t * aInString, int32_t aInStringLength,
     351             :      const uint32_t pos, const uint32_t whathasbeendone,
     352             :      const modetype check, const uint32_t start, const uint32_t end,
     353             :      nsString& txtURL, nsString& desc,
     354             :      int32_t& replaceBefore, int32_t& replaceAfter)
     355             : {
     356           0 :   uint32_t descstart = start;
     357           0 :   switch(check)
     358             :   {
     359             :   case RFC1738:
     360             :   {
     361           0 :     descstart = start - 5;
     362           0 :     desc.Append(&aInString[descstart], end - descstart + 2);  // include "<URL:" and ">"
     363           0 :     replaceAfter = end - pos + 1;
     364           0 :   } break;
     365             :   case RFC2396E:
     366             :   {
     367           0 :     descstart = start - 1;
     368           0 :     desc.Append(&aInString[descstart], end - descstart + 2); // include brackets
     369           0 :     replaceAfter = end - pos + 1;
     370           0 :   } break;
     371             :   case freetext:
     372             :   case abbreviated:
     373             :   {
     374           0 :     descstart = start;
     375           0 :     desc.Append(&aInString[descstart], end - start + 1); // don't include brackets
     376           0 :     replaceAfter = end - pos;
     377           0 :   } break;
     378           0 :   default: break;
     379             :   } //switch
     380             : 
     381           0 :   EscapeStr(desc, false);
     382             : 
     383           0 :   txtURL.Append(&aInString[start], end - start + 1);
     384           0 :   txtURL.StripWhitespace();
     385             : 
     386             :   // FIX ME
     387           0 :   nsAutoString temp2;
     388           0 :   ScanTXT(&aInString[descstart], pos - descstart, ~kURLs /*prevents loop*/ & whathasbeendone, temp2);
     389           0 :   replaceBefore = temp2.Length();
     390           0 :   return;
     391             : }
     392             : 
     393           0 : bool mozTXTToHTMLConv::ShouldLinkify(const nsCString& aURL)
     394             : {
     395           0 :   if (!mIOService)
     396           0 :     return false;
     397             : 
     398           0 :   nsAutoCString scheme;
     399           0 :   nsresult rv = mIOService->ExtractScheme(aURL, scheme);
     400           0 :   if(NS_FAILED(rv))
     401           0 :     return false;
     402             : 
     403             :   // Get the handler for this scheme.
     404           0 :   nsCOMPtr<nsIProtocolHandler> handler;
     405           0 :   rv = mIOService->GetProtocolHandler(scheme.get(), getter_AddRefs(handler));
     406           0 :   if(NS_FAILED(rv))
     407           0 :     return false;
     408             : 
     409             :   // Is it an external protocol handler? If not, linkify it.
     410           0 :   nsCOMPtr<nsIExternalProtocolHandler> externalHandler = do_QueryInterface(handler);
     411           0 :   if (!externalHandler)
     412           0 :    return true; // handler is built-in, linkify it!
     413             : 
     414             :   // If external app exists for the scheme then linkify it.
     415             :   bool exists;
     416           0 :   rv = externalHandler->ExternalAppExistsForScheme(scheme, &exists);
     417           0 :   return(NS_SUCCEEDED(rv) && exists);
     418             : }
     419             : 
     420             : bool
     421           0 : mozTXTToHTMLConv::CheckURLAndCreateHTML(
     422             :      const nsString& txtURL, const nsString& desc, const modetype mode,
     423             :      nsString& outputHTML)
     424             : {
     425             :   // Create *uri from txtURL
     426           0 :   nsCOMPtr<nsIURI> uri;
     427             :   nsresult rv;
     428             :   // Lazily initialize mIOService
     429           0 :   if (!mIOService)
     430             :   {
     431           0 :     mIOService = do_GetIOService();
     432             : 
     433           0 :     if (!mIOService)
     434           0 :       return false;
     435             :   }
     436             : 
     437             :   // See if the url should be linkified.
     438           0 :   NS_ConvertUTF16toUTF8 utf8URL(txtURL);
     439           0 :   if (!ShouldLinkify(utf8URL))
     440           0 :     return false;
     441             : 
     442             :   // it would be faster if we could just check to see if there is a protocol
     443             :   // handler for the url and return instead of actually trying to create a url...
     444           0 :   rv = mIOService->NewURI(utf8URL, nullptr, nullptr, getter_AddRefs(uri));
     445             : 
     446             :   // Real work
     447           0 :   if (NS_SUCCEEDED(rv) && uri)
     448             :   {
     449           0 :     outputHTML.AssignLiteral("<a class=\"moz-txt-link-");
     450           0 :     switch(mode)
     451             :     {
     452             :     case RFC1738:
     453           0 :       outputHTML.AppendLiteral("rfc1738");
     454           0 :       break;
     455             :     case RFC2396E:
     456           0 :       outputHTML.AppendLiteral("rfc2396E");
     457           0 :       break;
     458             :     case freetext:
     459           0 :       outputHTML.AppendLiteral("freetext");
     460           0 :       break;
     461             :     case abbreviated:
     462           0 :       outputHTML.AppendLiteral("abbreviated");
     463           0 :       break;
     464           0 :     default: break;
     465             :     }
     466           0 :     nsAutoString escapedURL(txtURL);
     467           0 :     EscapeStr(escapedURL, true);
     468             : 
     469           0 :     outputHTML.AppendLiteral("\" href=\"");
     470           0 :     outputHTML += escapedURL;
     471           0 :     outputHTML.AppendLiteral("\">");
     472           0 :     outputHTML += desc;
     473           0 :     outputHTML.AppendLiteral("</a>");
     474           0 :     return true;
     475             :   }
     476             :   else
     477           0 :     return false;
     478             : }
     479             : 
     480           0 : NS_IMETHODIMP mozTXTToHTMLConv::FindURLInPlaintext(const char16_t * aInString, int32_t aInLength, int32_t aPos, int32_t * aStartPos, int32_t * aEndPos)
     481             : {
     482             :   // call FindURL on the passed in string
     483           0 :   nsAutoString outputHTML; // we'll ignore the generated output HTML
     484             : 
     485           0 :   *aStartPos = -1;
     486           0 :   *aEndPos = -1;
     487             : 
     488           0 :   FindURL(aInString, aInLength, aPos, kURLs, outputHTML, *aStartPos, *aEndPos);
     489             : 
     490           0 :   return NS_OK;
     491             : }
     492             : 
     493             : bool
     494           0 : mozTXTToHTMLConv::FindURL(const char16_t * aInString, int32_t aInLength, const uint32_t pos,
     495             :      const uint32_t whathasbeendone,
     496             :      nsString& outputHTML, int32_t& replaceBefore, int32_t& replaceAfter)
     497             : {
     498             :   enum statetype {unchecked, invalid, startok, endok, success};
     499             :   static const modetype ranking[] = {RFC1738, RFC2396E, freetext, abbreviated};
     500             : 
     501             :   statetype state[mozTXTToHTMLConv_lastMode + 1]; // 0(=unknown)..lastMode
     502             :   /* I don't like this abuse of enums as index for the array,
     503             :      but I don't know a better method */
     504             : 
     505             :   // Define, which modes to check
     506             :   /* all modes but abbreviated are checked for text[pos] == ':',
     507             :      only abbreviated for '.', RFC2396E and abbreviated for '@' */
     508           0 :   for (modetype iState = unknown; iState <= mozTXTToHTMLConv_lastMode;
     509           0 :        iState = modetype(iState + 1))
     510           0 :     state[iState] = aInString[pos] == ':' ? unchecked : invalid;
     511           0 :   switch (aInString[pos])
     512             :   {
     513             :   case '@':
     514           0 :     state[RFC2396E] = unchecked;
     515             :     MOZ_FALLTHROUGH;
     516             :   case '.':
     517           0 :     state[abbreviated] = unchecked;
     518           0 :     break;
     519             :   case ':':
     520           0 :     state[abbreviated] = invalid;
     521           0 :     break;
     522             :   default:
     523           0 :     break;
     524             :   }
     525             : 
     526             :   // Test, first successful mode wins, sequence defined by |ranking|
     527           0 :   int32_t iCheck = 0;  // the currently tested modetype
     528           0 :   modetype check = ranking[iCheck];
     529           0 :   for (; iCheck < mozTXTToHTMLConv_numberOfModes && state[check] != success;
     530             :        iCheck++)
     531             :     /* check state from last run.
     532             :        If this is the first, check this one, which isn't = success yet */
     533             :   {
     534           0 :     check = ranking[iCheck];
     535             : 
     536             :     uint32_t start, end;
     537             : 
     538           0 :     if (state[check] == unchecked)
     539           0 :       if (FindURLStart(aInString, aInLength, pos, check, start))
     540           0 :         state[check] = startok;
     541             : 
     542           0 :     if (state[check] == startok)
     543           0 :       if (FindURLEnd(aInString, aInLength, pos, check, start, end))
     544           0 :         state[check] = endok;
     545             : 
     546           0 :     if (state[check] == endok)
     547             :     {
     548           0 :       nsAutoString txtURL, desc;
     549             :       int32_t resultReplaceBefore, resultReplaceAfter;
     550             : 
     551             :       CalculateURLBoundaries(aInString, aInLength, pos, whathasbeendone, check, start, end,
     552             :                              txtURL, desc,
     553           0 :                              resultReplaceBefore, resultReplaceAfter);
     554             : 
     555           0 :       if (aInString[pos] != ':')
     556             :       {
     557           0 :         nsAutoString temp = txtURL;
     558           0 :         txtURL.SetLength(0);
     559           0 :         CompleteAbbreviatedURL(temp.get(),temp.Length(), pos - start, txtURL);
     560             :       }
     561             : 
     562           0 :       if (!txtURL.IsEmpty() && CheckURLAndCreateHTML(txtURL, desc, check,
     563             :                                                      outputHTML))
     564             :       {
     565           0 :         replaceBefore = resultReplaceBefore;
     566           0 :         replaceAfter = resultReplaceAfter;
     567           0 :         state[check] = success;
     568             :       }
     569             :     } // if
     570             :   } // for
     571           0 :   return state[check] == success;
     572             : }
     573             : 
     574             : bool
     575           0 : mozTXTToHTMLConv::ItMatchesDelimited(const char16_t * aInString,
     576             :     int32_t aInLength, const char16_t* rep, int32_t aRepLen,
     577             :     LIMTYPE before, LIMTYPE after)
     578             : {
     579             : 
     580             :   // this little method gets called a LOT. I found we were spending a
     581             :   // lot of time just calculating the length of the variable "rep"
     582             :   // over and over again every time we called it. So we're now passing
     583             :   // an integer in here.
     584           0 :   int32_t textLen = aInLength;
     585             : 
     586           0 :   if
     587             :     (
     588           0 :       ((before == LT_IGNORE && (after == LT_IGNORE || after == LT_DELIMITER))
     589           0 :         && textLen < aRepLen) ||
     590           0 :       ((before != LT_IGNORE || (after != LT_IGNORE && after != LT_DELIMITER))
     591           0 :         && textLen < aRepLen + 1) ||
     592           0 :       (before != LT_IGNORE && after != LT_IGNORE && after != LT_DELIMITER
     593           0 :         && textLen < aRepLen + 2)
     594             :     )
     595           0 :     return false;
     596             : 
     597           0 :   char16_t text0 = aInString[0];
     598           0 :   char16_t textAfterPos = aInString[aRepLen + (before == LT_IGNORE ? 0 : 1)];
     599             : 
     600           0 :   if
     601             :     (
     602             :       (before == LT_ALPHA
     603           0 :         && !nsCRT::IsAsciiAlpha(text0)) ||
     604             :       (before == LT_DIGIT
     605           0 :         && !nsCRT::IsAsciiDigit(text0)) ||
     606             :       (before == LT_DELIMITER
     607           0 :         &&
     608             :         (
     609           0 :           nsCRT::IsAsciiAlpha(text0) ||
     610           0 :           nsCRT::IsAsciiDigit(text0) ||
     611           0 :           text0 == *rep
     612           0 :         )) ||
     613             :       (after == LT_ALPHA
     614           0 :         && !nsCRT::IsAsciiAlpha(textAfterPos)) ||
     615             :       (after == LT_DIGIT
     616           0 :         && !nsCRT::IsAsciiDigit(textAfterPos)) ||
     617             :       (after == LT_DELIMITER
     618           0 :         &&
     619             :         (
     620           0 :           nsCRT::IsAsciiAlpha(textAfterPos) ||
     621           0 :           nsCRT::IsAsciiDigit(textAfterPos) ||
     622           0 :           textAfterPos == *rep
     623           0 :         )) ||
     624           0 :         !Substring(Substring(aInString, aInString+aInLength),
     625           0 :                    (before == LT_IGNORE ? 0 : 1),
     626           0 :                    aRepLen).Equals(Substring(rep, rep+aRepLen),
     627           0 :                                    nsCaseInsensitiveStringComparator())
     628             :     )
     629           0 :     return false;
     630             : 
     631           0 :   return true;
     632             : }
     633             : 
     634             : uint32_t
     635           0 : mozTXTToHTMLConv::NumberOfMatches(const char16_t * aInString, int32_t aInStringLength,
     636             :      const char16_t* rep, int32_t aRepLen, LIMTYPE before, LIMTYPE after)
     637             : {
     638           0 :   uint32_t result = 0;
     639             : 
     640           0 :   for (int32_t i = 0; i < aInStringLength; i++)
     641             :   {
     642           0 :     const char16_t * indexIntoString = &aInString[i];
     643           0 :     if (ItMatchesDelimited(indexIntoString, aInStringLength - i, rep, aRepLen, before, after))
     644           0 :       result++;
     645             :   }
     646           0 :   return result;
     647             : }
     648             : 
     649             : 
     650             : // NOTE: the converted html for the phrase is appended to aOutString
     651             : // tagHTML and attributeHTML are plain ASCII (literal strings, in fact)
     652             : bool
     653           0 : mozTXTToHTMLConv::StructPhraseHit(const char16_t * aInString, int32_t aInStringLength, bool col0,
     654             :      const char16_t* tagTXT, int32_t aTagTXTLen,
     655             :      const char* tagHTML, const char* attributeHTML,
     656             :      nsString& aOutString, uint32_t& openTags)
     657             : {
     658             :   /* We're searching for the following pattern:
     659             :      LT_DELIMITER - "*" - ALPHA -
     660             :      [ some text (maybe more "*"-pairs) - ALPHA ] "*" - LT_DELIMITER.
     661             :      <strong> is only inserted, if existence of a pair could be verified
     662             :      We use the first opening/closing tag, if we can choose */
     663             : 
     664           0 :   const char16_t * newOffset = aInString;
     665           0 :   int32_t newLength = aInStringLength;
     666           0 :   if (!col0) // skip the first element?
     667             :   {
     668           0 :     newOffset = &aInString[1];
     669           0 :     newLength = aInStringLength - 1;
     670             :   }
     671             : 
     672             :   // opening tag
     673           0 :   if
     674             :     (
     675           0 :       ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen,
     676             :            (col0 ? LT_IGNORE : LT_DELIMITER), LT_ALPHA) // is opening tag
     677           0 :         && NumberOfMatches(newOffset, newLength, tagTXT, aTagTXTLen,
     678             :               LT_ALPHA, LT_DELIMITER)  // remaining closing tags
     679           0 :               > openTags
     680             :     )
     681             :   {
     682           0 :     openTags++;
     683           0 :     aOutString.Append('<');
     684           0 :     aOutString.AppendASCII(tagHTML);
     685           0 :     aOutString.Append(char16_t(' '));
     686           0 :     aOutString.AppendASCII(attributeHTML);
     687           0 :     aOutString.AppendLiteral("><span class=\"moz-txt-tag\">");
     688           0 :     aOutString.Append(tagTXT);
     689           0 :     aOutString.AppendLiteral("</span>");
     690           0 :     return true;
     691             :   }
     692             : 
     693             :   // closing tag
     694           0 :   else if (openTags > 0
     695           0 :        && ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen, LT_ALPHA, LT_DELIMITER))
     696             :   {
     697           0 :     openTags--;
     698           0 :     aOutString.AppendLiteral("<span class=\"moz-txt-tag\">");
     699           0 :     aOutString.Append(tagTXT);
     700           0 :     aOutString.AppendLiteral("</span></");
     701           0 :     aOutString.AppendASCII(tagHTML);
     702           0 :     aOutString.Append(char16_t('>'));
     703           0 :     return true;
     704             :   }
     705             : 
     706           0 :   return false;
     707             : }
     708             : 
     709             : 
     710             : bool
     711           0 : mozTXTToHTMLConv::SmilyHit(const char16_t * aInString, int32_t aLength, bool col0,
     712             :          const char* tagTXT, const char* imageName,
     713             :          nsString& outputHTML, int32_t& glyphTextLen)
     714             : {
     715           0 :   if ( !aInString || !tagTXT || !imageName )
     716           0 :       return false;
     717             : 
     718           0 :   int32_t tagLen = strlen(tagTXT);
     719             : 
     720           0 :   uint32_t delim = (col0 ? 0 : 1) + tagLen;
     721             : 
     722           0 :   if
     723             :     (
     724           0 :       (col0 || IsSpace(aInString[0]))
     725           0 :         &&
     726             :         (
     727           0 :           aLength <= int32_t(delim) ||
     728           0 :           IsSpace(aInString[delim]) ||
     729           0 :           (aLength > int32_t(delim + 1)
     730           0 :             &&
     731             :             (
     732           0 :               aInString[delim] == '.' ||
     733           0 :               aInString[delim] == ',' ||
     734           0 :               aInString[delim] == ';' ||
     735           0 :               aInString[delim] == '8' ||
     736           0 :               aInString[delim] == '>' ||
     737           0 :               aInString[delim] == '!' ||
     738           0 :               aInString[delim] == '?'
     739             :             )
     740           0 :             && IsSpace(aInString[delim + 1]))
     741             :         )
     742           0 :         && ItMatchesDelimited(aInString, aLength, NS_ConvertASCIItoUTF16(tagTXT).get(), tagLen,
     743             :                               col0 ? LT_IGNORE : LT_DELIMITER, LT_IGNORE)
     744             :                 // Note: tests at different pos for LT_IGNORE and LT_DELIMITER
     745             :     )
     746             :   {
     747           0 :     if (!col0)
     748             :     {
     749           0 :       outputHTML.Truncate();
     750           0 :       outputHTML.Append(char16_t(' '));
     751             :     }
     752             : 
     753           0 :     outputHTML.AppendLiteral("<span class=\""); // <span class="
     754           0 :     AppendASCIItoUTF16(imageName, outputHTML);  // e.g. smiley-frown
     755           0 :     outputHTML.AppendLiteral("\" title=\"");    // " title="
     756           0 :     AppendASCIItoUTF16(tagTXT, outputHTML);     // smiley tooltip
     757           0 :     outputHTML.AppendLiteral("\"><span>");      // "><span>
     758           0 :     AppendASCIItoUTF16(tagTXT, outputHTML);     // original text
     759           0 :     outputHTML.AppendLiteral("</span></span>"); // </span></span>
     760           0 :     glyphTextLen = (col0 ? 0 : 1) + tagLen;
     761           0 :     return true;
     762             :   }
     763             : 
     764           0 :   return false;
     765             : }
     766             : 
     767             : // the glyph is appended to aOutputString instead of the original string...
     768             : bool
     769           0 : mozTXTToHTMLConv::GlyphHit(const char16_t * aInString, int32_t aInLength, bool col0,
     770             :          nsString& aOutputString, int32_t& glyphTextLen)
     771             : {
     772           0 :   char16_t text0 = aInString[0];
     773           0 :   char16_t text1 = aInString[1];
     774           0 :   char16_t firstChar = (col0 ? text0 : text1);
     775             : 
     776             :   // temporary variable used to store the glyph html text
     777           0 :   nsAutoString outputHTML;
     778             :   bool bTestSmilie;
     779           0 :   bool bArg = false;
     780             :   int i;
     781             : 
     782             :   // refactor some of this mess to avoid code duplication and speed execution a bit
     783             :   // there are two cases that need to be tried one after another. To avoid a lot of
     784             :   // duplicate code, rolling into a loop
     785             : 
     786           0 :   i = 0;
     787           0 :   while ( i < 2 )
     788             :   {
     789           0 :     bTestSmilie = false;
     790           0 :     if ( !i && (firstChar == ':' || firstChar == ';' || firstChar == '=' || firstChar == '>' || firstChar == '8' || firstChar == 'O'))
     791             :     {
     792             :         // first test passed
     793             : 
     794           0 :         bTestSmilie = true;
     795           0 :         bArg = col0;
     796             :     }
     797           0 :     if ( i && col0 && ( text1 == ':' || text1 == ';' || text1 == '=' || text1 == '>' || text1 == '8' || text1 == 'O' ) )
     798             :     {
     799             :         // second test passed
     800             : 
     801           0 :         bTestSmilie = true;
     802           0 :         bArg = false;
     803             :     }
     804           0 :     if ( bTestSmilie && (
     805           0 :           SmilyHit(aInString, aInLength, bArg,
     806             :                    ":-)",
     807             :                    "moz-smiley-s1", // smile
     808           0 :                    outputHTML, glyphTextLen) ||
     809             : 
     810           0 :           SmilyHit(aInString, aInLength, bArg,
     811             :                    ":)",
     812             :                    "moz-smiley-s1", // smile
     813           0 :                    outputHTML, glyphTextLen) ||
     814             : 
     815           0 :           SmilyHit(aInString, aInLength, bArg,
     816             :                    ":-D",
     817             :                    "moz-smiley-s5", // laughing
     818           0 :                    outputHTML, glyphTextLen) ||
     819             : 
     820           0 :           SmilyHit(aInString, aInLength, bArg,
     821             :                    ":-(",
     822             :                    "moz-smiley-s2", // frown
     823           0 :                    outputHTML, glyphTextLen) ||
     824             : 
     825           0 :           SmilyHit(aInString, aInLength, bArg,
     826             :                    ":(",
     827             :                    "moz-smiley-s2", // frown
     828           0 :                    outputHTML, glyphTextLen) ||
     829             : 
     830           0 :           SmilyHit(aInString, aInLength, bArg,
     831             :                    ":-[",
     832             :                    "moz-smiley-s6", // embarassed
     833           0 :                    outputHTML, glyphTextLen) ||
     834             : 
     835           0 :           SmilyHit(aInString, aInLength, bArg,
     836             :                    ";-)",
     837             :                    "moz-smiley-s3", // wink
     838           0 :                    outputHTML, glyphTextLen) ||
     839             : 
     840           0 :           SmilyHit(aInString, aInLength, col0,
     841             :                    ";)",
     842             :                    "moz-smiley-s3", // wink
     843           0 :                    outputHTML, glyphTextLen) ||
     844             : 
     845           0 :           SmilyHit(aInString, aInLength, bArg,
     846             :                    ":-\\",
     847             :                    "moz-smiley-s7", // undecided
     848           0 :                    outputHTML, glyphTextLen) ||
     849             : 
     850           0 :           SmilyHit(aInString, aInLength, bArg,
     851             :                    ":-P",
     852             :                    "moz-smiley-s4", // tongue
     853           0 :                    outputHTML, glyphTextLen) ||
     854             : 
     855           0 :           SmilyHit(aInString, aInLength, bArg,
     856             :                    ";-P",
     857             :                    "moz-smiley-s4", // tongue
     858           0 :                    outputHTML, glyphTextLen) ||
     859             : 
     860           0 :           SmilyHit(aInString, aInLength, bArg,
     861             :                    "=-O",
     862             :                    "moz-smiley-s8", // surprise
     863           0 :                    outputHTML, glyphTextLen) ||
     864             : 
     865           0 :           SmilyHit(aInString, aInLength, bArg,
     866             :                    ":-*",
     867             :                    "moz-smiley-s9", // kiss
     868           0 :                    outputHTML, glyphTextLen) ||
     869             : 
     870           0 :           SmilyHit(aInString, aInLength, bArg,
     871             :                    ">:o",
     872             :                    "moz-smiley-s10", // yell
     873           0 :                    outputHTML, glyphTextLen) ||
     874             : 
     875           0 :           SmilyHit(aInString, aInLength, bArg,
     876             :                    ">:-o",
     877             :                    "moz-smiley-s10", // yell
     878           0 :                    outputHTML, glyphTextLen) ||
     879             : 
     880           0 :           SmilyHit(aInString, aInLength, bArg,
     881             :                    "8-)",
     882             :                    "moz-smiley-s11", // cool
     883           0 :                    outputHTML, glyphTextLen) ||
     884             : 
     885           0 :           SmilyHit(aInString, aInLength, bArg,
     886             :                    ":-$",
     887             :                    "moz-smiley-s12", // money
     888           0 :                    outputHTML, glyphTextLen) ||
     889             : 
     890           0 :           SmilyHit(aInString, aInLength, bArg,
     891             :                    ":-!",
     892             :                    "moz-smiley-s13", // foot
     893           0 :                    outputHTML, glyphTextLen) ||
     894             : 
     895           0 :           SmilyHit(aInString, aInLength, bArg,
     896             :                    "O:-)",
     897             :                    "moz-smiley-s14", // innocent
     898           0 :                    outputHTML, glyphTextLen) ||
     899             : 
     900           0 :           SmilyHit(aInString, aInLength, bArg,
     901             :                    ":'(",
     902             :                    "moz-smiley-s15", // cry
     903           0 :                    outputHTML, glyphTextLen) ||
     904             : 
     905           0 :           SmilyHit(aInString, aInLength, bArg,
     906             :                    ":-X",
     907             :                    "moz-smiley-s16", // sealed
     908             :                    outputHTML, glyphTextLen)
     909             :         )
     910             :     )
     911             :     {
     912           0 :         aOutputString.Append(outputHTML);
     913           0 :         return true;
     914             :     }
     915           0 :     i++;
     916             :   }
     917           0 :   if (text0 == '\f')
     918             :   {
     919           0 :       aOutputString.AppendLiteral("<span class='moz-txt-formfeed'></span>");
     920           0 :       glyphTextLen = 1;
     921           0 :       return true;
     922             :   }
     923           0 :   if (text0 == '+' || text1 == '+')
     924             :   {
     925           0 :     if (ItMatchesDelimited(aInString, aInLength,
     926             :                            u" +/-", 4,
     927             :                            LT_IGNORE, LT_IGNORE))
     928             :     {
     929           0 :       aOutputString.AppendLiteral(" &plusmn;");
     930           0 :       glyphTextLen = 4;
     931           0 :       return true;
     932             :     }
     933           0 :     if (col0 && ItMatchesDelimited(aInString, aInLength,
     934             :                                    u"+/-", 3,
     935             :                                    LT_IGNORE, LT_IGNORE))
     936             :     {
     937           0 :       aOutputString.AppendLiteral("&plusmn;");
     938           0 :       glyphTextLen = 3;
     939           0 :       return true;
     940             :     }
     941             :   }
     942             : 
     943             :   // x^2  =>  x<sup>2</sup>,   also handle powers x^-2,  x^0.5
     944             :   // implement regular expression /[\dA-Za-z\)\]}]\^-?\d+(\.\d+)*[^\dA-Za-z]/
     945           0 :   if
     946             :     (
     947             :       text1 == '^'
     948           0 :       &&
     949             :       (
     950           0 :         nsCRT::IsAsciiDigit(text0) || nsCRT::IsAsciiAlpha(text0) ||
     951           0 :         text0 == ')' || text0 == ']' || text0 == '}'
     952             :       )
     953           0 :       &&
     954             :       (
     955           0 :         (2 < aInLength && nsCRT::IsAsciiDigit(aInString[2])) ||
     956           0 :         (3 < aInLength && aInString[2] == '-' && nsCRT::IsAsciiDigit(aInString[3]))
     957             :       )
     958             :     )
     959             :   {
     960             :     // Find first non-digit
     961           0 :     int32_t delimPos = 3;  // skip "^" and first digit (or '-')
     962           0 :     for (; delimPos < aInLength
     963           0 :            &&
     964             :            (
     965           0 :              nsCRT::IsAsciiDigit(aInString[delimPos]) ||
     966           0 :              (aInString[delimPos] == '.' && delimPos + 1 < aInLength &&
     967           0 :                nsCRT::IsAsciiDigit(aInString[delimPos + 1]))
     968             :            );
     969             :          delimPos++)
     970             :       ;
     971             : 
     972           0 :     if (delimPos < aInLength && nsCRT::IsAsciiAlpha(aInString[delimPos]))
     973             :     {
     974           0 :       return false;
     975             :     }
     976             : 
     977           0 :     outputHTML.Truncate();
     978           0 :     outputHTML += text0;
     979             :     outputHTML.AppendLiteral(
     980             :       "<sup class=\"moz-txt-sup\">"
     981             :       "<span style=\"display:inline-block;width:0;height:0;overflow:hidden\">"
     982           0 :       "^</span>");
     983             : 
     984           0 :     aOutputString.Append(outputHTML);
     985           0 :     aOutputString.Append(&aInString[2], delimPos - 2);
     986           0 :     aOutputString.AppendLiteral("</sup>");
     987             : 
     988           0 :     glyphTextLen = delimPos /* - 1 + 1 */ ;
     989           0 :     return true;
     990             :   }
     991             :   /*
     992             :    The following strings are not substituted:
     993             :    |TXT   |HTML     |Reason
     994             :    +------+---------+----------
     995             :     ->     &larr;    Bug #454
     996             :     =>     &lArr;    dito
     997             :     <-     &rarr;    dito
     998             :     <=     &rArr;    dito
     999             :     (tm)   &trade;   dito
    1000             :     1/4    &frac14;  is triggered by 1/4 Part 1, 2/4 Part 2, ...
    1001             :     3/4    &frac34;  dito
    1002             :     1/2    &frac12;  similar
    1003             :   */
    1004           0 :   return false;
    1005             : }
    1006             : 
    1007             : /***************************************************************************
    1008             :   Library-internal Interface
    1009             : ****************************************************************************/
    1010             : 
    1011           0 : mozTXTToHTMLConv::mozTXTToHTMLConv()
    1012             : {
    1013           0 : }
    1014             : 
    1015           0 : mozTXTToHTMLConv::~mozTXTToHTMLConv()
    1016             : {
    1017           0 : }
    1018             : 
    1019           0 : NS_IMPL_ISUPPORTS(mozTXTToHTMLConv,
    1020             :                   mozITXTToHTMLConv,
    1021             :                   nsIStreamConverter,
    1022             :                   nsIStreamListener,
    1023             :                   nsIRequestObserver)
    1024             : 
    1025             : int32_t
    1026           0 : mozTXTToHTMLConv::CiteLevelTXT(const char16_t *line,
    1027             :                                     uint32_t& logLineStart)
    1028             : {
    1029           0 :   int32_t result = 0;
    1030           0 :   int32_t lineLength = NS_strlen(line);
    1031             : 
    1032           0 :   bool moreCites = true;
    1033           0 :   while (moreCites)
    1034             :   {
    1035             :     /* E.g. the following lines count as quote:
    1036             : 
    1037             :        > text
    1038             :        //#ifdef QUOTE_RECOGNITION_AGGRESSIVE
    1039             :        >text
    1040             :        //#ifdef QUOTE_RECOGNITION_AGGRESSIVE
    1041             :            > text
    1042             :        ] text
    1043             :        USER> text
    1044             :        USER] text
    1045             :        //#endif
    1046             : 
    1047             :        logLineStart is the position of "t" in this example
    1048             :     */
    1049           0 :     uint32_t i = logLineStart;
    1050             : 
    1051             : #ifdef QUOTE_RECOGNITION_AGGRESSIVE
    1052             :     for (; int32_t(i) < lineLength && IsSpace(line[i]); i++)
    1053             :       ;
    1054             :     for (; int32_t(i) < lineLength && nsCRT::IsAsciiAlpha(line[i])
    1055             :                                    && nsCRT::IsUpper(line[i])   ; i++)
    1056             :       ;
    1057             :     if (int32_t(i) < lineLength && (line[i] == '>' || line[i] == ']'))
    1058             : #else
    1059           0 :     if (int32_t(i) < lineLength && line[i] == '>')
    1060             : #endif
    1061             :     {
    1062           0 :       i++;
    1063           0 :       if (int32_t(i) < lineLength && line[i] == ' ')
    1064           0 :         i++;
    1065             :       // sendmail/mbox
    1066             :       // Placed here for performance increase
    1067           0 :       const char16_t * indexString = &line[logLineStart];
    1068             :            // here, |logLineStart < lineLength| is always true
    1069           0 :       uint32_t minlength = std::min(uint32_t(6), NS_strlen(indexString));
    1070           0 :       if (Substring(indexString,
    1071           0 :                     indexString+minlength).Equals(Substring(NS_LITERAL_STRING(">From "), 0, minlength),
    1072           0 :                                                   nsCaseInsensitiveStringComparator()))
    1073             :         //XXX RFC2646
    1074           0 :         moreCites = false;
    1075             :       else
    1076             :       {
    1077           0 :         result++;
    1078           0 :         logLineStart = i;
    1079           0 :       }
    1080             :     }
    1081             :     else
    1082           0 :       moreCites = false;
    1083             :   }
    1084             : 
    1085           0 :   return result;
    1086             : }
    1087             : 
    1088             : void
    1089           0 : mozTXTToHTMLConv::ScanTXT(const char16_t * aInString, int32_t aInStringLength, uint32_t whattodo, nsString& aOutString)
    1090             : {
    1091           0 :   bool doURLs = 0 != (whattodo & kURLs);
    1092           0 :   bool doGlyphSubstitution = 0 != (whattodo & kGlyphSubstitution);
    1093           0 :   bool doStructPhrase = 0 != (whattodo & kStructPhrase);
    1094             : 
    1095           0 :   uint32_t structPhrase_strong = 0;  // Number of currently open tags
    1096           0 :   uint32_t structPhrase_underline = 0;
    1097           0 :   uint32_t structPhrase_italic = 0;
    1098           0 :   uint32_t structPhrase_code = 0;
    1099             : 
    1100           0 :   nsAutoString outputHTML;  // moved here for performance increase
    1101             : 
    1102           0 :   for(uint32_t i = 0; int32_t(i) < aInStringLength;)
    1103             :   {
    1104           0 :     if (doGlyphSubstitution)
    1105             :     {
    1106             :       int32_t glyphTextLen;
    1107           0 :       if (GlyphHit(&aInString[i], aInStringLength - i, i == 0, aOutString, glyphTextLen))
    1108             :       {
    1109           0 :         i += glyphTextLen;
    1110           0 :         continue;
    1111             :       }
    1112             :     }
    1113             : 
    1114           0 :     if (doStructPhrase)
    1115             :     {
    1116           0 :       const char16_t * newOffset = aInString;
    1117           0 :       int32_t newLength = aInStringLength;
    1118           0 :       if (i > 0 ) // skip the first element?
    1119             :       {
    1120           0 :         newOffset = &aInString[i-1];
    1121           0 :         newLength = aInStringLength - i + 1;
    1122             :       }
    1123             : 
    1124           0 :       switch (aInString[i]) // Performance increase
    1125             :       {
    1126             :       case '*':
    1127           0 :         if (StructPhraseHit(newOffset, newLength, i == 0,
    1128             :                             u"*", 1,
    1129             :                             "b", "class=\"moz-txt-star\"",
    1130             :                             aOutString, structPhrase_strong))
    1131             :         {
    1132           0 :           i++;
    1133           0 :           continue;
    1134             :         }
    1135           0 :         break;
    1136             :       case '/':
    1137           0 :         if (StructPhraseHit(newOffset, newLength, i == 0,
    1138             :                             u"/", 1,
    1139             :                             "i", "class=\"moz-txt-slash\"",
    1140             :                             aOutString, structPhrase_italic))
    1141             :         {
    1142           0 :           i++;
    1143           0 :           continue;
    1144             :         }
    1145           0 :         break;
    1146             :       case '_':
    1147           0 :         if (StructPhraseHit(newOffset, newLength, i == 0,
    1148             :                             u"_", 1,
    1149             :                             "span" /* <u> is deprecated */,
    1150             :                             "class=\"moz-txt-underscore\"",
    1151             :                             aOutString, structPhrase_underline))
    1152             :         {
    1153           0 :           i++;
    1154           0 :           continue;
    1155             :         }
    1156           0 :         break;
    1157             :       case '|':
    1158           0 :         if (StructPhraseHit(newOffset, newLength, i == 0,
    1159             :                             u"|", 1,
    1160             :                             "code", "class=\"moz-txt-verticalline\"",
    1161             :                             aOutString, structPhrase_code))
    1162             :         {
    1163           0 :           i++;
    1164           0 :           continue;
    1165             :         }
    1166           0 :         break;
    1167             :       }
    1168             :     }
    1169             : 
    1170           0 :     if (doURLs)
    1171             :     {
    1172           0 :       switch (aInString[i])
    1173             :       {
    1174             :       case ':':
    1175             :       case '@':
    1176             :       case '.':
    1177           0 :         if ( (i == 0 || ((i > 0) && aInString[i - 1] != ' ')) && aInString[i +1] != ' ') // Performance increase
    1178             :         {
    1179             :           int32_t replaceBefore;
    1180             :           int32_t replaceAfter;
    1181           0 :           if (FindURL(aInString, aInStringLength, i, whattodo,
    1182             :                       outputHTML, replaceBefore, replaceAfter)
    1183           0 :                   && structPhrase_strong + structPhrase_italic +
    1184           0 :                        structPhrase_underline + structPhrase_code == 0
    1185             :                        /* workaround for bug #19445 */ )
    1186             :           {
    1187           0 :             aOutString.Cut(aOutString.Length() - replaceBefore, replaceBefore);
    1188           0 :             aOutString += outputHTML;
    1189           0 :             i += replaceAfter + 1;
    1190           0 :             continue;
    1191             :           }
    1192             :         }
    1193           0 :         break;
    1194             :       } //switch
    1195             :     }
    1196             : 
    1197           0 :     switch (aInString[i])
    1198             :     {
    1199             :     // Special symbols
    1200             :     case '<':
    1201             :     case '>':
    1202             :     case '&':
    1203           0 :       EscapeChar(aInString[i], aOutString, false);
    1204           0 :       i++;
    1205           0 :       break;
    1206             :     // Normal characters
    1207             :     default:
    1208           0 :       aOutString += aInString[i];
    1209           0 :       i++;
    1210           0 :       break;
    1211             :     }
    1212             :   }
    1213           0 : }
    1214             : 
    1215             : void
    1216           0 : mozTXTToHTMLConv::ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOutString)
    1217             : {
    1218             :   // some common variables we were recalculating
    1219             :   // every time inside the for loop...
    1220           0 :   int32_t lengthOfInString = aInString.Length();
    1221           0 :   const char16_t * uniBuffer = aInString.get();
    1222             : 
    1223             : #ifdef DEBUG_BenB_Perf
    1224             :   PRTime parsing_start = PR_IntervalNow();
    1225             : #endif
    1226             : 
    1227             :   // Look for simple entities not included in a tags and scan them.
    1228             :   // Skip all tags ("<[...]>") and content in an a link tag ("<a [...]</a>"),
    1229             :   // comment tag ("<!--[...]-->"), style tag, script tag or head tag.
    1230             :   // Unescape the rest (text between tags) and pass it to ScanTXT.
    1231           0 :   nsAutoCString canFollow(" \f\n\r\t>");
    1232           0 :   for (int32_t i = 0; i < lengthOfInString;)
    1233             :   {
    1234           0 :     if (aInString[i] == '<')  // html tag
    1235             :     {
    1236           0 :       int32_t start = i;
    1237           0 :       if (i + 2 < lengthOfInString &&
    1238           0 :           nsCRT::ToLower(aInString[i + 1]) == 'a' &&
    1239           0 :           canFollow.FindChar(aInString[i + 2]) != kNotFound)
    1240             :            // if a tag, skip until </a>.
    1241             :            // Make sure there's a white-space character after, not to match "abbr".
    1242             :       {
    1243           0 :         i = aInString.Find("</a>", true, i);
    1244           0 :         if (i == kNotFound)
    1245           0 :           i = lengthOfInString;
    1246             :         else
    1247           0 :           i += 4;
    1248             :       }
    1249           0 :       else if (Substring(aInString, i + 1, 3).LowerCaseEqualsASCII("!--"))
    1250             :           // if out-commended code, skip until -->
    1251             :       {
    1252           0 :         i = aInString.Find("-->", false, i);
    1253           0 :         if (i == kNotFound)
    1254           0 :           i = lengthOfInString;
    1255             :         else
    1256           0 :           i += 3;
    1257             :       }
    1258           0 :       else if (i + 6 < lengthOfInString &&
    1259           0 :       Substring(aInString, i + 1, 5).LowerCaseEqualsASCII("style") &&
    1260           0 :                canFollow.FindChar(aInString[i + 6]) != kNotFound)
    1261             :            // if style tag, skip until </style>
    1262             :       {
    1263           0 :         i = aInString.Find("</style>", true, i);
    1264           0 :         if (i == kNotFound)
    1265           0 :           i = lengthOfInString;
    1266             :         else
    1267           0 :           i += 8;
    1268             :       }
    1269           0 :       else if (i + 7 < lengthOfInString &&
    1270           0 :                Substring(aInString, i + 1, 6).LowerCaseEqualsASCII("script") &&
    1271           0 :                canFollow.FindChar(aInString[i + 7]) != kNotFound)
    1272             :            // if script tag, skip until </script>
    1273             :       {
    1274           0 :         i = aInString.Find("</script>", true, i);
    1275           0 :         if (i == kNotFound)
    1276           0 :           i = lengthOfInString;
    1277             :         else
    1278           0 :           i += 9;
    1279             :       }
    1280           0 :       else if (i + 5 < lengthOfInString &&
    1281           0 :                Substring(aInString, i + 1, 4).LowerCaseEqualsASCII("head") &&
    1282           0 :                canFollow.FindChar(aInString[i + 5]) != kNotFound)
    1283             :            // if head tag, skip until </head>
    1284             :            // Make sure not to match <header>.
    1285             :       {
    1286           0 :         i = aInString.Find("</head>", true, i);
    1287           0 :         if (i == kNotFound)
    1288           0 :           i = lengthOfInString;
    1289             :         else
    1290           0 :           i += 7;
    1291             :       }
    1292             :       else  // just skip tag (attributes etc.)
    1293             :       {
    1294           0 :         i = aInString.FindChar('>', i);
    1295           0 :         if (i == kNotFound)
    1296           0 :           i = lengthOfInString;
    1297             :         else
    1298           0 :           i++;
    1299             :       }
    1300           0 :       aOutString.Append(&uniBuffer[start], i - start);
    1301             :     }
    1302             :     else
    1303             :     {
    1304           0 :       uint32_t start = uint32_t(i);
    1305           0 :       i = aInString.FindChar('<', i);
    1306           0 :       if (i == kNotFound)
    1307           0 :         i = lengthOfInString;
    1308             : 
    1309           0 :       nsString tempString;
    1310           0 :       tempString.SetCapacity(uint32_t((uint32_t(i) - start) * growthRate));
    1311           0 :       UnescapeStr(uniBuffer, start, uint32_t(i) - start, tempString);
    1312           0 :       ScanTXT(tempString.get(), tempString.Length(), whattodo, aOutString);
    1313             :     }
    1314             :   }
    1315             : 
    1316             : #ifdef DEBUG_BenB_Perf
    1317             :   printf("ScanHTML time:    %d ms\n", PR_IntervalToMilliseconds(PR_IntervalNow() - parsing_start));
    1318             : #endif
    1319           0 : }
    1320             : 
    1321             : /****************************************************************************
    1322             :   XPCOM Interface
    1323             : *****************************************************************************/
    1324             : 
    1325             : NS_IMETHODIMP
    1326           0 : mozTXTToHTMLConv::Convert(nsIInputStream *aFromStream,
    1327             :                           const char *aFromType,
    1328             :                           const char *aToType,
    1329             :                           nsISupports *aCtxt, nsIInputStream **_retval)
    1330             : {
    1331           0 :   return NS_ERROR_NOT_IMPLEMENTED;
    1332             : }
    1333             : 
    1334             : NS_IMETHODIMP
    1335           0 : mozTXTToHTMLConv::AsyncConvertData(const char *aFromType,
    1336             :                                    const char *aToType,
    1337             :                                    nsIStreamListener *aListener, nsISupports *aCtxt) {
    1338           0 :   return NS_ERROR_NOT_IMPLEMENTED;
    1339             : }
    1340             : 
    1341             : NS_IMETHODIMP
    1342           0 : mozTXTToHTMLConv::OnDataAvailable(nsIRequest* request, nsISupports *ctxt,
    1343             :                                  nsIInputStream *inStr, uint64_t sourceOffset,
    1344             :                                  uint32_t count)
    1345             : {
    1346           0 :   return NS_ERROR_NOT_IMPLEMENTED;
    1347             : }
    1348             : 
    1349             : NS_IMETHODIMP
    1350           0 : mozTXTToHTMLConv::OnStartRequest(nsIRequest* request, nsISupports *ctxt)
    1351             : {
    1352           0 :   return NS_ERROR_NOT_IMPLEMENTED;
    1353             : }
    1354             : 
    1355             : NS_IMETHODIMP
    1356           0 : mozTXTToHTMLConv::OnStopRequest(nsIRequest* request, nsISupports *ctxt,
    1357             :                                 nsresult aStatus)
    1358             : {
    1359           0 :   return NS_ERROR_NOT_IMPLEMENTED;
    1360             : }
    1361             : 
    1362             : NS_IMETHODIMP
    1363           0 : mozTXTToHTMLConv::CiteLevelTXT(const char16_t *line, uint32_t *logLineStart,
    1364             :                                 uint32_t *_retval)
    1365             : {
    1366           0 :    if (!logLineStart || !_retval || !line)
    1367           0 :      return NS_ERROR_NULL_POINTER;
    1368           0 :    *_retval = CiteLevelTXT(line, *logLineStart);
    1369           0 :    return NS_OK;
    1370             : }
    1371             : 
    1372             : NS_IMETHODIMP
    1373           0 : mozTXTToHTMLConv::ScanTXT(const char16_t *text, uint32_t whattodo,
    1374             :                            char16_t **_retval)
    1375             : {
    1376           0 :   NS_ENSURE_ARG(text);
    1377             : 
    1378             :   // FIX ME!!!
    1379           0 :   nsString outString;
    1380           0 :   int32_t inLength = NS_strlen(text);
    1381             :   // by setting a large capacity up front, we save time
    1382             :   // when appending characters to the output string because we don't
    1383             :   // need to reallocate and re-copy the characters already in the out String.
    1384           0 :   NS_ASSERTION(inLength, "ScanTXT passed 0 length string");
    1385           0 :   if (inLength == 0) {
    1386           0 :     *_retval = NS_strdup(text);
    1387           0 :     return NS_OK;
    1388             :   }
    1389             : 
    1390           0 :   outString.SetCapacity(uint32_t(inLength * growthRate));
    1391           0 :   ScanTXT(text, inLength, whattodo, outString);
    1392             : 
    1393           0 :   *_retval = ToNewUnicode(outString);
    1394           0 :   return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
    1395             : }
    1396             : 
    1397             : NS_IMETHODIMP
    1398           0 : mozTXTToHTMLConv::ScanHTML(const char16_t *text, uint32_t whattodo,
    1399             :                             char16_t **_retval)
    1400             : {
    1401           0 :   NS_ENSURE_ARG(text);
    1402             : 
    1403             :   // FIX ME!!!
    1404           0 :   nsString outString;
    1405           0 :   nsString inString (text); // look at this nasty extra copy of the entire input buffer!
    1406           0 :   outString.SetCapacity(uint32_t(inString.Length() * growthRate));
    1407             : 
    1408           0 :   ScanHTML(inString, whattodo, outString);
    1409           0 :   *_retval = ToNewUnicode(outString);
    1410           0 :   return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
    1411             : }
    1412             : 
    1413             : nsresult
    1414           0 : MOZ_NewTXTToHTMLConv(mozTXTToHTMLConv** aConv)
    1415             : {
    1416           0 :     NS_PRECONDITION(aConv != nullptr, "null ptr");
    1417           0 :     if (!aConv)
    1418           0 :       return NS_ERROR_NULL_POINTER;
    1419             : 
    1420           0 :     *aConv = new mozTXTToHTMLConv();
    1421           0 :     if (!*aConv)
    1422           0 :       return NS_ERROR_OUT_OF_MEMORY;
    1423             : 
    1424           0 :     NS_ADDREF(*aConv);
    1425             :     //    return (*aConv)->Init();
    1426           0 :     return NS_OK;
    1427             : }

Generated by: LCOV version 1.13