Line data Source code
1 : /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* This Source Code Form is subject to the terms of the Mozilla Public
3 : * License, v. 2.0. If a copy of the MPL was not distributed with this
4 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 :
6 : #include "mozTXTToHTMLConv.h"
7 : #include "nsNetUtil.h"
8 : #include "nsUnicharUtils.h"
9 : #include "nsCRT.h"
10 : #include "nsIExternalProtocolHandler.h"
11 : #include "nsIIOService.h"
12 : #include "nsIURI.h"
13 :
14 : #include <algorithm>
15 :
16 : #ifdef DEBUG_BenB_Perf
17 : #include "prtime.h"
18 : #include "prinrval.h"
19 : #endif
20 :
21 : const double growthRate = 1.2;
22 :
23 : // Bug 183111, editor now replaces multiple spaces with leading
24 : // 0xA0's and a single ending space, so need to treat 0xA0's as spaces.
25 : // 0xA0 is the Latin1/Unicode character for "non-breaking space (nbsp)"
26 : // Also recognize the Japanese ideographic space 0x3000 as a space.
27 0 : static inline bool IsSpace(const char16_t aChar)
28 : {
29 0 : return (nsCRT::IsAsciiSpace(aChar) || aChar == 0xA0 || aChar == 0x3000);
30 : }
31 :
32 : // Escape Char will take ch, escape it and append the result to
33 : // aStringToAppendTo
34 : void
35 0 : mozTXTToHTMLConv::EscapeChar(const char16_t ch, nsString& aStringToAppendTo,
36 : bool inAttribute)
37 : {
38 0 : switch (ch)
39 : {
40 : case '<':
41 0 : aStringToAppendTo.AppendLiteral("<");
42 0 : break;
43 : case '>':
44 0 : aStringToAppendTo.AppendLiteral(">");
45 0 : break;
46 : case '&':
47 0 : aStringToAppendTo.AppendLiteral("&");
48 0 : break;
49 : case '"':
50 0 : if (inAttribute)
51 : {
52 0 : aStringToAppendTo.AppendLiteral(""");
53 0 : break;
54 : }
55 : // else fall through
56 : MOZ_FALLTHROUGH;
57 : default:
58 0 : aStringToAppendTo += ch;
59 : }
60 :
61 0 : return;
62 : }
63 :
64 : // EscapeStr takes the passed in string and
65 : // escapes it IN PLACE.
66 : void
67 0 : mozTXTToHTMLConv::EscapeStr(nsString& aInString, bool inAttribute)
68 : {
69 : // the replace substring routines
70 : // don't seem to work if you have a character
71 : // in the in string that is also in the replacement
72 : // string! =(
73 : //aInString.ReplaceSubstring("&", "&");
74 : //aInString.ReplaceSubstring("<", "<");
75 : //aInString.ReplaceSubstring(">", ">");
76 0 : for (uint32_t i = 0; i < aInString.Length();)
77 : {
78 0 : switch (aInString[i])
79 : {
80 : case '<':
81 0 : aInString.Cut(i, 1);
82 0 : aInString.Insert(NS_LITERAL_STRING("<"), i);
83 0 : i += 4; // skip past the integers we just added
84 0 : break;
85 : case '>':
86 0 : aInString.Cut(i, 1);
87 0 : aInString.Insert(NS_LITERAL_STRING(">"), i);
88 0 : i += 4; // skip past the integers we just added
89 0 : break;
90 : case '&':
91 0 : aInString.Cut(i, 1);
92 0 : aInString.Insert(NS_LITERAL_STRING("&"), i);
93 0 : i += 5; // skip past the integers we just added
94 0 : break;
95 : case '"':
96 0 : if (inAttribute)
97 : {
98 0 : aInString.Cut(i, 1);
99 0 : aInString.Insert(NS_LITERAL_STRING("""), i);
100 0 : i += 6;
101 0 : break;
102 : }
103 : // else fall through
104 : MOZ_FALLTHROUGH;
105 : default:
106 0 : i++;
107 : }
108 : }
109 0 : }
110 :
111 : void
112 0 : mozTXTToHTMLConv::UnescapeStr(const char16_t * aInString, int32_t aStartPos, int32_t aLength, nsString& aOutString)
113 : {
114 0 : const char16_t * subString = nullptr;
115 0 : for (uint32_t i = aStartPos; int32_t(i) - aStartPos < aLength;)
116 : {
117 0 : int32_t remainingChars = i - aStartPos;
118 0 : if (aInString[i] == '&')
119 : {
120 0 : subString = &aInString[i];
121 0 : if (!nsCRT::strncmp(subString, u"<", std::min(4, aLength - remainingChars)))
122 : {
123 0 : aOutString.Append(char16_t('<'));
124 0 : i += 4;
125 : }
126 0 : else if (!nsCRT::strncmp(subString, u">", std::min(4, aLength - remainingChars)))
127 : {
128 0 : aOutString.Append(char16_t('>'));
129 0 : i += 4;
130 : }
131 0 : else if (!nsCRT::strncmp(subString, u"&", std::min(5, aLength - remainingChars)))
132 : {
133 0 : aOutString.Append(char16_t('&'));
134 0 : i += 5;
135 : }
136 0 : else if (!nsCRT::strncmp(subString, u""", std::min(6, aLength - remainingChars)))
137 : {
138 0 : aOutString.Append(char16_t('"'));
139 0 : i += 6;
140 : }
141 : else
142 : {
143 0 : aOutString += aInString[i];
144 0 : i++;
145 : }
146 : }
147 : else
148 : {
149 0 : aOutString += aInString[i];
150 0 : i++;
151 : }
152 : }
153 0 : }
154 :
155 : void
156 0 : mozTXTToHTMLConv::CompleteAbbreviatedURL(const char16_t * aInString, int32_t aInLength,
157 : const uint32_t pos, nsString& aOutString)
158 : {
159 0 : NS_ASSERTION(int32_t(pos) < aInLength, "bad args to CompleteAbbreviatedURL, see bug #190851");
160 0 : if (int32_t(pos) >= aInLength)
161 0 : return;
162 :
163 0 : if (aInString[pos] == '@')
164 : {
165 : // only pre-pend a mailto url if the string contains a .domain in it..
166 : //i.e. we want to linkify johndoe@foo.com but not "let's meet @8pm"
167 0 : nsDependentString inString(aInString, aInLength);
168 0 : if (inString.FindChar('.', pos) != kNotFound) // if we have a '.' after the @ sign....
169 : {
170 0 : aOutString.AssignLiteral("mailto:");
171 0 : aOutString += aInString;
172 : }
173 : }
174 0 : else if (aInString[pos] == '.')
175 : {
176 0 : if (ItMatchesDelimited(aInString, aInLength,
177 : u"www.", 4, LT_IGNORE, LT_IGNORE))
178 : {
179 0 : aOutString.AssignLiteral("http://");
180 0 : aOutString += aInString;
181 : }
182 0 : else if (ItMatchesDelimited(aInString,aInLength, u"ftp.", 4, LT_IGNORE, LT_IGNORE))
183 : {
184 0 : aOutString.AssignLiteral("ftp://");
185 0 : aOutString += aInString;
186 : }
187 : }
188 : }
189 :
190 : bool
191 0 : mozTXTToHTMLConv::FindURLStart(const char16_t * aInString, int32_t aInLength,
192 : const uint32_t pos, const modetype check,
193 : uint32_t& start)
194 : {
195 0 : switch(check)
196 : { // no breaks, because end of blocks is never reached
197 : case RFC1738:
198 : {
199 0 : if (!nsCRT::strncmp(&aInString[std::max(int32_t(pos - 4), 0)], u"<URL:", 5))
200 : {
201 0 : start = pos + 1;
202 0 : return true;
203 : }
204 : else
205 0 : return false;
206 : }
207 : case RFC2396E:
208 : {
209 0 : nsString temp(aInString, aInLength);
210 0 : int32_t i = pos <= 0 ? kNotFound : temp.RFindCharInSet(u"<>\"", pos - 1);
211 0 : if (i != kNotFound && (temp[uint32_t(i)] == '<' ||
212 0 : temp[uint32_t(i)] == '"'))
213 : {
214 0 : start = uint32_t(++i);
215 0 : return start < pos;
216 : }
217 : else
218 0 : return false;
219 : }
220 : case freetext:
221 : {
222 0 : int32_t i = pos - 1;
223 0 : for (; i >= 0 && (
224 0 : nsCRT::IsAsciiAlpha(aInString[uint32_t(i)]) ||
225 0 : nsCRT::IsAsciiDigit(aInString[uint32_t(i)]) ||
226 0 : aInString[uint32_t(i)] == '+' ||
227 0 : aInString[uint32_t(i)] == '-' ||
228 0 : aInString[uint32_t(i)] == '.'
229 : ); i--)
230 : ;
231 0 : if (++i >= 0 && uint32_t(i) < pos && nsCRT::IsAsciiAlpha(aInString[uint32_t(i)]))
232 : {
233 0 : start = uint32_t(i);
234 0 : return true;
235 : }
236 : else
237 0 : return false;
238 : }
239 : case abbreviated:
240 : {
241 0 : int32_t i = pos - 1;
242 : // This disallows non-ascii-characters for email.
243 : // Currently correct, but revisit later after standards changed.
244 0 : bool isEmail = aInString[pos] == (char16_t)'@';
245 : // These chars mark the start of the URL
246 0 : for (; i >= 0
247 0 : && aInString[uint32_t(i)] != '>' && aInString[uint32_t(i)] != '<'
248 0 : && aInString[uint32_t(i)] != '"' && aInString[uint32_t(i)] != '\''
249 0 : && aInString[uint32_t(i)] != '`' && aInString[uint32_t(i)] != ','
250 0 : && aInString[uint32_t(i)] != '{' && aInString[uint32_t(i)] != '['
251 0 : && aInString[uint32_t(i)] != '(' && aInString[uint32_t(i)] != '|'
252 0 : && aInString[uint32_t(i)] != '\\'
253 0 : && !IsSpace(aInString[uint32_t(i)])
254 0 : && (!isEmail || nsCRT::IsAscii(aInString[uint32_t(i)]))
255 : ; i--)
256 : ;
257 0 : if
258 : (
259 0 : ++i >= 0 && uint32_t(i) < pos
260 0 : &&
261 : (
262 0 : nsCRT::IsAsciiAlpha(aInString[uint32_t(i)]) ||
263 0 : nsCRT::IsAsciiDigit(aInString[uint32_t(i)])
264 : )
265 : )
266 : {
267 0 : start = uint32_t(i);
268 0 : return true;
269 : }
270 : else
271 0 : return false;
272 : }
273 : default:
274 0 : return false;
275 : } //switch
276 : }
277 :
278 : bool
279 0 : mozTXTToHTMLConv::FindURLEnd(const char16_t * aInString, int32_t aInStringLength, const uint32_t pos,
280 : const modetype check, const uint32_t start, uint32_t& end)
281 : {
282 0 : switch(check)
283 : { // no breaks, because end of blocks is never reached
284 : case RFC1738:
285 : case RFC2396E:
286 : {
287 0 : nsString temp(aInString, aInStringLength);
288 :
289 0 : int32_t i = temp.FindCharInSet(u"<>\"", pos + 1);
290 0 : if (i != kNotFound && temp[uint32_t(i--)] ==
291 0 : (check == RFC1738 || temp[start - 1] == '<' ? '>' : '"'))
292 : {
293 0 : end = uint32_t(i);
294 0 : return end > pos;
295 : }
296 0 : return false;
297 : }
298 : case freetext:
299 : case abbreviated:
300 : {
301 0 : uint32_t i = pos + 1;
302 0 : bool isEmail = aInString[pos] == (char16_t)'@';
303 0 : bool seenOpeningParenthesis = false; // there is a '(' earlier in the URL
304 0 : bool seenOpeningSquareBracket = false; // there is a '[' earlier in the URL
305 0 : for (; int32_t(i) < aInStringLength; i++)
306 : {
307 : // These chars mark the end of the URL
308 0 : if (aInString[i] == '>' || aInString[i] == '<' ||
309 0 : aInString[i] == '"' || aInString[i] == '`' ||
310 0 : aInString[i] == '}' || aInString[i] == '{' ||
311 0 : (aInString[i] == ')' && !seenOpeningParenthesis) ||
312 0 : (aInString[i] == ']' && !seenOpeningSquareBracket) ||
313 : // Allow IPv6 adresses like http://[1080::8:800:200C:417A]/foo.
314 0 : (aInString[i] == '[' && i > 2 &&
315 0 : (aInString[i - 1] != '/' || aInString[i - 2] != '/')) ||
316 0 : IsSpace(aInString[i]))
317 0 : break;
318 : // Disallow non-ascii-characters for email.
319 : // Currently correct, but revisit later after standards changed.
320 0 : if (isEmail && (
321 0 : aInString[i] == '(' || aInString[i] == '\'' ||
322 0 : !nsCRT::IsAscii(aInString[i])))
323 0 : break;
324 0 : if (aInString[i] == '(')
325 0 : seenOpeningParenthesis = true;
326 0 : if (aInString[i] == '[')
327 0 : seenOpeningSquareBracket = true;
328 : }
329 : // These chars are allowed in the middle of the URL, but not at end.
330 : // Technically they are, but are used in normal text after the URL.
331 0 : while (--i > pos && (
332 0 : aInString[i] == '.' || aInString[i] == ',' || aInString[i] == ';' ||
333 0 : aInString[i] == '!' || aInString[i] == '?' || aInString[i] == '-' ||
334 0 : aInString[i] == ':' || aInString[i] == '\''
335 : ))
336 : ;
337 0 : if (i > pos)
338 : {
339 0 : end = i;
340 0 : return true;
341 : }
342 0 : return false;
343 : }
344 : default:
345 0 : return false;
346 : } //switch
347 : }
348 :
349 : void
350 0 : mozTXTToHTMLConv::CalculateURLBoundaries(const char16_t * aInString, int32_t aInStringLength,
351 : const uint32_t pos, const uint32_t whathasbeendone,
352 : const modetype check, const uint32_t start, const uint32_t end,
353 : nsString& txtURL, nsString& desc,
354 : int32_t& replaceBefore, int32_t& replaceAfter)
355 : {
356 0 : uint32_t descstart = start;
357 0 : switch(check)
358 : {
359 : case RFC1738:
360 : {
361 0 : descstart = start - 5;
362 0 : desc.Append(&aInString[descstart], end - descstart + 2); // include "<URL:" and ">"
363 0 : replaceAfter = end - pos + 1;
364 0 : } break;
365 : case RFC2396E:
366 : {
367 0 : descstart = start - 1;
368 0 : desc.Append(&aInString[descstart], end - descstart + 2); // include brackets
369 0 : replaceAfter = end - pos + 1;
370 0 : } break;
371 : case freetext:
372 : case abbreviated:
373 : {
374 0 : descstart = start;
375 0 : desc.Append(&aInString[descstart], end - start + 1); // don't include brackets
376 0 : replaceAfter = end - pos;
377 0 : } break;
378 0 : default: break;
379 : } //switch
380 :
381 0 : EscapeStr(desc, false);
382 :
383 0 : txtURL.Append(&aInString[start], end - start + 1);
384 0 : txtURL.StripWhitespace();
385 :
386 : // FIX ME
387 0 : nsAutoString temp2;
388 0 : ScanTXT(&aInString[descstart], pos - descstart, ~kURLs /*prevents loop*/ & whathasbeendone, temp2);
389 0 : replaceBefore = temp2.Length();
390 0 : return;
391 : }
392 :
393 0 : bool mozTXTToHTMLConv::ShouldLinkify(const nsCString& aURL)
394 : {
395 0 : if (!mIOService)
396 0 : return false;
397 :
398 0 : nsAutoCString scheme;
399 0 : nsresult rv = mIOService->ExtractScheme(aURL, scheme);
400 0 : if(NS_FAILED(rv))
401 0 : return false;
402 :
403 : // Get the handler for this scheme.
404 0 : nsCOMPtr<nsIProtocolHandler> handler;
405 0 : rv = mIOService->GetProtocolHandler(scheme.get(), getter_AddRefs(handler));
406 0 : if(NS_FAILED(rv))
407 0 : return false;
408 :
409 : // Is it an external protocol handler? If not, linkify it.
410 0 : nsCOMPtr<nsIExternalProtocolHandler> externalHandler = do_QueryInterface(handler);
411 0 : if (!externalHandler)
412 0 : return true; // handler is built-in, linkify it!
413 :
414 : // If external app exists for the scheme then linkify it.
415 : bool exists;
416 0 : rv = externalHandler->ExternalAppExistsForScheme(scheme, &exists);
417 0 : return(NS_SUCCEEDED(rv) && exists);
418 : }
419 :
420 : bool
421 0 : mozTXTToHTMLConv::CheckURLAndCreateHTML(
422 : const nsString& txtURL, const nsString& desc, const modetype mode,
423 : nsString& outputHTML)
424 : {
425 : // Create *uri from txtURL
426 0 : nsCOMPtr<nsIURI> uri;
427 : nsresult rv;
428 : // Lazily initialize mIOService
429 0 : if (!mIOService)
430 : {
431 0 : mIOService = do_GetIOService();
432 :
433 0 : if (!mIOService)
434 0 : return false;
435 : }
436 :
437 : // See if the url should be linkified.
438 0 : NS_ConvertUTF16toUTF8 utf8URL(txtURL);
439 0 : if (!ShouldLinkify(utf8URL))
440 0 : return false;
441 :
442 : // it would be faster if we could just check to see if there is a protocol
443 : // handler for the url and return instead of actually trying to create a url...
444 0 : rv = mIOService->NewURI(utf8URL, nullptr, nullptr, getter_AddRefs(uri));
445 :
446 : // Real work
447 0 : if (NS_SUCCEEDED(rv) && uri)
448 : {
449 0 : outputHTML.AssignLiteral("<a class=\"moz-txt-link-");
450 0 : switch(mode)
451 : {
452 : case RFC1738:
453 0 : outputHTML.AppendLiteral("rfc1738");
454 0 : break;
455 : case RFC2396E:
456 0 : outputHTML.AppendLiteral("rfc2396E");
457 0 : break;
458 : case freetext:
459 0 : outputHTML.AppendLiteral("freetext");
460 0 : break;
461 : case abbreviated:
462 0 : outputHTML.AppendLiteral("abbreviated");
463 0 : break;
464 0 : default: break;
465 : }
466 0 : nsAutoString escapedURL(txtURL);
467 0 : EscapeStr(escapedURL, true);
468 :
469 0 : outputHTML.AppendLiteral("\" href=\"");
470 0 : outputHTML += escapedURL;
471 0 : outputHTML.AppendLiteral("\">");
472 0 : outputHTML += desc;
473 0 : outputHTML.AppendLiteral("</a>");
474 0 : return true;
475 : }
476 : else
477 0 : return false;
478 : }
479 :
480 0 : NS_IMETHODIMP mozTXTToHTMLConv::FindURLInPlaintext(const char16_t * aInString, int32_t aInLength, int32_t aPos, int32_t * aStartPos, int32_t * aEndPos)
481 : {
482 : // call FindURL on the passed in string
483 0 : nsAutoString outputHTML; // we'll ignore the generated output HTML
484 :
485 0 : *aStartPos = -1;
486 0 : *aEndPos = -1;
487 :
488 0 : FindURL(aInString, aInLength, aPos, kURLs, outputHTML, *aStartPos, *aEndPos);
489 :
490 0 : return NS_OK;
491 : }
492 :
493 : bool
494 0 : mozTXTToHTMLConv::FindURL(const char16_t * aInString, int32_t aInLength, const uint32_t pos,
495 : const uint32_t whathasbeendone,
496 : nsString& outputHTML, int32_t& replaceBefore, int32_t& replaceAfter)
497 : {
498 : enum statetype {unchecked, invalid, startok, endok, success};
499 : static const modetype ranking[] = {RFC1738, RFC2396E, freetext, abbreviated};
500 :
501 : statetype state[mozTXTToHTMLConv_lastMode + 1]; // 0(=unknown)..lastMode
502 : /* I don't like this abuse of enums as index for the array,
503 : but I don't know a better method */
504 :
505 : // Define, which modes to check
506 : /* all modes but abbreviated are checked for text[pos] == ':',
507 : only abbreviated for '.', RFC2396E and abbreviated for '@' */
508 0 : for (modetype iState = unknown; iState <= mozTXTToHTMLConv_lastMode;
509 0 : iState = modetype(iState + 1))
510 0 : state[iState] = aInString[pos] == ':' ? unchecked : invalid;
511 0 : switch (aInString[pos])
512 : {
513 : case '@':
514 0 : state[RFC2396E] = unchecked;
515 : MOZ_FALLTHROUGH;
516 : case '.':
517 0 : state[abbreviated] = unchecked;
518 0 : break;
519 : case ':':
520 0 : state[abbreviated] = invalid;
521 0 : break;
522 : default:
523 0 : break;
524 : }
525 :
526 : // Test, first successful mode wins, sequence defined by |ranking|
527 0 : int32_t iCheck = 0; // the currently tested modetype
528 0 : modetype check = ranking[iCheck];
529 0 : for (; iCheck < mozTXTToHTMLConv_numberOfModes && state[check] != success;
530 : iCheck++)
531 : /* check state from last run.
532 : If this is the first, check this one, which isn't = success yet */
533 : {
534 0 : check = ranking[iCheck];
535 :
536 : uint32_t start, end;
537 :
538 0 : if (state[check] == unchecked)
539 0 : if (FindURLStart(aInString, aInLength, pos, check, start))
540 0 : state[check] = startok;
541 :
542 0 : if (state[check] == startok)
543 0 : if (FindURLEnd(aInString, aInLength, pos, check, start, end))
544 0 : state[check] = endok;
545 :
546 0 : if (state[check] == endok)
547 : {
548 0 : nsAutoString txtURL, desc;
549 : int32_t resultReplaceBefore, resultReplaceAfter;
550 :
551 : CalculateURLBoundaries(aInString, aInLength, pos, whathasbeendone, check, start, end,
552 : txtURL, desc,
553 0 : resultReplaceBefore, resultReplaceAfter);
554 :
555 0 : if (aInString[pos] != ':')
556 : {
557 0 : nsAutoString temp = txtURL;
558 0 : txtURL.SetLength(0);
559 0 : CompleteAbbreviatedURL(temp.get(),temp.Length(), pos - start, txtURL);
560 : }
561 :
562 0 : if (!txtURL.IsEmpty() && CheckURLAndCreateHTML(txtURL, desc, check,
563 : outputHTML))
564 : {
565 0 : replaceBefore = resultReplaceBefore;
566 0 : replaceAfter = resultReplaceAfter;
567 0 : state[check] = success;
568 : }
569 : } // if
570 : } // for
571 0 : return state[check] == success;
572 : }
573 :
574 : bool
575 0 : mozTXTToHTMLConv::ItMatchesDelimited(const char16_t * aInString,
576 : int32_t aInLength, const char16_t* rep, int32_t aRepLen,
577 : LIMTYPE before, LIMTYPE after)
578 : {
579 :
580 : // this little method gets called a LOT. I found we were spending a
581 : // lot of time just calculating the length of the variable "rep"
582 : // over and over again every time we called it. So we're now passing
583 : // an integer in here.
584 0 : int32_t textLen = aInLength;
585 :
586 0 : if
587 : (
588 0 : ((before == LT_IGNORE && (after == LT_IGNORE || after == LT_DELIMITER))
589 0 : && textLen < aRepLen) ||
590 0 : ((before != LT_IGNORE || (after != LT_IGNORE && after != LT_DELIMITER))
591 0 : && textLen < aRepLen + 1) ||
592 0 : (before != LT_IGNORE && after != LT_IGNORE && after != LT_DELIMITER
593 0 : && textLen < aRepLen + 2)
594 : )
595 0 : return false;
596 :
597 0 : char16_t text0 = aInString[0];
598 0 : char16_t textAfterPos = aInString[aRepLen + (before == LT_IGNORE ? 0 : 1)];
599 :
600 0 : if
601 : (
602 : (before == LT_ALPHA
603 0 : && !nsCRT::IsAsciiAlpha(text0)) ||
604 : (before == LT_DIGIT
605 0 : && !nsCRT::IsAsciiDigit(text0)) ||
606 : (before == LT_DELIMITER
607 0 : &&
608 : (
609 0 : nsCRT::IsAsciiAlpha(text0) ||
610 0 : nsCRT::IsAsciiDigit(text0) ||
611 0 : text0 == *rep
612 0 : )) ||
613 : (after == LT_ALPHA
614 0 : && !nsCRT::IsAsciiAlpha(textAfterPos)) ||
615 : (after == LT_DIGIT
616 0 : && !nsCRT::IsAsciiDigit(textAfterPos)) ||
617 : (after == LT_DELIMITER
618 0 : &&
619 : (
620 0 : nsCRT::IsAsciiAlpha(textAfterPos) ||
621 0 : nsCRT::IsAsciiDigit(textAfterPos) ||
622 0 : textAfterPos == *rep
623 0 : )) ||
624 0 : !Substring(Substring(aInString, aInString+aInLength),
625 0 : (before == LT_IGNORE ? 0 : 1),
626 0 : aRepLen).Equals(Substring(rep, rep+aRepLen),
627 0 : nsCaseInsensitiveStringComparator())
628 : )
629 0 : return false;
630 :
631 0 : return true;
632 : }
633 :
634 : uint32_t
635 0 : mozTXTToHTMLConv::NumberOfMatches(const char16_t * aInString, int32_t aInStringLength,
636 : const char16_t* rep, int32_t aRepLen, LIMTYPE before, LIMTYPE after)
637 : {
638 0 : uint32_t result = 0;
639 :
640 0 : for (int32_t i = 0; i < aInStringLength; i++)
641 : {
642 0 : const char16_t * indexIntoString = &aInString[i];
643 0 : if (ItMatchesDelimited(indexIntoString, aInStringLength - i, rep, aRepLen, before, after))
644 0 : result++;
645 : }
646 0 : return result;
647 : }
648 :
649 :
650 : // NOTE: the converted html for the phrase is appended to aOutString
651 : // tagHTML and attributeHTML are plain ASCII (literal strings, in fact)
652 : bool
653 0 : mozTXTToHTMLConv::StructPhraseHit(const char16_t * aInString, int32_t aInStringLength, bool col0,
654 : const char16_t* tagTXT, int32_t aTagTXTLen,
655 : const char* tagHTML, const char* attributeHTML,
656 : nsString& aOutString, uint32_t& openTags)
657 : {
658 : /* We're searching for the following pattern:
659 : LT_DELIMITER - "*" - ALPHA -
660 : [ some text (maybe more "*"-pairs) - ALPHA ] "*" - LT_DELIMITER.
661 : <strong> is only inserted, if existence of a pair could be verified
662 : We use the first opening/closing tag, if we can choose */
663 :
664 0 : const char16_t * newOffset = aInString;
665 0 : int32_t newLength = aInStringLength;
666 0 : if (!col0) // skip the first element?
667 : {
668 0 : newOffset = &aInString[1];
669 0 : newLength = aInStringLength - 1;
670 : }
671 :
672 : // opening tag
673 0 : if
674 : (
675 0 : ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen,
676 : (col0 ? LT_IGNORE : LT_DELIMITER), LT_ALPHA) // is opening tag
677 0 : && NumberOfMatches(newOffset, newLength, tagTXT, aTagTXTLen,
678 : LT_ALPHA, LT_DELIMITER) // remaining closing tags
679 0 : > openTags
680 : )
681 : {
682 0 : openTags++;
683 0 : aOutString.Append('<');
684 0 : aOutString.AppendASCII(tagHTML);
685 0 : aOutString.Append(char16_t(' '));
686 0 : aOutString.AppendASCII(attributeHTML);
687 0 : aOutString.AppendLiteral("><span class=\"moz-txt-tag\">");
688 0 : aOutString.Append(tagTXT);
689 0 : aOutString.AppendLiteral("</span>");
690 0 : return true;
691 : }
692 :
693 : // closing tag
694 0 : else if (openTags > 0
695 0 : && ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen, LT_ALPHA, LT_DELIMITER))
696 : {
697 0 : openTags--;
698 0 : aOutString.AppendLiteral("<span class=\"moz-txt-tag\">");
699 0 : aOutString.Append(tagTXT);
700 0 : aOutString.AppendLiteral("</span></");
701 0 : aOutString.AppendASCII(tagHTML);
702 0 : aOutString.Append(char16_t('>'));
703 0 : return true;
704 : }
705 :
706 0 : return false;
707 : }
708 :
709 :
710 : bool
711 0 : mozTXTToHTMLConv::SmilyHit(const char16_t * aInString, int32_t aLength, bool col0,
712 : const char* tagTXT, const char* imageName,
713 : nsString& outputHTML, int32_t& glyphTextLen)
714 : {
715 0 : if ( !aInString || !tagTXT || !imageName )
716 0 : return false;
717 :
718 0 : int32_t tagLen = strlen(tagTXT);
719 :
720 0 : uint32_t delim = (col0 ? 0 : 1) + tagLen;
721 :
722 0 : if
723 : (
724 0 : (col0 || IsSpace(aInString[0]))
725 0 : &&
726 : (
727 0 : aLength <= int32_t(delim) ||
728 0 : IsSpace(aInString[delim]) ||
729 0 : (aLength > int32_t(delim + 1)
730 0 : &&
731 : (
732 0 : aInString[delim] == '.' ||
733 0 : aInString[delim] == ',' ||
734 0 : aInString[delim] == ';' ||
735 0 : aInString[delim] == '8' ||
736 0 : aInString[delim] == '>' ||
737 0 : aInString[delim] == '!' ||
738 0 : aInString[delim] == '?'
739 : )
740 0 : && IsSpace(aInString[delim + 1]))
741 : )
742 0 : && ItMatchesDelimited(aInString, aLength, NS_ConvertASCIItoUTF16(tagTXT).get(), tagLen,
743 : col0 ? LT_IGNORE : LT_DELIMITER, LT_IGNORE)
744 : // Note: tests at different pos for LT_IGNORE and LT_DELIMITER
745 : )
746 : {
747 0 : if (!col0)
748 : {
749 0 : outputHTML.Truncate();
750 0 : outputHTML.Append(char16_t(' '));
751 : }
752 :
753 0 : outputHTML.AppendLiteral("<span class=\""); // <span class="
754 0 : AppendASCIItoUTF16(imageName, outputHTML); // e.g. smiley-frown
755 0 : outputHTML.AppendLiteral("\" title=\""); // " title="
756 0 : AppendASCIItoUTF16(tagTXT, outputHTML); // smiley tooltip
757 0 : outputHTML.AppendLiteral("\"><span>"); // "><span>
758 0 : AppendASCIItoUTF16(tagTXT, outputHTML); // original text
759 0 : outputHTML.AppendLiteral("</span></span>"); // </span></span>
760 0 : glyphTextLen = (col0 ? 0 : 1) + tagLen;
761 0 : return true;
762 : }
763 :
764 0 : return false;
765 : }
766 :
767 : // the glyph is appended to aOutputString instead of the original string...
768 : bool
769 0 : mozTXTToHTMLConv::GlyphHit(const char16_t * aInString, int32_t aInLength, bool col0,
770 : nsString& aOutputString, int32_t& glyphTextLen)
771 : {
772 0 : char16_t text0 = aInString[0];
773 0 : char16_t text1 = aInString[1];
774 0 : char16_t firstChar = (col0 ? text0 : text1);
775 :
776 : // temporary variable used to store the glyph html text
777 0 : nsAutoString outputHTML;
778 : bool bTestSmilie;
779 0 : bool bArg = false;
780 : int i;
781 :
782 : // refactor some of this mess to avoid code duplication and speed execution a bit
783 : // there are two cases that need to be tried one after another. To avoid a lot of
784 : // duplicate code, rolling into a loop
785 :
786 0 : i = 0;
787 0 : while ( i < 2 )
788 : {
789 0 : bTestSmilie = false;
790 0 : if ( !i && (firstChar == ':' || firstChar == ';' || firstChar == '=' || firstChar == '>' || firstChar == '8' || firstChar == 'O'))
791 : {
792 : // first test passed
793 :
794 0 : bTestSmilie = true;
795 0 : bArg = col0;
796 : }
797 0 : if ( i && col0 && ( text1 == ':' || text1 == ';' || text1 == '=' || text1 == '>' || text1 == '8' || text1 == 'O' ) )
798 : {
799 : // second test passed
800 :
801 0 : bTestSmilie = true;
802 0 : bArg = false;
803 : }
804 0 : if ( bTestSmilie && (
805 0 : SmilyHit(aInString, aInLength, bArg,
806 : ":-)",
807 : "moz-smiley-s1", // smile
808 0 : outputHTML, glyphTextLen) ||
809 :
810 0 : SmilyHit(aInString, aInLength, bArg,
811 : ":)",
812 : "moz-smiley-s1", // smile
813 0 : outputHTML, glyphTextLen) ||
814 :
815 0 : SmilyHit(aInString, aInLength, bArg,
816 : ":-D",
817 : "moz-smiley-s5", // laughing
818 0 : outputHTML, glyphTextLen) ||
819 :
820 0 : SmilyHit(aInString, aInLength, bArg,
821 : ":-(",
822 : "moz-smiley-s2", // frown
823 0 : outputHTML, glyphTextLen) ||
824 :
825 0 : SmilyHit(aInString, aInLength, bArg,
826 : ":(",
827 : "moz-smiley-s2", // frown
828 0 : outputHTML, glyphTextLen) ||
829 :
830 0 : SmilyHit(aInString, aInLength, bArg,
831 : ":-[",
832 : "moz-smiley-s6", // embarassed
833 0 : outputHTML, glyphTextLen) ||
834 :
835 0 : SmilyHit(aInString, aInLength, bArg,
836 : ";-)",
837 : "moz-smiley-s3", // wink
838 0 : outputHTML, glyphTextLen) ||
839 :
840 0 : SmilyHit(aInString, aInLength, col0,
841 : ";)",
842 : "moz-smiley-s3", // wink
843 0 : outputHTML, glyphTextLen) ||
844 :
845 0 : SmilyHit(aInString, aInLength, bArg,
846 : ":-\\",
847 : "moz-smiley-s7", // undecided
848 0 : outputHTML, glyphTextLen) ||
849 :
850 0 : SmilyHit(aInString, aInLength, bArg,
851 : ":-P",
852 : "moz-smiley-s4", // tongue
853 0 : outputHTML, glyphTextLen) ||
854 :
855 0 : SmilyHit(aInString, aInLength, bArg,
856 : ";-P",
857 : "moz-smiley-s4", // tongue
858 0 : outputHTML, glyphTextLen) ||
859 :
860 0 : SmilyHit(aInString, aInLength, bArg,
861 : "=-O",
862 : "moz-smiley-s8", // surprise
863 0 : outputHTML, glyphTextLen) ||
864 :
865 0 : SmilyHit(aInString, aInLength, bArg,
866 : ":-*",
867 : "moz-smiley-s9", // kiss
868 0 : outputHTML, glyphTextLen) ||
869 :
870 0 : SmilyHit(aInString, aInLength, bArg,
871 : ">:o",
872 : "moz-smiley-s10", // yell
873 0 : outputHTML, glyphTextLen) ||
874 :
875 0 : SmilyHit(aInString, aInLength, bArg,
876 : ">:-o",
877 : "moz-smiley-s10", // yell
878 0 : outputHTML, glyphTextLen) ||
879 :
880 0 : SmilyHit(aInString, aInLength, bArg,
881 : "8-)",
882 : "moz-smiley-s11", // cool
883 0 : outputHTML, glyphTextLen) ||
884 :
885 0 : SmilyHit(aInString, aInLength, bArg,
886 : ":-$",
887 : "moz-smiley-s12", // money
888 0 : outputHTML, glyphTextLen) ||
889 :
890 0 : SmilyHit(aInString, aInLength, bArg,
891 : ":-!",
892 : "moz-smiley-s13", // foot
893 0 : outputHTML, glyphTextLen) ||
894 :
895 0 : SmilyHit(aInString, aInLength, bArg,
896 : "O:-)",
897 : "moz-smiley-s14", // innocent
898 0 : outputHTML, glyphTextLen) ||
899 :
900 0 : SmilyHit(aInString, aInLength, bArg,
901 : ":'(",
902 : "moz-smiley-s15", // cry
903 0 : outputHTML, glyphTextLen) ||
904 :
905 0 : SmilyHit(aInString, aInLength, bArg,
906 : ":-X",
907 : "moz-smiley-s16", // sealed
908 : outputHTML, glyphTextLen)
909 : )
910 : )
911 : {
912 0 : aOutputString.Append(outputHTML);
913 0 : return true;
914 : }
915 0 : i++;
916 : }
917 0 : if (text0 == '\f')
918 : {
919 0 : aOutputString.AppendLiteral("<span class='moz-txt-formfeed'></span>");
920 0 : glyphTextLen = 1;
921 0 : return true;
922 : }
923 0 : if (text0 == '+' || text1 == '+')
924 : {
925 0 : if (ItMatchesDelimited(aInString, aInLength,
926 : u" +/-", 4,
927 : LT_IGNORE, LT_IGNORE))
928 : {
929 0 : aOutputString.AppendLiteral(" ±");
930 0 : glyphTextLen = 4;
931 0 : return true;
932 : }
933 0 : if (col0 && ItMatchesDelimited(aInString, aInLength,
934 : u"+/-", 3,
935 : LT_IGNORE, LT_IGNORE))
936 : {
937 0 : aOutputString.AppendLiteral("±");
938 0 : glyphTextLen = 3;
939 0 : return true;
940 : }
941 : }
942 :
943 : // x^2 => x<sup>2</sup>, also handle powers x^-2, x^0.5
944 : // implement regular expression /[\dA-Za-z\)\]}]\^-?\d+(\.\d+)*[^\dA-Za-z]/
945 0 : if
946 : (
947 : text1 == '^'
948 0 : &&
949 : (
950 0 : nsCRT::IsAsciiDigit(text0) || nsCRT::IsAsciiAlpha(text0) ||
951 0 : text0 == ')' || text0 == ']' || text0 == '}'
952 : )
953 0 : &&
954 : (
955 0 : (2 < aInLength && nsCRT::IsAsciiDigit(aInString[2])) ||
956 0 : (3 < aInLength && aInString[2] == '-' && nsCRT::IsAsciiDigit(aInString[3]))
957 : )
958 : )
959 : {
960 : // Find first non-digit
961 0 : int32_t delimPos = 3; // skip "^" and first digit (or '-')
962 0 : for (; delimPos < aInLength
963 0 : &&
964 : (
965 0 : nsCRT::IsAsciiDigit(aInString[delimPos]) ||
966 0 : (aInString[delimPos] == '.' && delimPos + 1 < aInLength &&
967 0 : nsCRT::IsAsciiDigit(aInString[delimPos + 1]))
968 : );
969 : delimPos++)
970 : ;
971 :
972 0 : if (delimPos < aInLength && nsCRT::IsAsciiAlpha(aInString[delimPos]))
973 : {
974 0 : return false;
975 : }
976 :
977 0 : outputHTML.Truncate();
978 0 : outputHTML += text0;
979 : outputHTML.AppendLiteral(
980 : "<sup class=\"moz-txt-sup\">"
981 : "<span style=\"display:inline-block;width:0;height:0;overflow:hidden\">"
982 0 : "^</span>");
983 :
984 0 : aOutputString.Append(outputHTML);
985 0 : aOutputString.Append(&aInString[2], delimPos - 2);
986 0 : aOutputString.AppendLiteral("</sup>");
987 :
988 0 : glyphTextLen = delimPos /* - 1 + 1 */ ;
989 0 : return true;
990 : }
991 : /*
992 : The following strings are not substituted:
993 : |TXT |HTML |Reason
994 : +------+---------+----------
995 : -> ← Bug #454
996 : => ⇐ dito
997 : <- → dito
998 : <= ⇒ dito
999 : (tm) ™ dito
1000 : 1/4 ¼ is triggered by 1/4 Part 1, 2/4 Part 2, ...
1001 : 3/4 ¾ dito
1002 : 1/2 ½ similar
1003 : */
1004 0 : return false;
1005 : }
1006 :
1007 : /***************************************************************************
1008 : Library-internal Interface
1009 : ****************************************************************************/
1010 :
1011 0 : mozTXTToHTMLConv::mozTXTToHTMLConv()
1012 : {
1013 0 : }
1014 :
1015 0 : mozTXTToHTMLConv::~mozTXTToHTMLConv()
1016 : {
1017 0 : }
1018 :
1019 0 : NS_IMPL_ISUPPORTS(mozTXTToHTMLConv,
1020 : mozITXTToHTMLConv,
1021 : nsIStreamConverter,
1022 : nsIStreamListener,
1023 : nsIRequestObserver)
1024 :
1025 : int32_t
1026 0 : mozTXTToHTMLConv::CiteLevelTXT(const char16_t *line,
1027 : uint32_t& logLineStart)
1028 : {
1029 0 : int32_t result = 0;
1030 0 : int32_t lineLength = NS_strlen(line);
1031 :
1032 0 : bool moreCites = true;
1033 0 : while (moreCites)
1034 : {
1035 : /* E.g. the following lines count as quote:
1036 :
1037 : > text
1038 : //#ifdef QUOTE_RECOGNITION_AGGRESSIVE
1039 : >text
1040 : //#ifdef QUOTE_RECOGNITION_AGGRESSIVE
1041 : > text
1042 : ] text
1043 : USER> text
1044 : USER] text
1045 : //#endif
1046 :
1047 : logLineStart is the position of "t" in this example
1048 : */
1049 0 : uint32_t i = logLineStart;
1050 :
1051 : #ifdef QUOTE_RECOGNITION_AGGRESSIVE
1052 : for (; int32_t(i) < lineLength && IsSpace(line[i]); i++)
1053 : ;
1054 : for (; int32_t(i) < lineLength && nsCRT::IsAsciiAlpha(line[i])
1055 : && nsCRT::IsUpper(line[i]) ; i++)
1056 : ;
1057 : if (int32_t(i) < lineLength && (line[i] == '>' || line[i] == ']'))
1058 : #else
1059 0 : if (int32_t(i) < lineLength && line[i] == '>')
1060 : #endif
1061 : {
1062 0 : i++;
1063 0 : if (int32_t(i) < lineLength && line[i] == ' ')
1064 0 : i++;
1065 : // sendmail/mbox
1066 : // Placed here for performance increase
1067 0 : const char16_t * indexString = &line[logLineStart];
1068 : // here, |logLineStart < lineLength| is always true
1069 0 : uint32_t minlength = std::min(uint32_t(6), NS_strlen(indexString));
1070 0 : if (Substring(indexString,
1071 0 : indexString+minlength).Equals(Substring(NS_LITERAL_STRING(">From "), 0, minlength),
1072 0 : nsCaseInsensitiveStringComparator()))
1073 : //XXX RFC2646
1074 0 : moreCites = false;
1075 : else
1076 : {
1077 0 : result++;
1078 0 : logLineStart = i;
1079 0 : }
1080 : }
1081 : else
1082 0 : moreCites = false;
1083 : }
1084 :
1085 0 : return result;
1086 : }
1087 :
1088 : void
1089 0 : mozTXTToHTMLConv::ScanTXT(const char16_t * aInString, int32_t aInStringLength, uint32_t whattodo, nsString& aOutString)
1090 : {
1091 0 : bool doURLs = 0 != (whattodo & kURLs);
1092 0 : bool doGlyphSubstitution = 0 != (whattodo & kGlyphSubstitution);
1093 0 : bool doStructPhrase = 0 != (whattodo & kStructPhrase);
1094 :
1095 0 : uint32_t structPhrase_strong = 0; // Number of currently open tags
1096 0 : uint32_t structPhrase_underline = 0;
1097 0 : uint32_t structPhrase_italic = 0;
1098 0 : uint32_t structPhrase_code = 0;
1099 :
1100 0 : nsAutoString outputHTML; // moved here for performance increase
1101 :
1102 0 : for(uint32_t i = 0; int32_t(i) < aInStringLength;)
1103 : {
1104 0 : if (doGlyphSubstitution)
1105 : {
1106 : int32_t glyphTextLen;
1107 0 : if (GlyphHit(&aInString[i], aInStringLength - i, i == 0, aOutString, glyphTextLen))
1108 : {
1109 0 : i += glyphTextLen;
1110 0 : continue;
1111 : }
1112 : }
1113 :
1114 0 : if (doStructPhrase)
1115 : {
1116 0 : const char16_t * newOffset = aInString;
1117 0 : int32_t newLength = aInStringLength;
1118 0 : if (i > 0 ) // skip the first element?
1119 : {
1120 0 : newOffset = &aInString[i-1];
1121 0 : newLength = aInStringLength - i + 1;
1122 : }
1123 :
1124 0 : switch (aInString[i]) // Performance increase
1125 : {
1126 : case '*':
1127 0 : if (StructPhraseHit(newOffset, newLength, i == 0,
1128 : u"*", 1,
1129 : "b", "class=\"moz-txt-star\"",
1130 : aOutString, structPhrase_strong))
1131 : {
1132 0 : i++;
1133 0 : continue;
1134 : }
1135 0 : break;
1136 : case '/':
1137 0 : if (StructPhraseHit(newOffset, newLength, i == 0,
1138 : u"/", 1,
1139 : "i", "class=\"moz-txt-slash\"",
1140 : aOutString, structPhrase_italic))
1141 : {
1142 0 : i++;
1143 0 : continue;
1144 : }
1145 0 : break;
1146 : case '_':
1147 0 : if (StructPhraseHit(newOffset, newLength, i == 0,
1148 : u"_", 1,
1149 : "span" /* <u> is deprecated */,
1150 : "class=\"moz-txt-underscore\"",
1151 : aOutString, structPhrase_underline))
1152 : {
1153 0 : i++;
1154 0 : continue;
1155 : }
1156 0 : break;
1157 : case '|':
1158 0 : if (StructPhraseHit(newOffset, newLength, i == 0,
1159 : u"|", 1,
1160 : "code", "class=\"moz-txt-verticalline\"",
1161 : aOutString, structPhrase_code))
1162 : {
1163 0 : i++;
1164 0 : continue;
1165 : }
1166 0 : break;
1167 : }
1168 : }
1169 :
1170 0 : if (doURLs)
1171 : {
1172 0 : switch (aInString[i])
1173 : {
1174 : case ':':
1175 : case '@':
1176 : case '.':
1177 0 : if ( (i == 0 || ((i > 0) && aInString[i - 1] != ' ')) && aInString[i +1] != ' ') // Performance increase
1178 : {
1179 : int32_t replaceBefore;
1180 : int32_t replaceAfter;
1181 0 : if (FindURL(aInString, aInStringLength, i, whattodo,
1182 : outputHTML, replaceBefore, replaceAfter)
1183 0 : && structPhrase_strong + structPhrase_italic +
1184 0 : structPhrase_underline + structPhrase_code == 0
1185 : /* workaround for bug #19445 */ )
1186 : {
1187 0 : aOutString.Cut(aOutString.Length() - replaceBefore, replaceBefore);
1188 0 : aOutString += outputHTML;
1189 0 : i += replaceAfter + 1;
1190 0 : continue;
1191 : }
1192 : }
1193 0 : break;
1194 : } //switch
1195 : }
1196 :
1197 0 : switch (aInString[i])
1198 : {
1199 : // Special symbols
1200 : case '<':
1201 : case '>':
1202 : case '&':
1203 0 : EscapeChar(aInString[i], aOutString, false);
1204 0 : i++;
1205 0 : break;
1206 : // Normal characters
1207 : default:
1208 0 : aOutString += aInString[i];
1209 0 : i++;
1210 0 : break;
1211 : }
1212 : }
1213 0 : }
1214 :
1215 : void
1216 0 : mozTXTToHTMLConv::ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOutString)
1217 : {
1218 : // some common variables we were recalculating
1219 : // every time inside the for loop...
1220 0 : int32_t lengthOfInString = aInString.Length();
1221 0 : const char16_t * uniBuffer = aInString.get();
1222 :
1223 : #ifdef DEBUG_BenB_Perf
1224 : PRTime parsing_start = PR_IntervalNow();
1225 : #endif
1226 :
1227 : // Look for simple entities not included in a tags and scan them.
1228 : // Skip all tags ("<[...]>") and content in an a link tag ("<a [...]</a>"),
1229 : // comment tag ("<!--[...]-->"), style tag, script tag or head tag.
1230 : // Unescape the rest (text between tags) and pass it to ScanTXT.
1231 0 : nsAutoCString canFollow(" \f\n\r\t>");
1232 0 : for (int32_t i = 0; i < lengthOfInString;)
1233 : {
1234 0 : if (aInString[i] == '<') // html tag
1235 : {
1236 0 : int32_t start = i;
1237 0 : if (i + 2 < lengthOfInString &&
1238 0 : nsCRT::ToLower(aInString[i + 1]) == 'a' &&
1239 0 : canFollow.FindChar(aInString[i + 2]) != kNotFound)
1240 : // if a tag, skip until </a>.
1241 : // Make sure there's a white-space character after, not to match "abbr".
1242 : {
1243 0 : i = aInString.Find("</a>", true, i);
1244 0 : if (i == kNotFound)
1245 0 : i = lengthOfInString;
1246 : else
1247 0 : i += 4;
1248 : }
1249 0 : else if (Substring(aInString, i + 1, 3).LowerCaseEqualsASCII("!--"))
1250 : // if out-commended code, skip until -->
1251 : {
1252 0 : i = aInString.Find("-->", false, i);
1253 0 : if (i == kNotFound)
1254 0 : i = lengthOfInString;
1255 : else
1256 0 : i += 3;
1257 : }
1258 0 : else if (i + 6 < lengthOfInString &&
1259 0 : Substring(aInString, i + 1, 5).LowerCaseEqualsASCII("style") &&
1260 0 : canFollow.FindChar(aInString[i + 6]) != kNotFound)
1261 : // if style tag, skip until </style>
1262 : {
1263 0 : i = aInString.Find("</style>", true, i);
1264 0 : if (i == kNotFound)
1265 0 : i = lengthOfInString;
1266 : else
1267 0 : i += 8;
1268 : }
1269 0 : else if (i + 7 < lengthOfInString &&
1270 0 : Substring(aInString, i + 1, 6).LowerCaseEqualsASCII("script") &&
1271 0 : canFollow.FindChar(aInString[i + 7]) != kNotFound)
1272 : // if script tag, skip until </script>
1273 : {
1274 0 : i = aInString.Find("</script>", true, i);
1275 0 : if (i == kNotFound)
1276 0 : i = lengthOfInString;
1277 : else
1278 0 : i += 9;
1279 : }
1280 0 : else if (i + 5 < lengthOfInString &&
1281 0 : Substring(aInString, i + 1, 4).LowerCaseEqualsASCII("head") &&
1282 0 : canFollow.FindChar(aInString[i + 5]) != kNotFound)
1283 : // if head tag, skip until </head>
1284 : // Make sure not to match <header>.
1285 : {
1286 0 : i = aInString.Find("</head>", true, i);
1287 0 : if (i == kNotFound)
1288 0 : i = lengthOfInString;
1289 : else
1290 0 : i += 7;
1291 : }
1292 : else // just skip tag (attributes etc.)
1293 : {
1294 0 : i = aInString.FindChar('>', i);
1295 0 : if (i == kNotFound)
1296 0 : i = lengthOfInString;
1297 : else
1298 0 : i++;
1299 : }
1300 0 : aOutString.Append(&uniBuffer[start], i - start);
1301 : }
1302 : else
1303 : {
1304 0 : uint32_t start = uint32_t(i);
1305 0 : i = aInString.FindChar('<', i);
1306 0 : if (i == kNotFound)
1307 0 : i = lengthOfInString;
1308 :
1309 0 : nsString tempString;
1310 0 : tempString.SetCapacity(uint32_t((uint32_t(i) - start) * growthRate));
1311 0 : UnescapeStr(uniBuffer, start, uint32_t(i) - start, tempString);
1312 0 : ScanTXT(tempString.get(), tempString.Length(), whattodo, aOutString);
1313 : }
1314 : }
1315 :
1316 : #ifdef DEBUG_BenB_Perf
1317 : printf("ScanHTML time: %d ms\n", PR_IntervalToMilliseconds(PR_IntervalNow() - parsing_start));
1318 : #endif
1319 0 : }
1320 :
1321 : /****************************************************************************
1322 : XPCOM Interface
1323 : *****************************************************************************/
1324 :
1325 : NS_IMETHODIMP
1326 0 : mozTXTToHTMLConv::Convert(nsIInputStream *aFromStream,
1327 : const char *aFromType,
1328 : const char *aToType,
1329 : nsISupports *aCtxt, nsIInputStream **_retval)
1330 : {
1331 0 : return NS_ERROR_NOT_IMPLEMENTED;
1332 : }
1333 :
1334 : NS_IMETHODIMP
1335 0 : mozTXTToHTMLConv::AsyncConvertData(const char *aFromType,
1336 : const char *aToType,
1337 : nsIStreamListener *aListener, nsISupports *aCtxt) {
1338 0 : return NS_ERROR_NOT_IMPLEMENTED;
1339 : }
1340 :
1341 : NS_IMETHODIMP
1342 0 : mozTXTToHTMLConv::OnDataAvailable(nsIRequest* request, nsISupports *ctxt,
1343 : nsIInputStream *inStr, uint64_t sourceOffset,
1344 : uint32_t count)
1345 : {
1346 0 : return NS_ERROR_NOT_IMPLEMENTED;
1347 : }
1348 :
1349 : NS_IMETHODIMP
1350 0 : mozTXTToHTMLConv::OnStartRequest(nsIRequest* request, nsISupports *ctxt)
1351 : {
1352 0 : return NS_ERROR_NOT_IMPLEMENTED;
1353 : }
1354 :
1355 : NS_IMETHODIMP
1356 0 : mozTXTToHTMLConv::OnStopRequest(nsIRequest* request, nsISupports *ctxt,
1357 : nsresult aStatus)
1358 : {
1359 0 : return NS_ERROR_NOT_IMPLEMENTED;
1360 : }
1361 :
1362 : NS_IMETHODIMP
1363 0 : mozTXTToHTMLConv::CiteLevelTXT(const char16_t *line, uint32_t *logLineStart,
1364 : uint32_t *_retval)
1365 : {
1366 0 : if (!logLineStart || !_retval || !line)
1367 0 : return NS_ERROR_NULL_POINTER;
1368 0 : *_retval = CiteLevelTXT(line, *logLineStart);
1369 0 : return NS_OK;
1370 : }
1371 :
1372 : NS_IMETHODIMP
1373 0 : mozTXTToHTMLConv::ScanTXT(const char16_t *text, uint32_t whattodo,
1374 : char16_t **_retval)
1375 : {
1376 0 : NS_ENSURE_ARG(text);
1377 :
1378 : // FIX ME!!!
1379 0 : nsString outString;
1380 0 : int32_t inLength = NS_strlen(text);
1381 : // by setting a large capacity up front, we save time
1382 : // when appending characters to the output string because we don't
1383 : // need to reallocate and re-copy the characters already in the out String.
1384 0 : NS_ASSERTION(inLength, "ScanTXT passed 0 length string");
1385 0 : if (inLength == 0) {
1386 0 : *_retval = NS_strdup(text);
1387 0 : return NS_OK;
1388 : }
1389 :
1390 0 : outString.SetCapacity(uint32_t(inLength * growthRate));
1391 0 : ScanTXT(text, inLength, whattodo, outString);
1392 :
1393 0 : *_retval = ToNewUnicode(outString);
1394 0 : return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
1395 : }
1396 :
1397 : NS_IMETHODIMP
1398 0 : mozTXTToHTMLConv::ScanHTML(const char16_t *text, uint32_t whattodo,
1399 : char16_t **_retval)
1400 : {
1401 0 : NS_ENSURE_ARG(text);
1402 :
1403 : // FIX ME!!!
1404 0 : nsString outString;
1405 0 : nsString inString (text); // look at this nasty extra copy of the entire input buffer!
1406 0 : outString.SetCapacity(uint32_t(inString.Length() * growthRate));
1407 :
1408 0 : ScanHTML(inString, whattodo, outString);
1409 0 : *_retval = ToNewUnicode(outString);
1410 0 : return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
1411 : }
1412 :
1413 : nsresult
1414 0 : MOZ_NewTXTToHTMLConv(mozTXTToHTMLConv** aConv)
1415 : {
1416 0 : NS_PRECONDITION(aConv != nullptr, "null ptr");
1417 0 : if (!aConv)
1418 0 : return NS_ERROR_NULL_POINTER;
1419 :
1420 0 : *aConv = new mozTXTToHTMLConv();
1421 0 : if (!*aConv)
1422 0 : return NS_ERROR_OUT_OF_MEMORY;
1423 :
1424 0 : NS_ADDREF(*aConv);
1425 : // return (*aConv)->Init();
1426 0 : return NS_OK;
1427 : }
|