Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* This Source Code Form is subject to the terms of the Mozilla Public
3
 * License, v. 2.0. If a copy of the MPL was not distributed with this
4
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5
6
#include "mozilla/TextUtils.h"
7
#include "mozTXTToHTMLConv.h"
8
#include "nsNetUtil.h"
9
#include "nsUnicharUtils.h"
10
#include "nsCRT.h"
11
#include "nsIExternalProtocolHandler.h"
12
#include "nsIIOService.h"
13
#include "nsIURI.h"
14
15
#include <algorithm>
16
17
#ifdef DEBUG_BenB_Perf
18
#include "prtime.h"
19
#include "prinrval.h"
20
#endif
21
22
using mozilla::IsAsciiAlpha;
23
using mozilla::IsAsciiDigit;
24
25
const double growthRate = 1.2;
26
27
// Bug 183111, editor now replaces multiple spaces with leading
28
// 0xA0's and a single ending space, so need to treat 0xA0's as spaces.
29
// 0xA0 is the Latin1/Unicode character for "non-breaking space (nbsp)"
30
// Also recognize the Japanese ideographic space 0x3000 as a space.
31
static inline bool IsSpace(const char16_t aChar)
32
0
{
33
0
  return (nsCRT::IsAsciiSpace(aChar) || aChar == 0xA0 || aChar == 0x3000);
34
0
}
35
36
// Escape Char will take ch, escape it and append the result to
37
// aStringToAppendTo
38
void
39
mozTXTToHTMLConv::EscapeChar(const char16_t ch, nsString& aStringToAppendTo,
40
                             bool inAttribute)
41
0
{
42
0
    switch (ch)
43
0
    {
44
0
    case '<':
45
0
      aStringToAppendTo.AppendLiteral("&lt;");
46
0
      break;
47
0
    case '>':
48
0
      aStringToAppendTo.AppendLiteral("&gt;");
49
0
      break;
50
0
    case '&':
51
0
      aStringToAppendTo.AppendLiteral("&amp;");
52
0
      break;
53
0
    case '"':
54
0
      if (inAttribute)
55
0
      {
56
0
        aStringToAppendTo.AppendLiteral("&quot;");
57
0
        break;
58
0
      }
59
0
      // else fall through
60
0
      MOZ_FALLTHROUGH;
61
0
    default:
62
0
      aStringToAppendTo += ch;
63
0
    }
64
0
}
65
66
// EscapeStr takes the passed in string and
67
// escapes it IN PLACE.
68
void
69
mozTXTToHTMLConv::EscapeStr(nsString& aInString, bool inAttribute)
70
0
{
71
0
  // the replace substring routines
72
0
  // don't seem to work if you have a character
73
0
  // in the in string that is also in the replacement
74
0
  // string! =(
75
0
  //aInString.ReplaceSubstring("&", "&amp;");
76
0
  //aInString.ReplaceSubstring("<", "&lt;");
77
0
  //aInString.ReplaceSubstring(">", "&gt;");
78
0
  for (uint32_t i = 0; i < aInString.Length();)
79
0
  {
80
0
    switch (aInString[i])
81
0
    {
82
0
    case '<':
83
0
      aInString.Cut(i, 1);
84
0
      aInString.InsertLiteral(u"&lt;", i);
85
0
      i += 4; // skip past the integers we just added
86
0
      break;
87
0
    case '>':
88
0
      aInString.Cut(i, 1);
89
0
      aInString.InsertLiteral(u"&gt;", i);
90
0
      i += 4; // skip past the integers we just added
91
0
      break;
92
0
    case '&':
93
0
      aInString.Cut(i, 1);
94
0
      aInString.InsertLiteral(u"&amp;", i);
95
0
      i += 5; // skip past the integers we just added
96
0
      break;
97
0
    case '"':
98
0
      if (inAttribute)
99
0
      {
100
0
        aInString.Cut(i, 1);
101
0
        aInString.InsertLiteral(u"&quot;", i);
102
0
        i += 6;
103
0
        break;
104
0
      }
105
0
      // else fall through
106
0
      MOZ_FALLTHROUGH;
107
0
    default:
108
0
      i++;
109
0
    }
110
0
  }
111
0
}
112
113
void
114
mozTXTToHTMLConv::UnescapeStr(const char16_t * aInString, int32_t aStartPos, int32_t aLength, nsString& aOutString)
115
0
{
116
0
  const char16_t * subString = nullptr;
117
0
  for (uint32_t i = aStartPos; int32_t(i) - aStartPos < aLength;)
118
0
  {
119
0
    int32_t remainingChars = i - aStartPos;
120
0
    if (aInString[i] == '&')
121
0
    {
122
0
      subString = &aInString[i];
123
0
      if (!NS_strncmp(subString, u"&lt;", std::min(4, aLength - remainingChars)))
124
0
      {
125
0
        aOutString.Append(char16_t('<'));
126
0
        i += 4;
127
0
      }
128
0
      else if (!NS_strncmp(subString, u"&gt;", std::min(4, aLength - remainingChars)))
129
0
      {
130
0
        aOutString.Append(char16_t('>'));
131
0
        i += 4;
132
0
      }
133
0
      else if (!NS_strncmp(subString, u"&amp;", std::min(5, aLength - remainingChars)))
134
0
      {
135
0
        aOutString.Append(char16_t('&'));
136
0
        i += 5;
137
0
      }
138
0
      else if (!NS_strncmp(subString, u"&quot;", std::min(6, aLength - remainingChars)))
139
0
      {
140
0
        aOutString.Append(char16_t('"'));
141
0
        i += 6;
142
0
      }
143
0
      else
144
0
      {
145
0
        aOutString += aInString[i];
146
0
        i++;
147
0
      }
148
0
    }
149
0
    else
150
0
    {
151
0
      aOutString += aInString[i];
152
0
      i++;
153
0
    }
154
0
  }
155
0
}
156
157
void
158
mozTXTToHTMLConv::CompleteAbbreviatedURL(const char16_t * aInString, int32_t aInLength,
159
                                         const uint32_t pos, nsString& aOutString)
160
0
{
161
0
  NS_ASSERTION(int32_t(pos) < aInLength, "bad args to CompleteAbbreviatedURL, see bug #190851");
162
0
  if (int32_t(pos) >= aInLength)
163
0
    return;
164
0
165
0
  if (aInString[pos] == '@')
166
0
  {
167
0
    // only pre-pend a mailto url if the string contains a .domain in it..
168
0
    //i.e. we want to linkify johndoe@foo.com but not "let's meet @8pm"
169
0
    nsDependentString inString(aInString, aInLength);
170
0
    if (inString.FindChar('.', pos) != kNotFound) // if we have a '.' after the @ sign....
171
0
    {
172
0
      aOutString.AssignLiteral("mailto:");
173
0
      aOutString += aInString;
174
0
    }
175
0
  }
176
0
  else if (aInString[pos] == '.')
177
0
  {
178
0
    if (ItMatchesDelimited(aInString, aInLength,
179
0
                           u"www.", 4, LT_IGNORE, LT_IGNORE))
180
0
    {
181
0
      aOutString.AssignLiteral("http://");
182
0
      aOutString += aInString;
183
0
    }
184
0
    else if (ItMatchesDelimited(aInString,aInLength, u"ftp.", 4, LT_IGNORE, LT_IGNORE))
185
0
    {
186
0
      aOutString.AssignLiteral("ftp://");
187
0
      aOutString += aInString;
188
0
    }
189
0
  }
190
0
}
191
192
bool
193
mozTXTToHTMLConv::FindURLStart(const char16_t * aInString, int32_t aInLength,
194
                               const uint32_t pos, const modetype check,
195
                               uint32_t& start)
196
0
{
197
0
  switch(check)
198
0
  { // no breaks, because end of blocks is never reached
199
0
  case RFC1738:
200
0
  {
201
0
    if (!NS_strncmp(&aInString[std::max(int32_t(pos - 4), 0)], u"<URL:", 5))
202
0
    {
203
0
      start = pos + 1;
204
0
      return true;
205
0
    }
206
0
    return false;
207
0
  }
208
0
  case RFC2396E:
209
0
  {
210
0
    nsString temp(aInString, aInLength);
211
0
    int32_t i = pos <= 0 ? kNotFound : temp.RFindCharInSet(u"<>\"", pos - 1);
212
0
    if (i != kNotFound && (temp[uint32_t(i)] == '<' ||
213
0
                           temp[uint32_t(i)] == '"'))
214
0
    {
215
0
      start = uint32_t(++i);
216
0
      return start < pos;
217
0
    }
218
0
    return false;
219
0
  }
220
0
  case freetext:
221
0
  {
222
0
    int32_t i = pos - 1;
223
0
    for (; i >= 0 && (
224
0
         IsAsciiAlpha(aInString[uint32_t(i)]) ||
225
0
         IsAsciiDigit(aInString[uint32_t(i)]) ||
226
0
         aInString[uint32_t(i)] == '+' ||
227
0
         aInString[uint32_t(i)] == '-' ||
228
0
         aInString[uint32_t(i)] == '.'
229
0
         ); i--)
230
0
      ;
231
0
    if (++i >= 0 && uint32_t(i) < pos && IsAsciiAlpha(aInString[uint32_t(i)]))
232
0
    {
233
0
      start = uint32_t(i);
234
0
      return true;
235
0
    }
236
0
    return false;
237
0
  }
238
0
  case abbreviated:
239
0
  {
240
0
    int32_t i = pos - 1;
241
0
    // This disallows non-ascii-characters for email.
242
0
    // Currently correct, but revisit later after standards changed.
243
0
    bool isEmail = aInString[pos] == (char16_t)'@';
244
0
    // These chars mark the start of the URL
245
0
    for (; i >= 0
246
0
             && aInString[uint32_t(i)] != '>' && aInString[uint32_t(i)] != '<'
247
0
             && aInString[uint32_t(i)] != '"' && aInString[uint32_t(i)] != '\''
248
0
             && aInString[uint32_t(i)] != '`' && aInString[uint32_t(i)] != ','
249
0
             && aInString[uint32_t(i)] != '{' && aInString[uint32_t(i)] != '['
250
0
             && aInString[uint32_t(i)] != '(' && aInString[uint32_t(i)] != '|'
251
0
             && aInString[uint32_t(i)] != '\\'
252
0
             && !IsSpace(aInString[uint32_t(i)])
253
0
             && (!isEmail || nsCRT::IsAscii(aInString[uint32_t(i)]))
254
0
         ; i--)
255
0
      ;
256
0
    if
257
0
      (
258
0
        ++i >= 0 && uint32_t(i) < pos
259
0
          &&
260
0
          (
261
0
            IsAsciiAlpha(aInString[uint32_t(i)]) ||
262
0
            IsAsciiDigit(aInString[uint32_t(i)])
263
0
          )
264
0
      )
265
0
    {
266
0
      start = uint32_t(i);
267
0
      return true;
268
0
    }
269
0
    return false;
270
0
  }
271
0
  default:
272
0
    return false;
273
0
  } //switch
274
0
}
275
276
bool
277
mozTXTToHTMLConv::FindURLEnd(const char16_t * aInString, int32_t aInStringLength, const uint32_t pos,
278
           const modetype check, const uint32_t start, uint32_t& end)
279
0
{
280
0
  switch(check)
281
0
  { // no breaks, because end of blocks is never reached
282
0
  case RFC1738:
283
0
  case RFC2396E:
284
0
  {
285
0
    nsString temp(aInString, aInStringLength);
286
0
287
0
    int32_t i = temp.FindCharInSet(u"<>\"", pos + 1);
288
0
    if (i != kNotFound && temp[uint32_t(i--)] ==
289
0
        (check == RFC1738 || temp[start - 1] == '<' ? '>' : '"'))
290
0
    {
291
0
      end = uint32_t(i);
292
0
      return end > pos;
293
0
    }
294
0
    return false;
295
0
  }
296
0
  case freetext:
297
0
  case abbreviated:
298
0
  {
299
0
    uint32_t i = pos + 1;
300
0
    bool isEmail = aInString[pos] == (char16_t)'@';
301
0
    bool seenOpeningParenthesis = false; // there is a '(' earlier in the URL
302
0
    bool seenOpeningSquareBracket = false; // there is a '[' earlier in the URL
303
0
    for (; int32_t(i) < aInStringLength; i++)
304
0
    {
305
0
      // These chars mark the end of the URL
306
0
      if (aInString[i] == '>' || aInString[i] == '<' ||
307
0
          aInString[i] == '"' || aInString[i] == '`' ||
308
0
          aInString[i] == '}' || aInString[i] == '{' ||
309
0
          (aInString[i] == ')' && !seenOpeningParenthesis) ||
310
0
          (aInString[i] == ']' && !seenOpeningSquareBracket) ||
311
0
          // Allow IPv6 adresses like http://[1080::8:800:200C:417A]/foo.
312
0
          (aInString[i] == '[' && i > 2 &&
313
0
           (aInString[i - 1] != '/' || aInString[i - 2] != '/')) ||
314
0
          IsSpace(aInString[i]))
315
0
          break;
316
0
      // Disallow non-ascii-characters for email.
317
0
      // Currently correct, but revisit later after standards changed.
318
0
      if (isEmail && (
319
0
            aInString[i] == '(' || aInString[i] == '\'' ||
320
0
            !nsCRT::IsAscii(aInString[i])))
321
0
          break;
322
0
      if (aInString[i] == '(')
323
0
        seenOpeningParenthesis = true;
324
0
      if (aInString[i] == '[')
325
0
        seenOpeningSquareBracket = true;
326
0
    }
327
0
    // These chars are allowed in the middle of the URL, but not at end.
328
0
    // Technically they are, but are used in normal text after the URL.
329
0
    while (--i > pos && (
330
0
             aInString[i] == '.' || aInString[i] == ',' || aInString[i] == ';' ||
331
0
             aInString[i] == '!' || aInString[i] == '?' || aInString[i] == '-' ||
332
0
             aInString[i] == ':' || aInString[i] == '\''
333
0
             ))
334
0
        ;
335
0
    if (i > pos)
336
0
    {
337
0
      end = i;
338
0
      return true;
339
0
    }
340
0
    return false;
341
0
  }
342
0
  default:
343
0
    return false;
344
0
  } //switch
345
0
}
346
347
void
348
mozTXTToHTMLConv::CalculateURLBoundaries(const char16_t * aInString, int32_t aInStringLength,
349
     const uint32_t pos, const uint32_t whathasbeendone,
350
     const modetype check, const uint32_t start, const uint32_t end,
351
     nsString& txtURL, nsString& desc,
352
     int32_t& replaceBefore, int32_t& replaceAfter)
353
0
{
354
0
  uint32_t descstart = start;
355
0
  switch(check)
356
0
  {
357
0
  case RFC1738:
358
0
  {
359
0
    descstart = start - 5;
360
0
    desc.Append(&aInString[descstart], end - descstart + 2);  // include "<URL:" and ">"
361
0
    replaceAfter = end - pos + 1;
362
0
  } break;
363
0
  case RFC2396E:
364
0
  {
365
0
    descstart = start - 1;
366
0
    desc.Append(&aInString[descstart], end - descstart + 2); // include brackets
367
0
    replaceAfter = end - pos + 1;
368
0
  } break;
369
0
  case freetext:
370
0
  case abbreviated:
371
0
  {
372
0
    descstart = start;
373
0
    desc.Append(&aInString[descstart], end - start + 1); // don't include brackets
374
0
    replaceAfter = end - pos;
375
0
  } break;
376
0
  default: break;
377
0
  } //switch
378
0
379
0
  EscapeStr(desc, false);
380
0
381
0
  txtURL.Append(&aInString[start], end - start + 1);
382
0
  txtURL.StripWhitespace();
383
0
384
0
  // FIX ME
385
0
  nsAutoString temp2;
386
0
  ScanTXT(&aInString[descstart], pos - descstart, ~kURLs /*prevents loop*/ & whathasbeendone, temp2);
387
0
  replaceBefore = temp2.Length();
388
0
}
389
390
bool mozTXTToHTMLConv::ShouldLinkify(const nsCString& aURL)
391
0
{
392
0
  if (!mIOService)
393
0
    return false;
394
0
395
0
  nsAutoCString scheme;
396
0
  nsresult rv = mIOService->ExtractScheme(aURL, scheme);
397
0
  if(NS_FAILED(rv))
398
0
    return false;
399
0
400
0
  // Get the handler for this scheme.
401
0
  nsCOMPtr<nsIProtocolHandler> handler;
402
0
  rv = mIOService->GetProtocolHandler(scheme.get(), getter_AddRefs(handler));
403
0
  if(NS_FAILED(rv))
404
0
    return false;
405
0
406
0
  // Is it an external protocol handler? If not, linkify it.
407
0
  nsCOMPtr<nsIExternalProtocolHandler> externalHandler = do_QueryInterface(handler);
408
0
  if (!externalHandler)
409
0
   return true; // handler is built-in, linkify it!
410
0
411
0
  // If external app exists for the scheme then linkify it.
412
0
  bool exists;
413
0
  rv = externalHandler->ExternalAppExistsForScheme(scheme, &exists);
414
0
  return(NS_SUCCEEDED(rv) && exists);
415
0
}
416
417
bool
418
mozTXTToHTMLConv::CheckURLAndCreateHTML(
419
     const nsString& txtURL, const nsString& desc, const modetype mode,
420
     nsString& outputHTML)
421
0
{
422
0
  // Create *uri from txtURL
423
0
  nsCOMPtr<nsIURI> uri;
424
0
  nsresult rv;
425
0
  // Lazily initialize mIOService
426
0
  if (!mIOService)
427
0
  {
428
0
    mIOService = do_GetIOService();
429
0
430
0
    if (!mIOService)
431
0
      return false;
432
0
  }
433
0
434
0
  // See if the url should be linkified.
435
0
  NS_ConvertUTF16toUTF8 utf8URL(txtURL);
436
0
  if (!ShouldLinkify(utf8URL))
437
0
    return false;
438
0
439
0
  // it would be faster if we could just check to see if there is a protocol
440
0
  // handler for the url and return instead of actually trying to create a url...
441
0
  rv = mIOService->NewURI(utf8URL, nullptr, nullptr, getter_AddRefs(uri));
442
0
443
0
  // Real work
444
0
  if (NS_SUCCEEDED(rv) && uri)
445
0
  {
446
0
    outputHTML.AssignLiteral("<a class=\"moz-txt-link-");
447
0
    switch(mode)
448
0
    {
449
0
    case RFC1738:
450
0
      outputHTML.AppendLiteral("rfc1738");
451
0
      break;
452
0
    case RFC2396E:
453
0
      outputHTML.AppendLiteral("rfc2396E");
454
0
      break;
455
0
    case freetext:
456
0
      outputHTML.AppendLiteral("freetext");
457
0
      break;
458
0
    case abbreviated:
459
0
      outputHTML.AppendLiteral("abbreviated");
460
0
      break;
461
0
    default: break;
462
0
    }
463
0
    nsAutoString escapedURL(txtURL);
464
0
    EscapeStr(escapedURL, true);
465
0
466
0
    outputHTML.AppendLiteral("\" href=\"");
467
0
    outputHTML += escapedURL;
468
0
    outputHTML.AppendLiteral("\">");
469
0
    outputHTML += desc;
470
0
    outputHTML.AppendLiteral("</a>");
471
0
    return true;
472
0
  }
473
0
  return false;
474
0
}
475
476
NS_IMETHODIMP mozTXTToHTMLConv::FindURLInPlaintext(const char16_t * aInString, int32_t aInLength, int32_t aPos, int32_t * aStartPos, int32_t * aEndPos)
477
0
{
478
0
  // call FindURL on the passed in string
479
0
  nsAutoString outputHTML; // we'll ignore the generated output HTML
480
0
481
0
  *aStartPos = -1;
482
0
  *aEndPos = -1;
483
0
484
0
  FindURL(aInString, aInLength, aPos, kURLs, outputHTML, *aStartPos, *aEndPos);
485
0
486
0
  return NS_OK;
487
0
}
488
489
bool
490
mozTXTToHTMLConv::FindURL(const char16_t * aInString, int32_t aInLength, const uint32_t pos,
491
     const uint32_t whathasbeendone,
492
     nsString& outputHTML, int32_t& replaceBefore, int32_t& replaceAfter)
493
0
{
494
0
  enum statetype {unchecked, invalid, startok, endok, success};
495
0
  static const modetype ranking[] = {RFC1738, RFC2396E, freetext, abbreviated};
496
0
497
0
  statetype state[mozTXTToHTMLConv_lastMode + 1]; // 0(=unknown)..lastMode
498
0
  /* I don't like this abuse of enums as index for the array,
499
0
     but I don't know a better method */
500
0
501
0
  // Define, which modes to check
502
0
  /* all modes but abbreviated are checked for text[pos] == ':',
503
0
     only abbreviated for '.', RFC2396E and abbreviated for '@' */
504
0
  for (modetype iState = unknown; iState <= mozTXTToHTMLConv_lastMode;
505
0
       iState = modetype(iState + 1))
506
0
    state[iState] = aInString[pos] == ':' ? unchecked : invalid;
507
0
  switch (aInString[pos])
508
0
  {
509
0
  case '@':
510
0
    state[RFC2396E] = unchecked;
511
0
    MOZ_FALLTHROUGH;
512
0
  case '.':
513
0
    state[abbreviated] = unchecked;
514
0
    break;
515
0
  case ':':
516
0
    state[abbreviated] = invalid;
517
0
    break;
518
0
  default:
519
0
    break;
520
0
  }
521
0
522
0
  // Test, first successful mode wins, sequence defined by |ranking|
523
0
  int32_t iCheck = 0;  // the currently tested modetype
524
0
  modetype check = ranking[iCheck];
525
0
  for (; iCheck < mozTXTToHTMLConv_numberOfModes && state[check] != success;
526
0
       iCheck++)
527
0
    /* check state from last run.
528
0
       If this is the first, check this one, which isn't = success yet */
529
0
  {
530
0
    check = ranking[iCheck];
531
0
532
0
    uint32_t start, end;
533
0
534
0
    if (state[check] == unchecked)
535
0
      if (FindURLStart(aInString, aInLength, pos, check, start))
536
0
        state[check] = startok;
537
0
538
0
    if (state[check] == startok)
539
0
      if (FindURLEnd(aInString, aInLength, pos, check, start, end))
540
0
        state[check] = endok;
541
0
542
0
    if (state[check] == endok)
543
0
    {
544
0
      nsAutoString txtURL, desc;
545
0
      int32_t resultReplaceBefore, resultReplaceAfter;
546
0
547
0
      CalculateURLBoundaries(aInString, aInLength, pos, whathasbeendone, check, start, end,
548
0
                             txtURL, desc,
549
0
                             resultReplaceBefore, resultReplaceAfter);
550
0
551
0
      if (aInString[pos] != ':')
552
0
      {
553
0
        nsAutoString temp = txtURL;
554
0
        txtURL.SetLength(0);
555
0
        CompleteAbbreviatedURL(temp.get(),temp.Length(), pos - start, txtURL);
556
0
      }
557
0
558
0
      if (!txtURL.IsEmpty() && CheckURLAndCreateHTML(txtURL, desc, check,
559
0
                                                     outputHTML))
560
0
      {
561
0
        replaceBefore = resultReplaceBefore;
562
0
        replaceAfter = resultReplaceAfter;
563
0
        state[check] = success;
564
0
      }
565
0
    } // if
566
0
  } // for
567
0
  return state[check] == success;
568
0
}
569
570
bool
571
mozTXTToHTMLConv::ItMatchesDelimited(const char16_t * aInString,
572
    int32_t aInLength, const char16_t* rep, int32_t aRepLen,
573
    LIMTYPE before, LIMTYPE after)
574
0
{
575
0
576
0
  // this little method gets called a LOT. I found we were spending a
577
0
  // lot of time just calculating the length of the variable "rep"
578
0
  // over and over again every time we called it. So we're now passing
579
0
  // an integer in here.
580
0
  int32_t textLen = aInLength;
581
0
582
0
  if
583
0
    (
584
0
      ((before == LT_IGNORE && (after == LT_IGNORE || after == LT_DELIMITER))
585
0
        && textLen < aRepLen) ||
586
0
      ((before != LT_IGNORE || (after != LT_IGNORE && after != LT_DELIMITER))
587
0
        && textLen < aRepLen + 1) ||
588
0
      (before != LT_IGNORE && after != LT_IGNORE && after != LT_DELIMITER
589
0
        && textLen < aRepLen + 2)
590
0
    )
591
0
    return false;
592
0
593
0
  char16_t text0 = aInString[0];
594
0
  char16_t textAfterPos = aInString[aRepLen + (before == LT_IGNORE ? 0 : 1)];
595
0
596
0
  if
597
0
    (
598
0
      (before == LT_ALPHA
599
0
        && !IsAsciiAlpha(text0)) ||
600
0
      (before == LT_DIGIT
601
0
        && !IsAsciiDigit(text0)) ||
602
0
      (before == LT_DELIMITER
603
0
        &&
604
0
        (
605
0
          IsAsciiAlpha(text0) ||
606
0
          IsAsciiDigit(text0) ||
607
0
          text0 == *rep
608
0
        )) ||
609
0
      (after == LT_ALPHA
610
0
        && !IsAsciiAlpha(textAfterPos)) ||
611
0
      (after == LT_DIGIT
612
0
        && !IsAsciiDigit(textAfterPos)) ||
613
0
      (after == LT_DELIMITER
614
0
        &&
615
0
        (
616
0
          IsAsciiAlpha(textAfterPos) ||
617
0
          IsAsciiDigit(textAfterPos) ||
618
0
          textAfterPos == *rep
619
0
        )) ||
620
0
        !Substring(Substring(aInString, aInString+aInLength),
621
0
                   (before == LT_IGNORE ? 0 : 1),
622
0
                   aRepLen).Equals(Substring(rep, rep+aRepLen),
623
0
                                   nsCaseInsensitiveStringComparator())
624
0
    )
625
0
    return false;
626
0
627
0
  return true;
628
0
}
629
630
uint32_t
631
mozTXTToHTMLConv::NumberOfMatches(const char16_t * aInString, int32_t aInStringLength,
632
     const char16_t* rep, int32_t aRepLen, LIMTYPE before, LIMTYPE after)
633
0
{
634
0
  uint32_t result = 0;
635
0
636
0
  for (int32_t i = 0; i < aInStringLength; i++)
637
0
  {
638
0
    const char16_t * indexIntoString = &aInString[i];
639
0
    if (ItMatchesDelimited(indexIntoString, aInStringLength - i, rep, aRepLen, before, after))
640
0
      result++;
641
0
  }
642
0
  return result;
643
0
}
644
645
646
// NOTE: the converted html for the phrase is appended to aOutString
647
// tagHTML and attributeHTML are plain ASCII (literal strings, in fact)
648
bool
649
mozTXTToHTMLConv::StructPhraseHit(const char16_t * aInString, int32_t aInStringLength, bool col0,
650
     const char16_t* tagTXT, int32_t aTagTXTLen,
651
     const char* tagHTML, const char* attributeHTML,
652
     nsString& aOutString, uint32_t& openTags)
653
0
{
654
0
  /* We're searching for the following pattern:
655
0
     LT_DELIMITER - "*" - ALPHA -
656
0
     [ some text (maybe more "*"-pairs) - ALPHA ] "*" - LT_DELIMITER.
657
0
     <strong> is only inserted, if existence of a pair could be verified
658
0
     We use the first opening/closing tag, if we can choose */
659
0
660
0
  const char16_t * newOffset = aInString;
661
0
  int32_t newLength = aInStringLength;
662
0
  if (!col0) // skip the first element?
663
0
  {
664
0
    newOffset = &aInString[1];
665
0
    newLength = aInStringLength - 1;
666
0
  }
667
0
668
0
  // opening tag
669
0
  if
670
0
    (
671
0
      ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen,
672
0
           (col0 ? LT_IGNORE : LT_DELIMITER), LT_ALPHA) // is opening tag
673
0
        && NumberOfMatches(newOffset, newLength, tagTXT, aTagTXTLen,
674
0
              LT_ALPHA, LT_DELIMITER)  // remaining closing tags
675
0
              > openTags
676
0
    )
677
0
  {
678
0
    openTags++;
679
0
    aOutString.Append('<');
680
0
    aOutString.AppendASCII(tagHTML);
681
0
    aOutString.Append(char16_t(' '));
682
0
    aOutString.AppendASCII(attributeHTML);
683
0
    aOutString.AppendLiteral("><span class=\"moz-txt-tag\">");
684
0
    aOutString.Append(tagTXT);
685
0
    aOutString.AppendLiteral("</span>");
686
0
    return true;
687
0
  }
688
0
689
0
  // closing tag
690
0
  else if (openTags > 0
691
0
       && ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen, LT_ALPHA, LT_DELIMITER))
692
0
  {
693
0
    openTags--;
694
0
    aOutString.AppendLiteral("<span class=\"moz-txt-tag\">");
695
0
    aOutString.Append(tagTXT);
696
0
    aOutString.AppendLiteral("</span></");
697
0
    aOutString.AppendASCII(tagHTML);
698
0
    aOutString.Append(char16_t('>'));
699
0
    return true;
700
0
  }
701
0
702
0
  return false;
703
0
}
704
705
706
bool
707
mozTXTToHTMLConv::SmilyHit(const char16_t * aInString, int32_t aLength, bool col0,
708
         const char* tagTXT, const char* imageName,
709
         nsString& outputHTML, int32_t& glyphTextLen)
710
0
{
711
0
  if ( !aInString || !tagTXT || !imageName )
712
0
      return false;
713
0
714
0
  int32_t tagLen = strlen(tagTXT);
715
0
716
0
  uint32_t delim = (col0 ? 0 : 1) + tagLen;
717
0
718
0
  if
719
0
    (
720
0
      (col0 || IsSpace(aInString[0]))
721
0
        &&
722
0
        (
723
0
          aLength <= int32_t(delim) ||
724
0
          IsSpace(aInString[delim]) ||
725
0
          (aLength > int32_t(delim + 1)
726
0
            &&
727
0
            (
728
0
              aInString[delim] == '.' ||
729
0
              aInString[delim] == ',' ||
730
0
              aInString[delim] == ';' ||
731
0
              aInString[delim] == '8' ||
732
0
              aInString[delim] == '>' ||
733
0
              aInString[delim] == '!' ||
734
0
              aInString[delim] == '?'
735
0
            )
736
0
            && IsSpace(aInString[delim + 1]))
737
0
        )
738
0
        && ItMatchesDelimited(aInString, aLength, NS_ConvertASCIItoUTF16(tagTXT).get(), tagLen,
739
0
                              col0 ? LT_IGNORE : LT_DELIMITER, LT_IGNORE)
740
0
          // Note: tests at different pos for LT_IGNORE and LT_DELIMITER
741
0
    )
742
0
  {
743
0
    if (!col0)
744
0
    {
745
0
      outputHTML.Truncate();
746
0
      outputHTML.Append(char16_t(' '));
747
0
    }
748
0
749
0
    outputHTML.AppendLiteral("<span class=\""); // <span class="
750
0
    outputHTML.AppendASCII(imageName);          // e.g. smiley-frown
751
0
    outputHTML.AppendLiteral("\" title=\"");    // " title="
752
0
    outputHTML.AppendASCII(tagTXT);             // smiley tooltip
753
0
    outputHTML.AppendLiteral("\"><span>");      // "><span>
754
0
    outputHTML.AppendASCII(tagTXT);             // original text
755
0
    outputHTML.AppendLiteral("</span></span>"); // </span></span>
756
0
    glyphTextLen = (col0 ? 0 : 1) + tagLen;
757
0
    return true;
758
0
  }
759
0
760
0
  return false;
761
0
}
762
763
// the glyph is appended to aOutputString instead of the original string...
764
bool
765
mozTXTToHTMLConv::GlyphHit(const char16_t * aInString, int32_t aInLength, bool col0,
766
         nsString& aOutputString, int32_t& glyphTextLen)
767
0
{
768
0
  char16_t text0 = aInString[0];
769
0
  char16_t text1 = aInString[1];
770
0
  char16_t firstChar = (col0 ? text0 : text1);
771
0
772
0
  // temporary variable used to store the glyph html text
773
0
  nsAutoString outputHTML;
774
0
  bool bTestSmilie;
775
0
  bool bArg = false;
776
0
  int i;
777
0
778
0
  // refactor some of this mess to avoid code duplication and speed execution a bit
779
0
  // there are two cases that need to be tried one after another. To avoid a lot of
780
0
  // duplicate code, rolling into a loop
781
0
782
0
  i = 0;
783
0
  while ( i < 2 )
784
0
  {
785
0
    bTestSmilie = false;
786
0
    if ( !i && (firstChar == ':' || firstChar == ';' || firstChar == '=' || firstChar == '>' || firstChar == '8' || firstChar == 'O'))
787
0
    {
788
0
        // first test passed
789
0
790
0
        bTestSmilie = true;
791
0
        bArg = col0;
792
0
    }
793
0
    if ( i && col0 && ( text1 == ':' || text1 == ';' || text1 == '=' || text1 == '>' || text1 == '8' || text1 == 'O' ) )
794
0
    {
795
0
        // second test passed
796
0
797
0
        bTestSmilie = true;
798
0
        bArg = false;
799
0
    }
800
0
    if ( bTestSmilie && (
801
0
          SmilyHit(aInString, aInLength, bArg,
802
0
                   ":-)",
803
0
                   "moz-smiley-s1", // smile
804
0
                   outputHTML, glyphTextLen) ||
805
0
806
0
          SmilyHit(aInString, aInLength, bArg,
807
0
                   ":)",
808
0
                   "moz-smiley-s1", // smile
809
0
                   outputHTML, glyphTextLen) ||
810
0
811
0
          SmilyHit(aInString, aInLength, bArg,
812
0
                   ":-D",
813
0
                   "moz-smiley-s5", // laughing
814
0
                   outputHTML, glyphTextLen) ||
815
0
816
0
          SmilyHit(aInString, aInLength, bArg,
817
0
                   ":-(",
818
0
                   "moz-smiley-s2", // frown
819
0
                   outputHTML, glyphTextLen) ||
820
0
821
0
          SmilyHit(aInString, aInLength, bArg,
822
0
                   ":(",
823
0
                   "moz-smiley-s2", // frown
824
0
                   outputHTML, glyphTextLen) ||
825
0
826
0
          SmilyHit(aInString, aInLength, bArg,
827
0
                   ":-[",
828
0
                   "moz-smiley-s6", // embarassed
829
0
                   outputHTML, glyphTextLen) ||
830
0
831
0
          SmilyHit(aInString, aInLength, bArg,
832
0
                   ";-)",
833
0
                   "moz-smiley-s3", // wink
834
0
                   outputHTML, glyphTextLen) ||
835
0
836
0
          SmilyHit(aInString, aInLength, col0,
837
0
                   ";)",
838
0
                   "moz-smiley-s3", // wink
839
0
                   outputHTML, glyphTextLen) ||
840
0
841
0
          SmilyHit(aInString, aInLength, bArg,
842
0
                   ":-\\",
843
0
                   "moz-smiley-s7", // undecided
844
0
                   outputHTML, glyphTextLen) ||
845
0
846
0
          SmilyHit(aInString, aInLength, bArg,
847
0
                   ":-P",
848
0
                   "moz-smiley-s4", // tongue
849
0
                   outputHTML, glyphTextLen) ||
850
0
851
0
          SmilyHit(aInString, aInLength, bArg,
852
0
                   ";-P",
853
0
                   "moz-smiley-s4", // tongue
854
0
                   outputHTML, glyphTextLen) ||
855
0
856
0
          SmilyHit(aInString, aInLength, bArg,
857
0
                   "=-O",
858
0
                   "moz-smiley-s8", // surprise
859
0
                   outputHTML, glyphTextLen) ||
860
0
861
0
          SmilyHit(aInString, aInLength, bArg,
862
0
                   ":-*",
863
0
                   "moz-smiley-s9", // kiss
864
0
                   outputHTML, glyphTextLen) ||
865
0
866
0
          SmilyHit(aInString, aInLength, bArg,
867
0
                   ">:o",
868
0
                   "moz-smiley-s10", // yell
869
0
                   outputHTML, glyphTextLen) ||
870
0
871
0
          SmilyHit(aInString, aInLength, bArg,
872
0
                   ">:-o",
873
0
                   "moz-smiley-s10", // yell
874
0
                   outputHTML, glyphTextLen) ||
875
0
876
0
          SmilyHit(aInString, aInLength, bArg,
877
0
                   "8-)",
878
0
                   "moz-smiley-s11", // cool
879
0
                   outputHTML, glyphTextLen) ||
880
0
881
0
          SmilyHit(aInString, aInLength, bArg,
882
0
                   ":-$",
883
0
                   "moz-smiley-s12", // money
884
0
                   outputHTML, glyphTextLen) ||
885
0
886
0
          SmilyHit(aInString, aInLength, bArg,
887
0
                   ":-!",
888
0
                   "moz-smiley-s13", // foot
889
0
                   outputHTML, glyphTextLen) ||
890
0
891
0
          SmilyHit(aInString, aInLength, bArg,
892
0
                   "O:-)",
893
0
                   "moz-smiley-s14", // innocent
894
0
                   outputHTML, glyphTextLen) ||
895
0
896
0
          SmilyHit(aInString, aInLength, bArg,
897
0
                   ":'(",
898
0
                   "moz-smiley-s15", // cry
899
0
                   outputHTML, glyphTextLen) ||
900
0
901
0
          SmilyHit(aInString, aInLength, bArg,
902
0
                   ":-X",
903
0
                   "moz-smiley-s16", // sealed
904
0
                   outputHTML, glyphTextLen)
905
0
        )
906
0
    )
907
0
    {
908
0
        aOutputString.Append(outputHTML);
909
0
        return true;
910
0
    }
911
0
    i++;
912
0
  }
913
0
  if (text0 == '\f')
914
0
  {
915
0
      aOutputString.AppendLiteral("<span class='moz-txt-formfeed'></span>");
916
0
      glyphTextLen = 1;
917
0
      return true;
918
0
  }
919
0
  if (text0 == '+' || text1 == '+')
920
0
  {
921
0
    if (ItMatchesDelimited(aInString, aInLength,
922
0
                           u" +/-", 4,
923
0
                           LT_IGNORE, LT_IGNORE))
924
0
    {
925
0
      aOutputString.AppendLiteral(" &plusmn;");
926
0
      glyphTextLen = 4;
927
0
      return true;
928
0
    }
929
0
    if (col0 && ItMatchesDelimited(aInString, aInLength,
930
0
                                   u"+/-", 3,
931
0
                                   LT_IGNORE, LT_IGNORE))
932
0
    {
933
0
      aOutputString.AppendLiteral("&plusmn;");
934
0
      glyphTextLen = 3;
935
0
      return true;
936
0
    }
937
0
  }
938
0
939
0
  // x^2  =>  x<sup>2</sup>,   also handle powers x^-2,  x^0.5
940
0
  // implement regular expression /[\dA-Za-z\)\]}]\^-?\d+(\.\d+)*[^\dA-Za-z]/
941
0
  if
942
0
    (
943
0
      text1 == '^'
944
0
      &&
945
0
      (
946
0
        IsAsciiDigit(text0) || IsAsciiAlpha(text0) ||
947
0
        text0 == ')' || text0 == ']' || text0 == '}'
948
0
      )
949
0
      &&
950
0
      (
951
0
        (2 < aInLength && IsAsciiDigit(aInString[2])) ||
952
0
        (3 < aInLength && aInString[2] == '-' && IsAsciiDigit(aInString[3]))
953
0
      )
954
0
    )
955
0
  {
956
0
    // Find first non-digit
957
0
    int32_t delimPos = 3;  // skip "^" and first digit (or '-')
958
0
    for (; delimPos < aInLength
959
0
           &&
960
0
           (
961
0
             IsAsciiDigit(aInString[delimPos]) ||
962
0
             (aInString[delimPos] == '.' && delimPos + 1 < aInLength &&
963
0
               IsAsciiDigit(aInString[delimPos + 1]))
964
0
           );
965
0
         delimPos++)
966
0
      ;
967
0
968
0
    if (delimPos < aInLength && IsAsciiAlpha(aInString[delimPos]))
969
0
    {
970
0
      return false;
971
0
    }
972
0
973
0
    outputHTML.Truncate();
974
0
    outputHTML += text0;
975
0
    outputHTML.AppendLiteral(
976
0
      "<sup class=\"moz-txt-sup\">"
977
0
      "<span style=\"display:inline-block;width:0;height:0;overflow:hidden\">"
978
0
      "^</span>");
979
0
980
0
    aOutputString.Append(outputHTML);
981
0
    aOutputString.Append(&aInString[2], delimPos - 2);
982
0
    aOutputString.AppendLiteral("</sup>");
983
0
984
0
    glyphTextLen = delimPos /* - 1 + 1 */ ;
985
0
    return true;
986
0
  }
987
0
  /*
988
0
   The following strings are not substituted:
989
0
   |TXT   |HTML     |Reason
990
0
   +------+---------+----------
991
0
    ->     &larr;    Bug #454
992
0
    =>     &lArr;    dito
993
0
    <-     &rarr;    dito
994
0
    <=     &rArr;    dito
995
0
    (tm)   &trade;   dito
996
0
    1/4    &frac14;  is triggered by 1/4 Part 1, 2/4 Part 2, ...
997
0
    3/4    &frac34;  dito
998
0
    1/2    &frac12;  similar
999
0
  */
1000
0
  return false;
1001
0
}
1002
1003
/***************************************************************************
1004
  Library-internal Interface
1005
****************************************************************************/
1006
1007
NS_IMPL_ISUPPORTS(mozTXTToHTMLConv,
1008
                  mozITXTToHTMLConv,
1009
                  nsIStreamConverter,
1010
                  nsIStreamListener,
1011
                  nsIRequestObserver)
1012
1013
int32_t
1014
mozTXTToHTMLConv::CiteLevelTXT(const char16_t *line,
1015
            uint32_t& logLineStart)
1016
0
{
1017
0
  int32_t result = 0;
1018
0
  int32_t lineLength = NS_strlen(line);
1019
0
1020
0
  bool moreCites = true;
1021
0
  while (moreCites)
1022
0
  {
1023
0
    /* E.g. the following lines count as quote:
1024
0
1025
0
       > text
1026
0
       //#ifdef QUOTE_RECOGNITION_AGGRESSIVE
1027
0
       >text
1028
0
       //#ifdef QUOTE_RECOGNITION_AGGRESSIVE
1029
0
           > text
1030
0
       ] text
1031
0
       USER> text
1032
0
       USER] text
1033
0
       //#endif
1034
0
1035
0
       logLineStart is the position of "t" in this example
1036
0
    */
1037
0
    uint32_t i = logLineStart;
1038
0
1039
#ifdef QUOTE_RECOGNITION_AGGRESSIVE
1040
    for (; int32_t(i) < lineLength && IsSpace(line[i]); i++)
1041
      ;
1042
    for (; int32_t(i) < lineLength && IsAsciiAlpha(line[i])
1043
                                   && nsCRT::IsUpper(line[i])   ; i++)
1044
      ;
1045
    if (int32_t(i) < lineLength && (line[i] == '>' || line[i] == ']'))
1046
#else
1047
0
    if (int32_t(i) < lineLength && line[i] == '>')
1048
0
#endif
1049
0
    {
1050
0
      i++;
1051
0
      if (int32_t(i) < lineLength && line[i] == ' ')
1052
0
        i++;
1053
0
      // sendmail/mbox
1054
0
      // Placed here for performance increase
1055
0
      const char16_t * indexString = &line[logLineStart];
1056
0
           // here, |logLineStart < lineLength| is always true
1057
0
      uint32_t minlength = std::min(uint32_t(6), NS_strlen(indexString));
1058
0
      if (Substring(indexString,
1059
0
                    indexString+minlength).Equals(Substring(NS_LITERAL_STRING(">From "), 0, minlength),
1060
0
                                                  nsCaseInsensitiveStringComparator()))
1061
0
        //XXX RFC2646
1062
0
        moreCites = false;
1063
0
      else
1064
0
      {
1065
0
        result++;
1066
0
        logLineStart = i;
1067
0
      }
1068
0
    }
1069
0
    else
1070
0
      moreCites = false;
1071
0
  }
1072
0
1073
0
  return result;
1074
0
}
1075
1076
void
1077
mozTXTToHTMLConv::ScanTXT(const char16_t * aInString, int32_t aInStringLength, uint32_t whattodo, nsString& aOutString)
1078
0
{
1079
0
  bool doURLs = 0 != (whattodo & kURLs);
1080
0
  bool doGlyphSubstitution = 0 != (whattodo & kGlyphSubstitution);
1081
0
  bool doStructPhrase = 0 != (whattodo & kStructPhrase);
1082
0
1083
0
  uint32_t structPhrase_strong = 0;  // Number of currently open tags
1084
0
  uint32_t structPhrase_underline = 0;
1085
0
  uint32_t structPhrase_italic = 0;
1086
0
  uint32_t structPhrase_code = 0;
1087
0
1088
0
  nsAutoString outputHTML;  // moved here for performance increase
1089
0
1090
0
  for(uint32_t i = 0; int32_t(i) < aInStringLength;)
1091
0
  {
1092
0
    if (doGlyphSubstitution)
1093
0
    {
1094
0
      int32_t glyphTextLen;
1095
0
      if (GlyphHit(&aInString[i], aInStringLength - i, i == 0, aOutString, glyphTextLen))
1096
0
      {
1097
0
        i += glyphTextLen;
1098
0
        continue;
1099
0
      }
1100
0
    }
1101
0
1102
0
    if (doStructPhrase)
1103
0
    {
1104
0
      const char16_t * newOffset = aInString;
1105
0
      int32_t newLength = aInStringLength;
1106
0
      if (i > 0 ) // skip the first element?
1107
0
      {
1108
0
        newOffset = &aInString[i-1];
1109
0
        newLength = aInStringLength - i + 1;
1110
0
      }
1111
0
1112
0
      switch (aInString[i]) // Performance increase
1113
0
      {
1114
0
      case '*':
1115
0
        if (StructPhraseHit(newOffset, newLength, i == 0,
1116
0
                            u"*", 1,
1117
0
                            "b", "class=\"moz-txt-star\"",
1118
0
                            aOutString, structPhrase_strong))
1119
0
        {
1120
0
          i++;
1121
0
          continue;
1122
0
        }
1123
0
        break;
1124
0
      case '/':
1125
0
        if (StructPhraseHit(newOffset, newLength, i == 0,
1126
0
                            u"/", 1,
1127
0
                            "i", "class=\"moz-txt-slash\"",
1128
0
                            aOutString, structPhrase_italic))
1129
0
        {
1130
0
          i++;
1131
0
          continue;
1132
0
        }
1133
0
        break;
1134
0
      case '_':
1135
0
        if (StructPhraseHit(newOffset, newLength, i == 0,
1136
0
                            u"_", 1,
1137
0
                            "span" /* <u> is deprecated */,
1138
0
                            "class=\"moz-txt-underscore\"",
1139
0
                            aOutString, structPhrase_underline))
1140
0
        {
1141
0
          i++;
1142
0
          continue;
1143
0
        }
1144
0
        break;
1145
0
      case '|':
1146
0
        if (StructPhraseHit(newOffset, newLength, i == 0,
1147
0
                            u"|", 1,
1148
0
                            "code", "class=\"moz-txt-verticalline\"",
1149
0
                            aOutString, structPhrase_code))
1150
0
        {
1151
0
          i++;
1152
0
          continue;
1153
0
        }
1154
0
        break;
1155
0
      }
1156
0
    }
1157
0
1158
0
    if (doURLs)
1159
0
    {
1160
0
      switch (aInString[i])
1161
0
      {
1162
0
      case ':':
1163
0
      case '@':
1164
0
      case '.':
1165
0
        if ( (i == 0 || ((i > 0) && aInString[i - 1] != ' ')) && aInString[i +1] != ' ') // Performance increase
1166
0
        {
1167
0
          int32_t replaceBefore;
1168
0
          int32_t replaceAfter;
1169
0
          if (FindURL(aInString, aInStringLength, i, whattodo,
1170
0
                      outputHTML, replaceBefore, replaceAfter)
1171
0
                  && structPhrase_strong + structPhrase_italic +
1172
0
                       structPhrase_underline + structPhrase_code == 0
1173
0
                       /* workaround for bug #19445 */ )
1174
0
          {
1175
0
            aOutString.Cut(aOutString.Length() - replaceBefore, replaceBefore);
1176
0
            aOutString += outputHTML;
1177
0
            i += replaceAfter + 1;
1178
0
            continue;
1179
0
          }
1180
0
        }
1181
0
        break;
1182
0
      } //switch
1183
0
    }
1184
0
1185
0
    switch (aInString[i])
1186
0
    {
1187
0
    // Special symbols
1188
0
    case '<':
1189
0
    case '>':
1190
0
    case '&':
1191
0
      EscapeChar(aInString[i], aOutString, false);
1192
0
      i++;
1193
0
      break;
1194
0
    // Normal characters
1195
0
    default:
1196
0
      aOutString += aInString[i];
1197
0
      i++;
1198
0
      break;
1199
0
    }
1200
0
  }
1201
0
}
1202
1203
void
1204
mozTXTToHTMLConv::ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOutString)
1205
0
{
1206
0
  // some common variables we were recalculating
1207
0
  // every time inside the for loop...
1208
0
  int32_t lengthOfInString = aInString.Length();
1209
0
  const char16_t * uniBuffer = aInString.get();
1210
0
1211
#ifdef DEBUG_BenB_Perf
1212
  PRTime parsing_start = PR_IntervalNow();
1213
#endif
1214
1215
0
  // Look for simple entities not included in a tags and scan them.
1216
0
  // Skip all tags ("<[...]>") and content in an a link tag ("<a [...]</a>"),
1217
0
  // comment tag ("<!--[...]-->"), style tag, script tag or head tag.
1218
0
  // Unescape the rest (text between tags) and pass it to ScanTXT.
1219
0
  nsAutoCString canFollow(" \f\n\r\t>");
1220
0
  for (int32_t i = 0; i < lengthOfInString;)
1221
0
  {
1222
0
    if (aInString[i] == '<')  // html tag
1223
0
    {
1224
0
      int32_t start = i;
1225
0
      if (i + 2 < lengthOfInString &&
1226
0
          nsCRT::ToLower(aInString[i + 1]) == 'a' &&
1227
0
          canFollow.FindChar(aInString[i + 2]) != kNotFound)
1228
0
           // if a tag, skip until </a>.
1229
0
           // Make sure there's a white-space character after, not to match "abbr".
1230
0
      {
1231
0
        i = aInString.Find("</a>", true, i);
1232
0
        if (i == kNotFound)
1233
0
          i = lengthOfInString;
1234
0
        else
1235
0
          i += 4;
1236
0
      }
1237
0
      else if (Substring(aInString, i + 1, 3).LowerCaseEqualsASCII("!--"))
1238
0
          // if out-commended code, skip until -->
1239
0
      {
1240
0
        i = aInString.Find("-->", false, i);
1241
0
        if (i == kNotFound)
1242
0
          i = lengthOfInString;
1243
0
        else
1244
0
          i += 3;
1245
0
      }
1246
0
      else if (i + 6 < lengthOfInString &&
1247
0
      Substring(aInString, i + 1, 5).LowerCaseEqualsASCII("style") &&
1248
0
               canFollow.FindChar(aInString[i + 6]) != kNotFound)
1249
0
           // if style tag, skip until </style>
1250
0
      {
1251
0
        i = aInString.Find("</style>", true, i);
1252
0
        if (i == kNotFound)
1253
0
          i = lengthOfInString;
1254
0
        else
1255
0
          i += 8;
1256
0
      }
1257
0
      else if (i + 7 < lengthOfInString &&
1258
0
               Substring(aInString, i + 1, 6).LowerCaseEqualsASCII("script") &&
1259
0
               canFollow.FindChar(aInString[i + 7]) != kNotFound)
1260
0
           // if script tag, skip until </script>
1261
0
      {
1262
0
        i = aInString.Find("</script>", true, i);
1263
0
        if (i == kNotFound)
1264
0
          i = lengthOfInString;
1265
0
        else
1266
0
          i += 9;
1267
0
      }
1268
0
      else if (i + 5 < lengthOfInString &&
1269
0
               Substring(aInString, i + 1, 4).LowerCaseEqualsASCII("head") &&
1270
0
               canFollow.FindChar(aInString[i + 5]) != kNotFound)
1271
0
           // if head tag, skip until </head>
1272
0
           // Make sure not to match <header>.
1273
0
      {
1274
0
        i = aInString.Find("</head>", true, i);
1275
0
        if (i == kNotFound)
1276
0
          i = lengthOfInString;
1277
0
        else
1278
0
          i += 7;
1279
0
      }
1280
0
      else  // just skip tag (attributes etc.)
1281
0
      {
1282
0
        i = aInString.FindChar('>', i);
1283
0
        if (i == kNotFound)
1284
0
          i = lengthOfInString;
1285
0
        else
1286
0
          i++;
1287
0
      }
1288
0
      aOutString.Append(&uniBuffer[start], i - start);
1289
0
    }
1290
0
    else
1291
0
    {
1292
0
      uint32_t start = uint32_t(i);
1293
0
      i = aInString.FindChar('<', i);
1294
0
      if (i == kNotFound)
1295
0
        i = lengthOfInString;
1296
0
1297
0
      nsString tempString;
1298
0
      tempString.SetCapacity(uint32_t((uint32_t(i) - start) * growthRate));
1299
0
      UnescapeStr(uniBuffer, start, uint32_t(i) - start, tempString);
1300
0
      ScanTXT(tempString.get(), tempString.Length(), whattodo, aOutString);
1301
0
    }
1302
0
  }
1303
0
1304
#ifdef DEBUG_BenB_Perf
1305
  printf("ScanHTML time:    %d ms\n", PR_IntervalToMilliseconds(PR_IntervalNow() - parsing_start));
1306
#endif
1307
}
1308
1309
/****************************************************************************
1310
  XPCOM Interface
1311
*****************************************************************************/
1312
1313
NS_IMETHODIMP
1314
mozTXTToHTMLConv::Convert(nsIInputStream *aFromStream,
1315
                          const char *aFromType,
1316
                          const char *aToType,
1317
                          nsISupports *aCtxt, nsIInputStream **_retval)
1318
0
{
1319
0
  return NS_ERROR_NOT_IMPLEMENTED;
1320
0
}
1321
1322
NS_IMETHODIMP
1323
mozTXTToHTMLConv::AsyncConvertData(const char *aFromType,
1324
                                   const char *aToType,
1325
0
                                   nsIStreamListener *aListener, nsISupports *aCtxt) {
1326
0
  return NS_ERROR_NOT_IMPLEMENTED;
1327
0
}
1328
1329
NS_IMETHODIMP
1330
mozTXTToHTMLConv::OnDataAvailable(nsIRequest* request, nsISupports *ctxt,
1331
                                 nsIInputStream *inStr, uint64_t sourceOffset,
1332
                                 uint32_t count)
1333
0
{
1334
0
  return NS_ERROR_NOT_IMPLEMENTED;
1335
0
}
1336
1337
NS_IMETHODIMP
1338
mozTXTToHTMLConv::OnStartRequest(nsIRequest* request, nsISupports *ctxt)
1339
0
{
1340
0
  return NS_ERROR_NOT_IMPLEMENTED;
1341
0
}
1342
1343
NS_IMETHODIMP
1344
mozTXTToHTMLConv::OnStopRequest(nsIRequest* request, nsISupports *ctxt,
1345
                                nsresult aStatus)
1346
0
{
1347
0
  return NS_ERROR_NOT_IMPLEMENTED;
1348
0
}
1349
1350
NS_IMETHODIMP
1351
mozTXTToHTMLConv::CiteLevelTXT(const char16_t *line, uint32_t *logLineStart,
1352
        uint32_t *_retval)
1353
0
{
1354
0
   if (!logLineStart || !_retval || !line)
1355
0
     return NS_ERROR_NULL_POINTER;
1356
0
   *_retval = CiteLevelTXT(line, *logLineStart);
1357
0
   return NS_OK;
1358
0
}
1359
1360
NS_IMETHODIMP
1361
mozTXTToHTMLConv::ScanTXT(const char16_t *text, uint32_t whattodo,
1362
         char16_t **_retval)
1363
0
{
1364
0
  NS_ENSURE_ARG(text);
1365
0
1366
0
  // FIX ME!!!
1367
0
  nsString outString;
1368
0
  int32_t inLength = NS_strlen(text);
1369
0
  // by setting a large capacity up front, we save time
1370
0
  // when appending characters to the output string because we don't
1371
0
  // need to reallocate and re-copy the characters already in the out String.
1372
0
  NS_ASSERTION(inLength, "ScanTXT passed 0 length string");
1373
0
  if (inLength == 0) {
1374
0
    *_retval = NS_xstrdup(text);
1375
0
    return NS_OK;
1376
0
  }
1377
0
1378
0
  outString.SetCapacity(uint32_t(inLength * growthRate));
1379
0
  ScanTXT(text, inLength, whattodo, outString);
1380
0
1381
0
  *_retval = ToNewUnicode(outString);
1382
0
  return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
1383
0
}
1384
1385
NS_IMETHODIMP
1386
mozTXTToHTMLConv::ScanHTML(const char16_t *text, uint32_t whattodo,
1387
          char16_t **_retval)
1388
0
{
1389
0
  NS_ENSURE_ARG(text);
1390
0
1391
0
  // FIX ME!!!
1392
0
  nsString outString;
1393
0
  nsString inString (text); // look at this nasty extra copy of the entire input buffer!
1394
0
  outString.SetCapacity(uint32_t(inString.Length() * growthRate));
1395
0
1396
0
  ScanHTML(inString, whattodo, outString);
1397
0
  *_retval = ToNewUnicode(outString);
1398
0
  return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
1399
0
}
1400
1401
nsresult
1402
MOZ_NewTXTToHTMLConv(mozTXTToHTMLConv** aConv)
1403
0
{
1404
0
    MOZ_ASSERT(aConv != nullptr, "null ptr");
1405
0
    if (!aConv)
1406
0
      return NS_ERROR_NULL_POINTER;
1407
0
1408
0
    *aConv = new mozTXTToHTMLConv();
1409
0
    if (!*aConv)
1410
0
      return NS_ERROR_OUT_OF_MEMORY;
1411
0
1412
0
    NS_ADDREF(*aConv);
1413
0
    //    return (*aConv)->Init();
1414
0
    return NS_OK;
1415
0
}