/src/mozilla-central/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
2 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
3 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
5 | | |
6 | | #include "mozilla/TextUtils.h" |
7 | | #include "mozTXTToHTMLConv.h" |
8 | | #include "nsNetUtil.h" |
9 | | #include "nsUnicharUtils.h" |
10 | | #include "nsCRT.h" |
11 | | #include "nsIExternalProtocolHandler.h" |
12 | | #include "nsIIOService.h" |
13 | | #include "nsIURI.h" |
14 | | |
15 | | #include <algorithm> |
16 | | |
17 | | #ifdef DEBUG_BenB_Perf |
18 | | #include "prtime.h" |
19 | | #include "prinrval.h" |
20 | | #endif |
21 | | |
22 | | using mozilla::IsAsciiAlpha; |
23 | | using mozilla::IsAsciiDigit; |
24 | | |
25 | | const double growthRate = 1.2; |
26 | | |
27 | | // Bug 183111, editor now replaces multiple spaces with leading |
28 | | // 0xA0's and a single ending space, so need to treat 0xA0's as spaces. |
29 | | // 0xA0 is the Latin1/Unicode character for "non-breaking space (nbsp)" |
30 | | // Also recognize the Japanese ideographic space 0x3000 as a space. |
31 | | static inline bool IsSpace(const char16_t aChar) |
32 | 0 | { |
33 | 0 | return (nsCRT::IsAsciiSpace(aChar) || aChar == 0xA0 || aChar == 0x3000); |
34 | 0 | } |
35 | | |
36 | | // Escape Char will take ch, escape it and append the result to |
37 | | // aStringToAppendTo |
38 | | void |
39 | | mozTXTToHTMLConv::EscapeChar(const char16_t ch, nsString& aStringToAppendTo, |
40 | | bool inAttribute) |
41 | 0 | { |
42 | 0 | switch (ch) |
43 | 0 | { |
44 | 0 | case '<': |
45 | 0 | aStringToAppendTo.AppendLiteral("<"); |
46 | 0 | break; |
47 | 0 | case '>': |
48 | 0 | aStringToAppendTo.AppendLiteral(">"); |
49 | 0 | break; |
50 | 0 | case '&': |
51 | 0 | aStringToAppendTo.AppendLiteral("&"); |
52 | 0 | break; |
53 | 0 | case '"': |
54 | 0 | if (inAttribute) |
55 | 0 | { |
56 | 0 | aStringToAppendTo.AppendLiteral("""); |
57 | 0 | break; |
58 | 0 | } |
59 | 0 | // else fall through |
60 | 0 | MOZ_FALLTHROUGH; |
61 | 0 | default: |
62 | 0 | aStringToAppendTo += ch; |
63 | 0 | } |
64 | 0 | } |
65 | | |
66 | | // EscapeStr takes the passed in string and |
67 | | // escapes it IN PLACE. |
68 | | void |
69 | | mozTXTToHTMLConv::EscapeStr(nsString& aInString, bool inAttribute) |
70 | 0 | { |
71 | 0 | // the replace substring routines |
72 | 0 | // don't seem to work if you have a character |
73 | 0 | // in the in string that is also in the replacement |
74 | 0 | // string! =( |
75 | 0 | //aInString.ReplaceSubstring("&", "&"); |
76 | 0 | //aInString.ReplaceSubstring("<", "<"); |
77 | 0 | //aInString.ReplaceSubstring(">", ">"); |
78 | 0 | for (uint32_t i = 0; i < aInString.Length();) |
79 | 0 | { |
80 | 0 | switch (aInString[i]) |
81 | 0 | { |
82 | 0 | case '<': |
83 | 0 | aInString.Cut(i, 1); |
84 | 0 | aInString.InsertLiteral(u"<", i); |
85 | 0 | i += 4; // skip past the integers we just added |
86 | 0 | break; |
87 | 0 | case '>': |
88 | 0 | aInString.Cut(i, 1); |
89 | 0 | aInString.InsertLiteral(u">", i); |
90 | 0 | i += 4; // skip past the integers we just added |
91 | 0 | break; |
92 | 0 | case '&': |
93 | 0 | aInString.Cut(i, 1); |
94 | 0 | aInString.InsertLiteral(u"&", i); |
95 | 0 | i += 5; // skip past the integers we just added |
96 | 0 | break; |
97 | 0 | case '"': |
98 | 0 | if (inAttribute) |
99 | 0 | { |
100 | 0 | aInString.Cut(i, 1); |
101 | 0 | aInString.InsertLiteral(u""", i); |
102 | 0 | i += 6; |
103 | 0 | break; |
104 | 0 | } |
105 | 0 | // else fall through |
106 | 0 | MOZ_FALLTHROUGH; |
107 | 0 | default: |
108 | 0 | i++; |
109 | 0 | } |
110 | 0 | } |
111 | 0 | } |
112 | | |
113 | | void |
114 | | mozTXTToHTMLConv::UnescapeStr(const char16_t * aInString, int32_t aStartPos, int32_t aLength, nsString& aOutString) |
115 | 0 | { |
116 | 0 | const char16_t * subString = nullptr; |
117 | 0 | for (uint32_t i = aStartPos; int32_t(i) - aStartPos < aLength;) |
118 | 0 | { |
119 | 0 | int32_t remainingChars = i - aStartPos; |
120 | 0 | if (aInString[i] == '&') |
121 | 0 | { |
122 | 0 | subString = &aInString[i]; |
123 | 0 | if (!NS_strncmp(subString, u"<", std::min(4, aLength - remainingChars))) |
124 | 0 | { |
125 | 0 | aOutString.Append(char16_t('<')); |
126 | 0 | i += 4; |
127 | 0 | } |
128 | 0 | else if (!NS_strncmp(subString, u">", std::min(4, aLength - remainingChars))) |
129 | 0 | { |
130 | 0 | aOutString.Append(char16_t('>')); |
131 | 0 | i += 4; |
132 | 0 | } |
133 | 0 | else if (!NS_strncmp(subString, u"&", std::min(5, aLength - remainingChars))) |
134 | 0 | { |
135 | 0 | aOutString.Append(char16_t('&')); |
136 | 0 | i += 5; |
137 | 0 | } |
138 | 0 | else if (!NS_strncmp(subString, u""", std::min(6, aLength - remainingChars))) |
139 | 0 | { |
140 | 0 | aOutString.Append(char16_t('"')); |
141 | 0 | i += 6; |
142 | 0 | } |
143 | 0 | else |
144 | 0 | { |
145 | 0 | aOutString += aInString[i]; |
146 | 0 | i++; |
147 | 0 | } |
148 | 0 | } |
149 | 0 | else |
150 | 0 | { |
151 | 0 | aOutString += aInString[i]; |
152 | 0 | i++; |
153 | 0 | } |
154 | 0 | } |
155 | 0 | } |
156 | | |
157 | | void |
158 | | mozTXTToHTMLConv::CompleteAbbreviatedURL(const char16_t * aInString, int32_t aInLength, |
159 | | const uint32_t pos, nsString& aOutString) |
160 | 0 | { |
161 | 0 | NS_ASSERTION(int32_t(pos) < aInLength, "bad args to CompleteAbbreviatedURL, see bug #190851"); |
162 | 0 | if (int32_t(pos) >= aInLength) |
163 | 0 | return; |
164 | 0 | |
165 | 0 | if (aInString[pos] == '@') |
166 | 0 | { |
167 | 0 | // only pre-pend a mailto url if the string contains a .domain in it.. |
168 | 0 | //i.e. we want to linkify johndoe@foo.com but not "let's meet @8pm" |
169 | 0 | nsDependentString inString(aInString, aInLength); |
170 | 0 | if (inString.FindChar('.', pos) != kNotFound) // if we have a '.' after the @ sign.... |
171 | 0 | { |
172 | 0 | aOutString.AssignLiteral("mailto:"); |
173 | 0 | aOutString += aInString; |
174 | 0 | } |
175 | 0 | } |
176 | 0 | else if (aInString[pos] == '.') |
177 | 0 | { |
178 | 0 | if (ItMatchesDelimited(aInString, aInLength, |
179 | 0 | u"www.", 4, LT_IGNORE, LT_IGNORE)) |
180 | 0 | { |
181 | 0 | aOutString.AssignLiteral("http://"); |
182 | 0 | aOutString += aInString; |
183 | 0 | } |
184 | 0 | else if (ItMatchesDelimited(aInString,aInLength, u"ftp.", 4, LT_IGNORE, LT_IGNORE)) |
185 | 0 | { |
186 | 0 | aOutString.AssignLiteral("ftp://"); |
187 | 0 | aOutString += aInString; |
188 | 0 | } |
189 | 0 | } |
190 | 0 | } |
191 | | |
192 | | bool |
193 | | mozTXTToHTMLConv::FindURLStart(const char16_t * aInString, int32_t aInLength, |
194 | | const uint32_t pos, const modetype check, |
195 | | uint32_t& start) |
196 | 0 | { |
197 | 0 | switch(check) |
198 | 0 | { // no breaks, because end of blocks is never reached |
199 | 0 | case RFC1738: |
200 | 0 | { |
201 | 0 | if (!NS_strncmp(&aInString[std::max(int32_t(pos - 4), 0)], u"<URL:", 5)) |
202 | 0 | { |
203 | 0 | start = pos + 1; |
204 | 0 | return true; |
205 | 0 | } |
206 | 0 | return false; |
207 | 0 | } |
208 | 0 | case RFC2396E: |
209 | 0 | { |
210 | 0 | nsString temp(aInString, aInLength); |
211 | 0 | int32_t i = pos <= 0 ? kNotFound : temp.RFindCharInSet(u"<>\"", pos - 1); |
212 | 0 | if (i != kNotFound && (temp[uint32_t(i)] == '<' || |
213 | 0 | temp[uint32_t(i)] == '"')) |
214 | 0 | { |
215 | 0 | start = uint32_t(++i); |
216 | 0 | return start < pos; |
217 | 0 | } |
218 | 0 | return false; |
219 | 0 | } |
220 | 0 | case freetext: |
221 | 0 | { |
222 | 0 | int32_t i = pos - 1; |
223 | 0 | for (; i >= 0 && ( |
224 | 0 | IsAsciiAlpha(aInString[uint32_t(i)]) || |
225 | 0 | IsAsciiDigit(aInString[uint32_t(i)]) || |
226 | 0 | aInString[uint32_t(i)] == '+' || |
227 | 0 | aInString[uint32_t(i)] == '-' || |
228 | 0 | aInString[uint32_t(i)] == '.' |
229 | 0 | ); i--) |
230 | 0 | ; |
231 | 0 | if (++i >= 0 && uint32_t(i) < pos && IsAsciiAlpha(aInString[uint32_t(i)])) |
232 | 0 | { |
233 | 0 | start = uint32_t(i); |
234 | 0 | return true; |
235 | 0 | } |
236 | 0 | return false; |
237 | 0 | } |
238 | 0 | case abbreviated: |
239 | 0 | { |
240 | 0 | int32_t i = pos - 1; |
241 | 0 | // This disallows non-ascii-characters for email. |
242 | 0 | // Currently correct, but revisit later after standards changed. |
243 | 0 | bool isEmail = aInString[pos] == (char16_t)'@'; |
244 | 0 | // These chars mark the start of the URL |
245 | 0 | for (; i >= 0 |
246 | 0 | && aInString[uint32_t(i)] != '>' && aInString[uint32_t(i)] != '<' |
247 | 0 | && aInString[uint32_t(i)] != '"' && aInString[uint32_t(i)] != '\'' |
248 | 0 | && aInString[uint32_t(i)] != '`' && aInString[uint32_t(i)] != ',' |
249 | 0 | && aInString[uint32_t(i)] != '{' && aInString[uint32_t(i)] != '[' |
250 | 0 | && aInString[uint32_t(i)] != '(' && aInString[uint32_t(i)] != '|' |
251 | 0 | && aInString[uint32_t(i)] != '\\' |
252 | 0 | && !IsSpace(aInString[uint32_t(i)]) |
253 | 0 | && (!isEmail || nsCRT::IsAscii(aInString[uint32_t(i)])) |
254 | 0 | ; i--) |
255 | 0 | ; |
256 | 0 | if |
257 | 0 | ( |
258 | 0 | ++i >= 0 && uint32_t(i) < pos |
259 | 0 | && |
260 | 0 | ( |
261 | 0 | IsAsciiAlpha(aInString[uint32_t(i)]) || |
262 | 0 | IsAsciiDigit(aInString[uint32_t(i)]) |
263 | 0 | ) |
264 | 0 | ) |
265 | 0 | { |
266 | 0 | start = uint32_t(i); |
267 | 0 | return true; |
268 | 0 | } |
269 | 0 | return false; |
270 | 0 | } |
271 | 0 | default: |
272 | 0 | return false; |
273 | 0 | } //switch |
274 | 0 | } |
275 | | |
276 | | bool |
277 | | mozTXTToHTMLConv::FindURLEnd(const char16_t * aInString, int32_t aInStringLength, const uint32_t pos, |
278 | | const modetype check, const uint32_t start, uint32_t& end) |
279 | 0 | { |
280 | 0 | switch(check) |
281 | 0 | { // no breaks, because end of blocks is never reached |
282 | 0 | case RFC1738: |
283 | 0 | case RFC2396E: |
284 | 0 | { |
285 | 0 | nsString temp(aInString, aInStringLength); |
286 | 0 |
|
287 | 0 | int32_t i = temp.FindCharInSet(u"<>\"", pos + 1); |
288 | 0 | if (i != kNotFound && temp[uint32_t(i--)] == |
289 | 0 | (check == RFC1738 || temp[start - 1] == '<' ? '>' : '"')) |
290 | 0 | { |
291 | 0 | end = uint32_t(i); |
292 | 0 | return end > pos; |
293 | 0 | } |
294 | 0 | return false; |
295 | 0 | } |
296 | 0 | case freetext: |
297 | 0 | case abbreviated: |
298 | 0 | { |
299 | 0 | uint32_t i = pos + 1; |
300 | 0 | bool isEmail = aInString[pos] == (char16_t)'@'; |
301 | 0 | bool seenOpeningParenthesis = false; // there is a '(' earlier in the URL |
302 | 0 | bool seenOpeningSquareBracket = false; // there is a '[' earlier in the URL |
303 | 0 | for (; int32_t(i) < aInStringLength; i++) |
304 | 0 | { |
305 | 0 | // These chars mark the end of the URL |
306 | 0 | if (aInString[i] == '>' || aInString[i] == '<' || |
307 | 0 | aInString[i] == '"' || aInString[i] == '`' || |
308 | 0 | aInString[i] == '}' || aInString[i] == '{' || |
309 | 0 | (aInString[i] == ')' && !seenOpeningParenthesis) || |
310 | 0 | (aInString[i] == ']' && !seenOpeningSquareBracket) || |
311 | 0 | // Allow IPv6 adresses like http://[1080::8:800:200C:417A]/foo. |
312 | 0 | (aInString[i] == '[' && i > 2 && |
313 | 0 | (aInString[i - 1] != '/' || aInString[i - 2] != '/')) || |
314 | 0 | IsSpace(aInString[i])) |
315 | 0 | break; |
316 | 0 | // Disallow non-ascii-characters for email. |
317 | 0 | // Currently correct, but revisit later after standards changed. |
318 | 0 | if (isEmail && ( |
319 | 0 | aInString[i] == '(' || aInString[i] == '\'' || |
320 | 0 | !nsCRT::IsAscii(aInString[i]))) |
321 | 0 | break; |
322 | 0 | if (aInString[i] == '(') |
323 | 0 | seenOpeningParenthesis = true; |
324 | 0 | if (aInString[i] == '[') |
325 | 0 | seenOpeningSquareBracket = true; |
326 | 0 | } |
327 | 0 | // These chars are allowed in the middle of the URL, but not at end. |
328 | 0 | // Technically they are, but are used in normal text after the URL. |
329 | 0 | while (--i > pos && ( |
330 | 0 | aInString[i] == '.' || aInString[i] == ',' || aInString[i] == ';' || |
331 | 0 | aInString[i] == '!' || aInString[i] == '?' || aInString[i] == '-' || |
332 | 0 | aInString[i] == ':' || aInString[i] == '\'' |
333 | 0 | )) |
334 | 0 | ; |
335 | 0 | if (i > pos) |
336 | 0 | { |
337 | 0 | end = i; |
338 | 0 | return true; |
339 | 0 | } |
340 | 0 | return false; |
341 | 0 | } |
342 | 0 | default: |
343 | 0 | return false; |
344 | 0 | } //switch |
345 | 0 | } |
346 | | |
347 | | void |
348 | | mozTXTToHTMLConv::CalculateURLBoundaries(const char16_t * aInString, int32_t aInStringLength, |
349 | | const uint32_t pos, const uint32_t whathasbeendone, |
350 | | const modetype check, const uint32_t start, const uint32_t end, |
351 | | nsString& txtURL, nsString& desc, |
352 | | int32_t& replaceBefore, int32_t& replaceAfter) |
353 | 0 | { |
354 | 0 | uint32_t descstart = start; |
355 | 0 | switch(check) |
356 | 0 | { |
357 | 0 | case RFC1738: |
358 | 0 | { |
359 | 0 | descstart = start - 5; |
360 | 0 | desc.Append(&aInString[descstart], end - descstart + 2); // include "<URL:" and ">" |
361 | 0 | replaceAfter = end - pos + 1; |
362 | 0 | } break; |
363 | 0 | case RFC2396E: |
364 | 0 | { |
365 | 0 | descstart = start - 1; |
366 | 0 | desc.Append(&aInString[descstart], end - descstart + 2); // include brackets |
367 | 0 | replaceAfter = end - pos + 1; |
368 | 0 | } break; |
369 | 0 | case freetext: |
370 | 0 | case abbreviated: |
371 | 0 | { |
372 | 0 | descstart = start; |
373 | 0 | desc.Append(&aInString[descstart], end - start + 1); // don't include brackets |
374 | 0 | replaceAfter = end - pos; |
375 | 0 | } break; |
376 | 0 | default: break; |
377 | 0 | } //switch |
378 | 0 | |
379 | 0 | EscapeStr(desc, false); |
380 | 0 |
|
381 | 0 | txtURL.Append(&aInString[start], end - start + 1); |
382 | 0 | txtURL.StripWhitespace(); |
383 | 0 |
|
384 | 0 | // FIX ME |
385 | 0 | nsAutoString temp2; |
386 | 0 | ScanTXT(&aInString[descstart], pos - descstart, ~kURLs /*prevents loop*/ & whathasbeendone, temp2); |
387 | 0 | replaceBefore = temp2.Length(); |
388 | 0 | } |
389 | | |
390 | | bool mozTXTToHTMLConv::ShouldLinkify(const nsCString& aURL) |
391 | 0 | { |
392 | 0 | if (!mIOService) |
393 | 0 | return false; |
394 | 0 | |
395 | 0 | nsAutoCString scheme; |
396 | 0 | nsresult rv = mIOService->ExtractScheme(aURL, scheme); |
397 | 0 | if(NS_FAILED(rv)) |
398 | 0 | return false; |
399 | 0 | |
400 | 0 | // Get the handler for this scheme. |
401 | 0 | nsCOMPtr<nsIProtocolHandler> handler; |
402 | 0 | rv = mIOService->GetProtocolHandler(scheme.get(), getter_AddRefs(handler)); |
403 | 0 | if(NS_FAILED(rv)) |
404 | 0 | return false; |
405 | 0 | |
406 | 0 | // Is it an external protocol handler? If not, linkify it. |
407 | 0 | nsCOMPtr<nsIExternalProtocolHandler> externalHandler = do_QueryInterface(handler); |
408 | 0 | if (!externalHandler) |
409 | 0 | return true; // handler is built-in, linkify it! |
410 | 0 | |
411 | 0 | // If external app exists for the scheme then linkify it. |
412 | 0 | bool exists; |
413 | 0 | rv = externalHandler->ExternalAppExistsForScheme(scheme, &exists); |
414 | 0 | return(NS_SUCCEEDED(rv) && exists); |
415 | 0 | } |
416 | | |
417 | | bool |
418 | | mozTXTToHTMLConv::CheckURLAndCreateHTML( |
419 | | const nsString& txtURL, const nsString& desc, const modetype mode, |
420 | | nsString& outputHTML) |
421 | 0 | { |
422 | 0 | // Create *uri from txtURL |
423 | 0 | nsCOMPtr<nsIURI> uri; |
424 | 0 | nsresult rv; |
425 | 0 | // Lazily initialize mIOService |
426 | 0 | if (!mIOService) |
427 | 0 | { |
428 | 0 | mIOService = do_GetIOService(); |
429 | 0 |
|
430 | 0 | if (!mIOService) |
431 | 0 | return false; |
432 | 0 | } |
433 | 0 | |
434 | 0 | // See if the url should be linkified. |
435 | 0 | NS_ConvertUTF16toUTF8 utf8URL(txtURL); |
436 | 0 | if (!ShouldLinkify(utf8URL)) |
437 | 0 | return false; |
438 | 0 | |
439 | 0 | // it would be faster if we could just check to see if there is a protocol |
440 | 0 | // handler for the url and return instead of actually trying to create a url... |
441 | 0 | rv = mIOService->NewURI(utf8URL, nullptr, nullptr, getter_AddRefs(uri)); |
442 | 0 |
|
443 | 0 | // Real work |
444 | 0 | if (NS_SUCCEEDED(rv) && uri) |
445 | 0 | { |
446 | 0 | outputHTML.AssignLiteral("<a class=\"moz-txt-link-"); |
447 | 0 | switch(mode) |
448 | 0 | { |
449 | 0 | case RFC1738: |
450 | 0 | outputHTML.AppendLiteral("rfc1738"); |
451 | 0 | break; |
452 | 0 | case RFC2396E: |
453 | 0 | outputHTML.AppendLiteral("rfc2396E"); |
454 | 0 | break; |
455 | 0 | case freetext: |
456 | 0 | outputHTML.AppendLiteral("freetext"); |
457 | 0 | break; |
458 | 0 | case abbreviated: |
459 | 0 | outputHTML.AppendLiteral("abbreviated"); |
460 | 0 | break; |
461 | 0 | default: break; |
462 | 0 | } |
463 | 0 | nsAutoString escapedURL(txtURL); |
464 | 0 | EscapeStr(escapedURL, true); |
465 | 0 |
|
466 | 0 | outputHTML.AppendLiteral("\" href=\""); |
467 | 0 | outputHTML += escapedURL; |
468 | 0 | outputHTML.AppendLiteral("\">"); |
469 | 0 | outputHTML += desc; |
470 | 0 | outputHTML.AppendLiteral("</a>"); |
471 | 0 | return true; |
472 | 0 | } |
473 | 0 | return false; |
474 | 0 | } |
475 | | |
476 | | NS_IMETHODIMP mozTXTToHTMLConv::FindURLInPlaintext(const char16_t * aInString, int32_t aInLength, int32_t aPos, int32_t * aStartPos, int32_t * aEndPos) |
477 | 0 | { |
478 | 0 | // call FindURL on the passed in string |
479 | 0 | nsAutoString outputHTML; // we'll ignore the generated output HTML |
480 | 0 |
|
481 | 0 | *aStartPos = -1; |
482 | 0 | *aEndPos = -1; |
483 | 0 |
|
484 | 0 | FindURL(aInString, aInLength, aPos, kURLs, outputHTML, *aStartPos, *aEndPos); |
485 | 0 |
|
486 | 0 | return NS_OK; |
487 | 0 | } |
488 | | |
489 | | bool |
490 | | mozTXTToHTMLConv::FindURL(const char16_t * aInString, int32_t aInLength, const uint32_t pos, |
491 | | const uint32_t whathasbeendone, |
492 | | nsString& outputHTML, int32_t& replaceBefore, int32_t& replaceAfter) |
493 | 0 | { |
494 | 0 | enum statetype {unchecked, invalid, startok, endok, success}; |
495 | 0 | static const modetype ranking[] = {RFC1738, RFC2396E, freetext, abbreviated}; |
496 | 0 |
|
497 | 0 | statetype state[mozTXTToHTMLConv_lastMode + 1]; // 0(=unknown)..lastMode |
498 | 0 | /* I don't like this abuse of enums as index for the array, |
499 | 0 | but I don't know a better method */ |
500 | 0 |
|
501 | 0 | // Define, which modes to check |
502 | 0 | /* all modes but abbreviated are checked for text[pos] == ':', |
503 | 0 | only abbreviated for '.', RFC2396E and abbreviated for '@' */ |
504 | 0 | for (modetype iState = unknown; iState <= mozTXTToHTMLConv_lastMode; |
505 | 0 | iState = modetype(iState + 1)) |
506 | 0 | state[iState] = aInString[pos] == ':' ? unchecked : invalid; |
507 | 0 | switch (aInString[pos]) |
508 | 0 | { |
509 | 0 | case '@': |
510 | 0 | state[RFC2396E] = unchecked; |
511 | 0 | MOZ_FALLTHROUGH; |
512 | 0 | case '.': |
513 | 0 | state[abbreviated] = unchecked; |
514 | 0 | break; |
515 | 0 | case ':': |
516 | 0 | state[abbreviated] = invalid; |
517 | 0 | break; |
518 | 0 | default: |
519 | 0 | break; |
520 | 0 | } |
521 | 0 | |
522 | 0 | // Test, first successful mode wins, sequence defined by |ranking| |
523 | 0 | int32_t iCheck = 0; // the currently tested modetype |
524 | 0 | modetype check = ranking[iCheck]; |
525 | 0 | for (; iCheck < mozTXTToHTMLConv_numberOfModes && state[check] != success; |
526 | 0 | iCheck++) |
527 | 0 | /* check state from last run. |
528 | 0 | If this is the first, check this one, which isn't = success yet */ |
529 | 0 | { |
530 | 0 | check = ranking[iCheck]; |
531 | 0 |
|
532 | 0 | uint32_t start, end; |
533 | 0 |
|
534 | 0 | if (state[check] == unchecked) |
535 | 0 | if (FindURLStart(aInString, aInLength, pos, check, start)) |
536 | 0 | state[check] = startok; |
537 | 0 |
|
538 | 0 | if (state[check] == startok) |
539 | 0 | if (FindURLEnd(aInString, aInLength, pos, check, start, end)) |
540 | 0 | state[check] = endok; |
541 | 0 |
|
542 | 0 | if (state[check] == endok) |
543 | 0 | { |
544 | 0 | nsAutoString txtURL, desc; |
545 | 0 | int32_t resultReplaceBefore, resultReplaceAfter; |
546 | 0 |
|
547 | 0 | CalculateURLBoundaries(aInString, aInLength, pos, whathasbeendone, check, start, end, |
548 | 0 | txtURL, desc, |
549 | 0 | resultReplaceBefore, resultReplaceAfter); |
550 | 0 |
|
551 | 0 | if (aInString[pos] != ':') |
552 | 0 | { |
553 | 0 | nsAutoString temp = txtURL; |
554 | 0 | txtURL.SetLength(0); |
555 | 0 | CompleteAbbreviatedURL(temp.get(),temp.Length(), pos - start, txtURL); |
556 | 0 | } |
557 | 0 |
|
558 | 0 | if (!txtURL.IsEmpty() && CheckURLAndCreateHTML(txtURL, desc, check, |
559 | 0 | outputHTML)) |
560 | 0 | { |
561 | 0 | replaceBefore = resultReplaceBefore; |
562 | 0 | replaceAfter = resultReplaceAfter; |
563 | 0 | state[check] = success; |
564 | 0 | } |
565 | 0 | } // if |
566 | 0 | } // for |
567 | 0 | return state[check] == success; |
568 | 0 | } |
569 | | |
570 | | bool |
571 | | mozTXTToHTMLConv::ItMatchesDelimited(const char16_t * aInString, |
572 | | int32_t aInLength, const char16_t* rep, int32_t aRepLen, |
573 | | LIMTYPE before, LIMTYPE after) |
574 | 0 | { |
575 | 0 |
|
576 | 0 | // this little method gets called a LOT. I found we were spending a |
577 | 0 | // lot of time just calculating the length of the variable "rep" |
578 | 0 | // over and over again every time we called it. So we're now passing |
579 | 0 | // an integer in here. |
580 | 0 | int32_t textLen = aInLength; |
581 | 0 |
|
582 | 0 | if |
583 | 0 | ( |
584 | 0 | ((before == LT_IGNORE && (after == LT_IGNORE || after == LT_DELIMITER)) |
585 | 0 | && textLen < aRepLen) || |
586 | 0 | ((before != LT_IGNORE || (after != LT_IGNORE && after != LT_DELIMITER)) |
587 | 0 | && textLen < aRepLen + 1) || |
588 | 0 | (before != LT_IGNORE && after != LT_IGNORE && after != LT_DELIMITER |
589 | 0 | && textLen < aRepLen + 2) |
590 | 0 | ) |
591 | 0 | return false; |
592 | 0 | |
593 | 0 | char16_t text0 = aInString[0]; |
594 | 0 | char16_t textAfterPos = aInString[aRepLen + (before == LT_IGNORE ? 0 : 1)]; |
595 | 0 |
|
596 | 0 | if |
597 | 0 | ( |
598 | 0 | (before == LT_ALPHA |
599 | 0 | && !IsAsciiAlpha(text0)) || |
600 | 0 | (before == LT_DIGIT |
601 | 0 | && !IsAsciiDigit(text0)) || |
602 | 0 | (before == LT_DELIMITER |
603 | 0 | && |
604 | 0 | ( |
605 | 0 | IsAsciiAlpha(text0) || |
606 | 0 | IsAsciiDigit(text0) || |
607 | 0 | text0 == *rep |
608 | 0 | )) || |
609 | 0 | (after == LT_ALPHA |
610 | 0 | && !IsAsciiAlpha(textAfterPos)) || |
611 | 0 | (after == LT_DIGIT |
612 | 0 | && !IsAsciiDigit(textAfterPos)) || |
613 | 0 | (after == LT_DELIMITER |
614 | 0 | && |
615 | 0 | ( |
616 | 0 | IsAsciiAlpha(textAfterPos) || |
617 | 0 | IsAsciiDigit(textAfterPos) || |
618 | 0 | textAfterPos == *rep |
619 | 0 | )) || |
620 | 0 | !Substring(Substring(aInString, aInString+aInLength), |
621 | 0 | (before == LT_IGNORE ? 0 : 1), |
622 | 0 | aRepLen).Equals(Substring(rep, rep+aRepLen), |
623 | 0 | nsCaseInsensitiveStringComparator()) |
624 | 0 | ) |
625 | 0 | return false; |
626 | 0 | |
627 | 0 | return true; |
628 | 0 | } |
629 | | |
630 | | uint32_t |
631 | | mozTXTToHTMLConv::NumberOfMatches(const char16_t * aInString, int32_t aInStringLength, |
632 | | const char16_t* rep, int32_t aRepLen, LIMTYPE before, LIMTYPE after) |
633 | 0 | { |
634 | 0 | uint32_t result = 0; |
635 | 0 |
|
636 | 0 | for (int32_t i = 0; i < aInStringLength; i++) |
637 | 0 | { |
638 | 0 | const char16_t * indexIntoString = &aInString[i]; |
639 | 0 | if (ItMatchesDelimited(indexIntoString, aInStringLength - i, rep, aRepLen, before, after)) |
640 | 0 | result++; |
641 | 0 | } |
642 | 0 | return result; |
643 | 0 | } |
644 | | |
645 | | |
646 | | // NOTE: the converted html for the phrase is appended to aOutString |
647 | | // tagHTML and attributeHTML are plain ASCII (literal strings, in fact) |
648 | | bool |
649 | | mozTXTToHTMLConv::StructPhraseHit(const char16_t * aInString, int32_t aInStringLength, bool col0, |
650 | | const char16_t* tagTXT, int32_t aTagTXTLen, |
651 | | const char* tagHTML, const char* attributeHTML, |
652 | | nsString& aOutString, uint32_t& openTags) |
653 | 0 | { |
654 | 0 | /* We're searching for the following pattern: |
655 | 0 | LT_DELIMITER - "*" - ALPHA - |
656 | 0 | [ some text (maybe more "*"-pairs) - ALPHA ] "*" - LT_DELIMITER. |
657 | 0 | <strong> is only inserted, if existence of a pair could be verified |
658 | 0 | We use the first opening/closing tag, if we can choose */ |
659 | 0 |
|
660 | 0 | const char16_t * newOffset = aInString; |
661 | 0 | int32_t newLength = aInStringLength; |
662 | 0 | if (!col0) // skip the first element? |
663 | 0 | { |
664 | 0 | newOffset = &aInString[1]; |
665 | 0 | newLength = aInStringLength - 1; |
666 | 0 | } |
667 | 0 |
|
668 | 0 | // opening tag |
669 | 0 | if |
670 | 0 | ( |
671 | 0 | ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen, |
672 | 0 | (col0 ? LT_IGNORE : LT_DELIMITER), LT_ALPHA) // is opening tag |
673 | 0 | && NumberOfMatches(newOffset, newLength, tagTXT, aTagTXTLen, |
674 | 0 | LT_ALPHA, LT_DELIMITER) // remaining closing tags |
675 | 0 | > openTags |
676 | 0 | ) |
677 | 0 | { |
678 | 0 | openTags++; |
679 | 0 | aOutString.Append('<'); |
680 | 0 | aOutString.AppendASCII(tagHTML); |
681 | 0 | aOutString.Append(char16_t(' ')); |
682 | 0 | aOutString.AppendASCII(attributeHTML); |
683 | 0 | aOutString.AppendLiteral("><span class=\"moz-txt-tag\">"); |
684 | 0 | aOutString.Append(tagTXT); |
685 | 0 | aOutString.AppendLiteral("</span>"); |
686 | 0 | return true; |
687 | 0 | } |
688 | 0 | |
689 | 0 | // closing tag |
690 | 0 | else if (openTags > 0 |
691 | 0 | && ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen, LT_ALPHA, LT_DELIMITER)) |
692 | 0 | { |
693 | 0 | openTags--; |
694 | 0 | aOutString.AppendLiteral("<span class=\"moz-txt-tag\">"); |
695 | 0 | aOutString.Append(tagTXT); |
696 | 0 | aOutString.AppendLiteral("</span></"); |
697 | 0 | aOutString.AppendASCII(tagHTML); |
698 | 0 | aOutString.Append(char16_t('>')); |
699 | 0 | return true; |
700 | 0 | } |
701 | 0 | |
702 | 0 | return false; |
703 | 0 | } |
704 | | |
705 | | |
706 | | bool |
707 | | mozTXTToHTMLConv::SmilyHit(const char16_t * aInString, int32_t aLength, bool col0, |
708 | | const char* tagTXT, const char* imageName, |
709 | | nsString& outputHTML, int32_t& glyphTextLen) |
710 | 0 | { |
711 | 0 | if ( !aInString || !tagTXT || !imageName ) |
712 | 0 | return false; |
713 | 0 | |
714 | 0 | int32_t tagLen = strlen(tagTXT); |
715 | 0 |
|
716 | 0 | uint32_t delim = (col0 ? 0 : 1) + tagLen; |
717 | 0 |
|
718 | 0 | if |
719 | 0 | ( |
720 | 0 | (col0 || IsSpace(aInString[0])) |
721 | 0 | && |
722 | 0 | ( |
723 | 0 | aLength <= int32_t(delim) || |
724 | 0 | IsSpace(aInString[delim]) || |
725 | 0 | (aLength > int32_t(delim + 1) |
726 | 0 | && |
727 | 0 | ( |
728 | 0 | aInString[delim] == '.' || |
729 | 0 | aInString[delim] == ',' || |
730 | 0 | aInString[delim] == ';' || |
731 | 0 | aInString[delim] == '8' || |
732 | 0 | aInString[delim] == '>' || |
733 | 0 | aInString[delim] == '!' || |
734 | 0 | aInString[delim] == '?' |
735 | 0 | ) |
736 | 0 | && IsSpace(aInString[delim + 1])) |
737 | 0 | ) |
738 | 0 | && ItMatchesDelimited(aInString, aLength, NS_ConvertASCIItoUTF16(tagTXT).get(), tagLen, |
739 | 0 | col0 ? LT_IGNORE : LT_DELIMITER, LT_IGNORE) |
740 | 0 | // Note: tests at different pos for LT_IGNORE and LT_DELIMITER |
741 | 0 | ) |
742 | 0 | { |
743 | 0 | if (!col0) |
744 | 0 | { |
745 | 0 | outputHTML.Truncate(); |
746 | 0 | outputHTML.Append(char16_t(' ')); |
747 | 0 | } |
748 | 0 |
|
749 | 0 | outputHTML.AppendLiteral("<span class=\""); // <span class=" |
750 | 0 | outputHTML.AppendASCII(imageName); // e.g. smiley-frown |
751 | 0 | outputHTML.AppendLiteral("\" title=\""); // " title=" |
752 | 0 | outputHTML.AppendASCII(tagTXT); // smiley tooltip |
753 | 0 | outputHTML.AppendLiteral("\"><span>"); // "><span> |
754 | 0 | outputHTML.AppendASCII(tagTXT); // original text |
755 | 0 | outputHTML.AppendLiteral("</span></span>"); // </span></span> |
756 | 0 | glyphTextLen = (col0 ? 0 : 1) + tagLen; |
757 | 0 | return true; |
758 | 0 | } |
759 | 0 |
|
760 | 0 | return false; |
761 | 0 | } |
762 | | |
763 | | // the glyph is appended to aOutputString instead of the original string... |
764 | | bool |
765 | | mozTXTToHTMLConv::GlyphHit(const char16_t * aInString, int32_t aInLength, bool col0, |
766 | | nsString& aOutputString, int32_t& glyphTextLen) |
767 | 0 | { |
768 | 0 | char16_t text0 = aInString[0]; |
769 | 0 | char16_t text1 = aInString[1]; |
770 | 0 | char16_t firstChar = (col0 ? text0 : text1); |
771 | 0 |
|
772 | 0 | // temporary variable used to store the glyph html text |
773 | 0 | nsAutoString outputHTML; |
774 | 0 | bool bTestSmilie; |
775 | 0 | bool bArg = false; |
776 | 0 | int i; |
777 | 0 |
|
778 | 0 | // refactor some of this mess to avoid code duplication and speed execution a bit |
779 | 0 | // there are two cases that need to be tried one after another. To avoid a lot of |
780 | 0 | // duplicate code, rolling into a loop |
781 | 0 |
|
782 | 0 | i = 0; |
783 | 0 | while ( i < 2 ) |
784 | 0 | { |
785 | 0 | bTestSmilie = false; |
786 | 0 | if ( !i && (firstChar == ':' || firstChar == ';' || firstChar == '=' || firstChar == '>' || firstChar == '8' || firstChar == 'O')) |
787 | 0 | { |
788 | 0 | // first test passed |
789 | 0 |
|
790 | 0 | bTestSmilie = true; |
791 | 0 | bArg = col0; |
792 | 0 | } |
793 | 0 | if ( i && col0 && ( text1 == ':' || text1 == ';' || text1 == '=' || text1 == '>' || text1 == '8' || text1 == 'O' ) ) |
794 | 0 | { |
795 | 0 | // second test passed |
796 | 0 |
|
797 | 0 | bTestSmilie = true; |
798 | 0 | bArg = false; |
799 | 0 | } |
800 | 0 | if ( bTestSmilie && ( |
801 | 0 | SmilyHit(aInString, aInLength, bArg, |
802 | 0 | ":-)", |
803 | 0 | "moz-smiley-s1", // smile |
804 | 0 | outputHTML, glyphTextLen) || |
805 | 0 |
|
806 | 0 | SmilyHit(aInString, aInLength, bArg, |
807 | 0 | ":)", |
808 | 0 | "moz-smiley-s1", // smile |
809 | 0 | outputHTML, glyphTextLen) || |
810 | 0 |
|
811 | 0 | SmilyHit(aInString, aInLength, bArg, |
812 | 0 | ":-D", |
813 | 0 | "moz-smiley-s5", // laughing |
814 | 0 | outputHTML, glyphTextLen) || |
815 | 0 |
|
816 | 0 | SmilyHit(aInString, aInLength, bArg, |
817 | 0 | ":-(", |
818 | 0 | "moz-smiley-s2", // frown |
819 | 0 | outputHTML, glyphTextLen) || |
820 | 0 |
|
821 | 0 | SmilyHit(aInString, aInLength, bArg, |
822 | 0 | ":(", |
823 | 0 | "moz-smiley-s2", // frown |
824 | 0 | outputHTML, glyphTextLen) || |
825 | 0 |
|
826 | 0 | SmilyHit(aInString, aInLength, bArg, |
827 | 0 | ":-[", |
828 | 0 | "moz-smiley-s6", // embarassed |
829 | 0 | outputHTML, glyphTextLen) || |
830 | 0 |
|
831 | 0 | SmilyHit(aInString, aInLength, bArg, |
832 | 0 | ";-)", |
833 | 0 | "moz-smiley-s3", // wink |
834 | 0 | outputHTML, glyphTextLen) || |
835 | 0 |
|
836 | 0 | SmilyHit(aInString, aInLength, col0, |
837 | 0 | ";)", |
838 | 0 | "moz-smiley-s3", // wink |
839 | 0 | outputHTML, glyphTextLen) || |
840 | 0 |
|
841 | 0 | SmilyHit(aInString, aInLength, bArg, |
842 | 0 | ":-\\", |
843 | 0 | "moz-smiley-s7", // undecided |
844 | 0 | outputHTML, glyphTextLen) || |
845 | 0 |
|
846 | 0 | SmilyHit(aInString, aInLength, bArg, |
847 | 0 | ":-P", |
848 | 0 | "moz-smiley-s4", // tongue |
849 | 0 | outputHTML, glyphTextLen) || |
850 | 0 |
|
851 | 0 | SmilyHit(aInString, aInLength, bArg, |
852 | 0 | ";-P", |
853 | 0 | "moz-smiley-s4", // tongue |
854 | 0 | outputHTML, glyphTextLen) || |
855 | 0 |
|
856 | 0 | SmilyHit(aInString, aInLength, bArg, |
857 | 0 | "=-O", |
858 | 0 | "moz-smiley-s8", // surprise |
859 | 0 | outputHTML, glyphTextLen) || |
860 | 0 |
|
861 | 0 | SmilyHit(aInString, aInLength, bArg, |
862 | 0 | ":-*", |
863 | 0 | "moz-smiley-s9", // kiss |
864 | 0 | outputHTML, glyphTextLen) || |
865 | 0 |
|
866 | 0 | SmilyHit(aInString, aInLength, bArg, |
867 | 0 | ">:o", |
868 | 0 | "moz-smiley-s10", // yell |
869 | 0 | outputHTML, glyphTextLen) || |
870 | 0 |
|
871 | 0 | SmilyHit(aInString, aInLength, bArg, |
872 | 0 | ">:-o", |
873 | 0 | "moz-smiley-s10", // yell |
874 | 0 | outputHTML, glyphTextLen) || |
875 | 0 |
|
876 | 0 | SmilyHit(aInString, aInLength, bArg, |
877 | 0 | "8-)", |
878 | 0 | "moz-smiley-s11", // cool |
879 | 0 | outputHTML, glyphTextLen) || |
880 | 0 |
|
881 | 0 | SmilyHit(aInString, aInLength, bArg, |
882 | 0 | ":-$", |
883 | 0 | "moz-smiley-s12", // money |
884 | 0 | outputHTML, glyphTextLen) || |
885 | 0 |
|
886 | 0 | SmilyHit(aInString, aInLength, bArg, |
887 | 0 | ":-!", |
888 | 0 | "moz-smiley-s13", // foot |
889 | 0 | outputHTML, glyphTextLen) || |
890 | 0 |
|
891 | 0 | SmilyHit(aInString, aInLength, bArg, |
892 | 0 | "O:-)", |
893 | 0 | "moz-smiley-s14", // innocent |
894 | 0 | outputHTML, glyphTextLen) || |
895 | 0 |
|
896 | 0 | SmilyHit(aInString, aInLength, bArg, |
897 | 0 | ":'(", |
898 | 0 | "moz-smiley-s15", // cry |
899 | 0 | outputHTML, glyphTextLen) || |
900 | 0 |
|
901 | 0 | SmilyHit(aInString, aInLength, bArg, |
902 | 0 | ":-X", |
903 | 0 | "moz-smiley-s16", // sealed |
904 | 0 | outputHTML, glyphTextLen) |
905 | 0 | ) |
906 | 0 | ) |
907 | 0 | { |
908 | 0 | aOutputString.Append(outputHTML); |
909 | 0 | return true; |
910 | 0 | } |
911 | 0 | i++; |
912 | 0 | } |
913 | 0 | if (text0 == '\f') |
914 | 0 | { |
915 | 0 | aOutputString.AppendLiteral("<span class='moz-txt-formfeed'></span>"); |
916 | 0 | glyphTextLen = 1; |
917 | 0 | return true; |
918 | 0 | } |
919 | 0 | if (text0 == '+' || text1 == '+') |
920 | 0 | { |
921 | 0 | if (ItMatchesDelimited(aInString, aInLength, |
922 | 0 | u" +/-", 4, |
923 | 0 | LT_IGNORE, LT_IGNORE)) |
924 | 0 | { |
925 | 0 | aOutputString.AppendLiteral(" ±"); |
926 | 0 | glyphTextLen = 4; |
927 | 0 | return true; |
928 | 0 | } |
929 | 0 | if (col0 && ItMatchesDelimited(aInString, aInLength, |
930 | 0 | u"+/-", 3, |
931 | 0 | LT_IGNORE, LT_IGNORE)) |
932 | 0 | { |
933 | 0 | aOutputString.AppendLiteral("±"); |
934 | 0 | glyphTextLen = 3; |
935 | 0 | return true; |
936 | 0 | } |
937 | 0 | } |
938 | 0 | |
939 | 0 | // x^2 => x<sup>2</sup>, also handle powers x^-2, x^0.5 |
940 | 0 | // implement regular expression /[\dA-Za-z\)\]}]\^-?\d+(\.\d+)*[^\dA-Za-z]/ |
941 | 0 | if |
942 | 0 | ( |
943 | 0 | text1 == '^' |
944 | 0 | && |
945 | 0 | ( |
946 | 0 | IsAsciiDigit(text0) || IsAsciiAlpha(text0) || |
947 | 0 | text0 == ')' || text0 == ']' || text0 == '}' |
948 | 0 | ) |
949 | 0 | && |
950 | 0 | ( |
951 | 0 | (2 < aInLength && IsAsciiDigit(aInString[2])) || |
952 | 0 | (3 < aInLength && aInString[2] == '-' && IsAsciiDigit(aInString[3])) |
953 | 0 | ) |
954 | 0 | ) |
955 | 0 | { |
956 | 0 | // Find first non-digit |
957 | 0 | int32_t delimPos = 3; // skip "^" and first digit (or '-') |
958 | 0 | for (; delimPos < aInLength |
959 | 0 | && |
960 | 0 | ( |
961 | 0 | IsAsciiDigit(aInString[delimPos]) || |
962 | 0 | (aInString[delimPos] == '.' && delimPos + 1 < aInLength && |
963 | 0 | IsAsciiDigit(aInString[delimPos + 1])) |
964 | 0 | ); |
965 | 0 | delimPos++) |
966 | 0 | ; |
967 | 0 |
|
968 | 0 | if (delimPos < aInLength && IsAsciiAlpha(aInString[delimPos])) |
969 | 0 | { |
970 | 0 | return false; |
971 | 0 | } |
972 | 0 | |
973 | 0 | outputHTML.Truncate(); |
974 | 0 | outputHTML += text0; |
975 | 0 | outputHTML.AppendLiteral( |
976 | 0 | "<sup class=\"moz-txt-sup\">" |
977 | 0 | "<span style=\"display:inline-block;width:0;height:0;overflow:hidden\">" |
978 | 0 | "^</span>"); |
979 | 0 |
|
980 | 0 | aOutputString.Append(outputHTML); |
981 | 0 | aOutputString.Append(&aInString[2], delimPos - 2); |
982 | 0 | aOutputString.AppendLiteral("</sup>"); |
983 | 0 |
|
984 | 0 | glyphTextLen = delimPos /* - 1 + 1 */ ; |
985 | 0 | return true; |
986 | 0 | } |
987 | 0 | /* |
988 | 0 | The following strings are not substituted: |
989 | 0 | |TXT |HTML |Reason |
990 | 0 | +------+---------+---------- |
991 | 0 | -> ← Bug #454 |
992 | 0 | => ⇐ dito |
993 | 0 | <- → dito |
994 | 0 | <= ⇒ dito |
995 | 0 | (tm) ™ dito |
996 | 0 | 1/4 ¼ is triggered by 1/4 Part 1, 2/4 Part 2, ... |
997 | 0 | 3/4 ¾ dito |
998 | 0 | 1/2 ½ similar |
999 | 0 | */ |
1000 | 0 | return false; |
1001 | 0 | } |
1002 | | |
1003 | | /*************************************************************************** |
1004 | | Library-internal Interface |
1005 | | ****************************************************************************/ |
1006 | | |
1007 | | NS_IMPL_ISUPPORTS(mozTXTToHTMLConv, |
1008 | | mozITXTToHTMLConv, |
1009 | | nsIStreamConverter, |
1010 | | nsIStreamListener, |
1011 | | nsIRequestObserver) |
1012 | | |
1013 | | int32_t |
1014 | | mozTXTToHTMLConv::CiteLevelTXT(const char16_t *line, |
1015 | | uint32_t& logLineStart) |
1016 | 0 | { |
1017 | 0 | int32_t result = 0; |
1018 | 0 | int32_t lineLength = NS_strlen(line); |
1019 | 0 |
|
1020 | 0 | bool moreCites = true; |
1021 | 0 | while (moreCites) |
1022 | 0 | { |
1023 | 0 | /* E.g. the following lines count as quote: |
1024 | 0 |
|
1025 | 0 | > text |
1026 | 0 | //#ifdef QUOTE_RECOGNITION_AGGRESSIVE |
1027 | 0 | >text |
1028 | 0 | //#ifdef QUOTE_RECOGNITION_AGGRESSIVE |
1029 | 0 | > text |
1030 | 0 | ] text |
1031 | 0 | USER> text |
1032 | 0 | USER] text |
1033 | 0 | //#endif |
1034 | 0 |
|
1035 | 0 | logLineStart is the position of "t" in this example |
1036 | 0 | */ |
1037 | 0 | uint32_t i = logLineStart; |
1038 | 0 |
|
1039 | | #ifdef QUOTE_RECOGNITION_AGGRESSIVE |
1040 | | for (; int32_t(i) < lineLength && IsSpace(line[i]); i++) |
1041 | | ; |
1042 | | for (; int32_t(i) < lineLength && IsAsciiAlpha(line[i]) |
1043 | | && nsCRT::IsUpper(line[i]) ; i++) |
1044 | | ; |
1045 | | if (int32_t(i) < lineLength && (line[i] == '>' || line[i] == ']')) |
1046 | | #else |
1047 | 0 | if (int32_t(i) < lineLength && line[i] == '>') |
1048 | 0 | #endif |
1049 | 0 | { |
1050 | 0 | i++; |
1051 | 0 | if (int32_t(i) < lineLength && line[i] == ' ') |
1052 | 0 | i++; |
1053 | 0 | // sendmail/mbox |
1054 | 0 | // Placed here for performance increase |
1055 | 0 | const char16_t * indexString = &line[logLineStart]; |
1056 | 0 | // here, |logLineStart < lineLength| is always true |
1057 | 0 | uint32_t minlength = std::min(uint32_t(6), NS_strlen(indexString)); |
1058 | 0 | if (Substring(indexString, |
1059 | 0 | indexString+minlength).Equals(Substring(NS_LITERAL_STRING(">From "), 0, minlength), |
1060 | 0 | nsCaseInsensitiveStringComparator())) |
1061 | 0 | //XXX RFC2646 |
1062 | 0 | moreCites = false; |
1063 | 0 | else |
1064 | 0 | { |
1065 | 0 | result++; |
1066 | 0 | logLineStart = i; |
1067 | 0 | } |
1068 | 0 | } |
1069 | 0 | else |
1070 | 0 | moreCites = false; |
1071 | 0 | } |
1072 | 0 |
|
1073 | 0 | return result; |
1074 | 0 | } |
1075 | | |
1076 | | void |
1077 | | mozTXTToHTMLConv::ScanTXT(const char16_t * aInString, int32_t aInStringLength, uint32_t whattodo, nsString& aOutString) |
1078 | 0 | { |
1079 | 0 | bool doURLs = 0 != (whattodo & kURLs); |
1080 | 0 | bool doGlyphSubstitution = 0 != (whattodo & kGlyphSubstitution); |
1081 | 0 | bool doStructPhrase = 0 != (whattodo & kStructPhrase); |
1082 | 0 |
|
1083 | 0 | uint32_t structPhrase_strong = 0; // Number of currently open tags |
1084 | 0 | uint32_t structPhrase_underline = 0; |
1085 | 0 | uint32_t structPhrase_italic = 0; |
1086 | 0 | uint32_t structPhrase_code = 0; |
1087 | 0 |
|
1088 | 0 | nsAutoString outputHTML; // moved here for performance increase |
1089 | 0 |
|
1090 | 0 | for(uint32_t i = 0; int32_t(i) < aInStringLength;) |
1091 | 0 | { |
1092 | 0 | if (doGlyphSubstitution) |
1093 | 0 | { |
1094 | 0 | int32_t glyphTextLen; |
1095 | 0 | if (GlyphHit(&aInString[i], aInStringLength - i, i == 0, aOutString, glyphTextLen)) |
1096 | 0 | { |
1097 | 0 | i += glyphTextLen; |
1098 | 0 | continue; |
1099 | 0 | } |
1100 | 0 | } |
1101 | 0 | |
1102 | 0 | if (doStructPhrase) |
1103 | 0 | { |
1104 | 0 | const char16_t * newOffset = aInString; |
1105 | 0 | int32_t newLength = aInStringLength; |
1106 | 0 | if (i > 0 ) // skip the first element? |
1107 | 0 | { |
1108 | 0 | newOffset = &aInString[i-1]; |
1109 | 0 | newLength = aInStringLength - i + 1; |
1110 | 0 | } |
1111 | 0 |
|
1112 | 0 | switch (aInString[i]) // Performance increase |
1113 | 0 | { |
1114 | 0 | case '*': |
1115 | 0 | if (StructPhraseHit(newOffset, newLength, i == 0, |
1116 | 0 | u"*", 1, |
1117 | 0 | "b", "class=\"moz-txt-star\"", |
1118 | 0 | aOutString, structPhrase_strong)) |
1119 | 0 | { |
1120 | 0 | i++; |
1121 | 0 | continue; |
1122 | 0 | } |
1123 | 0 | break; |
1124 | 0 | case '/': |
1125 | 0 | if (StructPhraseHit(newOffset, newLength, i == 0, |
1126 | 0 | u"/", 1, |
1127 | 0 | "i", "class=\"moz-txt-slash\"", |
1128 | 0 | aOutString, structPhrase_italic)) |
1129 | 0 | { |
1130 | 0 | i++; |
1131 | 0 | continue; |
1132 | 0 | } |
1133 | 0 | break; |
1134 | 0 | case '_': |
1135 | 0 | if (StructPhraseHit(newOffset, newLength, i == 0, |
1136 | 0 | u"_", 1, |
1137 | 0 | "span" /* <u> is deprecated */, |
1138 | 0 | "class=\"moz-txt-underscore\"", |
1139 | 0 | aOutString, structPhrase_underline)) |
1140 | 0 | { |
1141 | 0 | i++; |
1142 | 0 | continue; |
1143 | 0 | } |
1144 | 0 | break; |
1145 | 0 | case '|': |
1146 | 0 | if (StructPhraseHit(newOffset, newLength, i == 0, |
1147 | 0 | u"|", 1, |
1148 | 0 | "code", "class=\"moz-txt-verticalline\"", |
1149 | 0 | aOutString, structPhrase_code)) |
1150 | 0 | { |
1151 | 0 | i++; |
1152 | 0 | continue; |
1153 | 0 | } |
1154 | 0 | break; |
1155 | 0 | } |
1156 | 0 | } |
1157 | 0 |
|
1158 | 0 | if (doURLs) |
1159 | 0 | { |
1160 | 0 | switch (aInString[i]) |
1161 | 0 | { |
1162 | 0 | case ':': |
1163 | 0 | case '@': |
1164 | 0 | case '.': |
1165 | 0 | if ( (i == 0 || ((i > 0) && aInString[i - 1] != ' ')) && aInString[i +1] != ' ') // Performance increase |
1166 | 0 | { |
1167 | 0 | int32_t replaceBefore; |
1168 | 0 | int32_t replaceAfter; |
1169 | 0 | if (FindURL(aInString, aInStringLength, i, whattodo, |
1170 | 0 | outputHTML, replaceBefore, replaceAfter) |
1171 | 0 | && structPhrase_strong + structPhrase_italic + |
1172 | 0 | structPhrase_underline + structPhrase_code == 0 |
1173 | 0 | /* workaround for bug #19445 */ ) |
1174 | 0 | { |
1175 | 0 | aOutString.Cut(aOutString.Length() - replaceBefore, replaceBefore); |
1176 | 0 | aOutString += outputHTML; |
1177 | 0 | i += replaceAfter + 1; |
1178 | 0 | continue; |
1179 | 0 | } |
1180 | 0 | } |
1181 | 0 | break; |
1182 | 0 | } //switch |
1183 | 0 | } |
1184 | 0 |
|
1185 | 0 | switch (aInString[i]) |
1186 | 0 | { |
1187 | 0 | // Special symbols |
1188 | 0 | case '<': |
1189 | 0 | case '>': |
1190 | 0 | case '&': |
1191 | 0 | EscapeChar(aInString[i], aOutString, false); |
1192 | 0 | i++; |
1193 | 0 | break; |
1194 | 0 | // Normal characters |
1195 | 0 | default: |
1196 | 0 | aOutString += aInString[i]; |
1197 | 0 | i++; |
1198 | 0 | break; |
1199 | 0 | } |
1200 | 0 | } |
1201 | 0 | } |
1202 | | |
1203 | | void |
1204 | | mozTXTToHTMLConv::ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOutString) |
1205 | 0 | { |
1206 | 0 | // some common variables we were recalculating |
1207 | 0 | // every time inside the for loop... |
1208 | 0 | int32_t lengthOfInString = aInString.Length(); |
1209 | 0 | const char16_t * uniBuffer = aInString.get(); |
1210 | 0 |
|
1211 | | #ifdef DEBUG_BenB_Perf |
1212 | | PRTime parsing_start = PR_IntervalNow(); |
1213 | | #endif |
1214 | |
|
1215 | 0 | // Look for simple entities not included in a tags and scan them. |
1216 | 0 | // Skip all tags ("<[...]>") and content in an a link tag ("<a [...]</a>"), |
1217 | 0 | // comment tag ("<!--[...]-->"), style tag, script tag or head tag. |
1218 | 0 | // Unescape the rest (text between tags) and pass it to ScanTXT. |
1219 | 0 | nsAutoCString canFollow(" \f\n\r\t>"); |
1220 | 0 | for (int32_t i = 0; i < lengthOfInString;) |
1221 | 0 | { |
1222 | 0 | if (aInString[i] == '<') // html tag |
1223 | 0 | { |
1224 | 0 | int32_t start = i; |
1225 | 0 | if (i + 2 < lengthOfInString && |
1226 | 0 | nsCRT::ToLower(aInString[i + 1]) == 'a' && |
1227 | 0 | canFollow.FindChar(aInString[i + 2]) != kNotFound) |
1228 | 0 | // if a tag, skip until </a>. |
1229 | 0 | // Make sure there's a white-space character after, not to match "abbr". |
1230 | 0 | { |
1231 | 0 | i = aInString.Find("</a>", true, i); |
1232 | 0 | if (i == kNotFound) |
1233 | 0 | i = lengthOfInString; |
1234 | 0 | else |
1235 | 0 | i += 4; |
1236 | 0 | } |
1237 | 0 | else if (Substring(aInString, i + 1, 3).LowerCaseEqualsASCII("!--")) |
1238 | 0 | // if out-commended code, skip until --> |
1239 | 0 | { |
1240 | 0 | i = aInString.Find("-->", false, i); |
1241 | 0 | if (i == kNotFound) |
1242 | 0 | i = lengthOfInString; |
1243 | 0 | else |
1244 | 0 | i += 3; |
1245 | 0 | } |
1246 | 0 | else if (i + 6 < lengthOfInString && |
1247 | 0 | Substring(aInString, i + 1, 5).LowerCaseEqualsASCII("style") && |
1248 | 0 | canFollow.FindChar(aInString[i + 6]) != kNotFound) |
1249 | 0 | // if style tag, skip until </style> |
1250 | 0 | { |
1251 | 0 | i = aInString.Find("</style>", true, i); |
1252 | 0 | if (i == kNotFound) |
1253 | 0 | i = lengthOfInString; |
1254 | 0 | else |
1255 | 0 | i += 8; |
1256 | 0 | } |
1257 | 0 | else if (i + 7 < lengthOfInString && |
1258 | 0 | Substring(aInString, i + 1, 6).LowerCaseEqualsASCII("script") && |
1259 | 0 | canFollow.FindChar(aInString[i + 7]) != kNotFound) |
1260 | 0 | // if script tag, skip until </script> |
1261 | 0 | { |
1262 | 0 | i = aInString.Find("</script>", true, i); |
1263 | 0 | if (i == kNotFound) |
1264 | 0 | i = lengthOfInString; |
1265 | 0 | else |
1266 | 0 | i += 9; |
1267 | 0 | } |
1268 | 0 | else if (i + 5 < lengthOfInString && |
1269 | 0 | Substring(aInString, i + 1, 4).LowerCaseEqualsASCII("head") && |
1270 | 0 | canFollow.FindChar(aInString[i + 5]) != kNotFound) |
1271 | 0 | // if head tag, skip until </head> |
1272 | 0 | // Make sure not to match <header>. |
1273 | 0 | { |
1274 | 0 | i = aInString.Find("</head>", true, i); |
1275 | 0 | if (i == kNotFound) |
1276 | 0 | i = lengthOfInString; |
1277 | 0 | else |
1278 | 0 | i += 7; |
1279 | 0 | } |
1280 | 0 | else // just skip tag (attributes etc.) |
1281 | 0 | { |
1282 | 0 | i = aInString.FindChar('>', i); |
1283 | 0 | if (i == kNotFound) |
1284 | 0 | i = lengthOfInString; |
1285 | 0 | else |
1286 | 0 | i++; |
1287 | 0 | } |
1288 | 0 | aOutString.Append(&uniBuffer[start], i - start); |
1289 | 0 | } |
1290 | 0 | else |
1291 | 0 | { |
1292 | 0 | uint32_t start = uint32_t(i); |
1293 | 0 | i = aInString.FindChar('<', i); |
1294 | 0 | if (i == kNotFound) |
1295 | 0 | i = lengthOfInString; |
1296 | 0 |
|
1297 | 0 | nsString tempString; |
1298 | 0 | tempString.SetCapacity(uint32_t((uint32_t(i) - start) * growthRate)); |
1299 | 0 | UnescapeStr(uniBuffer, start, uint32_t(i) - start, tempString); |
1300 | 0 | ScanTXT(tempString.get(), tempString.Length(), whattodo, aOutString); |
1301 | 0 | } |
1302 | 0 | } |
1303 | 0 |
|
1304 | | #ifdef DEBUG_BenB_Perf |
1305 | | printf("ScanHTML time: %d ms\n", PR_IntervalToMilliseconds(PR_IntervalNow() - parsing_start)); |
1306 | | #endif |
1307 | | } |
1308 | | |
1309 | | /**************************************************************************** |
1310 | | XPCOM Interface |
1311 | | *****************************************************************************/ |
1312 | | |
1313 | | NS_IMETHODIMP |
1314 | | mozTXTToHTMLConv::Convert(nsIInputStream *aFromStream, |
1315 | | const char *aFromType, |
1316 | | const char *aToType, |
1317 | | nsISupports *aCtxt, nsIInputStream **_retval) |
1318 | 0 | { |
1319 | 0 | return NS_ERROR_NOT_IMPLEMENTED; |
1320 | 0 | } |
1321 | | |
1322 | | NS_IMETHODIMP |
1323 | | mozTXTToHTMLConv::AsyncConvertData(const char *aFromType, |
1324 | | const char *aToType, |
1325 | 0 | nsIStreamListener *aListener, nsISupports *aCtxt) { |
1326 | 0 | return NS_ERROR_NOT_IMPLEMENTED; |
1327 | 0 | } |
1328 | | |
1329 | | NS_IMETHODIMP |
1330 | | mozTXTToHTMLConv::OnDataAvailable(nsIRequest* request, nsISupports *ctxt, |
1331 | | nsIInputStream *inStr, uint64_t sourceOffset, |
1332 | | uint32_t count) |
1333 | 0 | { |
1334 | 0 | return NS_ERROR_NOT_IMPLEMENTED; |
1335 | 0 | } |
1336 | | |
1337 | | NS_IMETHODIMP |
1338 | | mozTXTToHTMLConv::OnStartRequest(nsIRequest* request, nsISupports *ctxt) |
1339 | 0 | { |
1340 | 0 | return NS_ERROR_NOT_IMPLEMENTED; |
1341 | 0 | } |
1342 | | |
1343 | | NS_IMETHODIMP |
1344 | | mozTXTToHTMLConv::OnStopRequest(nsIRequest* request, nsISupports *ctxt, |
1345 | | nsresult aStatus) |
1346 | 0 | { |
1347 | 0 | return NS_ERROR_NOT_IMPLEMENTED; |
1348 | 0 | } |
1349 | | |
1350 | | NS_IMETHODIMP |
1351 | | mozTXTToHTMLConv::CiteLevelTXT(const char16_t *line, uint32_t *logLineStart, |
1352 | | uint32_t *_retval) |
1353 | 0 | { |
1354 | 0 | if (!logLineStart || !_retval || !line) |
1355 | 0 | return NS_ERROR_NULL_POINTER; |
1356 | 0 | *_retval = CiteLevelTXT(line, *logLineStart); |
1357 | 0 | return NS_OK; |
1358 | 0 | } |
1359 | | |
1360 | | NS_IMETHODIMP |
1361 | | mozTXTToHTMLConv::ScanTXT(const char16_t *text, uint32_t whattodo, |
1362 | | char16_t **_retval) |
1363 | 0 | { |
1364 | 0 | NS_ENSURE_ARG(text); |
1365 | 0 |
|
1366 | 0 | // FIX ME!!! |
1367 | 0 | nsString outString; |
1368 | 0 | int32_t inLength = NS_strlen(text); |
1369 | 0 | // by setting a large capacity up front, we save time |
1370 | 0 | // when appending characters to the output string because we don't |
1371 | 0 | // need to reallocate and re-copy the characters already in the out String. |
1372 | 0 | NS_ASSERTION(inLength, "ScanTXT passed 0 length string"); |
1373 | 0 | if (inLength == 0) { |
1374 | 0 | *_retval = NS_xstrdup(text); |
1375 | 0 | return NS_OK; |
1376 | 0 | } |
1377 | 0 | |
1378 | 0 | outString.SetCapacity(uint32_t(inLength * growthRate)); |
1379 | 0 | ScanTXT(text, inLength, whattodo, outString); |
1380 | 0 |
|
1381 | 0 | *_retval = ToNewUnicode(outString); |
1382 | 0 | return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY; |
1383 | 0 | } |
1384 | | |
1385 | | NS_IMETHODIMP |
1386 | | mozTXTToHTMLConv::ScanHTML(const char16_t *text, uint32_t whattodo, |
1387 | | char16_t **_retval) |
1388 | 0 | { |
1389 | 0 | NS_ENSURE_ARG(text); |
1390 | 0 |
|
1391 | 0 | // FIX ME!!! |
1392 | 0 | nsString outString; |
1393 | 0 | nsString inString (text); // look at this nasty extra copy of the entire input buffer! |
1394 | 0 | outString.SetCapacity(uint32_t(inString.Length() * growthRate)); |
1395 | 0 |
|
1396 | 0 | ScanHTML(inString, whattodo, outString); |
1397 | 0 | *_retval = ToNewUnicode(outString); |
1398 | 0 | return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY; |
1399 | 0 | } |
1400 | | |
1401 | | nsresult |
1402 | | MOZ_NewTXTToHTMLConv(mozTXTToHTMLConv** aConv) |
1403 | 0 | { |
1404 | 0 | MOZ_ASSERT(aConv != nullptr, "null ptr"); |
1405 | 0 | if (!aConv) |
1406 | 0 | return NS_ERROR_NULL_POINTER; |
1407 | 0 | |
1408 | 0 | *aConv = new mozTXTToHTMLConv(); |
1409 | 0 | if (!*aConv) |
1410 | 0 | return NS_ERROR_OUT_OF_MEMORY; |
1411 | 0 | |
1412 | 0 | NS_ADDREF(*aConv); |
1413 | 0 | // return (*aConv)->Init(); |
1414 | 0 | return NS_OK; |
1415 | 0 | } |