/src/mozilla-central/netwerk/streamconv/converters/mozTXTToHTMLConv.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
2 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
3 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
5 | | |
6 | | /** |
7 | | Description: Currently only functions to enhance plain text with HTML tags. See mozITXTToHTMLConv. Stream conversion is defunct. |
8 | | */ |
9 | | |
10 | | #ifndef _mozTXTToHTMLConv_h__ |
11 | | #define _mozTXTToHTMLConv_h__ |
12 | | |
13 | | #include "mozITXTToHTMLConv.h" |
14 | | #include "nsString.h" |
15 | | #include "nsCOMPtr.h" |
16 | | |
17 | | class nsIIOService; |
18 | | |
19 | | class mozTXTToHTMLConv : public mozITXTToHTMLConv |
20 | | { |
21 | | |
22 | 0 | virtual ~mozTXTToHTMLConv() = default; |
23 | | |
24 | | ////////////////////////////////////////////////////////// |
25 | | public: |
26 | | ////////////////////////////////////////////////////////// |
27 | | |
28 | 0 | mozTXTToHTMLConv() = default; |
29 | | NS_DECL_ISUPPORTS |
30 | | |
31 | | NS_DECL_MOZITXTTOHTMLCONV |
32 | | NS_DECL_NSIREQUESTOBSERVER |
33 | | NS_DECL_NSISTREAMLISTENER |
34 | | NS_DECL_NSISTREAMCONVERTER |
35 | | |
36 | | /** |
37 | | see mozITXTToHTMLConv::ScanTXT |
38 | | */ |
39 | | void ScanTXT(const char16_t * aInString, int32_t aInStringLength, uint32_t whattodo, nsString& aOutString); |
40 | | |
41 | | /** |
42 | | see mozITXTToHTMLConv::ScanHTML. We will modify aInString potentially... |
43 | | */ |
44 | | void ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOutString); |
45 | | |
46 | | /** |
47 | | see mozITXTToHTMLConv::CiteLevelTXT |
48 | | */ |
49 | | int32_t CiteLevelTXT(const char16_t * line,uint32_t& logLineStart); |
50 | | |
51 | | |
52 | | ////////////////////////////////////////////////////////// |
53 | | protected: |
54 | | ////////////////////////////////////////////////////////// |
55 | | nsCOMPtr<nsIIOService> mIOService; // for performance reasons, cache the netwerk service... |
56 | | /** |
57 | | Completes<ul> |
58 | | <li>Case 1: mailto: "mozilla@bucksch.org" -> "mailto:mozilla@bucksch.org" |
59 | | <li>Case 2: http: "www.mozilla.org" -> "http://www.mozilla.org" |
60 | | <li>Case 3: ftp: "ftp.mozilla.org" -> "ftp://www.mozilla.org" |
61 | | </ul> |
62 | | It does no check, if the resulting URL is valid. |
63 | | @param text (in): abbreviated URL |
64 | | @param pos (in): position of "@" (case 1) or first "." (case 2 and 3) |
65 | | @return Completed URL at success and empty string at failure |
66 | | */ |
67 | | void CompleteAbbreviatedURL(const char16_t * aInString, int32_t aInLength, |
68 | | const uint32_t pos, nsString& aOutString); |
69 | | |
70 | | |
71 | | ////////////////////////////////////////////////////////// |
72 | | private: |
73 | | ////////////////////////////////////////////////////////// |
74 | | |
75 | | enum LIMTYPE |
76 | | { |
77 | | LT_IGNORE, // limitation not checked |
78 | | LT_DELIMITER, // not alphanumeric and not rep[0]. End of text is also ok. |
79 | | LT_ALPHA, // alpha char |
80 | | LT_DIGIT |
81 | | }; |
82 | | |
83 | | /** |
84 | | @param text (in): the string to search through.<p> |
85 | | If before = IGNORE,<br> |
86 | | rep is compared starting at 1. char of text (text[0]),<br> |
87 | | else starting at 2. char of text (text[1]). |
88 | | Chars after "after"-delimiter are ignored. |
89 | | @param rep (in): the string to look for |
90 | | @param aRepLen (in): the number of bytes in the string to look for |
91 | | @param before (in): limitation before rep |
92 | | @param after (in): limitation after rep |
93 | | @return true, if rep is found and limitation spec is met or rep is empty |
94 | | */ |
95 | | bool ItMatchesDelimited(const char16_t * aInString, int32_t aInLength, |
96 | | const char16_t * rep, int32_t aRepLen, LIMTYPE before, LIMTYPE after); |
97 | | |
98 | | /** |
99 | | @param see ItMatchesDelimited |
100 | | @return Number of ItMatchesDelimited in text |
101 | | */ |
102 | | uint32_t NumberOfMatches(const char16_t * aInString, int32_t aInStringLength, |
103 | | const char16_t* rep, int32_t aRepLen, LIMTYPE before, LIMTYPE after); |
104 | | |
105 | | /** |
106 | | Currently only changes "<", ">" and "&". All others stay as they are.<p> |
107 | | "Char" in function name to avoid side effects with nsString(ch) |
108 | | constructors. |
109 | | @param ch (in) |
110 | | @param aStringToAppendto (out) - the string to append the escaped |
111 | | string to. |
112 | | @param inAttribute (in) - will escape quotes, too (which is |
113 | | only needed for attribute values) |
114 | | */ |
115 | | void EscapeChar(const char16_t ch, nsString& aStringToAppendto, |
116 | | bool inAttribute); |
117 | | |
118 | | /** |
119 | | See EscapeChar. Escapes the string in place. |
120 | | */ |
121 | | void EscapeStr(nsString& aInString, bool inAttribute); |
122 | | |
123 | | /** |
124 | | Currently only reverts "<", ">" and "&". All others stay as they are.<p> |
125 | | @param aInString (in) HTML string |
126 | | @param aStartPos (in) start index into the buffer |
127 | | @param aLength (in) length of the buffer |
128 | | @param aOutString (out) unescaped buffer |
129 | | */ |
130 | | void UnescapeStr(const char16_t * aInString, int32_t aStartPos, |
131 | | int32_t aLength, nsString& aOutString); |
132 | | |
133 | | /** |
134 | | <em>Note</em>: I use different strategies to pass context between the |
135 | | functions (full text and pos vs. cutted text and col0, glphyTextLen vs. |
136 | | replaceBefore/-After). It makes some sense, but is hard to understand |
137 | | (maintain) :-(. |
138 | | */ |
139 | | |
140 | | /** |
141 | | <p><em>Note:</em> replaceBefore + replaceAfter + 1 (for char at pos) chars |
142 | | in text should be replaced by outputHTML.</p> |
143 | | <p><em>Note:</em> This function should be able to process a URL on multiple |
144 | | lines, but currently, ScanForURLs is called for every line, so it can't.</p> |
145 | | @param text (in): includes possibly a URL |
146 | | @param pos (in): position in text, where either ":", "." or "@" are found |
147 | | @param whathasbeendone (in): What the calling ScanTXT did/has to do with the |
148 | | (not-linkified) text, i.e. usually the "whattodo" parameter. |
149 | | (Needed to calculate replaceBefore.) NOT what will be done with |
150 | | the content of the link. |
151 | | @param outputHTML (out): URL with HTML-a tag |
152 | | @param replaceBefore (out): Number of chars of URL before pos |
153 | | @param replaceAfter (out): Number of chars of URL after pos |
154 | | @return URL found |
155 | | */ |
156 | | bool FindURL(const char16_t * aInString, int32_t aInLength, const uint32_t pos, |
157 | | const uint32_t whathasbeendone, |
158 | | nsString& outputHTML, int32_t& replaceBefore, int32_t& replaceAfter); |
159 | | |
160 | | enum modetype { |
161 | | unknown, |
162 | | RFC1738, /* Check, if RFC1738, APPENDIX compliant, |
163 | | like "<URL:http://www.mozilla.org>". */ |
164 | | RFC2396E, /* RFC2396, APPENDIX E allows anglebrackets (like |
165 | | "<http://www.mozilla.org>") (without "URL:") or |
166 | | quotation marks(like ""http://www.mozilla.org""). |
167 | | Also allow email addresses without scheme, |
168 | | e.g. "<mozilla@bucksch.org>" */ |
169 | | freetext, /* assume heading scheme |
170 | | with "[a-zA-Z][a-zA-Z0-9+\-\.]*:" like "news:" |
171 | | (see RFC2396, Section 3.1). |
172 | | Certain characters (see code) or any whitespace |
173 | | (including linebreaks) end the URL. |
174 | | Other certain (punctation) characters (see code) |
175 | | at the end are stripped off. */ |
176 | | abbreviated /* Similar to freetext, but without scheme, e.g. |
177 | | "www.mozilla.org", "ftp.mozilla.org" and |
178 | | "mozilla@bucksch.org". */ |
179 | | /* RFC1738 and RFC2396E type URLs may use multiple lines, |
180 | | whitespace is stripped. Special characters like ")" stay intact.*/ |
181 | | }; |
182 | | |
183 | | /** |
184 | | * @param text (in), pos (in): see FindURL |
185 | | * @param check (in): Start must be conform with this mode |
186 | | * @param start (out): Position in text, where URL (including brackets or |
187 | | * similar) starts |
188 | | * @return |check|-conform start has been found |
189 | | */ |
190 | | bool FindURLStart(const char16_t * aInString, int32_t aInLength, const uint32_t pos, |
191 | | const modetype check, uint32_t& start); |
192 | | |
193 | | /** |
194 | | * @param text (in), pos (in): see FindURL |
195 | | * @param check (in): End must be conform with this mode |
196 | | * @param start (in): see FindURLStart |
197 | | * @param end (out): Similar to |start| param of FindURLStart |
198 | | * @return |check|-conform end has been found |
199 | | */ |
200 | | bool FindURLEnd(const char16_t * aInString, int32_t aInStringLength, const uint32_t pos, |
201 | | const modetype check, const uint32_t start, uint32_t& end); |
202 | | |
203 | | /** |
204 | | * @param text (in), pos (in), whathasbeendone (in): see FindURL |
205 | | * @param check (in): Current mode |
206 | | * @param start (in), end (in): see FindURLEnd |
207 | | * @param txtURL (out): Guessed (raw) URL. |
208 | | * Without whitespace, but not completed. |
209 | | * @param desc (out): Link as shown to the user, but already escaped. |
210 | | * Should be placed between the <a> and </a> tags. |
211 | | * @param replaceBefore(out), replaceAfter (out): see FindURL |
212 | | */ |
213 | | void CalculateURLBoundaries(const char16_t * aInString, int32_t aInStringLength, |
214 | | const uint32_t pos, const uint32_t whathasbeendone, |
215 | | const modetype check, const uint32_t start, const uint32_t end, |
216 | | nsString& txtURL, nsString& desc, |
217 | | int32_t& replaceBefore, int32_t& replaceAfter); |
218 | | |
219 | | /** |
220 | | * @param txtURL (in), desc (in): see CalculateURLBoundaries |
221 | | * @param outputHTML (out): see FindURL |
222 | | * @return A valid URL could be found (and creation of HTML successful) |
223 | | */ |
224 | | bool CheckURLAndCreateHTML( |
225 | | const nsString& txtURL, const nsString& desc, const modetype mode, |
226 | | nsString& outputHTML); |
227 | | |
228 | | /** |
229 | | @param text (in): line of text possibly with tagTXT.<p> |
230 | | if col0 is true, |
231 | | starting with tagTXT<br> |
232 | | else |
233 | | starting one char before tagTXT |
234 | | @param col0 (in): tagTXT is on the beginning of the line (or paragraph). |
235 | | open must be 0 then. |
236 | | @param tagTXT (in): Tag in plaintext to search for, e.g. "*" |
237 | | @param aTagTxtLen (in): length of tagTXT. |
238 | | @param tagHTML (in): HTML-Tag to replace tagTXT with, |
239 | | without "<" and ">", e.g. "strong" |
240 | | @param attributeHTML (in): HTML-attribute to add to opening tagHTML, |
241 | | e.g. "class=txt_star" |
242 | | @param aOutString: string to APPEND the converted html into |
243 | | @param open (in/out): Number of currently open tags of type tagHTML |
244 | | @return Conversion succeeded |
245 | | */ |
246 | | bool StructPhraseHit(const char16_t * aInString, int32_t aInStringLength, bool col0, |
247 | | const char16_t* tagTXT, |
248 | | int32_t aTagTxtLen, |
249 | | const char* tagHTML, const char* attributeHTML, |
250 | | nsString& aOutputString, uint32_t& openTags); |
251 | | |
252 | | /** |
253 | | @param text (in), col0 (in): see GlyphHit |
254 | | @param tagTXT (in): Smily, see also StructPhraseHit |
255 | | @param imageName (in): the basename of the file that contains the image for this smilie |
256 | | @param outputHTML (out): new string containing the html for the smily |
257 | | @param glyphTextLen (out): see GlyphHit |
258 | | */ |
259 | | bool |
260 | | SmilyHit(const char16_t * aInString, int32_t aLength, bool col0, |
261 | | const char* tagTXT, const char* imageName, |
262 | | nsString& outputHTML, int32_t& glyphTextLen); |
263 | | |
264 | | /** |
265 | | Checks, if we can replace some chars at the start of line with prettier HTML |
266 | | code.<p> |
267 | | If success is reported, replace the first glyphTextLen chars with outputHTML |
268 | | |
269 | | @param text (in): line of text possibly with Glyph.<p> |
270 | | If col0 is true, |
271 | | starting with Glyph <br><!-- (br not part of text) --> |
272 | | else |
273 | | starting one char before Glyph |
274 | | @param col0 (in): text starts at the beginning of the line (or paragraph) |
275 | | @param aOutString (out): APPENDS html for the glyph to this string |
276 | | @param glyphTextLen (out): Length of original text to replace |
277 | | @return see StructPhraseHit |
278 | | */ |
279 | | bool GlyphHit(const char16_t * aInString, int32_t aInLength, bool col0, |
280 | | nsString& aOutString, int32_t& glyphTextLen); |
281 | | |
282 | | /** |
283 | | Check if a given url should be linkified. |
284 | | @param aURL (in): url to be checked on. |
285 | | */ |
286 | | bool ShouldLinkify(const nsCString& aURL); |
287 | | }; |
288 | | |
289 | | // It's said, that Win32 and Mac don't like static const members |
290 | | const int32_t mozTXTToHTMLConv_lastMode = 4; |
291 | | // Needed (only) by mozTXTToHTMLConv::FindURL |
292 | | const int32_t mozTXTToHTMLConv_numberOfModes = 4; // dito; unknown not counted |
293 | | |
294 | | #endif |