/src/mozilla-central/netwerk/streamconv/converters/nsDirIndexParser.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
2 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
3 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
5 | | |
6 | | /* This parsing code originally lived in xpfe/components/directory/ - bbaetz */ |
7 | | |
8 | | #include "nsDirIndexParser.h" |
9 | | |
10 | | #include "mozilla/ArrayUtils.h" |
11 | | #include "mozilla/dom/FallbackEncoding.h" |
12 | | #include "mozilla/Encoding.h" |
13 | | #include "prprf.h" |
14 | | #include "nsCRT.h" |
15 | | #include "nsDirIndex.h" |
16 | | #include "nsEscape.h" |
17 | | #include "nsIDirIndex.h" |
18 | | #include "nsIInputStream.h" |
19 | | #include "nsITextToSubURI.h" |
20 | | #include "nsServiceManagerUtils.h" |
21 | | |
22 | | using namespace mozilla; |
23 | | |
24 | | NS_IMPL_ISUPPORTS(nsDirIndexParser, |
25 | | nsIRequestObserver, |
26 | | nsIStreamListener, |
27 | | nsIDirIndexParser) |
28 | | |
29 | | nsDirIndexParser::nsDirIndexParser() |
30 | | : mLineStart(0) |
31 | 0 | , mHasDescription(false) { |
32 | 0 | } |
33 | | |
34 | | nsresult |
35 | 0 | nsDirIndexParser::Init() { |
36 | 0 | mLineStart = 0; |
37 | 0 | mHasDescription = false; |
38 | 0 | mFormat[0] = -1; |
39 | 0 | auto encoding = mozilla::dom::FallbackEncoding::FromLocale(); |
40 | 0 | encoding->Name(mEncoding); |
41 | 0 |
|
42 | 0 | nsresult rv; |
43 | 0 | // XXX not threadsafe |
44 | 0 | if (gRefCntParser++ == 0) |
45 | 0 | rv = CallGetService(NS_ITEXTTOSUBURI_CONTRACTID, &gTextToSubURI); |
46 | 0 | else |
47 | 0 | rv = NS_OK; |
48 | 0 |
|
49 | 0 | return rv; |
50 | 0 | } |
51 | | |
52 | 0 | nsDirIndexParser::~nsDirIndexParser() { |
53 | 0 | // XXX not threadsafe |
54 | 0 | if (--gRefCntParser == 0) { |
55 | 0 | NS_IF_RELEASE(gTextToSubURI); |
56 | 0 | } |
57 | 0 | } |
58 | | |
59 | | NS_IMETHODIMP |
60 | 0 | nsDirIndexParser::SetListener(nsIDirIndexListener* aListener) { |
61 | 0 | mListener = aListener; |
62 | 0 | return NS_OK; |
63 | 0 | } |
64 | | |
65 | | NS_IMETHODIMP |
66 | 0 | nsDirIndexParser::GetListener(nsIDirIndexListener** aListener) { |
67 | 0 | NS_IF_ADDREF(*aListener = mListener.get()); |
68 | 0 | return NS_OK; |
69 | 0 | } |
70 | | |
71 | | NS_IMETHODIMP |
72 | 0 | nsDirIndexParser::GetComment(char** aComment) { |
73 | 0 | *aComment = ToNewCString(mComment); |
74 | 0 |
|
75 | 0 | if (!*aComment) |
76 | 0 | return NS_ERROR_OUT_OF_MEMORY; |
77 | 0 | |
78 | 0 | return NS_OK; |
79 | 0 | } |
80 | | |
81 | | NS_IMETHODIMP |
82 | 0 | nsDirIndexParser::SetEncoding(const char* aEncoding) { |
83 | 0 | mEncoding.Assign(aEncoding); |
84 | 0 | return NS_OK; |
85 | 0 | } |
86 | | |
87 | | NS_IMETHODIMP |
88 | 0 | nsDirIndexParser::GetEncoding(char** aEncoding) { |
89 | 0 | *aEncoding = ToNewCString(mEncoding); |
90 | 0 |
|
91 | 0 | if (!*aEncoding) |
92 | 0 | return NS_ERROR_OUT_OF_MEMORY; |
93 | 0 | |
94 | 0 | return NS_OK; |
95 | 0 | } |
96 | | |
97 | | NS_IMETHODIMP |
98 | 0 | nsDirIndexParser::OnStartRequest(nsIRequest* aRequest, nsISupports* aCtxt) { |
99 | 0 | return NS_OK; |
100 | 0 | } |
101 | | |
102 | | NS_IMETHODIMP |
103 | | nsDirIndexParser::OnStopRequest(nsIRequest *aRequest, nsISupports *aCtxt, |
104 | 0 | nsresult aStatusCode) { |
105 | 0 | // Finish up |
106 | 0 | if (mBuf.Length() > (uint32_t) mLineStart) { |
107 | 0 | ProcessData(aRequest, aCtxt); |
108 | 0 | } |
109 | 0 |
|
110 | 0 | return NS_OK; |
111 | 0 | } |
112 | | |
113 | | nsDirIndexParser::Field |
114 | | nsDirIndexParser::gFieldTable[] = { |
115 | | { "Filename", FIELD_FILENAME }, |
116 | | { "Description", FIELD_DESCRIPTION }, |
117 | | { "Content-Length", FIELD_CONTENTLENGTH }, |
118 | | { "Last-Modified", FIELD_LASTMODIFIED }, |
119 | | { "Content-Type", FIELD_CONTENTTYPE }, |
120 | | { "File-Type", FIELD_FILETYPE }, |
121 | | { nullptr, FIELD_UNKNOWN } |
122 | | }; |
123 | | |
124 | | nsrefcnt nsDirIndexParser::gRefCntParser = 0; |
125 | | nsITextToSubURI *nsDirIndexParser::gTextToSubURI; |
126 | | |
127 | | nsresult |
128 | | nsDirIndexParser::ParseFormat(const char* aFormatStr) |
129 | 0 | { |
130 | 0 | // Parse a "200" format line, and remember the fields and their |
131 | 0 | // ordering in mFormat. Multiple 200 lines stomp on each other. |
132 | 0 | unsigned int formatNum = 0; |
133 | 0 | mFormat[0] = -1; |
134 | 0 |
|
135 | 0 | do { |
136 | 0 | while (*aFormatStr && nsCRT::IsAsciiSpace(char16_t(*aFormatStr))) |
137 | 0 | ++aFormatStr; |
138 | 0 |
|
139 | 0 | if (! *aFormatStr) |
140 | 0 | break; |
141 | 0 | |
142 | 0 | nsAutoCString name; |
143 | 0 | int32_t len = 0; |
144 | 0 | while (aFormatStr[len] && !nsCRT::IsAsciiSpace(char16_t(aFormatStr[len]))) |
145 | 0 | ++len; |
146 | 0 | name.SetCapacity(len + 1); |
147 | 0 | name.Append(aFormatStr, len); |
148 | 0 | aFormatStr += len; |
149 | 0 |
|
150 | 0 | // Okay, we're gonna monkey with the nsStr. Bold! |
151 | 0 | name.SetLength(nsUnescapeCount(name.BeginWriting())); |
152 | 0 |
|
153 | 0 | // All tokens are case-insensitive - http://www.mozilla.org/projects/netlib/dirindexformat.html |
154 | 0 | if (name.LowerCaseEqualsLiteral("description")) |
155 | 0 | mHasDescription = true; |
156 | 0 |
|
157 | 0 | for (Field* i = gFieldTable; i->mName; ++i) { |
158 | 0 | if (name.EqualsIgnoreCase(i->mName)) { |
159 | 0 | mFormat[formatNum] = i->mType; |
160 | 0 | mFormat[++formatNum] = -1; |
161 | 0 | break; |
162 | 0 | } |
163 | 0 | } |
164 | 0 |
|
165 | 0 | } while (*aFormatStr && (formatNum < (ArrayLength(mFormat)-1))); |
166 | 0 |
|
167 | 0 | return NS_OK; |
168 | 0 | } |
169 | | |
170 | | nsresult |
171 | | nsDirIndexParser::ParseData(nsIDirIndex *aIdx, char* aDataStr, int32_t aLineLen) |
172 | 0 | { |
173 | 0 | // Parse a "201" data line, using the field ordering specified in |
174 | 0 | // mFormat. |
175 | 0 |
|
176 | 0 | if(mFormat[0] == -1) { |
177 | 0 | // Ignore if we haven't seen a format yet. |
178 | 0 | return NS_OK; |
179 | 0 | } |
180 | 0 | |
181 | 0 | nsresult rv = NS_OK; |
182 | 0 | nsAutoCString filename; |
183 | 0 | int32_t lineLen = aLineLen; |
184 | 0 |
|
185 | 0 | for (int32_t i = 0; mFormat[i] != -1; ++i) { |
186 | 0 | // If we've exhausted the data before we run out of fields, just bail. |
187 | 0 | if (!*aDataStr || (lineLen < 1)) { |
188 | 0 | return NS_OK; |
189 | 0 | } |
190 | 0 | |
191 | 0 | while ((lineLen > 0) && nsCRT::IsAsciiSpace(*aDataStr)) { |
192 | 0 | ++aDataStr; |
193 | 0 | --lineLen; |
194 | 0 | } |
195 | 0 |
|
196 | 0 | if (lineLen < 1) { |
197 | 0 | // invalid format, bail |
198 | 0 | return NS_OK; |
199 | 0 | } |
200 | 0 | |
201 | 0 | char *value = aDataStr; |
202 | 0 | if (*aDataStr == '"' || *aDataStr == '\'') { |
203 | 0 | // it's a quoted string. snarf everything up to the next quote character |
204 | 0 | const char quotechar = *(aDataStr++); |
205 | 0 | lineLen--; |
206 | 0 | ++value; |
207 | 0 | while ((lineLen > 0) && *aDataStr != quotechar) { |
208 | 0 | ++aDataStr; |
209 | 0 | --lineLen; |
210 | 0 | } |
211 | 0 | if (lineLen > 0) { |
212 | 0 | *aDataStr++ = '\0'; |
213 | 0 | --lineLen; |
214 | 0 | } |
215 | 0 |
|
216 | 0 | if (!lineLen) { |
217 | 0 | // invalid format, bail |
218 | 0 | return NS_OK; |
219 | 0 | } |
220 | 0 | } else { |
221 | 0 | // it's unquoted. snarf until we see whitespace. |
222 | 0 | value = aDataStr; |
223 | 0 | while ((lineLen > 0) && (!nsCRT::IsAsciiSpace(*aDataStr))) { |
224 | 0 | ++aDataStr; |
225 | 0 | --lineLen; |
226 | 0 | } |
227 | 0 | if (lineLen > 0) { |
228 | 0 | *aDataStr++ = '\0'; |
229 | 0 | --lineLen; |
230 | 0 | } |
231 | 0 | // even if we ran out of line length here, there's still a trailing zero |
232 | 0 | // byte afterwards |
233 | 0 | } |
234 | 0 |
|
235 | 0 | fieldType t = fieldType(mFormat[i]); |
236 | 0 | switch (t) { |
237 | 0 | case FIELD_FILENAME: { |
238 | 0 | // don't unescape at this point, so that UnEscapeAndConvert() can |
239 | 0 | filename = value; |
240 | 0 |
|
241 | 0 | bool success = false; |
242 | 0 |
|
243 | 0 | nsAutoString entryuri; |
244 | 0 |
|
245 | 0 | if (gTextToSubURI) { |
246 | 0 | nsAutoString result; |
247 | 0 | if (NS_SUCCEEDED(rv = gTextToSubURI->UnEscapeAndConvert( |
248 | 0 | mEncoding, filename, result))) { |
249 | 0 | if (!result.IsEmpty()) { |
250 | 0 | aIdx->SetLocation(filename); |
251 | 0 | if (!mHasDescription) |
252 | 0 | aIdx->SetDescription(result); |
253 | 0 | success = true; |
254 | 0 | } |
255 | 0 | } else { |
256 | 0 | NS_WARNING("UnEscapeAndConvert error"); |
257 | 0 | } |
258 | 0 | } |
259 | 0 |
|
260 | 0 | if (!success) { |
261 | 0 | // if unsuccessfully at charset conversion, then |
262 | 0 | // just fallback to unescape'ing in-place |
263 | 0 | // XXX - this shouldn't be using UTF8, should it? |
264 | 0 | // when can we fail to get the service, anyway? - bbaetz |
265 | 0 | aIdx->SetLocation(filename); |
266 | 0 | if (!mHasDescription) { |
267 | 0 | aIdx->SetDescription(NS_ConvertUTF8toUTF16(value)); |
268 | 0 | } |
269 | 0 | } |
270 | 0 | } |
271 | 0 | break; |
272 | 0 | case FIELD_DESCRIPTION: |
273 | 0 | nsUnescape(value); |
274 | 0 | aIdx->SetDescription(NS_ConvertUTF8toUTF16(value)); |
275 | 0 | break; |
276 | 0 | case FIELD_CONTENTLENGTH: |
277 | 0 | { |
278 | 0 | int64_t len; |
279 | 0 | int32_t status = PR_sscanf(value, "%lld", &len); |
280 | 0 | if (status == 1) |
281 | 0 | aIdx->SetSize(len); |
282 | 0 | else |
283 | 0 | aIdx->SetSize(UINT64_MAX); // UINT64_MAX means unknown |
284 | 0 | } |
285 | 0 | break; |
286 | 0 | case FIELD_LASTMODIFIED: |
287 | 0 | { |
288 | 0 | PRTime tm; |
289 | 0 | nsUnescape(value); |
290 | 0 | if (PR_ParseTimeString(value, false, &tm) == PR_SUCCESS) { |
291 | 0 | aIdx->SetLastModified(tm); |
292 | 0 | } |
293 | 0 | } |
294 | 0 | break; |
295 | 0 | case FIELD_CONTENTTYPE: |
296 | 0 | aIdx->SetContentType(nsDependentCString(value)); |
297 | 0 | break; |
298 | 0 | case FIELD_FILETYPE: |
299 | 0 | // unescape in-place |
300 | 0 | nsUnescape(value); |
301 | 0 | if (!nsCRT::strcasecmp(value, "directory")) { |
302 | 0 | aIdx->SetType(nsIDirIndex::TYPE_DIRECTORY); |
303 | 0 | } else if (!nsCRT::strcasecmp(value, "file")) { |
304 | 0 | aIdx->SetType(nsIDirIndex::TYPE_FILE); |
305 | 0 | } else if (!nsCRT::strcasecmp(value, "symbolic-link")) { |
306 | 0 | aIdx->SetType(nsIDirIndex::TYPE_SYMLINK); |
307 | 0 | } else { |
308 | 0 | aIdx->SetType(nsIDirIndex::TYPE_UNKNOWN); |
309 | 0 | } |
310 | 0 | break; |
311 | 0 | case FIELD_UNKNOWN: |
312 | 0 | // ignore |
313 | 0 | break; |
314 | 0 | } |
315 | 0 | } |
316 | 0 |
|
317 | 0 | return NS_OK; |
318 | 0 | } |
319 | | |
320 | | NS_IMETHODIMP |
321 | | nsDirIndexParser::OnDataAvailable(nsIRequest *aRequest, nsISupports *aCtxt, |
322 | | nsIInputStream *aStream, |
323 | | uint64_t aSourceOffset, |
324 | 0 | uint32_t aCount) { |
325 | 0 | if (aCount < 1) |
326 | 0 | return NS_OK; |
327 | 0 | |
328 | 0 | int32_t len = mBuf.Length(); |
329 | 0 |
|
330 | 0 | // Ensure that our mBuf has capacity to hold the data we're about to |
331 | 0 | // read. |
332 | 0 | if (!mBuf.SetLength(len + aCount, fallible)) |
333 | 0 | return NS_ERROR_OUT_OF_MEMORY; |
334 | 0 | |
335 | 0 | // Now read the data into our buffer. |
336 | 0 | nsresult rv; |
337 | 0 | uint32_t count; |
338 | 0 | rv = aStream->Read(mBuf.BeginWriting() + len, aCount, &count); |
339 | 0 | if (NS_FAILED(rv)) return rv; |
340 | 0 | |
341 | 0 | // Set the string's length according to the amount of data we've read. |
342 | 0 | // Note: we know this to work on nsCString. This isn't guaranteed to |
343 | 0 | // work on other strings. |
344 | 0 | mBuf.SetLength(len + count); |
345 | 0 |
|
346 | 0 | return ProcessData(aRequest, aCtxt); |
347 | 0 | } |
348 | | |
349 | | nsresult |
350 | 0 | nsDirIndexParser::ProcessData(nsIRequest *aRequest, nsISupports *aCtxt) { |
351 | 0 | if (!mListener) |
352 | 0 | return NS_ERROR_FAILURE; |
353 | 0 | |
354 | 0 | int32_t numItems = 0; |
355 | 0 |
|
356 | 0 | while(true) { |
357 | 0 | ++numItems; |
358 | 0 |
|
359 | 0 | int32_t eol = mBuf.FindCharInSet("\n\r", mLineStart); |
360 | 0 | if (eol < 0) break; |
361 | 0 | mBuf.SetCharAt(char16_t('\0'), eol); |
362 | 0 |
|
363 | 0 | const char *line = mBuf.get() + mLineStart; |
364 | 0 |
|
365 | 0 | int32_t lineLen = eol - mLineStart; |
366 | 0 | mLineStart = eol + 1; |
367 | 0 |
|
368 | 0 | if (lineLen >= 4) { |
369 | 0 | nsresult rv; |
370 | 0 | const char *buf = line; |
371 | 0 |
|
372 | 0 | if (buf[0] == '1') { |
373 | 0 | if (buf[1] == '0') { |
374 | 0 | if (buf[2] == '0' && buf[3] == ':') { |
375 | 0 | // 100. Human-readable comment line. Ignore |
376 | 0 | } else if (buf[2] == '1' && buf[3] == ':') { |
377 | 0 | // 101. Human-readable information line. |
378 | 0 | mComment.Append(buf + 4); |
379 | 0 |
|
380 | 0 | char *value = ((char *)buf) + 4; |
381 | 0 | nsUnescape(value); |
382 | 0 | mListener->OnInformationAvailable(aRequest, aCtxt, NS_ConvertUTF8toUTF16(value)); |
383 | 0 |
|
384 | 0 | } else if (buf[2] == '2' && buf[3] == ':') { |
385 | 0 | // 102. Human-readable information line, HTML. |
386 | 0 | mComment.Append(buf + 4); |
387 | 0 | } |
388 | 0 | } |
389 | 0 | } else if (buf[0] == '2') { |
390 | 0 | if (buf[1] == '0') { |
391 | 0 | if (buf[2] == '0' && buf[3] == ':') { |
392 | 0 | // 200. Define field names |
393 | 0 | rv = ParseFormat(buf + 4); |
394 | 0 | if (NS_FAILED(rv)) { |
395 | 0 | return rv; |
396 | 0 | } |
397 | 0 | } else if (buf[2] == '1' && buf[3] == ':') { |
398 | 0 | // 201. Field data |
399 | 0 | nsCOMPtr<nsIDirIndex> idx = new nsDirIndex(); |
400 | 0 |
|
401 | 0 | rv = ParseData(idx, ((char *)buf) + 4, lineLen - 4); |
402 | 0 | if (NS_FAILED(rv)) { |
403 | 0 | return rv; |
404 | 0 | } |
405 | 0 | |
406 | 0 | mListener->OnIndexAvailable(aRequest, aCtxt, idx); |
407 | 0 | } |
408 | 0 | } |
409 | 0 | } else if (buf[0] == '3') { |
410 | 0 | if (buf[1] == '0') { |
411 | 0 | if (buf[2] == '0' && buf[3] == ':') { |
412 | 0 | // 300. Self-referring URL |
413 | 0 | } else if (buf[2] == '1' && buf[3] == ':') { |
414 | 0 | // 301. OUR EXTENSION - encoding |
415 | 0 | int i = 4; |
416 | 0 | while (buf[i] && nsCRT::IsAsciiSpace(buf[i])) |
417 | 0 | ++i; |
418 | 0 |
|
419 | 0 | if (buf[i]) |
420 | 0 | SetEncoding(buf+i); |
421 | 0 | } |
422 | 0 | } |
423 | 0 | } |
424 | 0 | } |
425 | 0 | } |
426 | 0 |
|
427 | 0 | return NS_OK; |
428 | 0 | } |