Coverage Report

Created: 2024-06-21 07:24

/src/wget/src/ftp-ls.c
Line
Count
Source (jump to first uncovered line)
1
/* Parsing FTP `ls' output.
2
   Copyright (C) 1996-2011, 2015, 2018-2024 Free Software Foundation,
3
   Inc.
4
5
This file is part of GNU Wget.
6
7
GNU Wget is free software; you can redistribute it and/or modify
8
it under the terms of the GNU General Public License as published by
9
the Free Software Foundation; either version 3 of the License, or
10
(at your option) any later version.
11
12
GNU Wget is distributed in the hope that it will be useful,
13
but WITHOUT ANY WARRANTY; without even the implied warranty of
14
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
GNU General Public License for more details.
16
17
You should have received a copy of the GNU General Public License
18
along with Wget.  If not, see <http://www.gnu.org/licenses/>.
19
20
Additional permission under GNU GPL version 3 section 7
21
22
If you modify this program, or any covered work, by linking or
23
combining it with the OpenSSL project's OpenSSL library (or a
24
modified version of that library), containing parts covered by the
25
terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26
grants you additional permission to convey the resulting work.
27
Corresponding Source for a non-source form of such a combination
28
shall include the source code for the parts of OpenSSL used as well
29
as that of the covered work.  */
30
31
#include "wget.h"
32
33
#include <stdio.h>
34
#include <stdlib.h>
35
#include <string.h>
36
#include <unistd.h>
37
#include <errno.h>
38
#include <time.h>
39
#include "utils.h"
40
#include "ftp.h"
41
#include "url.h"
42
#include "convert.h"            /* for html_quote_string prototype */
43
#include "retr.h"               /* for output_stream */
44
#include "c-strcase.h"
45
46
/* Converts symbolic permissions to number-style ones, e.g. string
47
   rwxr-xr-x to 755.  For now, it knows nothing of
48
   setuid/setgid/sticky.  ACLs are ignored.  */
49
static int
50
symperms (const char *s)
51
16.8k
{
52
16.8k
  int perms = 0, i;
53
54
16.8k
  if (strlen (s) < 9)
55
14.7k
    return 0;
56
8.10k
  for (i = 0; i < 3; i++, s += 3)
57
6.08k
    {
58
6.08k
      perms <<= 3;
59
6.08k
      perms += (((s[0] == 'r') << 2) + ((s[1] == 'w') << 1) +
60
6.08k
                (s[2] == 'x' || s[2] == 's'));
61
6.08k
    }
62
2.02k
  return perms;
63
16.8k
}
64
65
66
/* Cleans a line of text so that it can be consistently parsed. Destroys
67
   <CR> and <LF> in case that they occur at the end of the line and
68
   replaces all <TAB> character with <SPACE>. Returns the length of the
69
   modified line. */
70
static int
71
clean_line (char *line, int len)
72
76.9k
{
73
76.9k
  if (len <= 0) return 0;
74
75
148k
  while (len > 0 && (line[len - 1] == '\n' || line[len - 1] == '\r'))
76
71.6k
    line[--len] = '\0';
77
78
76.9k
  if (!len) return 0;
79
80
1.30M
  for ( ; *line ; line++ ) if (*line == '\t') *line = ' ';
81
82
71.7k
  return len;
83
76.9k
}
84
85
/* Convert the Un*x-ish style directory listing stored in FILE to a
86
   linked list of fileinfo (system-independent) entries.  The contents
87
   of FILE are considered to be produced by the standard Unix `ls -la'
88
   output (whatever that might be).  BSD (no group) and SYSV (with
89
   group) listings are handled.
90
91
   The timestamps are stored in a separate variable, time_t
92
   compatible (I hope).  The timezones are ignored.  */
93
static struct fileinfo *
94
ftp_parse_unix_ls (FILE *fp, int ignore_perms)
95
6.21k
{
96
6.21k
  static const char *months[] = {
97
6.21k
    "Jan", "Feb", "Mar", "Apr", "May", "Jun",
98
6.21k
    "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
99
6.21k
  };
100
6.21k
  int next, len, i, error, ignore;
101
6.21k
  int year, month, day;         /* for time analysis */
102
6.21k
  int hour, min, sec, ptype;
103
6.21k
  struct tm timestruct, *tnow;
104
6.21k
  time_t timenow;
105
6.21k
  size_t bufsize = 0;
106
107
6.21k
  char *line = NULL, *tok, *ptok;      /* tokenizer */
108
6.21k
  struct fileinfo *dir, *l, cur;       /* list creation */
109
110
6.21k
  dir = l = NULL;
111
112
  /* Line loop to end of file: */
113
61.0k
  while ((len = getline (&line, &bufsize, fp)) > 0)
114
54.8k
    {
115
54.8k
      len = clean_line (line, len);
116
      /* Skip if total...  */
117
54.8k
      if (!c_strncasecmp (line, "total", 5))
118
830
        continue;
119
      /* Get the first token (permissions).  */
120
53.9k
      tok = strtok (line, " ");
121
53.9k
      if (!tok)
122
6.60k
        continue;
123
124
47.3k
      cur.name = NULL;
125
47.3k
      cur.linkto = NULL;
126
127
      /* Decide whether we deal with a file or a directory.  */
128
47.3k
      switch (*tok)
129
47.3k
        {
130
6.24k
        case '-':
131
6.24k
          cur.type = FT_PLAINFILE;
132
6.24k
          DEBUGP (("PLAINFILE; "));
133
6.24k
          break;
134
4.55k
        case 'd':
135
4.55k
          cur.type = FT_DIRECTORY;
136
4.55k
          DEBUGP (("DIRECTORY; "));
137
4.55k
          break;
138
6.54k
        case 'l':
139
6.54k
          cur.type = FT_SYMLINK;
140
6.54k
          DEBUGP (("SYMLINK; "));
141
6.54k
          break;
142
30.0k
        default:
143
30.0k
          cur.type = FT_UNKNOWN;
144
30.0k
          DEBUGP (("UNKNOWN; "));
145
30.0k
          break;
146
47.3k
        }
147
148
47.3k
      if (ignore_perms)
149
30.5k
        {
150
30.5k
          switch (cur.type)
151
30.5k
            {
152
4.14k
            case FT_PLAINFILE:
153
4.14k
              cur.perms = 0644;
154
4.14k
              break;
155
3.00k
            case FT_DIRECTORY:
156
3.00k
              cur.perms = 0755;
157
3.00k
              break;
158
23.4k
            default:
159
              /*cur.perms = 1023;*/     /* #### What is this?  --hniksic */
160
23.4k
              cur.perms = 0644;
161
30.5k
            }
162
30.5k
          DEBUGP (("implicit perms %0o; ", (unsigned) cur.perms));
163
30.5k
        }
164
16.8k
       else
165
16.8k
         {
166
16.8k
           cur.perms = symperms (tok + 1);
167
16.8k
           DEBUGP (("perms %0o; ", (unsigned) cur.perms));
168
16.8k
         }
169
170
47.3k
      error = ignore = 0;       /* Erroneous and ignoring entries are
171
                                   treated equally for now.  */
172
47.3k
      year = hour = min = sec = 0; /* Silence the compiler.  */
173
47.3k
      month = day = 0;
174
47.3k
      ptype = TT_DAY;
175
47.3k
      next = -1;
176
      /* While there are tokens on the line, parse them.  Next is the
177
         number of tokens left until the filename.
178
179
         Use the month-name token as the "anchor" (the place where the
180
         position wrt the file name is "known").  When a month name is
181
         encountered, `next' is set to 5.  Also, the preceding
182
         characters are parsed to get the file size.
183
184
         This tactic is quite dubious when it comes to
185
         internationalization issues (non-English month names), but it
186
         works for now.  */
187
47.3k
      tok = line;
188
165k
      while (ptok = tok,
189
165k
             (tok = strtok (NULL, " ")) != NULL)
190
132k
        {
191
132k
          --next;
192
132k
          if (next < 0)         /* a month name was not encountered */
193
80.5k
            {
194
911k
              for (i = 0; i < 12; i++)
195
855k
                if (!c_strcasecmp (tok, months[i]))
196
24.5k
                  break;
197
              /* If we got a month, it means the token before it is the
198
                 size, and the filename is three tokens away.  */
199
80.5k
              if (i != 12)
200
24.5k
                {
201
24.5k
                  wgint size;
202
203
                  /* Parse the previous token with str_to_wgint.  */
204
24.5k
                  if (ptok == line)
205
754
                    {
206
                      /* Something has gone wrong during parsing. */
207
754
                      error = 1;
208
754
                      break;
209
754
                    }
210
23.8k
                  errno = 0;
211
23.8k
                  size = str_to_wgint (ptok, NULL, 10);
212
23.8k
                  if (size == WGINT_MAX && errno == ERANGE)
213
                    /* Out of range -- ignore the size.  #### Should
214
                       we refuse to start the download.  */
215
987
                    cur.size = 0;
216
22.8k
                  else
217
22.8k
                    cur.size = size;
218
23.8k
                  DEBUGP (("size: %s; ", number_to_static_string(cur.size)));
219
220
23.8k
                  month = i;
221
23.8k
                  next = 5;
222
23.8k
                  DEBUGP (("month: %s; ", months[month]));
223
23.8k
                }
224
80.5k
            }
225
52.3k
          else if (next == 4)   /* days */
226
19.9k
            {
227
19.9k
              if (tok[1])       /* two-digit... */
228
835
                day = 10 * (*tok - '0') + tok[1] - '0';
229
19.1k
              else              /* ...or one-digit */
230
19.1k
                day = *tok - '0';
231
19.9k
              DEBUGP (("day: %d; ", day));
232
19.9k
            }
233
32.4k
          else if (next == 3)
234
18.5k
            {
235
              /* This ought to be either the time, or the year.  Let's
236
                 be flexible!
237
238
                 If we have a number x, it's a year.  If we have x:y,
239
                 it's hours and minutes.  If we have x:y:z, z are
240
                 seconds.  */
241
18.5k
              year = 0;
242
18.5k
              min = hour = sec = 0;
243
              /* We must deal with digits.  */
244
18.5k
              if (c_isdigit (*tok))
245
10.2k
                {
246
                  /* Suppose it's year.  Limit to year 99999 to avoid integer overflow. */
247
24.3k
                  for (; c_isdigit (*tok) && year <= 99999; tok++)
248
14.1k
                    year = (*tok - '0') + 10 * year;
249
10.2k
                  if (*tok == ':')
250
2.95k
                    {
251
2.95k
                      int n;
252
                      /* This means these were hours!  */
253
2.95k
                      hour = year;
254
2.95k
                      year = 0;
255
2.95k
                      ptype = TT_HOUR_MIN;
256
2.95k
                      ++tok;
257
                      /* Get the minutes...  */
258
4.18k
                      for (n = 0; c_isdigit (*tok) && n < 2; tok++, n++)
259
1.23k
                        min = (*tok - '0') + 10 * min;
260
2.95k
                      if (*tok == ':')
261
1.59k
                        {
262
                          /* ...and the seconds.  */
263
1.59k
                          ++tok;
264
3.25k
                          for (n = 0; c_isdigit (*tok) && n < 2; tok++, n++)
265
1.66k
                            sec = (*tok - '0') + 10 * sec;
266
1.59k
                        }
267
2.95k
                    }
268
10.2k
                }
269
18.5k
              if (year)
270
6.40k
                DEBUGP (("year: %d (no tm); ", year));
271
12.0k
              else
272
12.0k
                DEBUGP (("time: %02d:%02d:%02d (no yr); ", hour, min, sec));
273
18.5k
            }
274
13.9k
          else if (next == 2)    /* The file name */
275
13.9k
            {
276
13.9k
              int fnlen;
277
13.9k
              char *p;
278
279
              /* Since the file name may contain a SPC, it is possible
280
                 for strtok to handle it wrong.  */
281
13.9k
              fnlen = strlen (tok);
282
13.9k
              if (fnlen < len - (tok - line))
283
2.27k
                {
284
                  /* So we have a SPC in the file name.  Restore the
285
                     original.  */
286
2.27k
                  tok[fnlen] = ' ';
287
                  /* If the file is a symbolic link, it should have a
288
                     ` -> ' somewhere.  */
289
2.27k
                  if (cur.type == FT_SYMLINK)
290
1.42k
                    {
291
1.42k
                      p = strstr (tok, " -> ");
292
1.42k
                      if (!p)
293
713
                        {
294
713
                          error = 1;
295
713
                          break;
296
713
                        }
297
713
                      cur.linkto = xstrdup (p + 4);
298
713
                      DEBUGP (("link to: %s\n", cur.linkto));
299
                      /* And separate it from the file name.  */
300
713
                      *p = '\0';
301
713
                    }
302
2.27k
                }
303
              /* If we have the filename, add it to the list of files or
304
                 directories.  */
305
              /* "." and ".." are an exception!  */
306
13.2k
              if (!strcmp (tok, ".") || !strcmp (tok, ".."))
307
1.42k
                {
308
1.42k
                  DEBUGP (("\nIgnoring `.' and `..'; "));
309
1.42k
                  ignore = 1;
310
1.42k
                  break;
311
1.42k
                }
312
              /* Some FTP sites choose to have ls -F as their default
313
                 LIST output, which marks the symlinks with a trailing
314
                 `@', directory names with a trailing `/' and
315
                 executables with a trailing `*'.  This is no problem
316
                 unless encountering a symbolic link ending with `@',
317
                 or an executable ending with `*' on a server without
318
                 default -F output.  I believe these cases are very
319
                 rare.  */
320
11.7k
              fnlen = strlen (tok); /* re-calculate `fnlen' */
321
11.7k
              cur.name = xmalloc (fnlen + 1);
322
11.7k
              memcpy (cur.name, tok, fnlen + 1);
323
11.7k
              if (fnlen)
324
11.7k
                {
325
11.7k
                  if (cur.type == FT_DIRECTORY && cur.name[fnlen - 1] == '/')
326
938
                    {
327
938
                      cur.name[fnlen - 1] = '\0';
328
938
                      DEBUGP (("trailing `/' on dir.\n"));
329
938
                    }
330
10.8k
                  else if (cur.type == FT_SYMLINK && cur.name[fnlen - 1] == '@')
331
973
                    {
332
973
                      cur.name[fnlen - 1] = '\0';
333
973
                      DEBUGP (("trailing `@' on link.\n"));
334
973
                    }
335
9.88k
                  else if (cur.type == FT_PLAINFILE
336
9.88k
                           && (cur.perms & 0111)
337
9.88k
                           && cur.name[fnlen - 1] == '*')
338
266
                    {
339
266
                      cur.name[fnlen - 1] = '\0';
340
266
                      DEBUGP (("trailing `*' on exec.\n"));
341
266
                    }
342
11.7k
                } /* if (fnlen) */
343
0
              else
344
0
                error = 1;
345
11.7k
              break;
346
13.2k
            }
347
0
          else
348
0
            abort ();
349
132k
        } /* while */
350
351
47.3k
      if (!cur.name || (cur.type == FT_SYMLINK && !cur.linkto))
352
37.3k
        error = 1;
353
354
47.3k
      DEBUGP (("%s\n", cur.name ? cur.name : ""));
355
356
47.3k
      if (error || ignore)
357
37.3k
        {
358
37.3k
          DEBUGP (("Skipping.\n"));
359
37.3k
          xfree (cur.name);
360
37.3k
          xfree (cur.linkto);
361
37.3k
          continue;
362
37.3k
        }
363
364
10.0k
      if (!dir)
365
670
        {
366
670
          l = dir = xnew (struct fileinfo);
367
670
          memcpy (l, &cur, sizeof (cur));
368
670
          l->prev = l->next = NULL;
369
670
        }
370
9.41k
      else
371
9.41k
        {
372
9.41k
          cur.prev = l;
373
9.41k
          l->next = xnew (struct fileinfo);
374
9.41k
          l = l->next;
375
9.41k
          memcpy (l, &cur, sizeof (cur));
376
9.41k
          l->next = NULL;
377
9.41k
        }
378
      /* Get the current time.  */
379
10.0k
      timenow = time (NULL);
380
10.0k
      tnow = localtime (&timenow);
381
      /* Build the time-stamp (the idea by zaga@fly.cc.fer.hr).  */
382
10.0k
      timestruct.tm_sec   = sec;
383
10.0k
      timestruct.tm_min   = min;
384
10.0k
      timestruct.tm_hour  = hour;
385
10.0k
      timestruct.tm_mday  = day;
386
10.0k
      timestruct.tm_mon   = month;
387
10.0k
      if (year == 0)
388
4.56k
        {
389
          /* Some listings will not specify the year if it is "obvious"
390
             that the file was from the previous year.  E.g. if today
391
             is 97-01-12, and you see a file of Dec 15th, its year is
392
             1996, not 1997.  Thanks to Vladimir Volovich for
393
             mentioning this!  */
394
4.56k
          if (month > tnow->tm_mon)
395
2.46k
            timestruct.tm_year = tnow->tm_year - 1;
396
2.10k
          else
397
2.10k
            timestruct.tm_year = tnow->tm_year;
398
4.56k
        }
399
5.51k
      else
400
5.51k
        timestruct.tm_year = year;
401
10.0k
      if (timestruct.tm_year >= 1900)
402
766
        timestruct.tm_year -= 1900;
403
10.0k
      timestruct.tm_wday  = 0;
404
10.0k
      timestruct.tm_yday  = 0;
405
10.0k
      timestruct.tm_isdst = -1;
406
10.0k
      l->tstamp = mktime (&timestruct); /* store the time-stamp */
407
10.0k
      l->ptype = ptype;
408
10.0k
    }
409
410
6.21k
  xfree (line);
411
6.21k
  return dir;
412
6.21k
}
413
414
static struct fileinfo *
415
ftp_parse_winnt_ls (FILE *fp)
416
849
{
417
849
  int len;
418
849
  int year, month, day;         /* for time analysis */
419
849
  int hour, min;
420
849
  size_t bufsize = 0;
421
849
  struct tm timestruct;
422
423
849
  char *line = NULL, *tok;             /* tokenizer */
424
849
  char *filename;
425
849
  struct fileinfo *dir, *l, cur; /* list creation */
426
427
849
  dir = l = NULL;
428
849
  cur.name = NULL;
429
430
  /* Line loop to end of file: */
431
4.59k
  while ((len = getline (&line, &bufsize, fp)) > 0)
432
3.74k
    {
433
3.74k
      len = clean_line (line, len);
434
435
      /* Name begins at 39 column of the listing if date presented in `mm-dd-yy'
436
         format or at 41 column if date presented in `mm-dd-yyyy' format. Thus,
437
         we cannot extract name before we parse date. Using this information we
438
         also can recognize filenames that begin with a series of space
439
         characters (but who really wants to use such filenames anyway?). */
440
3.74k
      if (len < 40) continue;
441
2.01k
      filename = line + 39;
442
443
      /* First column: mm-dd-yy or mm-dd-yyyy. Should atoi() on the month fail,
444
         january will be assumed.  */
445
2.01k
      tok = strtok(line, "-");
446
2.01k
      if (tok == NULL) continue;
447
1.94k
      month = atoi(tok);
448
1.94k
      if (month < 0) month = 0; else month--;
449
1.94k
      tok = strtok(NULL, "-");
450
1.94k
      if (tok == NULL) continue;
451
1.80k
      day = atoi(tok);
452
1.80k
      tok = strtok(NULL, " ");
453
1.80k
      if (tok == NULL) continue;
454
1.66k
      year = atoi(tok);
455
      /* Assuming the epoch starting at 1.1.1970 */
456
1.66k
      if (year <= 70)
457
1.20k
        {
458
1.20k
          year += 100;
459
1.20k
        }
460
461
      else if (year >= 1900)
461
319
        {
462
319
          year -= 1900;
463
319
          if (len < 42) continue;
464
191
          filename += 2;
465
191
        }
466
      /* Now it is possible to determine the position of the first symbol in
467
         filename. */
468
1.53k
      xfree (cur.name);
469
1.53k
      memset(&cur, 0, sizeof (cur));
470
1.53k
      cur.name = xstrdup(filename);
471
1.53k
      DEBUGP (("Name: '%s'\n", cur.name));
472
473
474
      /* Second column: hh:mm[AP]M, listing does not contain value for
475
         seconds */
476
1.53k
      tok = strtok(NULL,  ":");
477
1.53k
      if (tok == NULL) continue;
478
1.43k
      hour = atoi(tok);
479
1.43k
      tok = strtok(NULL,  "M");
480
1.43k
      if (tok == NULL) continue;
481
1.29k
      min = atoi(tok);
482
      /* Adjust hour from AM/PM. Just for the record, the sequence goes
483
         11:00AM, 12:00PM, 01:00PM ... 11:00PM, 12:00AM, 01:00AM . */
484
1.29k
      if (tok[0] && tok[1]) tok+=2;
485
1.29k
      if (hour >= 12 || hour < 0)  hour  = 0;
486
1.29k
      if (*tok == 'P') hour += 12;
487
488
1.29k
      DEBUGP (("YYYY/MM/DD HH:MM - %d/%02d/%02d %02d:%02d\n",
489
1.29k
              year+1900, month, day, hour, min));
490
491
      /* Build the time-stamp (copy & paste from above) */
492
1.29k
      timestruct.tm_sec   = 0;
493
1.29k
      timestruct.tm_min   = min;
494
1.29k
      timestruct.tm_hour  = hour;
495
1.29k
      timestruct.tm_mday  = day;
496
1.29k
      timestruct.tm_mon   = month;
497
1.29k
      timestruct.tm_year  = year;
498
1.29k
      timestruct.tm_wday  = 0;
499
1.29k
      timestruct.tm_yday  = 0;
500
1.29k
      timestruct.tm_isdst = -1;
501
1.29k
      cur.tstamp = mktime (&timestruct); /* store the time-stamp */
502
1.29k
      cur.ptype = TT_HOUR_MIN;
503
504
1.29k
      DEBUGP (("Timestamp: %ld\n", cur.tstamp));
505
506
      /* Third column: Either file length, or <DIR>. We also set the
507
         permissions (guessed as 0644 for plain files and 0755 for
508
         directories as the listing does not give us a clue) and filetype
509
         here. */
510
1.29k
      tok = strtok(NULL, " ");
511
1.29k
      if (tok == NULL) continue;
512
614
      while ((tok != NULL) && (*tok == '\0'))  tok = strtok(NULL, " ");
513
614
      if (tok == NULL) continue;
514
614
      if (*tok == '<')
515
76
        {
516
76
          cur.type  = FT_DIRECTORY;
517
76
          cur.size  = 0;
518
76
          cur.perms = 0755;
519
76
          DEBUGP (("Directory\n"));
520
76
        }
521
538
      else
522
538
        {
523
538
          wgint size;
524
538
          cur.type  = FT_PLAINFILE;
525
538
          errno = 0;
526
538
          size = str_to_wgint (tok, NULL, 10);
527
538
          if (size == WGINT_MAX && errno == ERANGE)
528
151
            cur.size = 0;       /* overflow */
529
387
          else
530
387
            cur.size = size;
531
538
          cur.perms = 0644;
532
538
          DEBUGP (("File, size %s bytes\n", number_to_static_string (cur.size)));
533
538
        }
534
535
614
      cur.linkto = NULL;
536
537
      /* And put everything into the linked list */
538
614
      if (!dir)
539
177
        {
540
177
          l = dir = xnew (struct fileinfo);
541
177
          memcpy (l, &cur, sizeof (cur));
542
177
          l->prev = l->next = NULL;
543
177
        }
544
437
      else
545
437
        {
546
437
          cur.prev = l;
547
437
          l->next = xnew (struct fileinfo);
548
437
          l = l->next;
549
437
          memcpy (l, &cur, sizeof (cur));
550
437
          l->next = NULL;
551
437
        }
552
614
      cur.name = NULL;
553
614
    }
554
555
849
  xfree (cur.name);
556
849
  xfree (line);
557
849
  return dir;
558
849
}
559
560
561
562
/* Convert the VMS-style directory listing stored in "file" to a
563
   linked list of fileinfo (system-independent) entries.  The contents
564
   of FILE are considered to be produced by the standard VMS
565
   "DIRECTORY [/SIZE [= ALL]] /DATE [/OWNER] [/PROTECTION]" command,
566
   more or less.  (Different VMS FTP servers may have different headers,
567
   and may not supply the same data, but all should be subsets of this.)
568
569
   VMS normally provides local (server) time and date information.
570
   Define the logical name or environment variable
571
   "WGET_TIMEZONE_DIFFERENTIAL" (seconds) to adjust the receiving local
572
   times if different from the remote local times.
573
574
   2005-02-23 SMS.
575
   Added code to eliminate "^" escape characters from ODS5 extended file
576
   names.  The TCPIP FTP server (V5.4) seems to prefer requests which do
577
   not use the escaped names which it provides.
578
*/
579
580
16.7k
#define VMS_DEFAULT_PROT_FILE 0644
581
390
#define VMS_DEFAULT_PROT_DIR 0755
582
583
/* 2005-02-23 SMS.
584
   eat_carets().
585
586
   Delete ODS5 extended file name escape characters ("^") in the
587
   original buffer.
588
   Note that the current scheme does not handle all EFN cases, but it
589
   could be made more complicated.
590
*/
591
592
static void eat_carets( char *str)
593
/* char *str;      Source pointer. */
594
17.0k
{
595
17.0k
  char *strd;   /* Destination pointer. */
596
17.0k
  char hdgt;
597
17.0k
  unsigned char uchr;
598
599
  /* Skip ahead to the first "^", if any. */
600
91.8k
  while ((*str != '\0') && (*str != '^'))
601
74.7k
     str++;
602
603
  /* If no caret was found, quit early. */
604
17.0k
  if (*str != '\0')
605
1.72k
  {
606
    /* Shift characters leftward as carets are found. */
607
1.72k
    strd = str;
608
9.55k
    while (*str != '\0')
609
7.82k
    {
610
7.82k
      uchr = *str;
611
7.82k
      if (uchr == '^')
612
3.33k
      {
613
        /* Found a caret.  Skip it, and check the next character. */
614
3.33k
        if ((char_prop[(unsigned char) str[1]] & 64) && (char_prop[(unsigned char) str[2]] & 64))
615
411
        {
616
          /* Hex digit.  Get char code from this and next hex digit. */
617
411
          uchr = *(++str);
618
411
          if (uchr <= '9')
619
206
          {
620
206
            hdgt = uchr - '0';           /* '0' - '9' -> 0 - 9. */
621
206
          }
622
205
          else
623
205
          {
624
205
            hdgt = ((uchr - 'A') & 7) + 10;    /* [Aa] - [Ff] -> 10 - 15. */
625
205
          }
626
411
          hdgt <<= 4;                   /* X16. */
627
411
          uchr = *(++str);              /* Next char must be hex digit. */
628
411
          if (uchr <= '9')
629
201
          {
630
201
            uchr = hdgt + uchr - '0';
631
201
          }
632
210
          else
633
210
          {
634
210
            uchr = hdgt + ((uchr - 'A') & 15) + 10;
635
210
          }
636
411
        }
637
2.92k
        else if (uchr == '_')
638
0
        {
639
          /* Convert escaped "_" to " ". */
640
0
          uchr = ' ';
641
0
        }
642
2.92k
        else if (uchr == '/')
643
0
        {
644
          /* Convert escaped "/" (invalid Zip) to "?" (invalid VMS). */
645
          /* Note that this is a left-over from Info-ZIP code, and is
646
             probably of little value here, except perhaps to avoid
647
             directory confusion which an unconverted slash might cause.
648
          */
649
0
          uchr = '?';
650
0
        }
651
        /* Else, not a hex digit.  Must be a simple escaped character
652
           (or Unicode, which is not yet handled here).
653
        */
654
3.33k
      }
655
      /* Else, not a caret.  Use as-is. */
656
7.82k
      *strd = uchr;
657
658
      /* Advance destination and source pointers. */
659
7.82k
      strd++;
660
7.82k
      str++;
661
7.82k
    }
662
    /* Terminate the destination string. */
663
1.72k
    *strd = '\0';
664
1.72k
  }
665
17.0k
}
666
667
668
static struct fileinfo *
669
ftp_parse_vms_ls (FILE *fp)
670
2.35k
{
671
2.35k
  int dt, i, j, len;
672
2.35k
  int perms;
673
2.35k
  size_t bufsize = 0;
674
2.35k
  time_t timenow;
675
2.35k
  struct tm *timestruct;
676
2.35k
  char date_str[32];
677
678
2.35k
  char *line = NULL, *tok; /* tokenizer */
679
2.35k
  struct fileinfo *dir, *l, cur; /* list creation */
680
681
2.35k
  dir = l = NULL;
682
683
  /* Skip blank lines, Directory heading, and more blank lines. */
684
685
3.12k
  for (j = 0; (i = getline (&line, &bufsize, fp)) > 0; )
686
3.09k
    {
687
3.09k
      i = clean_line (line, i);
688
3.09k
      if (i <= 0)
689
758
        continue; /* Ignore blank line. */
690
691
2.33k
      if ((j == 0) && (line[i - 1] == ']'))
692
7
        {
693
          /* Found Directory heading line.  Next non-blank line
694
          is significant. */
695
7
          j = 1;
696
7
        }
697
2.33k
      else if (!strncmp (line, "Total of ", 9))
698
1
        {
699
          /* Found "Total of ..." footing line.  No valid data
700
             will follow (empty directory). */
701
1
          i = 0; /* Arrange for early exit. */
702
1
          break;
703
1
        }
704
2.33k
      else
705
2.33k
        {
706
2.33k
          break; /* Must be significant data. */
707
2.33k
        }
708
2.33k
    }
709
710
  /* Read remainder of file until the next blank line or EOF. */
711
712
2.35k
  cur.name = NULL;
713
18.7k
  while (i > 0)
714
17.0k
    {
715
17.0k
      char *p;
716
717
      /* The first token is the file name.  After a long name, other
718
         data may be on the following line.  A valid directory name ends
719
         in ".DIR;1" (any case), although some VMS FTP servers may omit
720
         the version number (";1").
721
      */
722
723
17.0k
      tok = strtok(line, " ");
724
17.0k
      if (tok == NULL) tok = line;
725
17.0k
      DEBUGP (("file name:   '%s'\n", tok));
726
727
      /* Stripping the version number on a VMS system would be wrong.
728
         It may be foolish on a non-VMS system, too, but that's someone
729
         else's problem.  (Define PRESERVE_VMS_VERSIONS for proper
730
         operation on other operating systems.)
731
732
         2005-02-23 SMS.
733
         ODS5 extended file names may contain escaped semi-colons, so
734
         the version number is identified as right-side decimal digits
735
         led by a non-escaped semi-colon.  It may be absent.
736
      */
737
738
17.0k
#if (!defined( __VMS) && !defined( PRESERVE_VMS_VERSIONS))
739
19.9k
      for (p = tok + strlen (tok); (--p > tok) && c_isdigit(*p); );
740
17.0k
      if (p > tok && (*p == ';') && (*(p - 1) != '^'))
741
433
        {
742
433
          *p = '\0';
743
433
        }
744
17.0k
#endif /* (!defined( __VMS) && !defined( PRESERVE_VMS_VERSIONS)) */
745
746
      /* 2005-02-23 SMS.
747
         Eliminate "^" escape characters from ODS5 extended file name.
748
         (A caret is invalid in an ODS2 name, so this is always safe.)
749
      */
750
17.0k
      eat_carets (tok);
751
17.0k
      DEBUGP (("file name-^: '%s'\n", tok));
752
753
      /* Differentiate between a directory and any other file.  A VMS
754
         listing may not include file protections (permissions).  Set a
755
         default permissions value (according to the file type), which
756
         may be overwritten later.  Store directory names without the
757
         ".DIR;1" file type and version number, as the plain name is
758
         what will work in a CWD command.
759
      */
760
17.0k
      len = strlen (tok);
761
17.0k
      if (len >= 4 && !c_strncasecmp(tok + (len - 4), ".DIR", 4))
762
196
        {
763
196
          *(tok + (len - 4)) = '\0'; /* Discard ".DIR". */
764
196
          cur.type  = FT_DIRECTORY;
765
196
          cur.perms = VMS_DEFAULT_PROT_DIR;
766
196
          DEBUGP (("Directory (nv)\n"));
767
196
        }
768
16.8k
      else if (len >= 6 && !c_strncasecmp (tok + len - 6, ".DIR;1", 6))
769
194
        {
770
194
          *(tok + (len - 6)) = '\0'; /* Discard ".DIR;1". */
771
194
          cur.type  = FT_DIRECTORY;
772
194
          cur.perms = VMS_DEFAULT_PROT_DIR;
773
194
          DEBUGP (("Directory (v)\n"));
774
194
        }
775
16.7k
      else
776
16.7k
        {
777
16.7k
          cur.type  = FT_PLAINFILE;
778
16.7k
          cur.perms = VMS_DEFAULT_PROT_FILE;
779
16.7k
          DEBUGP (("File\n"));
780
16.7k
        }
781
17.0k
      xfree (cur.name);
782
17.0k
      cur.name = xstrdup (tok);
783
17.0k
      DEBUGP (("Name: '%s'\n", cur.name));
784
785
      /* Null the date and time string. */
786
17.0k
      *date_str = '\0';
787
788
      /* VMS lacks symbolic links. */
789
17.0k
      cur.linkto = NULL;
790
791
      /* VMS reports file sizes in (512-byte) disk blocks, not bytes,
792
         hence useless for an integrity check based on byte-count.
793
         Set size to unknown.
794
      */
795
17.0k
      cur.size = 0;
796
797
      /* Get token 2, if any.  A long name may force all other data onto
798
         a second line.  If needed, read the second line.
799
      */
800
801
17.0k
      tok = strtok (NULL, " ");
802
17.0k
      if (tok == NULL)
803
6.05k
        {
804
6.05k
          DEBUGP (("Getting additional line.\n"));
805
6.05k
          i = getline (&line, &bufsize, fp);
806
6.05k
          if (i <= 0)
807
618
            {
808
618
              DEBUGP (("EOF.  Leaving listing parser.\n"));
809
618
              break;
810
618
            }
811
812
          /* Second line must begin with " ".  Otherwise, it's a first
813
             line (and we may be confused).
814
          */
815
5.43k
          i = clean_line (line, i);
816
5.43k
          if (i <= 0)
817
30
            {
818
              /* Blank line.  End of significant file listing. */
819
30
              DEBUGP (("Blank line.  Leaving listing parser.\n"));
820
30
              break;
821
30
            }
822
5.40k
          else if (line[0] != ' ')
823
4.95k
            {
824
4.95k
              DEBUGP (("Non-blank in column 1.  Must be a new file name?\n"));
825
4.95k
              continue;
826
4.95k
            }
827
455
          else
828
455
            {
829
455
              tok = strtok (line, " ");
830
455
              if (tok == NULL)
831
10
                {
832
                  /* Unexpected non-empty but apparently blank line. */
833
10
                  DEBUGP (("Null token.  Leaving listing parser.\n"));
834
10
                  break;
835
10
                }
836
455
            }
837
5.43k
        }
838
839
      /* Analyze tokens.  (Order is not significant, except date must
840
         precede time.)
841
842
         Size:       ddd or ddd/ddd (where "ddd" is a decimal number)
843
         Date:       DD-MMM-YYYY
844
         Time:       HH:MM or HH:MM:SS or HH:MM:SS.CC
845
         Owner:      [user] or [user,group]
846
         Protection: (ppp,ppp,ppp,ppp) (where "ppp" is "RWED" or some
847
         subset thereof, for System, Owner, Group, World.
848
849
         If permission is lacking, info may be replaced by the string:
850
         "No privilege for attempted operation".
851
      */
852
56.8k
      while (tok != NULL)
853
45.3k
        {
854
45.3k
          DEBUGP (("Token: >%s<: ", tok));
855
856
45.3k
          if ((strlen (tok) < 12) && (strchr( tok, '-') != NULL))
857
3.35k
            {
858
              /* Date. */
859
3.35k
              DEBUGP (("Date.\n"));
860
3.35k
          snprintf(date_str, sizeof(date_str), "%s ", tok);
861
3.35k
            }
862
42.0k
          else if ((strlen (tok) < 12) && (strchr( tok, ':') != NULL))
863
1.81k
            {
864
              /* Time. */
865
1.81k
              DEBUGP (("Time. "));
866
1.81k
              strncat( date_str,
867
1.81k
                       tok,
868
1.81k
                       (sizeof( date_str)- strlen (date_str) - 1));
869
1.81k
              DEBUGP (("Date time: >%s<\n", date_str));
870
1.81k
            }
871
40.2k
          else if (strchr (tok, '[') != NULL)
872
4.43k
            {
873
              /* Owner.  (Ignore.) */
874
4.43k
              DEBUGP (("Owner.\n"));
875
4.43k
            }
876
35.7k
          else if (strchr (tok, '(') != NULL)
877
1.07k
            {
878
              /* Protections (permissions). */
879
1.07k
              perms = 0;
880
1.07k
              j = 0;
881
              /*FIXME: Should not be using the variable like this. */
882
43.8k
              for (i = 0; i < (int) strlen(tok); i++)
883
42.8k
                {
884
42.8k
                  switch (tok[ i])
885
42.8k
                    {
886
1.12k
                    case '(':
887
1.12k
                      break;
888
6
                    case ')':
889
6
                      break;
890
24.1k
                    case ',':
891
24.1k
                      if (j == 0)
892
326
                        {
893
326
                          perms = 0;
894
326
                        }
895
23.8k
                      else if (j < 4)
896
360
                        {
897
360
                          perms <<= 3;
898
360
                        }
899
24.1k
                      j++;
900
24.1k
                      break;
901
371
                    case 'R':
902
371
                      perms |= 4;
903
371
                      break;
904
194
                    case 'W':
905
194
                      perms |= 2;
906
194
                      break;
907
414
                    case 'E':
908
414
                      perms |= 1;
909
414
                      break;
910
394
                    case 'D':
911
394
                      perms |= 2;
912
394
                      break;
913
42.8k
                    }
914
42.8k
                }
915
1.07k
              cur.perms = perms;
916
1.07k
              DEBUGP (("Prot.  perms = %0o.\n", (unsigned) cur.perms));
917
1.07k
            }
918
34.6k
          else
919
34.6k
            {
920
              /* Nondescript.  Probably size(s), probably in blocks.
921
                 Could be "No privilege ..." message.  (Ignore.)
922
              */
923
34.6k
              DEBUGP (("Ignored (size?).\n"));
924
34.6k
            }
925
926
45.3k
          tok = strtok (NULL, " ");
927
45.3k
        }
928
929
      /* Tokens exhausted.  Interpret the data, and fill in the
930
         structure.
931
      */
932
      /* Fill tm timestruct according to date-time string.  Fractional
933
         seconds are ignored.  Default to current time, if conversion
934
         fails.
935
      */
936
11.4k
      timenow = time( NULL);
937
11.4k
      timestruct = localtime( &timenow );
938
11.4k
      strptime( date_str, "%d-%b-%Y %H:%M:%S", timestruct);
939
940
      /* Convert struct tm local time to time_t local time. */
941
11.4k
      timenow = mktime (timestruct);
942
      /* Offset local time according to environment variable (seconds). */
943
11.4k
      if ((tok = getenv ( "WGET_TIMEZONE_DIFFERENTIAL")) != NULL)
944
0
        {
945
0
          dt = atoi (tok);
946
0
          DEBUGP (("Time differential = %d.\n", dt));
947
0
        }
948
11.4k
      else
949
11.4k
        dt = 0;
950
951
11.4k
      if (dt >= 0)
952
11.4k
        timenow += dt;
953
0
      else
954
0
        timenow -= (-dt);
955
956
11.4k
      cur.tstamp = timenow; /* Store the time-stamp. */
957
11.4k
      DEBUGP (("Timestamp: %ld\n", cur.tstamp));
958
11.4k
      cur.ptype = TT_HOUR_MIN;
959
960
      /* Add the data for this item to the linked list, */
961
11.4k
      if (!dir)
962
1.74k
        {
963
1.74k
          l = dir = xmalloc (sizeof (struct fileinfo));
964
1.74k
          cur.prev = cur.next = NULL;
965
1.74k
          memcpy (l, &cur, sizeof (cur));
966
1.74k
        }
967
9.74k
      else
968
9.74k
        {
969
9.74k
          cur.prev = l;
970
9.74k
       cur.next = NULL;
971
9.74k
          l->next = xmalloc (sizeof (struct fileinfo));
972
9.74k
          l = l->next;
973
9.74k
          memcpy (l, &cur, sizeof (cur));
974
9.74k
        }
975
11.4k
      cur.name = NULL;
976
977
11.4k
      i = getline (&line, &bufsize, fp);
978
11.4k
      if (i > 0)
979
9.82k
        {
980
9.82k
          i = clean_line (line, i);
981
9.82k
          if (i <= 0)
982
19
            {
983
              /* Blank line.  End of significant file listing. */
984
19
              break;
985
19
            }
986
9.82k
        }
987
11.4k
    }
988
989
2.35k
  xfree (cur.name);
990
2.35k
  xfree (line);
991
2.35k
  return dir;
992
2.35k
}
993
994
995
/* This function switches between the correct parsing routine depending on
996
   the SYSTEM_TYPE. The system type should be based on the result of the
997
   "SYST" response of the FTP server. According to this response we will
998
   use on of the three different listing parsers that cover the most of FTP
999
   servers used nowadays.  */
1000
1001
struct fileinfo *
1002
ftp_parse_ls (const char *file, const enum stype system_type)
1003
0
{
1004
0
  FILE *fp;
1005
0
  struct fileinfo *fi;
1006
1007
0
  fp = fopen (file, "rb");
1008
0
  if (!fp)
1009
0
    {
1010
0
      logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
1011
0
      return NULL;
1012
0
    }
1013
1014
0
  fi = ftp_parse_ls_fp (fp, system_type);
1015
0
  fclose(fp);
1016
1017
0
  return fi;
1018
0
}
1019
1020
struct fileinfo *
1021
ftp_parse_ls_fp (FILE *fp, const enum stype system_type)
1022
9.42k
{
1023
9.42k
  switch (system_type)
1024
9.42k
    {
1025
2.35k
    case ST_UNIX:
1026
2.35k
      return ftp_parse_unix_ls (fp, 0);
1027
2.35k
    case ST_WINNT:
1028
2.35k
      {
1029
        /* Detect whether the listing is simulating the UNIX format */
1030
2.35k
        int   c = fgetc(fp);
1031
2.35k
        rewind(fp);
1032
1033
        /* If the first character of the file is '0'-'9', it's WINNT
1034
           format. */
1035
2.35k
        if (c >= '0' && c <='9')
1036
849
          return ftp_parse_winnt_ls (fp);
1037
1.50k
        else
1038
1.50k
          return ftp_parse_unix_ls (fp, 1);
1039
2.35k
      }
1040
2.35k
    case ST_VMS:
1041
2.35k
      return ftp_parse_vms_ls (fp);
1042
2.35k
    case ST_MACOS:
1043
2.35k
      return ftp_parse_unix_ls (fp, 1);
1044
0
    default:
1045
0
      logprintf (LOG_NOTQUIET, _("\
1046
0
Unsupported listing type, trying Unix listing parser.\n"));
1047
0
      return ftp_parse_unix_ls (fp, 0);
1048
9.42k
    }
1049
9.42k
}
1050
1051
/* Stuff for creating FTP index. */
1052
1053
/* The function creates an HTML index containing references to given
1054
   directories and files on the appropriate host.  The references are
1055
   FTP.  */
1056
uerr_t
1057
ftp_index (const char *file, struct url *u, struct fileinfo *f)
1058
0
{
1059
0
  FILE *fp;
1060
0
  char *upwd;
1061
0
  char *htcldir;                /* HTML-clean dir name */
1062
0
  char *htclfile;               /* HTML-clean file name */
1063
0
  char *urlclfile;              /* URL-clean file name */
1064
1065
0
  if (!output_stream)
1066
0
    {
1067
0
      fp = fopen (file, "wb");
1068
0
      if (!fp)
1069
0
        {
1070
0
          logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
1071
0
          return FOPENERR;
1072
0
        }
1073
0
    }
1074
0
  else
1075
0
    fp = output_stream;
1076
0
  if (u->user)
1077
0
    {
1078
0
      char *tmpu, *tmpp;        /* temporary, clean user and passwd */
1079
1080
0
      tmpu = url_escape (u->user);
1081
0
      tmpp = u->passwd ? url_escape (u->passwd) : NULL;
1082
0
      if (tmpp)
1083
0
        upwd = concat_strings (tmpu, ":", tmpp, "@", (char *) 0);
1084
0
      else
1085
0
        upwd = concat_strings (tmpu, "@", (char *) 0);
1086
0
      xfree (tmpu);
1087
0
      xfree (tmpp);
1088
0
    }
1089
0
  else
1090
0
    upwd = xstrdup ("");
1091
1092
0
  htcldir = html_quote_string (u->dir);
1093
1094
0
  fprintf (fp, "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n");
1095
0
  fprintf (fp, "<html>\n<head>\n<title>");
1096
0
  fprintf (fp, _("Index of /%s on %s:%d"), htcldir, u->host, u->port);
1097
0
  fprintf (fp, "</title>\n</head>\n<body>\n<h1>");
1098
0
  fprintf (fp, _("Index of /%s on %s:%d"), htcldir, u->host, u->port);
1099
0
  fprintf (fp, "</h1>\n<hr>\n<pre>\n");
1100
1101
0
  while (f)
1102
0
    {
1103
0
      fprintf (fp, "  ");
1104
0
      if (f->tstamp != -1)
1105
0
        {
1106
          /* #### Should we translate the months?  Or, even better, use
1107
             ISO 8601 dates?  */
1108
0
          static const char *months[] = {
1109
0
            "Jan", "Feb", "Mar", "Apr", "May", "Jun",
1110
0
            "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
1111
0
          };
1112
0
          time_t tstamp = f->tstamp;
1113
0
          struct tm *ptm = localtime (&tstamp);
1114
1115
0
          fprintf (fp, "%d %s %02d ", ptm->tm_year + 1900, months[ptm->tm_mon],
1116
0
                  ptm->tm_mday);
1117
0
          if (f->ptype == TT_HOUR_MIN)
1118
0
            fprintf (fp, "%02d:%02d  ", ptm->tm_hour, ptm->tm_min);
1119
0
          else
1120
0
            fprintf (fp, "       ");
1121
0
        }
1122
0
      else
1123
0
        fprintf (fp, _("time unknown       "));
1124
0
      switch (f->type)
1125
0
        {
1126
0
        case FT_PLAINFILE:
1127
0
          fprintf (fp, _("File        "));
1128
0
          break;
1129
0
        case FT_DIRECTORY:
1130
0
          fprintf (fp, _("Directory   "));
1131
0
          break;
1132
0
        case FT_SYMLINK:
1133
0
          fprintf (fp, _("Link        "));
1134
0
          break;
1135
0
        default:
1136
0
          fprintf (fp, _("Not sure    "));
1137
0
          break;
1138
0
        }
1139
0
      htclfile = html_quote_string (f->name);
1140
0
      urlclfile = url_escape_unsafe_and_reserved (f->name);
1141
0
      fprintf (fp, "<a href=\"ftp://%s%s:%d", upwd, u->host, u->port);
1142
0
      if (*u->dir != '/')
1143
0
        putc ('/', fp);
1144
      /* XXX: Should probably URL-escape dir components here, rather
1145
       * than just HTML-escape, for consistency with the next bit where
1146
       * we use urlclfile for the file component. Anyway, this is safer
1147
       * than what we had... */
1148
0
      fprintf (fp, "%s", htcldir);
1149
0
      if (*u->dir)
1150
0
        putc ('/', fp);
1151
0
      fprintf (fp, "%s", urlclfile);
1152
0
      if (f->type == FT_DIRECTORY)
1153
0
        putc ('/', fp);
1154
0
      fprintf (fp, "\">%s", htclfile);
1155
0
      if (f->type == FT_DIRECTORY)
1156
0
        putc ('/', fp);
1157
0
      fprintf (fp, "</a> ");
1158
0
      if (f->type == FT_PLAINFILE)
1159
0
        fprintf (fp, _(" (%s bytes)"), number_to_static_string (f->size));
1160
0
      else if (f->type == FT_SYMLINK)
1161
0
        fprintf (fp, "-> %s", f->linkto ? f->linkto : "(nil)");
1162
0
      putc ('\n', fp);
1163
0
      xfree (htclfile);
1164
0
      xfree (urlclfile);
1165
0
      f = f->next;
1166
0
    }
1167
0
  fprintf (fp, "</pre>\n</body>\n</html>\n");
1168
0
  xfree (htcldir);
1169
0
  xfree (upwd);
1170
0
  if (!output_stream)
1171
0
    fclose (fp);
1172
0
  else
1173
0
    fflush (fp);
1174
0
  return FTPOK;
1175
0
}