Coverage Report

Created: 2023-11-19 07:52

/src/wget/src/ftp-ls.c
Line
Count
Source (jump to first uncovered line)
1
/* Parsing FTP `ls' output.
2
   Copyright (C) 1996-2011, 2015, 2018-2023 Free Software Foundation,
3
   Inc.
4
5
This file is part of GNU Wget.
6
7
GNU Wget is free software; you can redistribute it and/or modify
8
it under the terms of the GNU General Public License as published by
9
the Free Software Foundation; either version 3 of the License, or
10
(at your option) any later version.
11
12
GNU Wget is distributed in the hope that it will be useful,
13
but WITHOUT ANY WARRANTY; without even the implied warranty of
14
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
GNU General Public License for more details.
16
17
You should have received a copy of the GNU General Public License
18
along with Wget.  If not, see <http://www.gnu.org/licenses/>.
19
20
Additional permission under GNU GPL version 3 section 7
21
22
If you modify this program, or any covered work, by linking or
23
combining it with the OpenSSL project's OpenSSL library (or a
24
modified version of that library), containing parts covered by the
25
terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26
grants you additional permission to convey the resulting work.
27
Corresponding Source for a non-source form of such a combination
28
shall include the source code for the parts of OpenSSL used as well
29
as that of the covered work.  */
30
31
#include "wget.h"
32
33
#include <stdio.h>
34
#include <stdlib.h>
35
#include <string.h>
36
#include <unistd.h>
37
#include <errno.h>
38
#include <time.h>
39
#include "utils.h"
40
#include "ftp.h"
41
#include "url.h"
42
#include "convert.h"            /* for html_quote_string prototype */
43
#include "retr.h"               /* for output_stream */
44
#include "c-strcase.h"
45
46
/* Converts symbolic permissions to number-style ones, e.g. string
47
   rwxr-xr-x to 755.  For now, it knows nothing of
48
   setuid/setgid/sticky.  ACLs are ignored.  */
49
static int
50
symperms (const char *s)
51
16.8k
{
52
16.8k
  int perms = 0, i;
53
54
16.8k
  if (strlen (s) < 9)
55
15.2k
    return 0;
56
6.37k
  for (i = 0; i < 3; i++, s += 3)
57
4.77k
    {
58
4.77k
      perms <<= 3;
59
4.77k
      perms += (((s[0] == 'r') << 2) + ((s[1] == 'w') << 1) +
60
4.77k
                (s[2] == 'x' || s[2] == 's'));
61
4.77k
    }
62
1.59k
  return perms;
63
16.8k
}
64
65
66
/* Cleans a line of text so that it can be consistently parsed. Destroys
67
   <CR> and <LF> in case that they occur at the end of the line and
68
   replaces all <TAB> character with <SPACE>. Returns the length of the
69
   modified line. */
70
static int
71
clean_line (char *line, int len)
72
77.2k
{
73
77.2k
  if (len <= 0) return 0;
74
75
153k
  while (len > 0 && (line[len - 1] == '\n' || line[len - 1] == '\r'))
76
76.0k
    line[--len] = '\0';
77
78
77.2k
  if (!len) return 0;
79
80
1.35M
  for ( ; *line ; line++ ) if (*line == '\t') *line = ' ';
81
82
72.5k
  return len;
83
77.2k
}
84
85
/* Convert the Un*x-ish style directory listing stored in FILE to a
86
   linked list of fileinfo (system-independent) entries.  The contents
87
   of FILE are considered to be produced by the standard Unix `ls -la'
88
   output (whatever that might be).  BSD (no group) and SYSV (with
89
   group) listings are handled.
90
91
   The timestamps are stored in a separate variable, time_t
92
   compatible (I hope).  The timezones are ignored.  */
93
static struct fileinfo *
94
ftp_parse_unix_ls (FILE *fp, int ignore_perms)
95
6.11k
{
96
6.11k
  static const char *months[] = {
97
6.11k
    "Jan", "Feb", "Mar", "Apr", "May", "Jun",
98
6.11k
    "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
99
6.11k
  };
100
6.11k
  int next, len, i, error, ignore;
101
6.11k
  int year, month, day;         /* for time analysis */
102
6.11k
  int hour, min, sec, ptype;
103
6.11k
  struct tm timestruct, *tnow;
104
6.11k
  time_t timenow;
105
6.11k
  size_t bufsize = 0;
106
107
6.11k
  char *line = NULL, *tok, *ptok;      /* tokenizer */
108
6.11k
  struct fileinfo *dir, *l, cur;       /* list creation */
109
110
6.11k
  dir = l = NULL;
111
112
  /* Line loop to end of file: */
113
60.6k
  while ((len = getline (&line, &bufsize, fp)) > 0)
114
54.5k
    {
115
54.5k
      len = clean_line (line, len);
116
      /* Skip if total...  */
117
54.5k
      if (!c_strncasecmp (line, "total", 5))
118
830
        continue;
119
      /* Get the first token (permissions).  */
120
53.7k
      tok = strtok (line, " ");
121
53.7k
      if (!tok)
122
6.33k
        continue;
123
124
47.3k
      cur.name = NULL;
125
47.3k
      cur.linkto = NULL;
126
127
      /* Decide whether we deal with a file or a directory.  */
128
47.3k
      switch (*tok)
129
47.3k
        {
130
5.74k
        case '-':
131
5.74k
          cur.type = FT_PLAINFILE;
132
5.74k
          DEBUGP (("PLAINFILE; "));
133
5.74k
          break;
134
3.72k
        case 'd':
135
3.72k
          cur.type = FT_DIRECTORY;
136
3.72k
          DEBUGP (("DIRECTORY; "));
137
3.72k
          break;
138
5.97k
        case 'l':
139
5.97k
          cur.type = FT_SYMLINK;
140
5.97k
          DEBUGP (("SYMLINK; "));
141
5.97k
          break;
142
31.9k
        default:
143
31.9k
          cur.type = FT_UNKNOWN;
144
31.9k
          DEBUGP (("UNKNOWN; "));
145
31.9k
          break;
146
47.3k
        }
147
148
47.3k
      if (ignore_perms)
149
30.5k
        {
150
30.5k
          switch (cur.type)
151
30.5k
            {
152
3.81k
            case FT_PLAINFILE:
153
3.81k
              cur.perms = 0644;
154
3.81k
              break;
155
2.45k
            case FT_DIRECTORY:
156
2.45k
              cur.perms = 0755;
157
2.45k
              break;
158
24.2k
            default:
159
              /*cur.perms = 1023;*/     /* #### What is this?  --hniksic */
160
24.2k
              cur.perms = 0644;
161
30.5k
            }
162
30.5k
          DEBUGP (("implicit perms %0o; ", (unsigned) cur.perms));
163
30.5k
        }
164
16.8k
       else
165
16.8k
         {
166
16.8k
           cur.perms = symperms (tok + 1);
167
16.8k
           DEBUGP (("perms %0o; ", (unsigned) cur.perms));
168
16.8k
         }
169
170
47.3k
      error = ignore = 0;       /* Erroneous and ignoring entries are
171
                                   treated equally for now.  */
172
47.3k
      year = hour = min = sec = 0; /* Silence the compiler.  */
173
47.3k
      month = day = 0;
174
47.3k
      ptype = TT_DAY;
175
47.3k
      next = -1;
176
      /* While there are tokens on the line, parse them.  Next is the
177
         number of tokens left until the filename.
178
179
         Use the month-name token as the "anchor" (the place where the
180
         position wrt the file name is "known").  When a month name is
181
         encountered, `next' is set to 5.  Also, the preceding
182
         characters are parsed to get the file size.
183
184
         This tactic is quite dubious when it comes to
185
         internationalization issues (non-English month names), but it
186
         works for now.  */
187
47.3k
      tok = line;
188
166k
      while (ptok = tok,
189
166k
             (tok = strtok (NULL, " ")) != NULL)
190
133k
        {
191
133k
          --next;
192
133k
          if (next < 0)         /* a month name was not encountered */
193
81.3k
            {
194
925k
              for (i = 0; i < 12; i++)
195
869k
                if (!c_strcasecmp (tok, months[i]))
196
25.1k
                  break;
197
              /* If we got a month, it means the token before it is the
198
                 size, and the filename is three tokens away.  */
199
81.3k
              if (i != 12)
200
25.1k
                {
201
25.1k
                  wgint size;
202
203
                  /* Parse the previous token with str_to_wgint.  */
204
25.1k
                  if (ptok == line)
205
785
                    {
206
                      /* Something has gone wrong during parsing. */
207
785
                      error = 1;
208
785
                      break;
209
785
                    }
210
24.3k
                  errno = 0;
211
24.3k
                  size = str_to_wgint (ptok, NULL, 10);
212
24.3k
                  if (size == WGINT_MAX && errno == ERANGE)
213
                    /* Out of range -- ignore the size.  #### Should
214
                       we refuse to start the download.  */
215
999
                    cur.size = 0;
216
23.3k
                  else
217
23.3k
                    cur.size = size;
218
24.3k
                  DEBUGP (("size: %s; ", number_to_static_string(cur.size)));
219
220
24.3k
                  month = i;
221
24.3k
                  next = 5;
222
24.3k
                  DEBUGP (("month: %s; ", months[month]));
223
24.3k
                }
224
81.3k
            }
225
51.8k
          else if (next == 4)   /* days */
226
20.3k
            {
227
20.3k
              if (tok[1])       /* two-digit... */
228
1.28k
                day = 10 * (*tok - '0') + tok[1] - '0';
229
19.0k
              else              /* ...or one-digit */
230
19.0k
                day = *tok - '0';
231
20.3k
              DEBUGP (("day: %d; ", day));
232
20.3k
            }
233
31.5k
          else if (next == 3)
234
18.4k
            {
235
              /* This ought to be either the time, or the year.  Let's
236
                 be flexible!
237
238
                 If we have a number x, it's a year.  If we have x:y,
239
                 it's hours and minutes.  If we have x:y:z, z are
240
                 seconds.  */
241
18.4k
              year = 0;
242
18.4k
              min = hour = sec = 0;
243
              /* We must deal with digits.  */
244
18.4k
              if (c_isdigit (*tok))
245
10.2k
                {
246
                  /* Suppose it's year.  Limit to year 99999 to avoid integer overflow. */
247
24.9k
                  for (; c_isdigit (*tok) && year <= 99999; tok++)
248
14.7k
                    year = (*tok - '0') + 10 * year;
249
10.2k
                  if (*tok == ':')
250
3.49k
                    {
251
3.49k
                      int n;
252
                      /* This means these were hours!  */
253
3.49k
                      hour = year;
254
3.49k
                      year = 0;
255
3.49k
                      ptype = TT_HOUR_MIN;
256
3.49k
                      ++tok;
257
                      /* Get the minutes...  */
258
5.41k
                      for (n = 0; c_isdigit (*tok) && n < 2; tok++, n++)
259
1.91k
                        min = (*tok - '0') + 10 * min;
260
3.49k
                      if (*tok == ':')
261
1.77k
                        {
262
                          /* ...and the seconds.  */
263
1.77k
                          ++tok;
264
3.65k
                          for (n = 0; c_isdigit (*tok) && n < 2; tok++, n++)
265
1.87k
                            sec = (*tok - '0') + 10 * sec;
266
1.77k
                        }
267
3.49k
                    }
268
10.2k
                }
269
18.4k
              if (year)
270
6.15k
                DEBUGP (("year: %d (no tm); ", year));
271
12.2k
              else
272
12.2k
                DEBUGP (("time: %02d:%02d:%02d (no yr); ", hour, min, sec));
273
18.4k
            }
274
13.0k
          else if (next == 2)    /* The file name */
275
13.0k
            {
276
13.0k
              int fnlen;
277
13.0k
              char *p;
278
279
              /* Since the file name may contain a SPC, it is possible
280
                 for strtok to handle it wrong.  */
281
13.0k
              fnlen = strlen (tok);
282
13.0k
              if (fnlen < len - (tok - line))
283
2.50k
                {
284
                  /* So we have a SPC in the file name.  Restore the
285
                     original.  */
286
2.50k
                  tok[fnlen] = ' ';
287
                  /* If the file is a symbolic link, it should have a
288
                     ` -> ' somewhere.  */
289
2.50k
                  if (cur.type == FT_SYMLINK)
290
1.43k
                    {
291
1.43k
                      p = strstr (tok, " -> ");
292
1.43k
                      if (!p)
293
715
                        {
294
715
                          error = 1;
295
715
                          break;
296
715
                        }
297
715
                      cur.linkto = xstrdup (p + 4);
298
715
                      DEBUGP (("link to: %s\n", cur.linkto));
299
                      /* And separate it from the file name.  */
300
715
                      *p = '\0';
301
715
                    }
302
2.50k
                }
303
              /* If we have the filename, add it to the list of files or
304
                 directories.  */
305
              /* "." and ".." are an exception!  */
306
12.3k
              if (!strcmp (tok, ".") || !strcmp (tok, ".."))
307
1.42k
                {
308
1.42k
                  DEBUGP (("\nIgnoring `.' and `..'; "));
309
1.42k
                  ignore = 1;
310
1.42k
                  break;
311
1.42k
                }
312
              /* Some FTP sites choose to have ls -F as their default
313
                 LIST output, which marks the symlinks with a trailing
314
                 `@', directory names with a trailing `/' and
315
                 executables with a trailing `*'.  This is no problem
316
                 unless encountering a symbolic link ending with `@',
317
                 or an executable ending with `*' on a server without
318
                 default -F output.  I believe these cases are very
319
                 rare.  */
320
10.9k
              fnlen = strlen (tok); /* re-calculate `fnlen' */
321
10.9k
              cur.name = xmalloc (fnlen + 1);
322
10.9k
              memcpy (cur.name, tok, fnlen + 1);
323
10.9k
              if (fnlen)
324
10.9k
                {
325
10.9k
                  if (cur.type == FT_DIRECTORY && cur.name[fnlen - 1] == '/')
326
1.07k
                    {
327
1.07k
                      cur.name[fnlen - 1] = '\0';
328
1.07k
                      DEBUGP (("trailing `/' on dir.\n"));
329
1.07k
                    }
330
9.84k
                  else if (cur.type == FT_SYMLINK && cur.name[fnlen - 1] == '@')
331
717
                    {
332
717
                      cur.name[fnlen - 1] = '\0';
333
717
                      DEBUGP (("trailing `@' on link.\n"));
334
717
                    }
335
9.12k
                  else if (cur.type == FT_PLAINFILE
336
9.12k
                           && (cur.perms & 0111)
337
9.12k
                           && cur.name[fnlen - 1] == '*')
338
195
                    {
339
195
                      cur.name[fnlen - 1] = '\0';
340
195
                      DEBUGP (("trailing `*' on exec.\n"));
341
195
                    }
342
10.9k
                } /* if (fnlen) */
343
0
              else
344
0
                error = 1;
345
10.9k
              break;
346
12.3k
            }
347
0
          else
348
0
            abort ();
349
133k
        } /* while */
350
351
47.3k
      if (!cur.name || (cur.type == FT_SYMLINK && !cur.linkto))
352
37.9k
        error = 1;
353
354
47.3k
      DEBUGP (("%s\n", cur.name ? cur.name : ""));
355
356
47.3k
      if (error || ignore)
357
37.9k
        {
358
37.9k
          DEBUGP (("Skipping.\n"));
359
37.9k
          xfree (cur.name);
360
37.9k
          xfree (cur.linkto);
361
37.9k
          continue;
362
37.9k
        }
363
364
9.46k
      if (!dir)
365
660
        {
366
660
          l = dir = xnew (struct fileinfo);
367
660
          memcpy (l, &cur, sizeof (cur));
368
660
          l->prev = l->next = NULL;
369
660
        }
370
8.80k
      else
371
8.80k
        {
372
8.80k
          cur.prev = l;
373
8.80k
          l->next = xnew (struct fileinfo);
374
8.80k
          l = l->next;
375
8.80k
          memcpy (l, &cur, sizeof (cur));
376
8.80k
          l->next = NULL;
377
8.80k
        }
378
      /* Get the current time.  */
379
9.46k
      timenow = time (NULL);
380
9.46k
      tnow = localtime (&timenow);
381
      /* Build the time-stamp (the idea by zaga@fly.cc.fer.hr).  */
382
9.46k
      timestruct.tm_sec   = sec;
383
9.46k
      timestruct.tm_min   = min;
384
9.46k
      timestruct.tm_hour  = hour;
385
9.46k
      timestruct.tm_mday  = day;
386
9.46k
      timestruct.tm_mon   = month;
387
9.46k
      if (year == 0)
388
4.59k
        {
389
          /* Some listings will not specify the year if it is "obvious"
390
             that the file was from the previous year.  E.g. if today
391
             is 97-01-12, and you see a file of Dec 15th, its year is
392
             1996, not 1997.  Thanks to Vladimir Volovich for
393
             mentioning this!  */
394
4.59k
          if (month > tnow->tm_mon)
395
767
            timestruct.tm_year = tnow->tm_year - 1;
396
3.82k
          else
397
3.82k
            timestruct.tm_year = tnow->tm_year;
398
4.59k
        }
399
4.86k
      else
400
4.86k
        timestruct.tm_year = year;
401
9.46k
      if (timestruct.tm_year >= 1900)
402
910
        timestruct.tm_year -= 1900;
403
9.46k
      timestruct.tm_wday  = 0;
404
9.46k
      timestruct.tm_yday  = 0;
405
9.46k
      timestruct.tm_isdst = -1;
406
9.46k
      l->tstamp = mktime (&timestruct); /* store the time-stamp */
407
9.46k
      l->ptype = ptype;
408
9.46k
    }
409
410
6.11k
  xfree (line);
411
6.11k
  return dir;
412
6.11k
}
413
414
static struct fileinfo *
415
ftp_parse_winnt_ls (FILE *fp)
416
899
{
417
899
  int len;
418
899
  int year, month, day;         /* for time analysis */
419
899
  int hour, min;
420
899
  size_t bufsize = 0;
421
899
  struct tm timestruct;
422
423
899
  char *line = NULL, *tok;             /* tokenizer */
424
899
  char *filename;
425
899
  struct fileinfo *dir, *l, cur; /* list creation */
426
427
899
  dir = l = NULL;
428
899
  cur.name = NULL;
429
430
  /* Line loop to end of file: */
431
4.76k
  while ((len = getline (&line, &bufsize, fp)) > 0)
432
3.86k
    {
433
3.86k
      len = clean_line (line, len);
434
435
      /* Name begins at 39 column of the listing if date presented in `mm-dd-yy'
436
         format or at 41 column if date presented in `mm-dd-yyyy' format. Thus,
437
         we cannot extract name before we parse date. Using this information we
438
         also can recognize filenames that begin with a series of space
439
         characters (but who really wants to use such filenames anyway?). */
440
3.86k
      if (len < 40) continue;
441
1.79k
      filename = line + 39;
442
443
      /* First column: mm-dd-yy or mm-dd-yyyy. Should atoi() on the month fail,
444
         january will be assumed.  */
445
1.79k
      tok = strtok(line, "-");
446
1.79k
      if (tok == NULL) continue;
447
1.72k
      month = atoi(tok);
448
1.72k
      if (month < 0) month = 0; else month--;
449
1.72k
      tok = strtok(NULL, "-");
450
1.72k
      if (tok == NULL) continue;
451
1.60k
      day = atoi(tok);
452
1.60k
      tok = strtok(NULL, " ");
453
1.60k
      if (tok == NULL) continue;
454
1.52k
      year = atoi(tok);
455
      /* Assuming the epoch starting at 1.1.1970 */
456
1.52k
      if (year <= 70)
457
1.12k
        {
458
1.12k
          year += 100;
459
1.12k
        }
460
399
      else if (year >= 1900)
461
274
        {
462
274
          year -= 1900;
463
274
          if (len < 42) continue;
464
185
          filename += 2;
465
185
        }
466
      /* Now it is possible to determine the position of the first symbol in
467
         filename. */
468
1.43k
      xfree (cur.name);
469
1.43k
      memset(&cur, 0, sizeof (cur));
470
1.43k
      cur.name = xstrdup(filename);
471
1.43k
      DEBUGP (("Name: '%s'\n", cur.name));
472
473
474
      /* Second column: hh:mm[AP]M, listing does not contain value for
475
         seconds */
476
1.43k
      tok = strtok(NULL,  ":");
477
1.43k
      if (tok == NULL) continue;
478
1.32k
      hour = atoi(tok);
479
1.32k
      tok = strtok(NULL,  "M");
480
1.32k
      if (tok == NULL) continue;
481
1.18k
      min = atoi(tok);
482
      /* Adjust hour from AM/PM. Just for the record, the sequence goes
483
         11:00AM, 12:00PM, 01:00PM ... 11:00PM, 12:00AM, 01:00AM . */
484
1.18k
      if (tok[0] && tok[1]) tok+=2;
485
1.18k
      if (hour >= 12 || hour < 0)  hour  = 0;
486
1.18k
      if (*tok == 'P') hour += 12;
487
488
1.18k
      DEBUGP (("YYYY/MM/DD HH:MM - %d/%02d/%02d %02d:%02d\n",
489
1.18k
              year+1900, month, day, hour, min));
490
491
      /* Build the time-stamp (copy & paste from above) */
492
1.18k
      timestruct.tm_sec   = 0;
493
1.18k
      timestruct.tm_min   = min;
494
1.18k
      timestruct.tm_hour  = hour;
495
1.18k
      timestruct.tm_mday  = day;
496
1.18k
      timestruct.tm_mon   = month;
497
1.18k
      timestruct.tm_year  = year;
498
1.18k
      timestruct.tm_wday  = 0;
499
1.18k
      timestruct.tm_yday  = 0;
500
1.18k
      timestruct.tm_isdst = -1;
501
1.18k
      cur.tstamp = mktime (&timestruct); /* store the time-stamp */
502
1.18k
      cur.ptype = TT_HOUR_MIN;
503
504
1.18k
      DEBUGP (("Timestamp: %ld\n", cur.tstamp));
505
506
      /* Third column: Either file length, or <DIR>. We also set the
507
         permissions (guessed as 0644 for plain files and 0755 for
508
         directories as the listing does not give us a clue) and filetype
509
         here. */
510
1.18k
      tok = strtok(NULL, " ");
511
1.18k
      if (tok == NULL) continue;
512
478
      while ((tok != NULL) && (*tok == '\0'))  tok = strtok(NULL, " ");
513
478
      if (tok == NULL) continue;
514
478
      if (*tok == '<')
515
76
        {
516
76
          cur.type  = FT_DIRECTORY;
517
76
          cur.size  = 0;
518
76
          cur.perms = 0755;
519
76
          DEBUGP (("Directory\n"));
520
76
        }
521
402
      else
522
402
        {
523
402
          wgint size;
524
402
          cur.type  = FT_PLAINFILE;
525
402
          errno = 0;
526
402
          size = str_to_wgint (tok, NULL, 10);
527
402
          if (size == WGINT_MAX && errno == ERANGE)
528
69
            cur.size = 0;       /* overflow */
529
333
          else
530
333
            cur.size = size;
531
402
          cur.perms = 0644;
532
402
          DEBUGP (("File, size %s bytes\n", number_to_static_string (cur.size)));
533
402
        }
534
535
478
      cur.linkto = NULL;
536
537
      /* And put everything into the linked list */
538
478
      if (!dir)
539
162
        {
540
162
          l = dir = xnew (struct fileinfo);
541
162
          memcpy (l, &cur, sizeof (cur));
542
162
          l->prev = l->next = NULL;
543
162
        }
544
316
      else
545
316
        {
546
316
          cur.prev = l;
547
316
          l->next = xnew (struct fileinfo);
548
316
          l = l->next;
549
316
          memcpy (l, &cur, sizeof (cur));
550
316
          l->next = NULL;
551
316
        }
552
478
      cur.name = NULL;
553
478
    }
554
555
899
  xfree (cur.name);
556
899
  xfree (line);
557
899
  return dir;
558
899
}
559
560
561
562
/* Convert the VMS-style directory listing stored in "file" to a
563
   linked list of fileinfo (system-independent) entries.  The contents
564
   of FILE are considered to be produced by the standard VMS
565
   "DIRECTORY [/SIZE [= ALL]] /DATE [/OWNER] [/PROTECTION]" command,
566
   more or less.  (Different VMS FTP servers may have different headers,
567
   and may not supply the same data, but all should be subsets of this.)
568
569
   VMS normally provides local (server) time and date information.
570
   Define the logical name or environment variable
571
   "WGET_TIMEZONE_DIFFERENTIAL" (seconds) to adjust the receiving local
572
   times if different from the remote local times.
573
574
   2005-02-23 SMS.
575
   Added code to eliminate "^" escape characters from ODS5 extended file
576
   names.  The TCPIP FTP server (V5.4) seems to prefer requests which do
577
   not use the escaped names which it provides.
578
*/
579
580
17.1k
#define VMS_DEFAULT_PROT_FILE 0644
581
393
#define VMS_DEFAULT_PROT_DIR 0755
582
583
/* 2005-02-23 SMS.
584
   eat_carets().
585
586
   Delete ODS5 extended file name escape characters ("^") in the
587
   original buffer.
588
   Note that the current scheme does not handle all EFN cases, but it
589
   could be made more complicated.
590
*/
591
592
static void eat_carets( char *str)
593
/* char *str;      Source pointer. */
594
17.5k
{
595
17.5k
  char *strd;   /* Destination pointer. */
596
17.5k
  char hdgt;
597
17.5k
  unsigned char uchr;
598
599
  /* Skip ahead to the first "^", if any. */
600
97.6k
  while ((*str != '\0') && (*str != '^'))
601
80.0k
     str++;
602
603
  /* If no caret was found, quit early. */
604
17.5k
  if (*str != '\0')
605
1.67k
  {
606
    /* Shift characters leftward as carets are found. */
607
1.67k
    strd = str;
608
12.5k
    while (*str != '\0')
609
10.9k
    {
610
10.9k
      uchr = *str;
611
10.9k
      if (uchr == '^')
612
3.44k
      {
613
        /* Found a caret.  Skip it, and check the next character. */
614
3.44k
        if ((char_prop[(unsigned char) str[1]] & 64) && (char_prop[(unsigned char) str[2]] & 64))
615
665
        {
616
          /* Hex digit.  Get char code from this and next hex digit. */
617
665
          uchr = *(++str);
618
665
          if (uchr <= '9')
619
291
          {
620
291
            hdgt = uchr - '0';           /* '0' - '9' -> 0 - 9. */
621
291
          }
622
374
          else
623
374
          {
624
374
            hdgt = ((uchr - 'A') & 7) + 10;    /* [Aa] - [Ff] -> 10 - 15. */
625
374
          }
626
665
          hdgt <<= 4;                   /* X16. */
627
665
          uchr = *(++str);              /* Next char must be hex digit. */
628
665
          if (uchr <= '9')
629
334
          {
630
334
            uchr = hdgt + uchr - '0';
631
334
          }
632
331
          else
633
331
          {
634
331
            uchr = hdgt + ((uchr - 'A') & 15) + 10;
635
331
          }
636
665
        }
637
2.77k
        else if (uchr == '_')
638
0
        {
639
          /* Convert escaped "_" to " ". */
640
0
          uchr = ' ';
641
0
        }
642
2.77k
        else if (uchr == '/')
643
0
        {
644
          /* Convert escaped "/" (invalid Zip) to "?" (invalid VMS). */
645
          /* Note that this is a left-over from Info-ZIP code, and is
646
             probably of little value here, except perhaps to avoid
647
             directory confusion which an unconverted slash might cause.
648
          */
649
0
          uchr = '?';
650
0
        }
651
        /* Else, not a hex digit.  Must be a simple escaped character
652
           (or Unicode, which is not yet handled here).
653
        */
654
3.44k
      }
655
      /* Else, not a caret.  Use as-is. */
656
10.9k
      *strd = uchr;
657
658
      /* Advance destination and source pointers. */
659
10.9k
      strd++;
660
10.9k
      str++;
661
10.9k
    }
662
    /* Terminate the destination string. */
663
1.67k
    *strd = '\0';
664
1.67k
  }
665
17.5k
}
666
667
668
static struct fileinfo *
669
ftp_parse_vms_ls (FILE *fp)
670
2.33k
{
671
2.33k
  int dt, i, j, len;
672
2.33k
  int perms;
673
2.33k
  size_t bufsize = 0;
674
2.33k
  time_t timenow;
675
2.33k
  struct tm *timestruct;
676
2.33k
  char date_str[32];
677
678
2.33k
  char *line = NULL, *tok; /* tokenizer */
679
2.33k
  struct fileinfo *dir, *l, cur; /* list creation */
680
681
2.33k
  dir = l = NULL;
682
683
  /* Skip blank lines, Directory heading, and more blank lines. */
684
685
3.10k
  for (j = 0; (i = getline (&line, &bufsize, fp)) > 0; )
686
3.07k
    {
687
3.07k
      i = clean_line (line, i);
688
3.07k
      if (i <= 0)
689
761
        continue; /* Ignore blank line. */
690
691
2.31k
      if ((j == 0) && (line[i - 1] == ']'))
692
6
        {
693
          /* Found Directory heading line.  Next non-blank line
694
          is significant. */
695
6
          j = 1;
696
6
        }
697
2.31k
      else if (!strncmp (line, "Total of ", 9))
698
1
        {
699
          /* Found "Total of ..." footing line.  No valid data
700
             will follow (empty directory). */
701
1
          i = 0; /* Arrange for early exit. */
702
1
          break;
703
1
        }
704
2.30k
      else
705
2.30k
        {
706
2.30k
          break; /* Must be significant data. */
707
2.30k
        }
708
2.31k
    }
709
710
  /* Read remainder of file until the next blank line or EOF. */
711
712
2.33k
  cur.name = NULL;
713
19.2k
  while (i > 0)
714
17.5k
    {
715
17.5k
      char *p;
716
717
      /* The first token is the file name.  After a long name, other
718
         data may be on the following line.  A valid directory name ends
719
         in ".DIR;1" (any case), although some VMS FTP servers may omit
720
         the version number (";1").
721
      */
722
723
17.5k
      tok = strtok(line, " ");
724
17.5k
      if (tok == NULL) tok = line;
725
17.5k
      DEBUGP (("file name:   '%s'\n", tok));
726
727
      /* Stripping the version number on a VMS system would be wrong.
728
         It may be foolish on a non-VMS system, too, but that's someone
729
         else's problem.  (Define PRESERVE_VMS_VERSIONS for proper
730
         operation on other operating systems.)
731
732
         2005-02-23 SMS.
733
         ODS5 extended file names may contain escaped semi-colons, so
734
         the version number is identified as right-side decimal digits
735
         led by a non-escaped semi-colon.  It may be absent.
736
      */
737
738
17.5k
#if (!defined( __VMS) && !defined( PRESERVE_VMS_VERSIONS))
739
20.4k
      for (p = tok + strlen (tok); (--p > tok) && c_isdigit(*p); );
740
17.5k
      if (p > tok && (*p == ';') && (*(p - 1) != '^'))
741
397
        {
742
397
          *p = '\0';
743
397
        }
744
17.5k
#endif /* (!defined( __VMS) && !defined( PRESERVE_VMS_VERSIONS)) */
745
746
      /* 2005-02-23 SMS.
747
         Eliminate "^" escape characters from ODS5 extended file name.
748
         (A caret is invalid in an ODS2 name, so this is always safe.)
749
      */
750
17.5k
      eat_carets (tok);
751
17.5k
      DEBUGP (("file name-^: '%s'\n", tok));
752
753
      /* Differentiate between a directory and any other file.  A VMS
754
         listing may not include file protections (permissions).  Set a
755
         default permissions value (according to the file type), which
756
         may be overwritten later.  Store directory names without the
757
         ".DIR;1" file type and version number, as the plain name is
758
         what will work in a CWD command.
759
      */
760
17.5k
      len = strlen (tok);
761
17.5k
      if (len >= 4 && !c_strncasecmp(tok + (len - 4), ".DIR", 4))
762
199
        {
763
199
          *(tok + (len - 4)) = '\0'; /* Discard ".DIR". */
764
199
          cur.type  = FT_DIRECTORY;
765
199
          cur.perms = VMS_DEFAULT_PROT_DIR;
766
199
          DEBUGP (("Directory (nv)\n"));
767
199
        }
768
17.3k
      else if (len >= 6 && !c_strncasecmp (tok + len - 6, ".DIR;1", 6))
769
194
        {
770
194
          *(tok + (len - 6)) = '\0'; /* Discard ".DIR;1". */
771
194
          cur.type  = FT_DIRECTORY;
772
194
          cur.perms = VMS_DEFAULT_PROT_DIR;
773
194
          DEBUGP (("Directory (v)\n"));
774
194
        }
775
17.1k
      else
776
17.1k
        {
777
17.1k
          cur.type  = FT_PLAINFILE;
778
17.1k
          cur.perms = VMS_DEFAULT_PROT_FILE;
779
17.1k
          DEBUGP (("File\n"));
780
17.1k
        }
781
17.5k
      xfree (cur.name);
782
17.5k
      cur.name = xstrdup (tok);
783
17.5k
      DEBUGP (("Name: '%s'\n", cur.name));
784
785
      /* Null the date and time string. */
786
17.5k
      *date_str = '\0';
787
788
      /* VMS lacks symbolic links. */
789
17.5k
      cur.linkto = NULL;
790
791
      /* VMS reports file sizes in (512-byte) disk blocks, not bytes,
792
         hence useless for an integrity check based on byte-count.
793
         Set size to unknown.
794
      */
795
17.5k
      cur.size = 0;
796
797
      /* Get token 2, if any.  A long name may force all other data onto
798
         a second line.  If needed, read the second line.
799
      */
800
801
17.5k
      tok = strtok (NULL, " ");
802
17.5k
      if (tok == NULL)
803
6.11k
        {
804
6.11k
          DEBUGP (("Getting additional line.\n"));
805
6.11k
          i = getline (&line, &bufsize, fp);
806
6.11k
          if (i <= 0)
807
582
            {
808
582
              DEBUGP (("EOF.  Leaving listing parser.\n"));
809
582
              break;
810
582
            }
811
812
          /* Second line must begin with " ".  Otherwise, it's a first
813
             line (and we may be confused).
814
          */
815
5.53k
          i = clean_line (line, i);
816
5.53k
          if (i <= 0)
817
32
            {
818
              /* Blank line.  End of significant file listing. */
819
32
              DEBUGP (("Blank line.  Leaving listing parser.\n"));
820
32
              break;
821
32
            }
822
5.50k
          else if (line[0] != ' ')
823
5.09k
            {
824
5.09k
              DEBUGP (("Non-blank in column 1.  Must be a new file name?\n"));
825
5.09k
              continue;
826
5.09k
            }
827
412
          else
828
412
            {
829
412
              tok = strtok (line, " ");
830
412
              if (tok == NULL)
831
11
                {
832
                  /* Unexpected non-empty but apparently blank line. */
833
11
                  DEBUGP (("Null token.  Leaving listing parser.\n"));
834
11
                  break;
835
11
                }
836
412
            }
837
5.53k
        }
838
839
      /* Analyze tokens.  (Order is not significant, except date must
840
         precede time.)
841
842
         Size:       ddd or ddd/ddd (where "ddd" is a decimal number)
843
         Date:       DD-MMM-YYYY
844
         Time:       HH:MM or HH:MM:SS or HH:MM:SS.CC
845
         Owner:      [user] or [user,group]
846
         Protection: (ppp,ppp,ppp,ppp) (where "ppp" is "RWED" or some
847
         subset thereof, for System, Owner, Group, World.
848
849
         If permission is lacking, info may be replaced by the string:
850
         "No privilege for attempted operation".
851
      */
852
59.7k
      while (tok != NULL)
853
47.9k
        {
854
47.9k
          DEBUGP (("Token: >%s<: ", tok));
855
856
47.9k
          if ((strlen (tok) < 12) && (strchr( tok, '-') != NULL))
857
3.40k
            {
858
              /* Date. */
859
3.40k
              DEBUGP (("Date.\n"));
860
3.40k
          snprintf(date_str, sizeof(date_str), "%s ", tok);
861
3.40k
            }
862
44.5k
          else if ((strlen (tok) < 12) && (strchr( tok, ':') != NULL))
863
2.08k
            {
864
              /* Time. */
865
2.08k
              DEBUGP (("Time. "));
866
2.08k
              strncat( date_str,
867
2.08k
                       tok,
868
2.08k
                       (sizeof( date_str)- strlen (date_str) - 1));
869
2.08k
              DEBUGP (("Date time: >%s<\n", date_str));
870
2.08k
            }
871
42.4k
          else if (strchr (tok, '[') != NULL)
872
4.15k
            {
873
              /* Owner.  (Ignore.) */
874
4.15k
              DEBUGP (("Owner.\n"));
875
4.15k
            }
876
38.2k
          else if (strchr (tok, '(') != NULL)
877
1.20k
            {
878
              /* Protections (permissions). */
879
1.20k
              perms = 0;
880
1.20k
              j = 0;
881
              /*FIXME: Should not be using the variable like this. */
882
49.8k
              for (i = 0; i < (int) strlen(tok); i++)
883
48.6k
                {
884
48.6k
                  switch (tok[ i])
885
48.6k
                    {
886
1.22k
                    case '(':
887
1.22k
                      break;
888
3
                    case ')':
889
3
                      break;
890
38.4k
                    case ',':
891
38.4k
                      if (j == 0)
892
334
                        {
893
334
                          perms = 0;
894
334
                        }
895
38.0k
                      else if (j < 4)
896
378
                        {
897
378
                          perms <<= 3;
898
378
                        }
899
38.4k
                      j++;
900
38.4k
                      break;
901
237
                    case 'R':
902
237
                      perms |= 4;
903
237
                      break;
904
234
                    case 'W':
905
234
                      perms |= 2;
906
234
                      break;
907
240
                    case 'E':
908
240
                      perms |= 1;
909
240
                      break;
910
265
                    case 'D':
911
265
                      perms |= 2;
912
265
                      break;
913
48.6k
                    }
914
48.6k
                }
915
1.20k
              cur.perms = perms;
916
1.20k
              DEBUGP (("Prot.  perms = %0o.\n", (unsigned) cur.perms));
917
1.20k
            }
918
37.0k
          else
919
37.0k
            {
920
              /* Nondescript.  Probably size(s), probably in blocks.
921
                 Could be "No privilege ..." message.  (Ignore.)
922
              */
923
37.0k
              DEBUGP (("Ignored (size?).\n"));
924
37.0k
            }
925
926
47.9k
          tok = strtok (NULL, " ");
927
47.9k
        }
928
929
      /* Tokens exhausted.  Interpret the data, and fill in the
930
         structure.
931
      */
932
      /* Fill tm timestruct according to date-time string.  Fractional
933
         seconds are ignored.  Default to current time, if conversion
934
         fails.
935
      */
936
11.8k
      timenow = time( NULL);
937
11.8k
      timestruct = localtime( &timenow );
938
11.8k
      strptime( date_str, "%d-%b-%Y %H:%M:%S", timestruct);
939
940
      /* Convert struct tm local time to time_t local time. */
941
11.8k
      timenow = mktime (timestruct);
942
      /* Offset local time according to environment variable (seconds). */
943
11.8k
      if ((tok = getenv ( "WGET_TIMEZONE_DIFFERENTIAL")) != NULL)
944
0
        {
945
0
          dt = atoi (tok);
946
0
          DEBUGP (("Time differential = %d.\n", dt));
947
0
        }
948
11.8k
      else
949
11.8k
        dt = 0;
950
951
11.8k
      if (dt >= 0)
952
11.8k
        timenow += dt;
953
0
      else
954
0
        timenow -= (-dt);
955
956
11.8k
      cur.tstamp = timenow; /* Store the time-stamp. */
957
11.8k
      DEBUGP (("Timestamp: %ld\n", cur.tstamp));
958
11.8k
      cur.ptype = TT_HOUR_MIN;
959
960
      /* Add the data for this item to the linked list, */
961
11.8k
      if (!dir)
962
1.74k
        {
963
1.74k
          l = dir = xmalloc (sizeof (struct fileinfo));
964
1.74k
          cur.prev = cur.next = NULL;
965
1.74k
          memcpy (l, &cur, sizeof (cur));
966
1.74k
        }
967
10.1k
      else
968
10.1k
        {
969
10.1k
          cur.prev = l;
970
10.1k
       cur.next = NULL;
971
10.1k
          l->next = xmalloc (sizeof (struct fileinfo));
972
10.1k
          l = l->next;
973
10.1k
          memcpy (l, &cur, sizeof (cur));
974
10.1k
        }
975
11.8k
      cur.name = NULL;
976
977
11.8k
      i = getline (&line, &bufsize, fp);
978
11.8k
      if (i > 0)
979
10.1k
        {
980
10.1k
          i = clean_line (line, i);
981
10.1k
          if (i <= 0)
982
17
            {
983
              /* Blank line.  End of significant file listing. */
984
17
              break;
985
17
            }
986
10.1k
        }
987
11.8k
    }
988
989
2.33k
  xfree (cur.name);
990
2.33k
  xfree (line);
991
2.33k
  return dir;
992
2.33k
}
993
994
995
/* This function switches between the correct parsing routine depending on
996
   the SYSTEM_TYPE. The system type should be based on the result of the
997
   "SYST" response of the FTP server. According to this response we will
998
   use on of the three different listing parsers that cover the most of FTP
999
   servers used nowadays.  */
1000
1001
struct fileinfo *
1002
ftp_parse_ls (const char *file, const enum stype system_type)
1003
0
{
1004
0
  FILE *fp;
1005
0
  struct fileinfo *fi;
1006
1007
0
  fp = fopen (file, "rb");
1008
0
  if (!fp)
1009
0
    {
1010
0
      logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
1011
0
      return NULL;
1012
0
    }
1013
1014
0
  fi = ftp_parse_ls_fp (fp, system_type);
1015
0
  fclose(fp);
1016
1017
0
  return fi;
1018
0
}
1019
1020
struct fileinfo *
1021
ftp_parse_ls_fp (FILE *fp, const enum stype system_type)
1022
9.34k
{
1023
9.34k
  switch (system_type)
1024
9.34k
    {
1025
2.33k
    case ST_UNIX:
1026
2.33k
      return ftp_parse_unix_ls (fp, 0);
1027
2.33k
    case ST_WINNT:
1028
2.33k
      {
1029
        /* Detect whether the listing is simulating the UNIX format */
1030
2.33k
        int   c = fgetc(fp);
1031
2.33k
        rewind(fp);
1032
1033
        /* If the first character of the file is '0'-'9', it's WINNT
1034
           format. */
1035
2.33k
        if (c >= '0' && c <='9')
1036
899
          return ftp_parse_winnt_ls (fp);
1037
1.43k
        else
1038
1.43k
          return ftp_parse_unix_ls (fp, 1);
1039
2.33k
      }
1040
2.33k
    case ST_VMS:
1041
2.33k
      return ftp_parse_vms_ls (fp);
1042
2.33k
    case ST_MACOS:
1043
2.33k
      return ftp_parse_unix_ls (fp, 1);
1044
0
    default:
1045
0
      logprintf (LOG_NOTQUIET, _("\
1046
0
Unsupported listing type, trying Unix listing parser.\n"));
1047
0
      return ftp_parse_unix_ls (fp, 0);
1048
9.34k
    }
1049
9.34k
}
1050
1051
/* Stuff for creating FTP index. */
1052
1053
/* The function creates an HTML index containing references to given
1054
   directories and files on the appropriate host.  The references are
1055
   FTP.  */
1056
uerr_t
1057
ftp_index (const char *file, struct url *u, struct fileinfo *f)
1058
0
{
1059
0
  FILE *fp;
1060
0
  char *upwd;
1061
0
  char *htcldir;                /* HTML-clean dir name */
1062
0
  char *htclfile;               /* HTML-clean file name */
1063
0
  char *urlclfile;              /* URL-clean file name */
1064
1065
0
  if (!output_stream)
1066
0
    {
1067
0
      fp = fopen (file, "wb");
1068
0
      if (!fp)
1069
0
        {
1070
0
          logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
1071
0
          return FOPENERR;
1072
0
        }
1073
0
    }
1074
0
  else
1075
0
    fp = output_stream;
1076
0
  if (u->user)
1077
0
    {
1078
0
      char *tmpu, *tmpp;        /* temporary, clean user and passwd */
1079
1080
0
      tmpu = url_escape (u->user);
1081
0
      tmpp = u->passwd ? url_escape (u->passwd) : NULL;
1082
0
      if (tmpp)
1083
0
        upwd = concat_strings (tmpu, ":", tmpp, "@", (char *) 0);
1084
0
      else
1085
0
        upwd = concat_strings (tmpu, "@", (char *) 0);
1086
0
      xfree (tmpu);
1087
0
      xfree (tmpp);
1088
0
    }
1089
0
  else
1090
0
    upwd = xstrdup ("");
1091
1092
0
  htcldir = html_quote_string (u->dir);
1093
1094
0
  fprintf (fp, "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n");
1095
0
  fprintf (fp, "<html>\n<head>\n<title>");
1096
0
  fprintf (fp, _("Index of /%s on %s:%d"), htcldir, u->host, u->port);
1097
0
  fprintf (fp, "</title>\n</head>\n<body>\n<h1>");
1098
0
  fprintf (fp, _("Index of /%s on %s:%d"), htcldir, u->host, u->port);
1099
0
  fprintf (fp, "</h1>\n<hr>\n<pre>\n");
1100
1101
0
  while (f)
1102
0
    {
1103
0
      fprintf (fp, "  ");
1104
0
      if (f->tstamp != -1)
1105
0
        {
1106
          /* #### Should we translate the months?  Or, even better, use
1107
             ISO 8601 dates?  */
1108
0
          static const char *months[] = {
1109
0
            "Jan", "Feb", "Mar", "Apr", "May", "Jun",
1110
0
            "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
1111
0
          };
1112
0
          time_t tstamp = f->tstamp;
1113
0
          struct tm *ptm = localtime (&tstamp);
1114
1115
0
          fprintf (fp, "%d %s %02d ", ptm->tm_year + 1900, months[ptm->tm_mon],
1116
0
                  ptm->tm_mday);
1117
0
          if (f->ptype == TT_HOUR_MIN)
1118
0
            fprintf (fp, "%02d:%02d  ", ptm->tm_hour, ptm->tm_min);
1119
0
          else
1120
0
            fprintf (fp, "       ");
1121
0
        }
1122
0
      else
1123
0
        fprintf (fp, _("time unknown       "));
1124
0
      switch (f->type)
1125
0
        {
1126
0
        case FT_PLAINFILE:
1127
0
          fprintf (fp, _("File        "));
1128
0
          break;
1129
0
        case FT_DIRECTORY:
1130
0
          fprintf (fp, _("Directory   "));
1131
0
          break;
1132
0
        case FT_SYMLINK:
1133
0
          fprintf (fp, _("Link        "));
1134
0
          break;
1135
0
        default:
1136
0
          fprintf (fp, _("Not sure    "));
1137
0
          break;
1138
0
        }
1139
0
      htclfile = html_quote_string (f->name);
1140
0
      urlclfile = url_escape_unsafe_and_reserved (f->name);
1141
0
      fprintf (fp, "<a href=\"ftp://%s%s:%d", upwd, u->host, u->port);
1142
0
      if (*u->dir != '/')
1143
0
        putc ('/', fp);
1144
      /* XXX: Should probably URL-escape dir components here, rather
1145
       * than just HTML-escape, for consistency with the next bit where
1146
       * we use urlclfile for the file component. Anyway, this is safer
1147
       * than what we had... */
1148
0
      fprintf (fp, "%s", htcldir);
1149
0
      if (*u->dir)
1150
0
        putc ('/', fp);
1151
0
      fprintf (fp, "%s", urlclfile);
1152
0
      if (f->type == FT_DIRECTORY)
1153
0
        putc ('/', fp);
1154
0
      fprintf (fp, "\">%s", htclfile);
1155
0
      if (f->type == FT_DIRECTORY)
1156
0
        putc ('/', fp);
1157
0
      fprintf (fp, "</a> ");
1158
0
      if (f->type == FT_PLAINFILE)
1159
0
        fprintf (fp, _(" (%s bytes)"), number_to_static_string (f->size));
1160
0
      else if (f->type == FT_SYMLINK)
1161
0
        fprintf (fp, "-> %s", f->linkto ? f->linkto : "(nil)");
1162
0
      putc ('\n', fp);
1163
0
      xfree (htclfile);
1164
0
      xfree (urlclfile);
1165
0
      f = f->next;
1166
0
    }
1167
0
  fprintf (fp, "</pre>\n</body>\n</html>\n");
1168
0
  xfree (htcldir);
1169
0
  xfree (upwd);
1170
0
  if (!output_stream)
1171
0
    fclose (fp);
1172
0
  else
1173
0
    fflush (fp);
1174
0
  return FTPOK;
1175
0
}