Coverage Report

Created: 2024-09-08 06:46

/src/jq/src/util.c
Line
Count
Source (jump to first uncovered line)
1
/*-
2
 * Parts (strptime()) Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
3
 * All rights reserved.
4
 *
5
 * This code was contributed to The NetBSD Foundation by Klaus Klein.
6
 * Heavily optimised by David Laight
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
 * POSSIBILITY OF SUCH DAMAGE.
28
 */
29
30
#include <sys/types.h>
31
#include <sys/stat.h>
32
#include <assert.h>
33
#include <errno.h>
34
#include <fcntl.h>
35
#include <limits.h>
36
#include <string.h>
37
#include <unistd.h>
38
#include <stdlib.h>
39
#include <stddef.h>
40
#ifdef HAVE_ALLOCA_H
41
# include <alloca.h>
42
#elif !defined alloca
43
# ifdef __GNUC__
44
#  define alloca __builtin_alloca
45
# elif defined _MSC_VER
46
#  include <malloc.h>
47
#  define alloca _alloca
48
# elif !defined HAVE_ALLOCA
49
#  ifdef  __cplusplus
50
extern "C"
51
#  endif
52
void *alloca (size_t);
53
# endif
54
#endif
55
56
#ifdef WIN32
57
#include <windows.h>
58
#include <processenv.h>
59
#include <shellapi.h>
60
#include <wchar.h>
61
#include <wtypes.h>
62
#endif
63
64
65
#include "util.h"
66
#include "jq.h"
67
#include "jv_alloc.h"
68
69
#ifdef WIN32
70
FILE *fopen(const char *fname, const char *mode) {
71
  size_t sz = sizeof(wchar_t) * MultiByteToWideChar(CP_UTF8, 0, fname, -1, NULL, 0);
72
  wchar_t *wfname = alloca(sz + 2); // +2 is not needed, but just in case
73
  MultiByteToWideChar(CP_UTF8, 0, fname, -1, wfname, sz);
74
75
  sz = sizeof(wchar_t) * MultiByteToWideChar(CP_UTF8, 0, mode, -1, NULL, 0);
76
  wchar_t *wmode = alloca(sz + 2); // +2 is not needed, but just in case
77
  MultiByteToWideChar(CP_UTF8, 0, mode, -1, wmode, sz);
78
  return _wfopen(wfname, wmode);
79
}
80
#endif
81
82
1.84k
jv expand_path(jv path) {
83
1.84k
  assert(jv_get_kind(path) == JV_KIND_STRING);
84
1.84k
  const char *pstr = jv_string_value(path);
85
1.84k
  jv ret = path;
86
1.84k
  if (jv_string_length_bytes(jv_copy(path)) > 1 && pstr[0] == '~' && pstr[1] == '/') {
87
0
    jv home = get_home();
88
0
    if (jv_is_valid(home)) {
89
0
      ret = jv_string_fmt("%s/%s",jv_string_value(home),pstr+2);
90
0
      jv_free(home);
91
0
    } else {
92
0
      jv emsg = jv_invalid_get_msg(home);
93
0
      ret = jv_invalid_with_msg(jv_string_fmt("Could not expand %s. (%s)", pstr, jv_string_value(emsg)));
94
0
      jv_free(emsg);
95
0
    }
96
0
    jv_free(path);
97
0
  }
98
1.84k
  return ret;
99
1.84k
}
100
101
1.85k
jv get_home() {
102
1.85k
  jv ret;
103
1.85k
  char *home = getenv("HOME");
104
1.85k
  if (!home) {
105
0
#ifndef WIN32
106
0
    ret = jv_invalid_with_msg(jv_string("Could not find home directory."));
107
#else
108
    home = getenv("USERPROFILE");
109
    if (!home) {
110
      home = getenv("HOMEPATH");
111
      if (!home) {
112
        ret = jv_invalid_with_msg(jv_string("Could not find home directory."));
113
      } else {
114
        const char *hd = getenv("HOMEDRIVE");
115
        if (!hd) hd = "";
116
        ret = jv_string_fmt("%s%s",hd,home);
117
      }
118
    } else {
119
      ret = jv_string(home);
120
    }
121
#endif
122
1.85k
  } else {
123
1.85k
    ret = jv_string(home);
124
1.85k
  }
125
1.85k
  return ret;
126
1.85k
}
127
128
129
5.53k
jv jq_realpath(jv path) {
130
5.53k
  int path_max;
131
5.53k
  char *buf = NULL;
132
5.53k
#ifdef _PC_PATH_MAX
133
5.53k
  path_max = pathconf(jv_string_value(path),_PC_PATH_MAX);
134
#else
135
  path_max = PATH_MAX;
136
#endif
137
5.53k
  if (path_max > 0) {
138
5.53k
     buf = jv_mem_alloc(path_max);
139
5.53k
  }
140
#ifdef WIN32
141
  char *tmp = _fullpath(buf, jv_string_value(path), path_max);
142
#else
143
5.53k
  char *tmp = realpath(jv_string_value(path), buf);
144
5.53k
#endif
145
5.53k
  if (tmp == NULL) {
146
5.53k
    free(buf);
147
5.53k
    return path;
148
5.53k
  }
149
0
  jv_free(path);
150
0
  path = jv_string(tmp);
151
0
  free(tmp);
152
0
  return path;
153
5.53k
}
154
155
const void *_jq_memmem(const void *haystack, size_t haystacklen,
156
808k
                       const void *needle, size_t needlelen) {
157
808k
#ifdef HAVE_MEMMEM
158
808k
  return (const void*)memmem(haystack, haystacklen, needle, needlelen);
159
#else
160
  const char *h = haystack;
161
  const char *n = needle;
162
  size_t hi, hi2, ni;
163
164
  if (haystacklen < needlelen || haystacklen == 0)
165
    return NULL;
166
  for (hi = 0; hi < (haystacklen - needlelen + 1); hi++) {
167
    for (ni = 0, hi2 = hi; ni < needlelen; ni++, hi2++) {
168
      if (h[hi2] != n[ni])
169
        goto not_this;
170
    }
171
172
    return &h[hi];
173
174
not_this:
175
    continue;
176
  }
177
  return NULL;
178
#endif /* !HAVE_MEMMEM */
179
808k
}
180
181
struct jq_util_input_state {
182
  jq_util_msg_cb err_cb;
183
  void *err_cb_data;
184
  jv_parser *parser;
185
  FILE* current_input;
186
  char **files;
187
  int nfiles;
188
  int curr_file;
189
  int failures;
190
  jv slurped;
191
  char buf[4096];
192
  size_t buf_valid_len;
193
  jv current_filename;
194
  size_t current_line;
195
};
196
197
0
static void fprinter(void *data, const char *fname) {
198
0
  fprintf((FILE *)data, "jq: error: Could not open file %s: %s\n", fname, strerror(errno));
199
0
}
200
201
// If parser == NULL -> RAW
202
0
jq_util_input_state *jq_util_input_init(jq_util_msg_cb err_cb, void *err_cb_data) {
203
0
  if (err_cb == NULL) {
204
0
    err_cb = fprinter;
205
0
    err_cb_data = stderr;
206
0
  }
207
0
  jq_util_input_state *new_state = jv_mem_calloc(1, sizeof(*new_state));
208
0
  new_state->err_cb = err_cb;
209
0
  new_state->err_cb_data = err_cb_data;
210
0
  new_state->slurped = jv_invalid();
211
0
  new_state->current_filename = jv_invalid();
212
213
0
  return new_state;
214
0
}
215
216
0
void jq_util_input_set_parser(jq_util_input_state *state, jv_parser *parser, int slurp) {
217
0
  assert(!jv_is_valid(state->slurped));
218
0
  state->parser = parser;
219
220
0
  if (parser == NULL && slurp)
221
0
    state->slurped = jv_string("");
222
0
  else if (slurp)
223
0
    state->slurped = jv_array();
224
0
  else
225
0
    state->slurped = jv_invalid();
226
0
}
227
228
0
void jq_util_input_free(jq_util_input_state **state) {
229
0
  jq_util_input_state *old_state = *state;
230
0
  *state = NULL;
231
0
  if (old_state == NULL)
232
0
    return;
233
234
0
  if (old_state->parser != NULL)
235
0
    jv_parser_free(old_state->parser);
236
0
  for (int i = 0; i < old_state->nfiles; i++)
237
0
    free(old_state->files[i]);
238
0
  free(old_state->files);
239
0
  jv_free(old_state->slurped);
240
0
  jv_free(old_state->current_filename);
241
0
  jv_mem_free(old_state);
242
0
}
243
244
0
void jq_util_input_add_input(jq_util_input_state *state, const char *fname) {
245
0
  state->files = jv_mem_realloc(state->files, (state->nfiles + 1) * sizeof(state->files[0]));
246
0
  state->files[state->nfiles++] = jv_mem_strdup(fname);
247
0
}
248
249
0
int jq_util_input_errors(jq_util_input_state *state) {
250
0
  return state->failures;
251
0
}
252
253
0
static const char *next_file(jq_util_input_state *state) {
254
0
  if (state->curr_file < state->nfiles)
255
0
    return state->files[state->curr_file++];
256
0
  return NULL;
257
0
}
258
259
0
static int jq_util_input_read_more(jq_util_input_state *state) {
260
0
  if (!state->current_input || feof(state->current_input) || ferror(state->current_input)) {
261
0
    if (state->current_input && ferror(state->current_input)) {
262
      // System-level input error on the stream. It will be closed (below).
263
      // TODO: report it. Can't use 'state->err_cb()' as it is hard-coded for
264
      //       'open' related problems.
265
0
      fprintf(stderr,"jq: error: %s\n", strerror(errno));
266
0
    }
267
0
    if (state->current_input) {
268
0
      if (state->current_input == stdin) {
269
0
        clearerr(stdin); // perhaps we can read again; anyways, we don't fclose(stdin)
270
0
      } else {
271
0
        fclose(state->current_input);
272
0
      }
273
0
      state->current_input = NULL;
274
0
      jv_free(state->current_filename);
275
0
      state->current_filename = jv_invalid();
276
0
      state->current_line = 0 ;
277
0
    }
278
0
    const char *f = next_file(state);
279
0
    if (f != NULL) {
280
0
      if (!strcmp(f, "-")) {
281
0
        state->current_input = stdin;
282
0
        state->current_filename = jv_string("<stdin>");
283
0
      } else {
284
0
        state->current_input = fopen(f, "r");
285
0
        state->current_filename = jv_string(f);
286
0
        if (!state->current_input) {
287
0
          state->err_cb(state->err_cb_data, f);
288
0
          state->failures++;
289
0
        }
290
0
      }
291
0
      state->current_line = 0;
292
0
    }
293
0
  }
294
295
0
  state->buf[0] = 0;
296
0
  state->buf_valid_len = 0;
297
0
  if (state->current_input) {
298
0
    char *res;
299
0
    memset(state->buf, 0xff, sizeof(state->buf));
300
301
0
    while (!(res = fgets(state->buf, sizeof(state->buf), state->current_input)) &&
302
0
           ferror(state->current_input) && errno == EINTR)
303
0
      clearerr(state->current_input);
304
0
    if (res == NULL) {
305
0
      state->buf[0] = 0;
306
0
      if (ferror(state->current_input))
307
0
        state->failures++;
308
0
    } else {
309
0
      const char *p = memchr(state->buf, '\n', sizeof(state->buf));
310
311
0
      if (p != NULL)
312
0
        state->current_line++;
313
314
0
      if (p == NULL && state->parser != NULL) {
315
        /*
316
         * There should be no NULs in JSON texts (but JSON text
317
         * sequences are another story).
318
         */
319
0
        state->buf_valid_len = strlen(state->buf);
320
0
      } else if (p == NULL && feof(state->current_input)) {
321
0
        size_t i;
322
323
        /*
324
         * XXX We don't know how many bytes we've read!
325
         *
326
         * We can't use getline() because there need not be any newlines
327
         * in the input.  The only entirely correct choices are: use
328
         * fgetc() or fread().  Using fread() will complicate buffer
329
         * management here.
330
         *
331
         * For now we check how much fgets() read by scanning backwards for the
332
         * terminating '\0'. This only works because we previously memset our
333
         * buffer with something nonzero.
334
         */
335
0
        for (i = sizeof(state->buf) - 1; i > 0; i--) {
336
0
          if (state->buf[i] == '\0')
337
0
            break;
338
0
        }
339
0
        state->buf_valid_len = i;
340
0
      } else if (p == NULL) {
341
0
        state->buf_valid_len = sizeof(state->buf) - 1;
342
0
      } else {
343
0
        state->buf_valid_len = (p - state->buf) + 1;
344
0
      }
345
0
    }
346
0
  }
347
0
  return state->curr_file == state->nfiles &&
348
0
      (!state->current_input || feof(state->current_input) || ferror(state->current_input));
349
0
}
350
351
0
jv jq_util_input_next_input_cb(jq_state *jq, void *data) {
352
0
  return jq_util_input_next_input((jq_util_input_state *)data);
353
0
}
354
355
// Return the current_filename:current_line
356
0
jv jq_util_input_get_position(jq_state *jq) {
357
0
  jq_input_cb cb = NULL;
358
0
  void *cb_data = NULL;
359
0
  jq_get_input_cb(jq, &cb, &cb_data);
360
0
  assert(cb == jq_util_input_next_input_cb);
361
0
  if (cb != jq_util_input_next_input_cb)
362
0
    return jv_invalid_with_msg(jv_string("Invalid jq_util_input API usage"));
363
0
  jq_util_input_state *s = (jq_util_input_state *)cb_data;
364
365
  // We can't assert that current_filename is a string because if
366
  // the error was a JSON parser error then we may not have set
367
  // current_filename yet.
368
0
  if (jv_get_kind(s->current_filename) != JV_KIND_STRING)
369
0
    return jv_string("<unknown>");
370
371
0
  jv v = jv_string_fmt("%s:%lu", jv_string_value(s->current_filename), (unsigned long)s->current_line);
372
0
  return v;
373
0
}
374
375
0
jv jq_util_input_get_current_filename(jq_state* jq) {
376
0
  jq_input_cb cb=NULL;
377
0
  void *cb_data=NULL;
378
0
  jq_get_input_cb(jq, &cb, &cb_data);
379
0
  if (cb != jq_util_input_next_input_cb)
380
0
    return jv_invalid_with_msg(jv_string("Unknown input filename"));
381
0
  jq_util_input_state *s = (jq_util_input_state *)cb_data;
382
0
  jv v = jv_copy(s->current_filename);
383
0
  return v;
384
0
}
385
386
0
jv jq_util_input_get_current_line(jq_state* jq) {
387
0
  jq_input_cb cb=NULL;
388
0
  void *cb_data=NULL;
389
0
  jq_get_input_cb(jq, &cb, &cb_data);
390
0
  if (cb != jq_util_input_next_input_cb)
391
0
    return jv_invalid_with_msg(jv_string("Unknown input line number"));
392
0
  jq_util_input_state *s = (jq_util_input_state *)cb_data;
393
0
  jv v = jv_number(s->current_line);
394
0
  return v;
395
0
}
396
397
398
// Blocks to read one more input from stdin and/or given files
399
// When slurping, it returns just one value
400
0
jv jq_util_input_next_input(jq_util_input_state *state) {
401
0
  int is_last = 0;
402
0
  int has_more = 0;
403
0
  jv value = jv_invalid(); // need more input
404
0
  do {
405
0
    if (state->parser == NULL) {
406
      // Raw input
407
0
      is_last = jq_util_input_read_more(state);
408
0
      if (state->buf_valid_len == 0)
409
0
        continue;
410
0
      if (jv_is_valid(state->slurped)) {
411
        // Slurped raw input
412
0
        state->slurped = jv_string_concat(state->slurped, jv_string_sized(state->buf, state->buf_valid_len));
413
0
      } else {
414
0
        if (!jv_is_valid(value))
415
0
          value = jv_string("");
416
0
        if (state->buf[state->buf_valid_len-1] == '\n') {
417
          // whole line
418
0
          state->buf[state->buf_valid_len-1] = 0;
419
0
          return jv_string_concat(value, jv_string_sized(state->buf, state->buf_valid_len-1));
420
0
        }
421
0
        value = jv_string_concat(value, jv_string_sized(state->buf, state->buf_valid_len));
422
0
        state->buf[0] = '\0';
423
0
        state->buf_valid_len = 0;
424
0
      }
425
0
    } else {
426
0
      if (jv_parser_remaining(state->parser) == 0) {
427
0
        is_last = jq_util_input_read_more(state);
428
0
        jv_parser_set_buf(state->parser, state->buf, state->buf_valid_len, !is_last);
429
0
      }
430
0
      value = jv_parser_next(state->parser);
431
0
      if (jv_is_valid(state->slurped)) {
432
        // When slurping an input that doesn't have a trailing newline,
433
        // we might have more than one value on the same line, so let's check
434
        // to see if we have more data to parse.
435
0
        has_more = jv_parser_remaining(state->parser);
436
0
        if (jv_is_valid(value)) {
437
0
          state->slurped = jv_array_append(state->slurped, value);
438
0
          value = jv_invalid();
439
0
        } else if (jv_invalid_has_msg(jv_copy(value)))
440
0
          return value; // Not slurped parsed input
441
0
      } else if (jv_is_valid(value) || jv_invalid_has_msg(jv_copy(value))) {
442
0
        return value;
443
0
      }
444
0
    }
445
0
  } while (!is_last || has_more);
446
447
0
  if (jv_is_valid(state->slurped)) {
448
0
    value = state->slurped;
449
0
    state->slurped = jv_invalid();
450
0
  }
451
0
  return value;
452
0
}
453
454
#ifndef HAVE_STRPTIME
455
/* http://cvsweb.netbsd.org/bsdweb.cgi/~checkout~/src/lib/libc/time/strptime.c?only_with_tag=HEAD
456
 * NetBSD implementation strptime().
457
 * Format description: https://netbsd.gw.com/cgi-bin/man-cgi?strptime+3+NetBSD-current
458
 * Adapted by https://github.com/res2001 (https://github.com/res2001/strptime).
459
*/
460
461
#include <ctype.h>
462
#include <string.h>
463
#include <time.h>
464
#include <stdint.h>
465
466
static const unsigned char *conv_num(const unsigned char *, int *, unsigned int, unsigned int);
467
static const unsigned char *find_string(const unsigned char *, int *, const char * const *, const char * const *, int);
468
469
/*
470
 * We do not implement alternate representations. However, we always
471
 * check whether a given modifier is allowed for a certain conversion.
472
 */
473
#define ALT_E     0x01
474
#define ALT_O     0x02
475
#define LEGAL_ALT(x)  { if (alt_format & ~(x)) return NULL; }
476
477
#define TM_YEAR_BASE  1900
478
479
#define TM_SUNDAY       0
480
#define TM_MONDAY       1
481
#define TM_TUESDAY      2
482
#define TM_WEDNESDAY    3
483
#define TM_THURSDAY     4
484
#define TM_FRIDAY       5
485
#define TM_SATURDAY     6
486
487
#define S_YEAR      (1 << 0)
488
#define S_MON     (1 << 1)
489
#define S_YDAY      (1 << 2)
490
#define S_MDAY      (1 << 3)
491
#define S_WDAY      (1 << 4)
492
#define S_HOUR      (1 << 5)
493
494
#define HAVE_MDAY(s)  (s & S_MDAY)
495
#define HAVE_MON(s)   (s & S_MON)
496
#define HAVE_WDAY(s)  (s & S_WDAY)
497
#define HAVE_YDAY(s)  (s & S_YDAY)
498
#define HAVE_YEAR(s)  (s & S_YEAR)
499
#define HAVE_HOUR(s)  (s & S_HOUR)
500
501
#define SECSPERMIN      60
502
#define MINSPERHOUR     60
503
#define SECSPERHOUR     (SECSPERMIN * MINSPERHOUR)
504
#define HOURSPERDAY     24
505
506
#define HERE_D_T_FMT    "%a %b %e %H:%M:%S %Y"
507
#define HERE_D_FMT      "%y/%m/%d"
508
#define HERE_T_FMT_AMPM "%I:%M:%S %p"
509
#define HERE_T_FMT      "%H:%M:%S"
510
511
#define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0))
512
513
/*
514
** Since everything in isleap is modulo 400 (or a factor of 400), we know that
515
**  isleap(y) == isleap(y % 400)
516
** and so
517
**  isleap(a + b) == isleap((a + b) % 400)
518
** or
519
**  isleap(a + b) == isleap(a % 400 + b % 400)
520
** This is true even if % means modulo rather than Fortran remainder
521
** (which is allowed by C89 but not by C99 or later).
522
** We use this to avoid addition overflow problems.
523
*/
524
525
#define isleap_sum(a, b)  isleap((a) % 400 + (b) % 400)
526
527
#ifdef _MSC_VER
528
#define tzname              _tzname
529
#define strncasecmp         _strnicmp
530
#endif
531
532
#ifdef TM_ZONE
533
static char* utc = "UTC";
534
#endif
535
/* RFC-822/RFC-2822 */
536
static const char *const nast[] = {
537
       "EST",    "CST",    "MST",    "PST",    "\0\0\0"
538
};
539
static const char *const nadt[] = {
540
       "EDT",    "CDT",    "MDT",    "PDT",    "\0\0\0"
541
};
542
static const char *const weekday_name[] =
543
{
544
    "Sunday", "Monday", "Tuesday", "Wednesday",
545
    "Thursday", "Friday", "Saturday"
546
};
547
static const char *const ab_weekday_name[] =
548
{
549
    "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
550
};
551
static const char *const month_name[] =
552
{
553
    "January", "February", "March", "April", "May", "June",
554
    "July", "August", "September", "October", "November", "December"
555
};
556
static const char *const ab_month_name[] =
557
{
558
    "Jan", "Feb", "Mar", "Apr", "May", "Jun",
559
    "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
560
};
561
static const char *const am_pm[] = {"AM", "PM"};
562
563
564
/*
565
 * Table to determine the ordinal date for the start of a month.
566
 * Ref: http://en.wikipedia.org/wiki/ISO_week_date
567
 */
568
static const int start_of_month[2][13] = {
569
    /* non-leap year */
570
    { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
571
    /* leap year */
572
    { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
573
};
574
575
/*
576
 * Calculate the week day of the first day of a year. Valid for
577
 * the Gregorian calendar, which began Sept 14, 1752 in the UK
578
 * and its colonies. Ref:
579
 * http://en.wikipedia.org/wiki/Determination_of_the_day_of_the_week
580
 */
581
582
static int
583
first_wday_of(int yr)
584
{
585
    return ((2 * (3 - (yr / 100) % 4)) + (yr % 100) + ((yr % 100) /  4) +
586
        (isleap(yr) ? 6 : 0) + 1) % 7;
587
}
588
589
#define delim(p)  ((p) == '\0' || isspace((unsigned char)(p)))
590
591
static int
592
fromzone(const unsigned char **bp, struct tm *tm, int mandatory)
593
{
594
//    timezone_t tz;
595
    char buf[512], *p;
596
    const unsigned char *rp;
597
598
    for (p = buf, rp = *bp; !delim(*rp) && p < &buf[sizeof(buf) - 1]; rp++)
599
        *p++ = *rp;
600
    *p = '\0';
601
602
    if (mandatory)
603
        *bp = rp;
604
    if (!isalnum((unsigned char)*buf))
605
        return 0;
606
//    tz = tzalloc(buf);
607
//    if (tz == NULL)
608
//        return 0;
609
610
    *bp = rp;
611
    tm->tm_isdst = 0; /* XXX */
612
#ifdef TM_GMTOFF
613
    tm->TM_GMTOFF = tzgetgmtoff(tz, tm->tm_isdst);
614
#endif
615
#ifdef TM_ZONE
616
    // Can't use tzgetname() here because we are going to free()
617
    tm->TM_ZONE = NULL; /* XXX */
618
#endif
619
//    tzfree(tz);
620
    return 1;
621
}
622
623
char* strptime(const char *buf, const char *fmt, struct tm *tm)
624
{
625
    unsigned char c;
626
    const unsigned char *bp, *ep, *zname;
627
    int alt_format, i, split_year = 0, neg = 0, state = 0,
628
        day_offset = -1, week_offset = 0, offs, mandatory;
629
    const char *new_fmt;
630
631
    bp = (const unsigned char *)buf;
632
633
    while (bp != NULL && (c = *fmt++) != '\0') {
634
        /* Clear `alternate' modifier prior to new conversion. */
635
        alt_format = 0;
636
        i = 0;
637
638
        /* Eat up white-space. */
639
        if (isspace(c)) {
640
            while (isspace(*bp))
641
                bp++;
642
            continue;
643
        }
644
645
        if (c != '%')
646
            goto literal;
647
648
649
again:    switch (c = *fmt++) {
650
        case '%': /* "%%" is converted to "%". */
651
literal:
652
            if (c != *bp++)
653
                return NULL;
654
            LEGAL_ALT(0);
655
            continue;
656
657
        /*
658
         * "Alternative" modifiers. Just set the appropriate flag
659
         * and start over again.
660
         */
661
        case 'E': /* "%E?" alternative conversion modifier. */
662
            LEGAL_ALT(0);
663
            alt_format |= ALT_E;
664
            goto again;
665
666
        case 'O': /* "%O?" alternative conversion modifier. */
667
            LEGAL_ALT(0);
668
            alt_format |= ALT_O;
669
            goto again;
670
671
        /*
672
         * "Complex" conversion rules, implemented through recursion.
673
         */
674
        case 'c': /* Date and time, using the locale's format. */
675
//            new_fmt = _TIME_LOCALE(loc)->d_t_fmt;
676
            new_fmt = HERE_D_T_FMT;
677
            state |= S_WDAY | S_MON | S_MDAY | S_YEAR;
678
            goto recurse;
679
680
        case 'F': /* The date as "%Y-%m-%d". */
681
            new_fmt = "%Y-%m-%d";
682
            LEGAL_ALT(0);
683
            state |= S_MON | S_MDAY | S_YEAR;
684
            goto recurse;
685
686
        case 'R': /* The time as "%H:%M". */
687
            new_fmt = "%H:%M";
688
            LEGAL_ALT(0);
689
            goto recurse;
690
691
        case 'r': /* The time in 12-hour clock representation. */
692
//            new_fmt = _TIME_LOCALE(loc)->t_fmt_ampm;
693
            new_fmt = HERE_T_FMT_AMPM;
694
            LEGAL_ALT(0);
695
            goto recurse;
696
697
        case 'X': /* The time, using the locale's format. */
698
            /* fall through */
699
700
        case 'T': /* The time as "%H:%M:%S". */
701
            new_fmt = HERE_T_FMT;
702
            LEGAL_ALT(0);
703
704
recurse:
705
            bp = (const unsigned char *)strptime((const char *)bp,
706
                                new_fmt, tm);
707
            LEGAL_ALT(ALT_E);
708
            continue;
709
710
        case 'x': /* The date, using the locale's format. */
711
            /* fall through */
712
713
        case 'D': /* The date as "%y/%m/%d". */
714
        {
715
            new_fmt = HERE_D_FMT;
716
            LEGAL_ALT(0);
717
            state |= S_MON | S_MDAY | S_YEAR;
718
            const int year = split_year ? tm->tm_year : 0;
719
720
            bp = (const unsigned char *)strptime((const char *)bp,
721
                                new_fmt, tm);
722
            LEGAL_ALT(ALT_E);
723
            tm->tm_year += year;
724
            if (split_year && tm->tm_year % (2000 - TM_YEAR_BASE) <= 68)
725
                tm->tm_year -= 2000 - TM_YEAR_BASE;
726
            split_year = 1;
727
            continue;
728
        }
729
        /*
730
         * "Elementary" conversion rules.
731
         */
732
        case 'A': /* The day of week, using the locale's form. */
733
        case 'a':
734
            bp = find_string(bp, &tm->tm_wday, weekday_name, ab_weekday_name, 7);
735
            LEGAL_ALT(0);
736
            state |= S_WDAY;
737
            continue;
738
739
        case 'B': /* The month, using the locale's form. */
740
        case 'b':
741
        case 'h':
742
            bp = find_string(bp, &tm->tm_mon, month_name, ab_month_name, 12);
743
            LEGAL_ALT(0);
744
            state |= S_MON;
745
            continue;
746
747
        case 'C': /* The century number. */
748
            i = 20;
749
            bp = conv_num(bp, &i, 0, 99);
750
751
            i = i * 100 - TM_YEAR_BASE;
752
            if (split_year)
753
                i += tm->tm_year % 100;
754
            split_year = 1;
755
            tm->tm_year = i;
756
            LEGAL_ALT(ALT_E);
757
            state |= S_YEAR;
758
            continue;
759
760
        case 'd': /* The day of month. */
761
        case 'e':
762
            bp = conv_num(bp, &tm->tm_mday, 1, 31);
763
            LEGAL_ALT(ALT_O);
764
            state |= S_MDAY;
765
            continue;
766
767
        case 'k': /* The hour (24-hour clock representation). */
768
            LEGAL_ALT(0);
769
            /* FALLTHROUGH */
770
        case 'H':
771
            bp = conv_num(bp, &tm->tm_hour, 0, 23);
772
            LEGAL_ALT(ALT_O);
773
            state |= S_HOUR;
774
            continue;
775
776
        case 'l': /* The hour (12-hour clock representation). */
777
            LEGAL_ALT(0);
778
            /* FALLTHROUGH */
779
        case 'I':
780
            bp = conv_num(bp, &tm->tm_hour, 1, 12);
781
            if (tm->tm_hour == 12)
782
                tm->tm_hour = 0;
783
            LEGAL_ALT(ALT_O);
784
            state |= S_HOUR;
785
            continue;
786
787
        case 'j': /* The day of year. */
788
            i = 1;
789
            bp = conv_num(bp, &i, 1, 366);
790
            tm->tm_yday = i - 1;
791
            LEGAL_ALT(0);
792
            state |= S_YDAY;
793
            continue;
794
795
        case 'M': /* The minute. */
796
            bp = conv_num(bp, &tm->tm_min, 0, 59);
797
            LEGAL_ALT(ALT_O);
798
            continue;
799
800
        case 'm': /* The month. */
801
            i = 1;
802
            bp = conv_num(bp, &i, 1, 12);
803
            tm->tm_mon = i - 1;
804
            LEGAL_ALT(ALT_O);
805
            state |= S_MON;
806
            continue;
807
808
        case 'p': /* The locale's equivalent of AM/PM. */
809
            bp = find_string(bp, &i, am_pm, NULL, 2);
810
            if (HAVE_HOUR(state) && tm->tm_hour > 11)
811
                return NULL;
812
            tm->tm_hour += i * 12;
813
            LEGAL_ALT(0);
814
            continue;
815
816
        case 'S': /* The seconds. */
817
            bp = conv_num(bp, &tm->tm_sec, 0, 61);
818
            LEGAL_ALT(ALT_O);
819
            continue;
820
821
        case 's': {     /* seconds since the epoch */
822
#ifdef _WIN32
823
            const time_t TIME_MAX = INT32_MAX;
824
#else
825
            const time_t TIME_MAX = INT64_MAX;
826
#endif
827
            time_t sse, d;
828
829
            if (*bp < '0' || *bp > '9') {
830
                bp = NULL;
831
                continue;
832
            }
833
834
            sse = *bp++ - '0';
835
            while (*bp >= '0' && *bp <= '9') {
836
                d = *bp++ - '0';
837
                if (sse > TIME_MAX/10) {
838
                    bp = NULL;
839
                    break;
840
                }
841
                sse *= 10;
842
                if (sse > TIME_MAX - d) {
843
                    bp = NULL;
844
                    break;
845
                }
846
                sse += d;
847
            }
848
            if (bp == NULL)
849
                continue;
850
851
#ifdef _WIN32
852
            if (localtime_s(tm, &sse))
853
#else
854
            if (localtime_r(&sse, tm) == NULL)
855
#endif
856
                bp = NULL;
857
            else
858
                state |= S_YDAY | S_WDAY | S_MON | S_MDAY | S_YEAR;
859
            continue;
860
            }
861
862
        case 'U': /* The week of year, beginning on sunday. */
863
        case 'W': /* The week of year, beginning on monday. */
864
            /*
865
             * This is bogus, as we can not assume any valid
866
             * information present in the tm structure at this
867
             * point to calculate a real value, so save the
868
             * week for now in case it can be used later.
869
             */
870
            bp = conv_num(bp, &i, 0, 53);
871
            LEGAL_ALT(ALT_O);
872
            if (c == 'U')
873
                day_offset = TM_SUNDAY;
874
            else
875
                day_offset = TM_MONDAY;
876
            week_offset = i;
877
            continue;
878
879
        case 'w': /* The day of week, beginning on sunday. */
880
            bp = conv_num(bp, &tm->tm_wday, 0, 6);
881
            LEGAL_ALT(ALT_O);
882
            state |= S_WDAY;
883
            continue;
884
885
        case 'u': /* The day of week, monday = 1. */
886
            bp = conv_num(bp, &i, 1, 7);
887
            tm->tm_wday = i % 7;
888
            LEGAL_ALT(ALT_O);
889
            state |= S_WDAY;
890
            continue;
891
892
        case 'g': /* The year corresponding to the ISO week
893
                 * number but without the century.
894
                 */
895
            bp = conv_num(bp, &i, 0, 99);
896
            continue;
897
898
        case 'G': /* The year corresponding to the ISO week
899
                 * number with century.
900
                 */
901
            do
902
                bp++;
903
            while (isdigit(*bp));
904
            continue;
905
906
        case 'V': /* The ISO 8601:1988 week number as decimal */
907
            bp = conv_num(bp, &i, 0, 53);
908
            continue;
909
910
        case 'Y': /* The year. */
911
            i = TM_YEAR_BASE; /* just for data sanity... */
912
            bp = conv_num(bp, &i, 0, 9999);
913
            tm->tm_year = i - TM_YEAR_BASE;
914
            LEGAL_ALT(ALT_E);
915
            state |= S_YEAR;
916
            continue;
917
918
        case 'y': /* The year within 100 years of the epoch. */
919
            /* LEGAL_ALT(ALT_E | ALT_O); */
920
            bp = conv_num(bp, &i, 0, 99);
921
922
            if (split_year)
923
                /* preserve century */
924
                i += (tm->tm_year / 100) * 100;
925
            else {
926
                split_year = 1;
927
                if (i <= 68)
928
                    i = i + 2000 - TM_YEAR_BASE;
929
            }
930
            tm->tm_year = i;
931
            state |= S_YEAR;
932
            continue;
933
934
        case 'Z':       // time zone name
935
        case 'z':       //
936
#ifdef _WIN32
937
            _tzset();
938
#else
939
            tzset();
940
#endif
941
            mandatory = c == 'z';
942
            /*
943
             * We recognize all ISO 8601 formats:
944
             * Z  = Zulu time/UTC
945
             * [+-]hhmm
946
             * [+-]hh:mm
947
             * [+-]hh
948
             * We recognize all RFC-822/RFC-2822 formats:
949
             * UT|GMT
950
             *          North American : UTC offsets
951
             * E[DS]T = Eastern : -4 | -5
952
             * C[DS]T = Central : -5 | -6
953
             * M[DS]T = Mountain: -6 | -7
954
             * P[DS]T = Pacific : -7 | -8
955
             *          Nautical/Military
956
             * [A-IL-M] = -1 ... -9 (J not used)
957
             * [N-Y]  = +1 ... +12
958
             * Note: J maybe used to denote non-nautical
959
             *       local time
960
             */
961
            if (mandatory)
962
                while (isspace(*bp))
963
                    bp++;
964
965
            zname = bp;
966
            switch (*bp++) {
967
            case 'G':
968
                if (*bp++ != 'M')
969
                    goto namedzone;
970
                /*FALLTHROUGH*/
971
            case 'U':
972
                if (*bp++ != 'T')
973
                    goto namedzone;
974
                else if (!delim(*bp) && *bp++ != 'C')
975
                    goto namedzone;
976
                /*FALLTHROUGH*/
977
            case 'Z':
978
                if (!delim(*bp))
979
                    goto namedzone;
980
                tm->tm_isdst = 0;
981
#ifdef TM_GMTOFF
982
                tm->TM_GMTOFF = 0;
983
#endif
984
#ifdef TM_ZONE
985
                tm->TM_ZONE = utc;
986
#endif
987
                continue;
988
            case '+':
989
                neg = 0;
990
                break;
991
            case '-':
992
                neg = 1;
993
                break;
994
            default:
995
namedzone:
996
                bp = zname;
997
998
                /* Nautical / Military style */
999
                if (delim(bp[1]) &&
1000
                    ((*bp >= 'A' && *bp <= 'I') ||
1001
                     (*bp >= 'L' && *bp <= 'Y'))) {
1002
#ifdef TM_GMTOFF
1003
                    /* Argh! No 'J'! */
1004
                    if (*bp >= 'A' && *bp <= 'I')
1005
                        tm->TM_GMTOFF =
1006
                            (int)*bp - ('A' - 1);
1007
                    else if (*bp >= 'L' && *bp <= 'M')
1008
                        tm->TM_GMTOFF = (int)*bp - 'A';
1009
                    else if (*bp >= 'N' && *bp <= 'Y')
1010
                        tm->TM_GMTOFF = 'M' - (int)*bp;
1011
                    tm->TM_GMTOFF *= SECSPERHOUR;
1012
#endif
1013
#ifdef TM_ZONE
1014
                    tm->TM_ZONE = NULL; /* XXX */
1015
#endif
1016
                    bp++;
1017
                    continue;
1018
                }
1019
                /* 'J' is local time */
1020
                if (delim(bp[1]) && *bp == 'J') {
1021
#ifdef TM_GMTOFF
1022
                    tm->TM_GMTOFF = -timezone;
1023
#endif
1024
#ifdef TM_ZONE
1025
                    tm->TM_ZONE = NULL; /* XXX */
1026
#endif
1027
                    bp++;
1028
                    continue;
1029
                }
1030
1031
                /*
1032
                 * From our 3 letter hard-coded table
1033
                 * XXX: Can be removed, handled by tzload()
1034
                 */
1035
                if (delim(bp[0]) || delim(bp[1]) ||
1036
                    delim(bp[2]) || !delim(bp[3]))
1037
                    goto loadzone;
1038
                ep = find_string(bp, &i, nast, NULL, 4);
1039
                if (ep != NULL) {
1040
#ifdef TM_GMTOFF
1041
                    tm->TM_GMTOFF = (-5 - i) * SECSPERHOUR;
1042
#endif
1043
#ifdef TM_ZONE
1044
                    tm->TM_ZONE = __UNCONST(nast[i]);
1045
#endif
1046
                    bp = ep;
1047
                    continue;
1048
                }
1049
                ep = find_string(bp, &i, nadt, NULL, 4);
1050
                if (ep != NULL) {
1051
                    tm->tm_isdst = 1;
1052
#ifdef TM_GMTOFF
1053
                    tm->TM_GMTOFF = (-4 - i) * SECSPERHOUR;
1054
#endif
1055
#ifdef TM_ZONE
1056
                    tm->TM_ZONE = __UNCONST(nadt[i]);
1057
#endif
1058
                    bp = ep;
1059
                    continue;
1060
                }
1061
                /*
1062
                 * Our current timezone
1063
                 */
1064
                ep = find_string(bp, &i,
1065
                             (const char * const *)tzname,
1066
                              NULL, 2);
1067
                if (ep != NULL) {
1068
                    tm->tm_isdst = i;
1069
#ifdef TM_GMTOFF
1070
                    tm->TM_GMTOFF = -timezone;
1071
#endif
1072
#ifdef TM_ZONE
1073
                    tm->TM_ZONE = tzname[i];
1074
#endif
1075
                    bp = ep;
1076
                    continue;
1077
                }
1078
loadzone:
1079
                /*
1080
                 * The hard way, load the zone!
1081
                 */
1082
                if (fromzone(&bp, tm, mandatory))
1083
                    continue;
1084
                goto out;
1085
            }
1086
            offs = 0;
1087
            for (i = 0; i < 4; ) {
1088
                if (isdigit(*bp)) {
1089
                    offs = offs * 10 + (*bp++ - '0');
1090
                    i++;
1091
                    continue;
1092
                }
1093
                if (i == 2 && *bp == ':') {
1094
                    bp++;
1095
                    continue;
1096
                }
1097
                break;
1098
            }
1099
            if (isdigit(*bp))
1100
                goto out;
1101
            switch (i) {
1102
            case 2:
1103
                offs *= SECSPERHOUR;
1104
                break;
1105
            case 4:
1106
                i = offs % 100;
1107
                offs /= 100;
1108
                if (i >= SECSPERMIN)
1109
                    goto out;
1110
                /* Convert minutes into decimal */
1111
                offs = offs * SECSPERHOUR + i * SECSPERMIN;
1112
                break;
1113
            default:
1114
out:
1115
                if (mandatory)
1116
                    return NULL;
1117
                bp = zname;
1118
                continue;
1119
            }
1120
            /* ISO 8601 & RFC 3339 limit to 23:59 max */
1121
            if (offs >= (HOURSPERDAY * SECSPERHOUR))
1122
                goto out;
1123
            if (neg)
1124
                offs = -offs;
1125
            tm->tm_isdst = 0; /* XXX */
1126
#ifdef TM_GMTOFF
1127
            tm->TM_GMTOFF = offs;
1128
#endif
1129
#ifdef TM_ZONE
1130
            tm->TM_ZONE = NULL; /* XXX */
1131
#endif
1132
            continue;
1133
1134
        /*
1135
         * Miscellaneous conversions.
1136
         */
1137
        case 'n': /* Any kind of white-space. */
1138
        case 't':
1139
            while (isspace(*bp))
1140
                bp++;
1141
            LEGAL_ALT(0);
1142
            continue;
1143
1144
1145
        default:  /* Unknown/unsupported conversion. */
1146
            return NULL;
1147
        }
1148
    }
1149
1150
    if (!HAVE_YDAY(state) && HAVE_YEAR(state)) {
1151
        if (HAVE_MON(state) && HAVE_MDAY(state)) {
1152
            /* calculate day of year (ordinal date) */
1153
            tm->tm_yday =  start_of_month[isleap_sum(tm->tm_year,
1154
                TM_YEAR_BASE)][tm->tm_mon] + (tm->tm_mday - 1);
1155
            state |= S_YDAY;
1156
        } else if (day_offset != -1) {
1157
            /*
1158
             * Set the date to the first Sunday (or Monday)
1159
             * of the specified week of the year.
1160
             */
1161
            if (!HAVE_WDAY(state)) {
1162
                tm->tm_wday = day_offset;
1163
                state |= S_WDAY;
1164
            }
1165
            tm->tm_yday = (7 -
1166
                first_wday_of(tm->tm_year + TM_YEAR_BASE) +
1167
                day_offset) % 7 + (week_offset - 1) * 7 +
1168
                tm->tm_wday  - day_offset;
1169
            state |= S_YDAY;
1170
        }
1171
    }
1172
1173
    if (HAVE_YDAY(state) && HAVE_YEAR(state)) {
1174
        int isleap;
1175
1176
        if (!HAVE_MON(state)) {
1177
            /* calculate month of day of year */
1178
            i = 0;
1179
            isleap = isleap_sum(tm->tm_year, TM_YEAR_BASE);
1180
            while (tm->tm_yday >= start_of_month[isleap][i])
1181
                i++;
1182
            if (i > 12) {
1183
                i = 1;
1184
                tm->tm_yday -= start_of_month[isleap][12];
1185
                tm->tm_year++;
1186
            }
1187
            tm->tm_mon = i - 1;
1188
            state |= S_MON;
1189
        }
1190
1191
        if (!HAVE_MDAY(state)) {
1192
            /* calculate day of month */
1193
            isleap = isleap_sum(tm->tm_year, TM_YEAR_BASE);
1194
            tm->tm_mday = tm->tm_yday -
1195
                start_of_month[isleap][tm->tm_mon] + 1;
1196
            state |= S_MDAY;
1197
        }
1198
1199
        if (!HAVE_WDAY(state)) {
1200
            /* calculate day of week */
1201
            i = 0;
1202
            week_offset = first_wday_of(tm->tm_year);
1203
            while (i++ <= tm->tm_yday) {
1204
                if (week_offset++ >= 6)
1205
                    week_offset = 0;
1206
            }
1207
            tm->tm_wday = week_offset;
1208
            state |= S_WDAY;
1209
        }
1210
    }
1211
1212
    return (char*)bp;
1213
}
1214
1215
1216
static const unsigned char *
1217
conv_num(const unsigned char *buf, int *dest, unsigned int llim, unsigned int ulim)
1218
{
1219
    unsigned int result = 0;
1220
    unsigned char ch;
1221
1222
    /* The limit also determines the number of valid digits. */
1223
    unsigned int rulim = ulim;
1224
1225
    ch = *buf;
1226
    if (ch < '0' || ch > '9')
1227
        return NULL;
1228
1229
    do {
1230
        result *= 10;
1231
        result += ch - '0';
1232
        rulim /= 10;
1233
        ch = *++buf;
1234
    } while ((result <= ulim) && rulim && ch >= '0' && ch <= '9');
1235
1236
    if (result < llim || result > ulim)
1237
        return NULL;
1238
1239
    *dest = result;
1240
    return buf;
1241
}
1242
1243
static const unsigned char *
1244
find_string(const unsigned char *bp, int *tgt, const char * const *n1,
1245
        const char * const *n2, int c)
1246
{
1247
    int i;
1248
    size_t len;
1249
1250
    /* check full name - then abbreviated ones */
1251
    for (; n1 != NULL; n1 = n2, n2 = NULL) {
1252
        for (i = 0; i < c; i++, n1++) {
1253
            len = strlen(*n1);
1254
            if (strncasecmp(*n1, (const char *)bp, len) == 0) {
1255
                *tgt = i;
1256
                return bp + len;
1257
            }
1258
        }
1259
    }
1260
1261
    /* Nothing matched */
1262
    return NULL;
1263
}
1264
#endif