Coverage Report

Created: 2025-06-13 06:43

/src/php-src/ext/date/lib/parse_posix.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * The MIT License (MIT)
3
 *
4
 * Copyright (c) 2021 MongoDB, Inc.
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24
25
#include "timelib.h"
26
#include "timelib_private.h"
27
28
// This section adds the missing 'strndup' implementation on Windows.
29
#if TIMELIB_USE_BUILTIN_STRNDUP == 1
30
# include <stdlib.h>
31
# include <string.h>
32
33
/**
34
 * char* timelib_strndup(const char* s, size_t n)
35
 *
36
 * Returns a pointer to a copy of 's' with at most 'n' characters
37
 * in memory obtained from 'malloc', or 'NULL' if insufficient
38
 * memory was available.  The result is always 'NULL' terminated.
39
 */
40
static char* timelib_strndup(const char* s, size_t n)
41
{
42
  char* result;
43
  size_t len = strlen(s);
44
45
  if (n < len) {
46
    len = n;
47
  }
48
49
  result = (char*)malloc(len + 1);
50
  if (!result) {
51
    return 0;
52
  }
53
54
  result[len] = '\0';
55
  return (char*)memcpy(result, s, len);
56
}
57
#endif
58
59
/* Forwards declrations */
60
static timelib_posix_trans_info *timelib_posix_trans_info_ctor(void);
61
static void timelib_posix_trans_info_dtor(timelib_posix_trans_info* ts);
62
63
/* "<" [+-]? .+? ">" */
64
static char *read_description_numeric_abbr(char **ptr)
65
9
{
66
9
  const char *begin = *ptr + 1;
67
68
  // skip '<'
69
9
  (*ptr)++;
70
71
54
  while (**ptr != '\0' && **ptr != '>') {
72
45
    (*ptr)++;
73
45
  }
74
75
9
  if (**ptr == '\0') {
76
0
    return NULL;
77
0
  }
78
79
9
  if (**ptr == '>') {
80
9
    (*ptr)++;
81
9
  }
82
83
  // Abbreviation may not be empty
84
9
  if (*ptr - begin - 1 < 1) {
85
0
    return NULL;
86
0
  }
87
88
9
  return timelib_strndup(begin, *ptr - begin - 1);
89
9
}
90
91
/* [A-Z]+ */
92
static char *read_description_abbr(char **ptr)
93
8.32k
{
94
8.32k
  const char *begin = *ptr;
95
96
  // Find the end
97
33.5k
  while ((**ptr >= 'A' && **ptr <= 'Z') || (**ptr >= 'a' && **ptr <= 'z')) {
98
25.2k
    (*ptr)++;
99
25.2k
  }
100
101
  // Abbreviation may not be empty
102
8.32k
  if (*ptr - begin < 1) {
103
0
    return NULL;
104
0
  }
105
106
8.32k
  return timelib_strndup(begin, *ptr - begin);
107
8.32k
}
108
109
/* "<" [+-]? .+? ">" | [A-Z]+ */
110
static char *read_description(char **ptr)
111
8.33k
{
112
8.33k
  if (**ptr == '<') {
113
9
    return read_description_numeric_abbr(ptr);
114
8.32k
  } else {
115
8.32k
    return read_description_abbr(ptr);
116
8.32k
  }
117
8.33k
}
118
119
/* [+-]? */
120
static int read_sign(char **ptr)
121
8.33k
{
122
8.33k
  int bias = 1;
123
124
8.33k
  if (**ptr == '+') {
125
0
    (*ptr)++;
126
8.33k
  } else if (**ptr == '-') {
127
238
    bias = -1;
128
238
    (*ptr)++;
129
238
  }
130
131
8.33k
  return bias;
132
8.33k
}
133
134
/* [0-9]+ */
135
static timelib_sll read_number(char **ptr)
136
15.2k
{
137
15.2k
  const char *begin = *ptr;
138
15.2k
  int acc = 0;
139
140
  // skip leading 0's
141
24.5k
  while (**ptr == '0') {
142
9.25k
    (*ptr)++;
143
9.25k
  }
144
145
22.4k
  while (**ptr >= '0' && **ptr <= '9') {
146
7.20k
    acc = acc * 10;
147
7.20k
    acc += (**ptr) - '0';
148
7.20k
    (*ptr)++;
149
7.20k
  }
150
151
15.2k
  if (begin == *ptr) {
152
0
    return TIMELIB_UNSET;
153
0
  }
154
155
15.2k
  return acc;
156
15.2k
}
157
158
/* [+-]? [0-9]+ ( ":" [0-9]+ ( ":" [0-9]+ )? )? */
159
static timelib_sll read_offset(char **ptr)
160
8.33k
{
161
8.33k
  const char *begin;
162
8.33k
  int bias = read_sign(ptr);
163
8.33k
  int hours = 0;
164
8.33k
  int minutes = 0;
165
8.33k
  int seconds = 0;
166
167
8.33k
  begin = *ptr;
168
169
  // read through to : or non-digit for hours
170
8.33k
  hours = read_number(ptr);
171
8.33k
  if (hours == TIMELIB_UNSET) {
172
0
    return hours;
173
0
  }
174
175
  // check for optional minutes
176
8.33k
  if (**ptr == ':') {
177
9
    (*ptr)++; // skip ':'
178
9
    minutes = read_number(ptr);
179
9
    if (minutes == TIMELIB_UNSET) {
180
0
      return minutes;
181
0
    }
182
9
  }
183
184
  // check for optional seconds
185
8.33k
  if (**ptr == ':') {
186
0
    (*ptr)++; // skip ':'
187
0
    seconds = read_number(ptr);
188
0
    if (seconds == TIMELIB_UNSET) {
189
0
      return seconds;
190
0
    }
191
0
  }
192
193
8.33k
  if (begin == *ptr) {
194
0
    return TIMELIB_UNSET;
195
0
  }
196
197
  // multiplication with -1, because the offset in the identifier is the
198
  // 'wrong' way around as for example EST5 is UTC-5 (and not +5)
199
8.33k
  return -1 * bias * (hours * 3600 + minutes * 60 + seconds);
200
8.33k
}
201
202
203
// Mw.m.d
204
static timelib_posix_trans_info* read_trans_spec_mwd(char **ptr)
205
2.31k
{
206
2.31k
  timelib_posix_trans_info *tmp = timelib_posix_trans_info_ctor();
207
208
2.31k
  tmp->type = TIMELIB_POSIX_TRANS_TYPE_MWD;
209
210
  // Skip 'M'
211
2.31k
  (*ptr)++;
212
213
2.31k
  tmp->mwd.month = read_number(ptr);
214
2.31k
  if (tmp->mwd.month == TIMELIB_UNSET) {
215
0
    goto fail;
216
0
  }
217
218
  // check for '.' and skip it
219
2.31k
  if (**ptr != '.') {
220
0
    goto fail;
221
0
  }
222
2.31k
  (*ptr)++;
223
224
2.31k
  tmp->mwd.week = read_number(ptr);
225
2.31k
  if (tmp->mwd.week == TIMELIB_UNSET) {
226
0
    goto fail;
227
0
  }
228
229
  // check for '.' and skip it
230
2.31k
  if (**ptr != '.') {
231
0
    goto fail;
232
0
  }
233
2.31k
  (*ptr)++;
234
235
2.31k
  tmp->mwd.dow = read_number(ptr);
236
2.31k
  if (tmp->mwd.dow == TIMELIB_UNSET) {
237
0
    goto fail;
238
0
  }
239
240
2.31k
  return tmp;
241
242
0
fail:
243
0
  timelib_posix_trans_info_dtor(tmp);
244
0
  return NULL;
245
2.31k
}
246
247
// (Jn | n | Mw.m.d) ( /time )?
248
static timelib_posix_trans_info* read_transition_spec(char **ptr)
249
2.31k
{
250
2.31k
  timelib_posix_trans_info *tmp;
251
252
2.31k
  if (**ptr == 'M') {
253
2.31k
    tmp = read_trans_spec_mwd(ptr);
254
2.31k
    if (!tmp) {
255
0
      return NULL;
256
0
    }
257
2.31k
  } else {
258
0
    tmp = timelib_posix_trans_info_ctor();
259
260
0
    if (**ptr == 'J') {
261
0
      tmp->type = TIMELIB_POSIX_TRANS_TYPE_JULIAN_NO_FEB29;
262
0
      (*ptr)++;
263
0
    }
264
265
0
    tmp->days = read_number(ptr);
266
0
    if (tmp->days == TIMELIB_UNSET) {
267
0
      goto fail;
268
0
    }
269
0
  }
270
271
  // Check for the optional hour
272
2.31k
  if (**ptr == '/') {
273
1.15k
    (*ptr)++;
274
1.15k
    tmp->hour = read_offset(ptr);
275
1.15k
    if (tmp->hour == TIMELIB_UNSET) {
276
0
      goto fail;
277
0
    }
278
    // as the bias for normal offsets = -1, we need to reverse it here
279
1.15k
    tmp->hour = -tmp->hour;
280
1.15k
  }
281
282
2.31k
  return tmp;
283
284
0
fail:
285
0
  timelib_posix_trans_info_dtor(tmp);
286
0
  return NULL;
287
2.31k
}
288
289
static timelib_posix_trans_info* timelib_posix_trans_info_ctor(void)
290
2.31k
{
291
2.31k
  timelib_posix_trans_info *tmp;
292
293
2.31k
  tmp = timelib_calloc(1, sizeof(timelib_posix_trans_info));
294
2.31k
  tmp->type = TIMELIB_POSIX_TRANS_TYPE_JULIAN_FEB29;
295
2.31k
  tmp->hour = 2 * 3600;
296
297
2.31k
  return tmp;
298
2.31k
}
299
300
static void timelib_posix_trans_info_dtor(timelib_posix_trans_info* ts)
301
2.31k
{
302
2.31k
  timelib_free(ts);
303
2.31k
}
304
305
void timelib_posix_str_dtor(timelib_posix_str *ps)
306
7.17k
{
307
7.17k
  if (ps->std) {
308
7.17k
    timelib_free(ps->std);
309
7.17k
  }
310
7.17k
  if (ps->dst) {
311
1.15k
    timelib_free(ps->dst);
312
1.15k
  }
313
7.17k
  if (ps->dst_begin) {
314
1.15k
    timelib_posix_trans_info_dtor(ps->dst_begin);
315
1.15k
  }
316
7.17k
  if (ps->dst_end) {
317
1.15k
    timelib_posix_trans_info_dtor(ps->dst_end);
318
1.15k
  }
319
320
7.17k
  timelib_free(ps);
321
7.17k
}
322
323
timelib_posix_str* timelib_parse_posix_str(const char *posix)
324
7.17k
{
325
7.17k
  timelib_posix_str *tmp = timelib_calloc(1, sizeof(timelib_posix_str));
326
7.17k
  char *ptr = (char*) posix;
327
328
  // read standard description (ie. EST or <-03>)
329
7.17k
  tmp->std = read_description(&ptr);
330
7.17k
  if (!tmp->std) {
331
0
    timelib_posix_str_dtor(tmp);
332
0
    return NULL;
333
0
  }
334
335
  // read required offset
336
7.17k
  tmp->std_offset = read_offset(&ptr);
337
7.17k
  if (tmp->std_offset == TIMELIB_UNSET) {
338
0
    timelib_posix_str_dtor(tmp);
339
0
    return NULL;
340
0
  }
341
342
  // if we're at the end return, otherwise we'll continue to try to parse
343
  // the dst abbreviation and spec
344
7.17k
  if (*ptr == '\0') {
345
6.01k
    return tmp;
346
6.01k
  }
347
348
  // assume dst is there, and initialise offset
349
1.15k
  tmp->dst_offset = tmp->std_offset + 3600;
350
351
1.15k
  tmp->dst = read_description(&ptr);
352
1.15k
  if (!tmp->dst) {
353
0
    timelib_posix_str_dtor(tmp);
354
0
    return NULL;
355
0
  }
356
357
  // if we have a "," here, then the dst offset is the standard offset +
358
  // 3600 seconds, otherwise, try to parse the dst offset
359
1.15k
  if (*ptr != ',' && *ptr != '\0') {
360
1
    tmp->dst_offset = read_offset(&ptr);
361
1
    if (tmp->dst_offset == TIMELIB_UNSET) {
362
0
      timelib_posix_str_dtor(tmp);
363
0
      return NULL;
364
0
    }
365
1
  }
366
367
  // if we *don't* have a "," here, we're missing the dst transitions
368
  // ,start[/time],end[/time]
369
1.15k
  if (*ptr != ',') {
370
0
    timelib_posix_str_dtor(tmp);
371
0
    return NULL;
372
0
  }
373
374
1.15k
  ptr++; // skip ','
375
376
  // start[/time]
377
1.15k
  tmp->dst_begin = read_transition_spec(&ptr);
378
1.15k
  if (!tmp->dst_begin) {
379
0
    timelib_posix_str_dtor(tmp);
380
0
    return NULL;
381
0
  }
382
383
  // if we *don't* have a "," here, we're missing the dst end transition
384
  // ,end[/time]
385
1.15k
  if (*ptr != ',') {
386
0
    timelib_posix_str_dtor(tmp);
387
0
    return NULL;
388
0
  }
389
390
1.15k
  ptr++; // skip ','
391
392
  // end[/time]
393
1.15k
  tmp->dst_end = read_transition_spec(&ptr);
394
1.15k
  if (!tmp->dst_end) {
395
0
    timelib_posix_str_dtor(tmp);
396
0
    return NULL;
397
0
  }
398
399
  // make sure there is no trailing data
400
1.15k
  if (*ptr != '\0') {
401
0
    timelib_posix_str_dtor(tmp);
402
0
    return NULL;
403
0
  }
404
405
1.15k
  return tmp;
406
1.15k
}
407
408
static const int month_lengths[2][MONTHS_PER_YEAR] = {
409
  { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }, // normal year
410
  { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }  // leap year
411
};
412
413
/* This function is adapted from the 'localtime.c' function 'transtime' as bundled with the 'tzcode' project
414
 * from IANA, and is public domain licensed. */
415
static timelib_sll calc_transition(timelib_posix_trans_info *psi, timelib_sll year)
416
0
{
417
0
  int leap_year = timelib_is_leap(year);
418
419
0
  switch (psi->type) {
420
0
    case TIMELIB_POSIX_TRANS_TYPE_JULIAN_NO_FEB29: {
421
0
      timelib_sll value = (psi->days - 1);
422
423
0
      if (leap_year && psi->days >= 60) {
424
0
        value++;
425
0
      }
426
427
0
      return value * SECS_PER_DAY;
428
0
    }
429
430
0
    case TIMELIB_POSIX_TRANS_TYPE_JULIAN_FEB29: {
431
0
      return psi->days * SECS_PER_DAY;
432
0
    }
433
434
0
    case TIMELIB_POSIX_TRANS_TYPE_MWD: {
435
      /*
436
       * Mm.n.d - nth "dth day" of month m.
437
       */
438
439
0
      int i, d, m1, yy0, yy1, yy2, dow;
440
0
      timelib_sll value = 0;
441
442
      /* Use Zeller's Congruence to get day-of-week of first day of
443
       * month. */
444
0
      m1 = (psi->mwd.month + 9) % 12 + 1;
445
0
      yy0 = (psi->mwd.month <= 2) ? (year - 1) : year;
446
0
      yy1 = yy0 / 100;
447
0
      yy2 = yy0 % 100;
448
0
      dow = ((26 * m1 - 2) / 10 + 1 + yy2 + yy2 / 4 + yy1 / 4 - 2 * yy1) % 7;
449
0
      if (dow < 0) {
450
0
        dow += DAYS_PER_WEEK;
451
0
      }
452
453
      /* "dow" is the day-of-week of the first day of the month. Get the
454
       * day-of-month (zero-origin) of the first "dow" day of the month. */
455
0
      d = psi->mwd.dow - dow;
456
0
      if (d < 0) {
457
0
        d += DAYS_PER_WEEK;
458
0
      }
459
0
      for (i = 1; i < psi->mwd.week; ++i) {
460
0
        if (d + DAYS_PER_WEEK >= month_lengths[leap_year][psi->mwd.month - 1]) {
461
0
          break;
462
0
        }
463
0
        d += DAYS_PER_WEEK;
464
0
      }
465
466
      /* "d" is the day-of-month (zero-origin) of the day we want. */
467
0
      value = d * SECS_PER_DAY;
468
0
      for (i = 0; i < psi->mwd.month - 1; ++i) {
469
0
        value += month_lengths[leap_year][i] * SECS_PER_DAY;
470
0
      }
471
472
0
      return value;
473
0
    } break;
474
0
  }
475
476
0
  return 0;
477
0
}
478
479
static timelib_sll count_leap_years(timelib_sll y)
480
0
{
481
  /* Because we want this for Jan 1, the leap day hasn't happend yet, so
482
   * subtract one of year before we calculate */
483
0
  y--;
484
485
0
  return (y/4) - (y/100) + (y/400);
486
0
}
487
488
timelib_sll timelib_ts_at_start_of_year(timelib_sll year)
489
0
{
490
0
  timelib_sll epoch_leap_years = count_leap_years(1970);
491
0
  timelib_sll current_leap_years = count_leap_years(year);
492
493
0
  return SECS_PER_DAY * (
494
0
    ((year-1970) * DAYS_PER_YEAR)
495
0
    + current_leap_years
496
0
    - epoch_leap_years
497
0
  );
498
0
}
499
500
void timelib_get_transitions_for_year(timelib_tzinfo *tz, timelib_sll year, timelib_posix_transitions *transitions)
501
0
{
502
0
  timelib_sll trans_begin; /* Since start of the year */
503
0
  timelib_sll trans_end;
504
0
  timelib_sll year_begin_ts = timelib_ts_at_start_of_year(year);
505
506
0
  trans_begin = year_begin_ts;
507
0
  trans_begin += calc_transition(tz->posix_info->dst_begin, year);
508
0
  trans_begin += tz->posix_info->dst_begin->hour;
509
0
  trans_begin -= tz->posix_info->std_offset;
510
511
0
  trans_end = year_begin_ts;
512
0
  trans_end += calc_transition(tz->posix_info->dst_end, year);
513
0
  trans_end += tz->posix_info->dst_end->hour;
514
0
  trans_end -= tz->posix_info->dst_offset;
515
516
0
  if (trans_begin < trans_end) {
517
0
    transitions->times[transitions->count  ] = trans_begin;
518
0
    transitions->times[transitions->count+1] = trans_end;
519
0
    transitions->types[transitions->count  ] = tz->posix_info->type_index_dst_type;
520
0
    transitions->types[transitions->count+1] = tz->posix_info->type_index_std_type;
521
0
  } else {
522
0
    transitions->times[transitions->count+1] = trans_begin;
523
0
    transitions->times[transitions->count  ] = trans_end;
524
0
    transitions->types[transitions->count+1] = tz->posix_info->type_index_dst_type;
525
0
    transitions->types[transitions->count  ] = tz->posix_info->type_index_std_type;
526
0
  }
527
528
0
  transitions->count += 2;
529
0
}
530
531
ttinfo* timelib_fetch_posix_timezone_offset(timelib_tzinfo *tz, timelib_sll ts, timelib_sll *transition_time)
532
6.46k
{
533
6.46k
  timelib_sll               year;
534
6.46k
  timelib_time              dummy;
535
6.46k
  timelib_posix_transitions transitions = { 0 };
536
6.46k
  size_t            i;
537
538
  /* If there is no second (dst_end) information, the UTC offset is valid for the whole year, so no need to
539
   * do clever logic */
540
6.46k
  if (!tz->posix_info->dst_end) {
541
6.46k
    if (transition_time) {
542
0
      *transition_time = tz->trans[tz->bit64.timecnt - 1];
543
0
    }
544
6.46k
    return &(tz->type[tz->posix_info->type_index_std_type]);
545
6.46k
  }
546
547
  /* Find 'year' (UTC) for 'ts' */
548
0
  timelib_unixtime2gmt(&dummy, ts);
549
0
  year = dummy.y;
550
551
  /* Calculate transition times for 'year-1', 'year', and 'year+1' */
552
0
  timelib_get_transitions_for_year(tz, year - 1, &transitions);
553
0
  timelib_get_transitions_for_year(tz, year,     &transitions);
554
0
  timelib_get_transitions_for_year(tz, year + 1, &transitions);
555
556
  /* Check where the 'ts' falls in the 4 transitions */
557
0
  for (i = 1; i < transitions.count; i++) {
558
0
    if (ts < transitions.times[i]) {
559
0
      if (transition_time) {
560
0
        *transition_time = transitions.times[i - 1];
561
0
      }
562
0
      return &(tz->type[transitions.types[i - 1]]);
563
0
    }
564
0
  }
565
566
0
  return NULL;
567
0
}