Coverage Report

Created: 2026-04-29 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gettext/gettext-tools/src/format-php.c
Line
Count
Source
1
/* PHP format strings.
2
   Copyright (C) 2001-2026 Free Software Foundation, Inc.
3
4
   This program is free software: you can redistribute it and/or modify
5
   it under the terms of the GNU General Public License as published by
6
   the Free Software Foundation; either version 3 of the License, or
7
   (at your option) any later version.
8
9
   This program is distributed in the hope that it will be useful,
10
   but WITHOUT ANY WARRANTY; without even the implied warranty of
11
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
   GNU General Public License for more details.
13
14
   You should have received a copy of the GNU General Public License
15
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
16
17
/* Written by Bruno Haible.  */
18
19
#include <config.h>
20
21
#include <stdbool.h>
22
#include <stdlib.h>
23
24
#include "format.h"
25
#include "c-ctype.h"
26
#include "xalloc.h"
27
#include "xvasprintf.h"
28
#include "format-invalid.h"
29
#include "gettext.h"
30
31
0
#define _(str) gettext (str)
32
33
/* PHP format strings are described in
34
   https://www.php.net/manual/en/function.sprintf.php, and are implemented in
35
   php-8.1.0/ext/standard/formatted_print.c.
36
   A directive
37
   - starts with '%' or '%m$' where m is a positive integer,
38
   - is optionally followed by any of the characters '0', '-', ' ', or
39
     "'<anychar>", each of which acts as a flag,
40
   - is optionally followed by a width specification: a nonempty digit
41
     sequence,
42
   - is optionally followed by '.' and a precision specification: an [optional?]
43
     nonempty digit sequence,
44
     (It's optional per <https://www.php.net/manual/en/function.sprintf.php>,
45
     but this is actually buggy: <https://github.com/php/php-src/issues/18897>.)
46
   - is optionally followed by a size specifier 'l', which is ignored,
47
   - is finished by a specifier
48
       - 's', that needs a string argument,
49
       - 'b', 'd', 'u', 'o', 'x', 'X', that need an integer argument,
50
       - 'e', 'E', 'f', 'F', 'g', 'G', 'h', 'H', that need a floating-point
51
         argument,
52
       - 'c', that needs a character argument.
53
   Additionally there is the directive '%%', which takes no argument.
54
   Numbered and unnumbered argument specifications can be used in the same
55
   string.  Numbered argument specifications have no influence on the
56
   "current argument index", that is incremented each time an argument is read.
57
 */
58
59
enum format_arg_type
60
{
61
  FAT_INTEGER,
62
  FAT_FLOAT,
63
  FAT_CHARACTER,
64
  FAT_STRING
65
};
66
67
struct numbered_arg
68
{
69
  size_t number;
70
  enum format_arg_type type;
71
};
72
73
struct spec
74
{
75
  size_t directives;
76
  /* We consider a directive as "likely intentional" if it does not contain a
77
     space.  This prevents xgettext from flagging strings like "100% complete"
78
     as 'php-format' if they don't occur in a context that requires a format
79
     string.  */
80
  size_t likely_intentional_directives;
81
  size_t numbered_arg_count;
82
  struct numbered_arg *numbered;
83
};
84
85
86
static int
87
numbered_arg_compare (const void *p1, const void *p2)
88
0
{
89
0
  size_t n1 = ((const struct numbered_arg *) p1)->number;
90
0
  size_t n2 = ((const struct numbered_arg *) p2)->number;
91
92
0
  return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
93
0
}
94
95
static void *
96
format_parse (const char *format, bool translated, char *fdi,
97
              char **invalid_reason)
98
0
{
99
0
  const char *const format_start = format;
100
101
0
  size_t directives = 0;
102
0
  size_t likely_intentional_directives = 0;
103
0
  size_t numbered_arg_count = 0;
104
0
  struct numbered_arg *numbered = NULL;
105
0
  size_t numbered_allocated = 0;
106
0
  size_t unnumbered_arg_count = 0;
107
108
0
  for (; *format != '\0';)
109
0
    if (*format++ == '%')
110
0
      {
111
        /* A directive.  */
112
0
        FDI_SET (format - 1, FMTDIR_START);
113
0
        directives++;
114
0
        bool likely_intentional = true;
115
116
0
        if (*format != '%')
117
0
          {
118
            /* A complex directive.  */
119
120
0
            size_t number = ++unnumbered_arg_count;
121
0
            if (c_isdigit (*format))
122
0
              {
123
0
                const char *f = format;
124
0
                size_t m = 0;
125
126
0
                do
127
0
                  {
128
0
                    m = 10 * m + (*f - '0');
129
0
                    f++;
130
0
                  }
131
0
                while (c_isdigit (*f));
132
133
0
                if (*f == '$')
134
0
                  {
135
0
                    if (m == 0)
136
0
                      {
137
0
                        *invalid_reason = INVALID_ARGNO_0 (directives);
138
0
                        FDI_SET (f, FMTDIR_ERROR);
139
0
                        goto bad_format;
140
0
                      }
141
0
                    number = m;
142
0
                    format = ++f;
143
0
                    --unnumbered_arg_count;
144
0
                  }
145
0
              }
146
147
            /* Parse flags.  */
148
0
            for (;;)
149
0
              {
150
0
                if (*format == '0' || *format == '-' || *format == ' ')
151
0
                  {
152
0
                    if (*format == ' ')
153
0
                      likely_intentional = false;
154
0
                    format++;
155
0
                  }
156
0
                else if (*format == '\'')
157
0
                  {
158
0
                    format++;
159
0
                    if (*format == '\0')
160
0
                      {
161
0
                        *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
162
0
                        FDI_SET (format - 1, FMTDIR_ERROR);
163
0
                        goto bad_format;
164
0
                      }
165
0
                    format++;
166
0
                  }
167
0
                else
168
0
                  break;
169
0
              }
170
171
            /* Parse width.  */
172
0
            if (c_isdigit (*format))
173
0
              {
174
0
                do
175
0
                  format++;
176
0
                while (c_isdigit (*format));
177
0
              }
178
179
            /* Parse precision.  */
180
0
            if (*format == '.')
181
0
              {
182
0
                format++;
183
184
0
                if (c_isdigit (*format))
185
0
                  {
186
0
                    do
187
0
                      format++;
188
0
                    while (c_isdigit (*format));
189
0
                  }
190
0
                else
191
0
                  --format;     /* will jump to bad_format */
192
0
              }
193
194
            /* Parse size.  */
195
0
            if (*format == 'l')
196
0
              format++;
197
198
0
            enum format_arg_type type;
199
0
            switch (*format)
200
0
              {
201
0
              case 'b': case 'd': case 'u': case 'o': case 'x': case 'X':
202
0
                type = FAT_INTEGER;
203
0
                break;
204
0
              case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
205
0
              case 'h': case 'H':
206
0
                type = FAT_FLOAT;
207
0
                break;
208
0
              case 'c':
209
0
                type = FAT_CHARACTER;
210
0
                break;
211
0
              case 's':
212
0
                type = FAT_STRING;
213
0
                break;
214
0
              default:
215
0
                if (*format == '\0')
216
0
                  {
217
0
                    *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
218
0
                    FDI_SET (format - 1, FMTDIR_ERROR);
219
0
                  }
220
0
                else
221
0
                  {
222
0
                    *invalid_reason =
223
0
                      INVALID_CONVERSION_SPECIFIER (directives, *format);
224
0
                    FDI_SET (format, FMTDIR_ERROR);
225
0
                  }
226
0
                goto bad_format;
227
0
              }
228
229
0
            if (numbered_allocated == numbered_arg_count)
230
0
              {
231
0
                numbered_allocated = 2 * numbered_allocated + 1;
232
0
                numbered = (struct numbered_arg *) xrealloc (numbered, numbered_allocated * sizeof (struct numbered_arg));
233
0
              }
234
0
            numbered[numbered_arg_count].number = number;
235
0
            numbered[numbered_arg_count].type = type;
236
0
            numbered_arg_count++;
237
0
          }
238
239
0
        if (likely_intentional)
240
0
          likely_intentional_directives++;
241
0
        FDI_SET (format, FMTDIR_END);
242
243
0
        format++;
244
0
      }
245
246
  /* Sort the numbered argument array, and eliminate duplicates.  */
247
0
  if (numbered_arg_count > 1)
248
0
    {
249
0
      qsort (numbered, numbered_arg_count,
250
0
             sizeof (struct numbered_arg), numbered_arg_compare);
251
252
      /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
253
0
      bool err = false;
254
0
      size_t i, j;
255
0
      for (i = j = 0; i < numbered_arg_count; i++)
256
0
        if (j > 0 && numbered[i].number == numbered[j-1].number)
257
0
          {
258
0
            enum format_arg_type type1 = numbered[i].type;
259
0
            enum format_arg_type type2 = numbered[j-1].type;
260
261
0
            enum format_arg_type type_both;
262
0
            if (type1 == type2)
263
0
              type_both = type1;
264
0
            else
265
0
              {
266
                /* Incompatible types.  */
267
0
                type_both = type1;
268
0
                if (!err)
269
0
                  *invalid_reason =
270
0
                    INVALID_INCOMPATIBLE_ARG_TYPES (numbered[i].number);
271
0
                err = true;
272
0
              }
273
274
0
            numbered[j-1].type = type_both;
275
0
          }
276
0
        else
277
0
          {
278
0
            if (j < i)
279
0
              {
280
0
                numbered[j].number = numbered[i].number;
281
0
                numbered[j].type = numbered[i].type;
282
0
              }
283
0
            j++;
284
0
          }
285
0
      numbered_arg_count = j;
286
0
      if (err)
287
        /* *invalid_reason has already been set above.  */
288
0
        goto bad_format;
289
0
    }
290
291
0
  struct spec *result = XMALLOC (struct spec);
292
0
  result->directives = directives;
293
0
  result->likely_intentional_directives = likely_intentional_directives;
294
0
  result->numbered_arg_count = numbered_arg_count;
295
0
  result->numbered = numbered;
296
0
  return result;
297
298
0
 bad_format:
299
0
  if (numbered != NULL)
300
0
    free (numbered);
301
0
  return NULL;
302
0
}
303
304
static void
305
format_free (void *descr)
306
0
{
307
0
  struct spec *spec = (struct spec *) descr;
308
309
0
  if (spec->numbered != NULL)
310
0
    free (spec->numbered);
311
0
  free (spec);
312
0
}
313
314
static int
315
format_get_number_of_directives (void *descr)
316
0
{
317
0
  struct spec *spec = (struct spec *) descr;
318
319
0
  return spec->directives;
320
0
}
321
322
static bool
323
format_is_unlikely_intentional (void *descr)
324
0
{
325
0
  struct spec *spec = (struct spec *) descr;
326
327
0
  return spec->likely_intentional_directives == 0;
328
0
}
329
330
static bool
331
format_check (void *msgid_descr, void *msgstr_descr, bool equality,
332
              formatstring_error_logger_t error_logger, void *error_logger_data,
333
              const char *pretty_msgid, const char *pretty_msgstr)
334
0
{
335
0
  struct spec *spec1 = (struct spec *) msgid_descr;
336
0
  struct spec *spec2 = (struct spec *) msgstr_descr;
337
0
  bool err = false;
338
339
0
  if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
340
0
    {
341
0
      size_t n1 = spec1->numbered_arg_count;
342
0
      size_t n2 = spec2->numbered_arg_count;
343
344
      /* Check that the argument numbers are the same.
345
         Both arrays are sorted.  We search for the first difference.  */
346
0
      {
347
0
        size_t i, j;
348
0
        for (i = 0, j = 0; i < n1 || j < n2; )
349
0
          {
350
0
            int cmp = (i >= n1 ? 1 :
351
0
                       j >= n2 ? -1 :
352
0
                       spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
353
0
                       spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
354
0
                       0);
355
356
0
            if (cmp > 0)
357
0
              {
358
0
                if (error_logger)
359
0
                  error_logger (error_logger_data,
360
0
                                _("a format specification for argument %zu, as in '%s', doesn't exist in '%s'"),
361
0
                                spec2->numbered[j].number, pretty_msgstr,
362
0
                                pretty_msgid);
363
0
                err = true;
364
0
                break;
365
0
              }
366
0
            else if (cmp < 0)
367
0
              {
368
0
                if (equality)
369
0
                  {
370
0
                    if (error_logger)
371
0
                      error_logger (error_logger_data,
372
0
                                    _("a format specification for argument %zu doesn't exist in '%s'"),
373
0
                                    spec1->numbered[i].number, pretty_msgstr);
374
0
                    err = true;
375
0
                    break;
376
0
                  }
377
0
                else
378
0
                  i++;
379
0
              }
380
0
            else
381
0
              j++, i++;
382
0
          }
383
0
      }
384
      /* Check the argument types are the same.  */
385
0
      if (!err)
386
0
        {
387
0
          size_t i, j;
388
0
          for (i = 0, j = 0; j < n2; )
389
0
            {
390
0
              if (spec1->numbered[i].number == spec2->numbered[j].number)
391
0
                {
392
0
                  if (spec1->numbered[i].type != spec2->numbered[j].type)
393
0
                    {
394
0
                      if (error_logger)
395
0
                        error_logger (error_logger_data,
396
0
                                      _("format specifications in '%s' and '%s' for argument %zu are not the same"),
397
0
                                      pretty_msgid, pretty_msgstr,
398
0
                                      spec2->numbered[j].number);
399
0
                      err = true;
400
0
                      break;
401
0
                    }
402
0
                  j++, i++;
403
0
                }
404
0
              else
405
0
                i++;
406
0
            }
407
0
        }
408
0
    }
409
410
0
  return err;
411
0
}
412
413
414
struct formatstring_parser formatstring_php =
415
{
416
  format_parse,
417
  format_free,
418
  format_get_number_of_directives,
419
  format_is_unlikely_intentional,
420
  format_check
421
};
422
423
424
#ifdef TEST
425
426
/* Test program: Print the argument list specification returned by
427
   format_parse for strings read from standard input.  */
428
429
#include <stdio.h>
430
431
static void
432
format_print (void *descr)
433
{
434
  struct spec *spec = (struct spec *) descr;
435
436
  if (spec == NULL)
437
    {
438
      printf ("INVALID");
439
      return;
440
    }
441
442
  printf ("(");
443
  size_t last = 1;
444
  for (size_t i = 0; i < spec->numbered_arg_count; i++)
445
    {
446
      size_t number = spec->numbered[i].number;
447
448
      if (i > 0)
449
        printf (" ");
450
      if (number < last)
451
        abort ();
452
      for (; last < number; last++)
453
        printf ("_ ");
454
      switch (spec->numbered[i].type)
455
        {
456
        case FAT_INTEGER:
457
          printf ("i");
458
          break;
459
        case FAT_FLOAT:
460
          printf ("f");
461
          break;
462
        case FAT_CHARACTER:
463
          printf ("c");
464
          break;
465
        case FAT_STRING:
466
          printf ("s");
467
          break;
468
        default:
469
          abort ();
470
        }
471
      last = number + 1;
472
    }
473
  printf (")");
474
}
475
476
int
477
main ()
478
{
479
  for (;;)
480
    {
481
      char *line = NULL;
482
      size_t line_size = 0;
483
      int line_len = getline (&line, &line_size, stdin);
484
      if (line_len < 0)
485
        break;
486
      if (line_len > 0 && line[line_len - 1] == '\n')
487
        line[--line_len] = '\0';
488
489
      char *invalid_reason = NULL;
490
      void *descr = format_parse (line, false, NULL, &invalid_reason);
491
492
      format_print (descr);
493
      printf ("\n");
494
      if (descr == NULL)
495
        printf ("%s\n", invalid_reason);
496
497
      free (invalid_reason);
498
      free (line);
499
    }
500
501
  return 0;
502
}
503
504
/*
505
 * For Emacs M-x compile
506
 * Local Variables:
507
 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DTEST format-php.c ../gnulib-lib/libgettextlib.la"
508
 * End:
509
 */
510
511
#endif /* TEST */