Coverage Report

Created: 2026-03-12 07:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gettext/gettext-tools/src/format-python-brace.c
Line
Count
Source
1
/* Python brace format strings.
2
   Copyright (C) 2004-2026 Free Software Foundation, Inc.
3
4
   This program is free software: you can redistribute it and/or modify
5
   it under the terms of the GNU General Public License as published by
6
   the Free Software Foundation; either version 3 of the License, or
7
   (at your option) any later version.
8
9
   This program is distributed in the hope that it will be useful,
10
   but WITHOUT ANY WARRANTY; without even the implied warranty of
11
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
   GNU General Public License for more details.
13
14
   You should have received a copy of the GNU General Public License
15
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
16
17
/* Written by Daiki Ueno and Bruno Haible.  */
18
19
#include <config.h>
20
21
#include <stdbool.h>
22
#include <stdlib.h>
23
#include <string.h>
24
25
#include "format.h"
26
#include "c-ctype.h"
27
#include "xalloc.h"
28
#include "xvasprintf.h"
29
#include "format-invalid.h"
30
#include "gettext.h"
31
32
0
#define _(str) gettext (str)
33
34
/* Python brace format strings are defined by PEP3101 together with the
35
   'format' method of the string class.
36
   Documentation:
37
     https://peps.python.org/pep-3101/
38
     https://docs.python.org/3/library/string.html#formatstrings
39
   Here we assume Python >= 3.1, which allows unnamed argument specifications.
40
   A format string directive here consists of
41
     - an opening brace '{',
42
     - optionally:
43
       - an identifier [_A-Za-z][_0-9A-Za-z]*|[0-9]+,
44
       - an optional sequence of
45
           - getattr ('.' identifier) or
46
           - getitem ('[' identifier ']')
47
         operators,
48
     - optionally, a ':' and a format specifier, where a format specifier is
49
       - either a format directive of the form '{' ... '}' without a format
50
         specifier, or
51
       - of the form [[fill]align][sign][#][0][minimumwidth][.precision][type]
52
         where
53
           - the fill character is any character,
54
           - the align flag is one of '<', '>', '=', '^',
55
           - the sign is one of '+', '-', ' ',
56
           - the # flag is '#',
57
           - the 0 flag is '0',
58
           - minimumwidth is a non-empty sequence of digits,
59
           - precision is a non-empty sequence of digits,
60
           - type is one of
61
             - 'b', 'c', 'd', 'o', 'x', 'X', 'n' for integers,
62
             - 'e', 'E', 'f', 'F', 'g', 'G', 'n', '%' for floating-point values,
63
     - a closing brace '}'.
64
   Numbered (identifier being a number) and unnamed argument specifications
65
   cannot be used in the same string.
66
   Brace characters '{' and '}' can be escaped by doubling them: '{{' and '}}'.
67
*/
68
69
struct named_arg
70
{
71
  char *name;
72
};
73
74
struct spec
75
{
76
  size_t directives;
77
  size_t named_arg_count;
78
  size_t allocated;
79
  struct named_arg *named;
80
};
81
82
83
/* Forward declaration of local functions.  */
84
static void free_named_args (struct spec *spec);
85
86
87
struct toplevel_counters
88
{
89
  /* Number of arguments seen whose identifier is a number.  */
90
  size_t numbered_arg_counter;
91
  /* Number of arguments with a missing identifier.  */
92
  size_t unnamed_arg_counter;
93
};
94
95
96
/* All the parse_* functions (except parse_upto) follow the same
97
   calling convention.  FORMATP shall point to the beginning of a token.
98
   If parsing succeeds, FORMATP will point to the next character after
99
   the token, and true is returned.  Otherwise, FORMATP will be
100
   unchanged and false is returned.  */
101
102
static bool
103
parse_named_field (struct spec *spec,
104
                   const char **formatp,
105
                   char *fdi, char **invalid_reason)
106
0
{
107
0
  const char *format = *formatp;
108
0
  char c;
109
110
0
  c = *format;
111
0
  if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_')
112
0
    {
113
0
      do
114
0
        c = *++format;
115
0
      while ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_'
116
0
             || (c >= '0' && c <= '9'));
117
0
      *formatp = format;
118
0
      return true;
119
0
    }
120
0
  return false;
121
0
}
122
123
static bool
124
parse_numeric_field (struct spec *spec,
125
                     const char **formatp,
126
                     char *fdi, char **invalid_reason)
127
0
{
128
0
  const char *format = *formatp;
129
0
  char c;
130
131
0
  c = *format;
132
0
  if (c >= '0' && c <= '9')
133
0
    {
134
0
      do
135
0
        c = *++format;
136
0
      while (c >= '0' && c <= '9');
137
0
      *formatp = format;
138
0
      return true;
139
0
    }
140
0
  return false;
141
0
}
142
143
/* Parses a directive.
144
   When this function is invoked, *formatp points to the start of the directive,
145
   i.e. to the '{' character.
146
   When this function returns true, *formatp points to the first character after
147
   the directive, i.e. in most cases to the character after the '}' character.
148
 */
149
static bool
150
parse_directive (struct spec *spec,
151
                 const char **formatp, struct toplevel_counters *toplevel,
152
                 char *fdi, char **invalid_reason)
153
0
{
154
0
  const char *format = *formatp;
155
0
  const char *const format_start = format;
156
157
0
  char c;
158
159
0
  c = *++format;
160
0
  if (c == '{')
161
0
    {
162
      /* An escaped '{'.  */
163
0
      *formatp = ++format;
164
0
      return true;
165
0
    }
166
167
0
  const char *name_start = format;
168
0
  if (parse_named_field (spec, &format, fdi, invalid_reason)
169
0
      || parse_numeric_field (spec, &format, fdi, invalid_reason))
170
0
    {
171
      /* Parse '.' (getattr) or '[..]' (getitem) operators followed by a
172
         name.  If must not recurse, but can be specified in a chain, such
173
         as "foo.bar.baz[0]".  */
174
0
      for (;;)
175
0
        {
176
0
          c = *format;
177
178
0
          if (c == '.')
179
0
            {
180
0
              format++;
181
0
              if (!parse_named_field (spec, &format, fdi, invalid_reason))
182
0
                {
183
0
                  if (*format == '\0')
184
0
                    {
185
0
                      *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
186
0
                      FDI_SET (format - 1, FMTDIR_ERROR);
187
0
                    }
188
0
                  else
189
0
                    {
190
0
                      *invalid_reason =
191
0
                        (c_isprint (*format)
192
0
                         ? xasprintf (_("In the directive number %zu, '%c' cannot start a getattr argument."),
193
0
                                      spec->directives, *format)
194
0
                         : xasprintf (_("In the directive number %zu, a getattr argument starts with a character that is not alphabetical or underscore."),
195
0
                                      spec->directives));
196
0
                      FDI_SET (format, FMTDIR_ERROR);
197
0
                    }
198
0
                  return false;
199
0
                }
200
0
            }
201
0
          else if (c == '[')
202
0
            {
203
0
              format++;
204
0
              if (!parse_named_field (spec, &format, fdi, invalid_reason)
205
0
                  && !parse_numeric_field (spec, &format, fdi, invalid_reason))
206
0
                {
207
0
                  if (*format == '\0')
208
0
                    {
209
0
                      *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
210
0
                      FDI_SET (format - 1, FMTDIR_ERROR);
211
0
                    }
212
0
                  else
213
0
                    {
214
0
                      *invalid_reason =
215
0
                        (c_isprint (*format)
216
0
                         ? xasprintf (_("In the directive number %zu, '%c' cannot start a getitem argument."),
217
0
                                      spec->directives, *format)
218
0
                         : xasprintf (_("In the directive number %zu, a getitem argument starts with a character that is not alphanumerical or underscore."),
219
0
                                      spec->directives));
220
0
                      FDI_SET (format, FMTDIR_ERROR);
221
0
                    }
222
0
                  return false;
223
0
                }
224
225
0
              if (*format != ']')
226
0
                {
227
0
                  *invalid_reason =
228
0
                    xasprintf (_("In the directive number %zu, there is an unterminated getitem argument."),
229
0
                               spec->directives);
230
0
                  FDI_SET (format - 1, FMTDIR_ERROR);
231
0
                  return false;
232
0
                }
233
0
              format++;
234
0
            }
235
0
          else
236
0
            break;
237
0
        }
238
0
    }
239
0
  else
240
0
    {
241
0
      if (*format == '\0')
242
0
        {
243
0
          *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
244
0
          FDI_SET (format - 1, FMTDIR_ERROR);
245
0
          return false;
246
0
        }
247
0
      if (!(toplevel != NULL && (*format == ':' || *format == '}')))
248
0
        {
249
0
          *invalid_reason =
250
0
            (c_isprint (*format)
251
0
             ? xasprintf (_("In the directive number %zu, '%c' cannot start a field name."),
252
0
                          spec->directives, *format)
253
0
             : xasprintf (_("In the directive number %zu, a field name starts with a character that is not alphanumerical or underscore."),
254
0
                          spec->directives));
255
0
          FDI_SET (format, FMTDIR_ERROR);
256
0
          return false;
257
0
        }
258
0
    }
259
0
  const char *name_end = format;
260
261
0
  if (*format == ':')
262
0
    {
263
0
      if (toplevel == NULL)
264
0
        {
265
0
          *invalid_reason =
266
0
            xasprintf (_("In the directive number %zu, no more nesting is allowed in a format specifier."),
267
0
                       spec->directives);
268
0
          FDI_SET (format, FMTDIR_ERROR);
269
0
          return false;
270
0
        }
271
272
0
      format++;
273
274
      /* Format specifiers.  Although a format specifier can be any
275
         string in theory, we can only recognize two types of format
276
         specifiers below, because otherwise we would need to evaluate
277
         Python expressions by ourselves:
278
279
           - A nested format directive expanding to an argument
280
           - The Standard Format Specifiers, as described in PEP3101,
281
             not including a nested format directive  */
282
0
      if (*format == '{')
283
0
        {
284
          /* Nested format directive.  */
285
0
          if (!parse_directive (spec, &format, NULL, fdi, invalid_reason))
286
0
            {
287
              /* FDI and INVALID_REASON will be set by a recursive call of
288
                 parse_directive.  */
289
0
              return false;
290
0
            }
291
0
        }
292
0
      else
293
0
        {
294
          /* Standard format specifiers is in the form:
295
             [[fill]align][sign][#][0][minimumwidth][.precision][type]  */
296
297
          /* Look ahead two characters to skip [[fill]align].  */
298
0
          int c1 = format[0];
299
0
          if (c1 == '\0')
300
0
            {
301
0
              *invalid_reason =
302
0
                xasprintf (_("The directive number %zu is unterminated."),
303
0
                           spec->directives);
304
0
              FDI_SET (format - 1, FMTDIR_ERROR);
305
0
              return false;
306
0
            }
307
308
0
          int c2 = format[1];
309
310
0
          if (c2 == '<' || c2 == '>' || c2 == '=' || c2 == '^')
311
0
            format += 2;
312
0
          else if (c1 == '<' || c1 == '>' || c1 == '=' || c1 == '^')
313
0
            format++;
314
315
0
          if (*format == '+' || *format == '-' || *format == ' ')
316
0
            format++;
317
0
          if (*format == '#')
318
0
            format++;
319
0
          if (*format == '0')
320
0
            format++;
321
322
          /* Parse the optional minimumwidth.  */
323
0
          while (c_isdigit (*format))
324
0
            format++;
325
326
          /* Parse the optional .precision.  */
327
0
          if (*format == '.')
328
0
            {
329
0
              format++;
330
0
              if (c_isdigit (*format))
331
0
                do
332
0
                  format++;
333
0
                while (c_isdigit (*format));
334
0
              else
335
0
                format--;
336
0
            }
337
338
0
          switch (*format)
339
0
            {
340
0
            case 'b': case 'c': case 'd': case 'o': case 'x': case 'X':
341
0
            case 'n':
342
0
            case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
343
0
            case '%':
344
0
              format++;
345
0
              break;
346
0
            default:
347
0
              break;
348
0
            }
349
0
        }
350
0
    }
351
352
0
  if (*format != '}')
353
0
    {
354
0
      *invalid_reason =
355
0
        xasprintf (_("The directive number %zu is unterminated."),
356
0
                   spec->directives);
357
0
      FDI_SET (format - 1, FMTDIR_ERROR);
358
0
      return false;
359
0
    }
360
361
0
  if (toplevel != NULL)
362
0
    {
363
0
      FDI_SET (name_start - 1, FMTDIR_START);
364
365
0
      size_t n = name_end - name_start;
366
0
      char *name;
367
0
      if (n == 0)
368
0
        {
369
0
          if (toplevel->numbered_arg_counter > 0)
370
0
            {
371
0
              *invalid_reason =
372
0
                xstrdup (_("The string refers to arguments both through absolute argument numbers and through unnamed argument specifications."));
373
0
              FDI_SET (format, FMTDIR_ERROR);
374
0
              return false;
375
0
            }
376
0
          name = xasprintf ("%zu", toplevel->unnamed_arg_counter);
377
0
          toplevel->unnamed_arg_counter++;
378
0
        }
379
0
      else
380
0
        {
381
0
          name = XNMALLOC (n + 1, char);
382
0
          memcpy (name, name_start, n);
383
0
          name[n] = '\0';
384
0
          if (name_start[0] >= '0' && name_start[0] <= '9')
385
0
            {
386
0
              if (toplevel->unnamed_arg_counter > 0)
387
0
                {
388
0
                  *invalid_reason =
389
0
                    xstrdup (_("The string refers to arguments both through absolute argument numbers and through unnamed argument specifications."));
390
0
                  FDI_SET (format, FMTDIR_ERROR);
391
0
                  return false;
392
0
                }
393
0
              toplevel->numbered_arg_counter++;
394
0
            }
395
0
        }
396
397
0
      spec->directives++;
398
399
0
      if (spec->allocated == spec->named_arg_count)
400
0
        {
401
0
          spec->allocated = 2 * spec->allocated + 1;
402
0
          spec->named = (struct named_arg *) xrealloc (spec->named, spec->allocated * sizeof (struct named_arg));
403
0
        }
404
0
      spec->named[spec->named_arg_count].name = name;
405
0
      spec->named_arg_count++;
406
407
0
      FDI_SET (format, FMTDIR_END);
408
0
    }
409
410
0
  *formatp = ++format;
411
0
  return true;
412
0
}
413
414
static bool
415
parse_upto (struct spec *spec,
416
            const char **formatp, struct toplevel_counters *toplevel,
417
            char terminator, char *fdi, char **invalid_reason)
418
0
{
419
0
  const char *format = *formatp;
420
421
0
  for (; *format != terminator && *format != '\0';)
422
0
    {
423
0
      if (*format == '{')
424
0
        {
425
0
          if (!parse_directive (spec, &format, toplevel, fdi, invalid_reason))
426
0
            return false;
427
0
        }
428
0
      else
429
0
        format++;
430
0
    }
431
432
0
  *formatp = format;
433
0
  return true;
434
0
}
435
436
static int
437
named_arg_compare (const void *p1, const void *p2)
438
0
{
439
0
  return strcmp (((const struct named_arg *) p1)->name,
440
0
                 ((const struct named_arg *) p2)->name);
441
0
}
442
443
static void *
444
format_parse (const char *format, bool translated,
445
              char *fdi, char **invalid_reason)
446
0
{
447
0
  struct spec spec;
448
0
  spec.directives = 0;
449
0
  spec.named_arg_count = 0;
450
0
  spec.allocated = 0;
451
0
  spec.named = NULL;
452
453
0
  struct toplevel_counters toplevel;
454
0
  toplevel.numbered_arg_counter = 0;
455
0
  toplevel.unnamed_arg_counter = 0;
456
0
  if (!parse_upto (&spec, &format, &toplevel, '\0', fdi, invalid_reason))
457
0
    {
458
0
      free_named_args (&spec);
459
0
      return NULL;
460
0
    }
461
462
  /* Sort the named argument array, and eliminate duplicates.  */
463
0
  if (spec.named_arg_count > 1)
464
0
    {
465
0
      qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg),
466
0
             named_arg_compare);
467
468
      /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
469
0
      size_t i, j;
470
0
      for (i = j = 0; i < spec.named_arg_count; i++)
471
0
        if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0)
472
0
          free (spec.named[i].name);
473
0
        else
474
0
          {
475
0
            if (j < i)
476
0
              spec.named[j].name = spec.named[i].name;
477
0
            j++;
478
0
          }
479
0
      spec.named_arg_count = j;
480
0
    }
481
482
0
  struct spec *result = XMALLOC (struct spec);
483
0
  *result = spec;
484
0
  return result;
485
0
}
486
487
static void
488
free_named_args (struct spec *spec)
489
0
{
490
0
  if (spec->named != NULL)
491
0
    {
492
0
      size_t i;
493
0
      for (i = 0; i < spec->named_arg_count; i++)
494
0
        free (spec->named[i].name);
495
0
      free (spec->named);
496
0
    }
497
0
}
498
499
static void
500
format_free (void *descr)
501
0
{
502
0
  struct spec *spec = (struct spec *) descr;
503
504
0
  free_named_args (spec);
505
0
  free (spec);
506
0
}
507
508
static int
509
format_get_number_of_directives (void *descr)
510
0
{
511
0
  struct spec *spec = (struct spec *) descr;
512
513
0
  return spec->directives;
514
0
}
515
516
static bool
517
format_check (void *msgid_descr, void *msgstr_descr, bool equality,
518
              formatstring_error_logger_t error_logger, void *error_logger_data,
519
              const char *pretty_msgid, const char *pretty_msgstr)
520
0
{
521
0
  struct spec *spec1 = (struct spec *) msgid_descr;
522
0
  struct spec *spec2 = (struct spec *) msgstr_descr;
523
0
  bool err = false;
524
525
0
  if (spec1->named_arg_count + spec2->named_arg_count > 0)
526
0
    {
527
0
      size_t n1 = spec1->named_arg_count;
528
0
      size_t n2 = spec2->named_arg_count;
529
530
      /* Check the argument names in spec2 are contained in those of spec1.
531
         Both arrays are sorted.  We search for the first difference.  */
532
0
      size_t i, j;
533
0
      for (i = 0, j = 0; i < n1 || j < n2; )
534
0
        {
535
0
          int cmp = (i >= n1 ? 1 :
536
0
                     j >= n2 ? -1 :
537
0
                     strcmp (spec1->named[i].name, spec2->named[j].name));
538
539
0
          if (cmp > 0)
540
0
            {
541
0
              if (error_logger)
542
0
                error_logger (error_logger_data,
543
0
                              _("a format specification for argument '%s', as in '%s', doesn't exist in '%s'"),
544
0
                              spec2->named[j].name, pretty_msgstr,
545
0
                              pretty_msgid);
546
0
              err = true;
547
0
              break;
548
0
            }
549
0
          else if (cmp < 0)
550
0
            {
551
0
              if (equality)
552
0
                {
553
0
                  if (error_logger)
554
0
                    error_logger (error_logger_data,
555
0
                                  _("a format specification for argument '%s' doesn't exist in '%s'"),
556
0
                                  spec1->named[i].name, pretty_msgstr);
557
0
                  err = true;
558
0
                  break;
559
0
                }
560
0
              else
561
0
                i++;
562
0
            }
563
0
          else
564
0
            j++, i++;
565
0
        }
566
0
    }
567
568
0
  return err;
569
0
}
570
571
572
struct formatstring_parser formatstring_python_brace =
573
{
574
  format_parse,
575
  format_free,
576
  format_get_number_of_directives,
577
  NULL,
578
  format_check
579
};
580
581
582
#ifdef TEST
583
584
/* Test program: Print the argument list specification returned by
585
   format_parse for strings read from standard input.  */
586
587
#include <stdio.h>
588
589
static void
590
format_print (void *descr)
591
{
592
  struct spec *spec = (struct spec *) descr;
593
594
  if (spec == NULL)
595
    {
596
      printf ("INVALID");
597
      return;
598
    }
599
600
  printf ("{");
601
  for (size_t i = 0; i < spec->named_arg_count; i++)
602
    {
603
      if (i > 0)
604
        printf (", ");
605
      printf ("'%s'", spec->named[i].name);
606
    }
607
  printf ("}");
608
}
609
610
int
611
main ()
612
{
613
  for (;;)
614
    {
615
      char *line = NULL;
616
      size_t line_size = 0;
617
      int line_len = getline (&line, &line_size, stdin);
618
      if (line_len < 0)
619
        break;
620
      if (line_len > 0 && line[line_len - 1] == '\n')
621
        line[--line_len] = '\0';
622
623
      char *invalid_reason = NULL;
624
      void *descr = format_parse (line, false, NULL, &invalid_reason);
625
626
      format_print (descr);
627
      printf ("\n");
628
      if (descr == NULL)
629
        printf ("%s\n", invalid_reason);
630
631
      free (invalid_reason);
632
      free (line);
633
    }
634
635
  return 0;
636
}
637
638
/*
639
 * For Emacs M-x compile
640
 * Local Variables:
641
 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DTEST format-python-brace.c ../gnulib-lib/libgettextlib.la"
642
 * End:
643
 */
644
645
#endif /* TEST */