/src/gettext/gettext-tools/src/format-php.c
Line | Count | Source |
1 | | /* PHP format strings. |
2 | | Copyright (C) 2001-2026 Free Software Foundation, Inc. |
3 | | |
4 | | This program is free software: you can redistribute it and/or modify |
5 | | it under the terms of the GNU General Public License as published by |
6 | | the Free Software Foundation; either version 3 of the License, or |
7 | | (at your option) any later version. |
8 | | |
9 | | This program is distributed in the hope that it will be useful, |
10 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | | GNU General Public License for more details. |
13 | | |
14 | | You should have received a copy of the GNU General Public License |
15 | | along with this program. If not, see <https://www.gnu.org/licenses/>. */ |
16 | | |
17 | | /* Written by Bruno Haible. */ |
18 | | |
19 | | #include <config.h> |
20 | | |
21 | | #include <stdbool.h> |
22 | | #include <stdlib.h> |
23 | | |
24 | | #include "format.h" |
25 | | #include "c-ctype.h" |
26 | | #include "xalloc.h" |
27 | | #include "xvasprintf.h" |
28 | | #include "format-invalid.h" |
29 | | #include "gettext.h" |
30 | | |
31 | 0 | #define _(str) gettext (str) |
32 | | |
33 | | /* PHP format strings are described in |
34 | | https://www.php.net/manual/en/function.sprintf.php, and are implemented in |
35 | | php-8.1.0/ext/standard/formatted_print.c. |
36 | | A directive |
37 | | - starts with '%' or '%m$' where m is a positive integer, |
38 | | - is optionally followed by any of the characters '0', '-', ' ', or |
39 | | "'<anychar>", each of which acts as a flag, |
40 | | - is optionally followed by a width specification: a nonempty digit |
41 | | sequence, |
42 | | - is optionally followed by '.' and a precision specification: an [optional?] |
43 | | nonempty digit sequence, |
44 | | (It's optional per <https://www.php.net/manual/en/function.sprintf.php>, |
45 | | but this is actually buggy: <https://github.com/php/php-src/issues/18897>.) |
46 | | - is optionally followed by a size specifier 'l', which is ignored, |
47 | | - is finished by a specifier |
48 | | - 's', that needs a string argument, |
49 | | - 'b', 'd', 'u', 'o', 'x', 'X', that need an integer argument, |
50 | | - 'e', 'E', 'f', 'F', 'g', 'G', 'h', 'H', that need a floating-point |
51 | | argument, |
52 | | - 'c', that needs a character argument. |
53 | | Additionally there is the directive '%%', which takes no argument. |
54 | | Numbered and unnumbered argument specifications can be used in the same |
55 | | string. Numbered argument specifications have no influence on the |
56 | | "current argument index", that is incremented each time an argument is read. |
57 | | */ |
58 | | |
59 | | enum format_arg_type |
60 | | { |
61 | | FAT_INTEGER, |
62 | | FAT_FLOAT, |
63 | | FAT_CHARACTER, |
64 | | FAT_STRING |
65 | | }; |
66 | | |
67 | | struct numbered_arg |
68 | | { |
69 | | size_t number; |
70 | | enum format_arg_type type; |
71 | | }; |
72 | | |
73 | | struct spec |
74 | | { |
75 | | size_t directives; |
76 | | /* We consider a directive as "likely intentional" if it does not contain a |
77 | | space. This prevents xgettext from flagging strings like "100% complete" |
78 | | as 'php-format' if they don't occur in a context that requires a format |
79 | | string. */ |
80 | | size_t likely_intentional_directives; |
81 | | size_t numbered_arg_count; |
82 | | struct numbered_arg *numbered; |
83 | | }; |
84 | | |
85 | | |
86 | | static int |
87 | | numbered_arg_compare (const void *p1, const void *p2) |
88 | 0 | { |
89 | 0 | size_t n1 = ((const struct numbered_arg *) p1)->number; |
90 | 0 | size_t n2 = ((const struct numbered_arg *) p2)->number; |
91 | |
|
92 | 0 | return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0); |
93 | 0 | } |
94 | | |
95 | | static void * |
96 | | format_parse (const char *format, bool translated, char *fdi, |
97 | | char **invalid_reason) |
98 | 0 | { |
99 | 0 | const char *const format_start = format; |
100 | |
|
101 | 0 | size_t directives = 0; |
102 | 0 | size_t likely_intentional_directives = 0; |
103 | 0 | size_t numbered_arg_count = 0; |
104 | 0 | struct numbered_arg *numbered = NULL; |
105 | 0 | size_t numbered_allocated = 0; |
106 | 0 | size_t unnumbered_arg_count = 0; |
107 | |
|
108 | 0 | for (; *format != '\0';) |
109 | 0 | if (*format++ == '%') |
110 | 0 | { |
111 | | /* A directive. */ |
112 | 0 | FDI_SET (format - 1, FMTDIR_START); |
113 | 0 | directives++; |
114 | 0 | bool likely_intentional = true; |
115 | |
|
116 | 0 | if (*format != '%') |
117 | 0 | { |
118 | | /* A complex directive. */ |
119 | |
|
120 | 0 | size_t number = ++unnumbered_arg_count; |
121 | 0 | if (c_isdigit (*format)) |
122 | 0 | { |
123 | 0 | const char *f = format; |
124 | 0 | size_t m = 0; |
125 | |
|
126 | 0 | do |
127 | 0 | { |
128 | 0 | m = 10 * m + (*f - '0'); |
129 | 0 | f++; |
130 | 0 | } |
131 | 0 | while (c_isdigit (*f)); |
132 | |
|
133 | 0 | if (*f == '$') |
134 | 0 | { |
135 | 0 | if (m == 0) |
136 | 0 | { |
137 | 0 | *invalid_reason = INVALID_ARGNO_0 (directives); |
138 | 0 | FDI_SET (f, FMTDIR_ERROR); |
139 | 0 | goto bad_format; |
140 | 0 | } |
141 | 0 | number = m; |
142 | 0 | format = ++f; |
143 | 0 | --unnumbered_arg_count; |
144 | 0 | } |
145 | 0 | } |
146 | | |
147 | | /* Parse flags. */ |
148 | 0 | for (;;) |
149 | 0 | { |
150 | 0 | if (*format == '0' || *format == '-' || *format == ' ') |
151 | 0 | { |
152 | 0 | if (*format == ' ') |
153 | 0 | likely_intentional = false; |
154 | 0 | format++; |
155 | 0 | } |
156 | 0 | else if (*format == '\'') |
157 | 0 | { |
158 | 0 | format++; |
159 | 0 | if (*format == '\0') |
160 | 0 | { |
161 | 0 | *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); |
162 | 0 | FDI_SET (format - 1, FMTDIR_ERROR); |
163 | 0 | goto bad_format; |
164 | 0 | } |
165 | 0 | format++; |
166 | 0 | } |
167 | 0 | else |
168 | 0 | break; |
169 | 0 | } |
170 | | |
171 | | /* Parse width. */ |
172 | 0 | if (c_isdigit (*format)) |
173 | 0 | { |
174 | 0 | do |
175 | 0 | format++; |
176 | 0 | while (c_isdigit (*format)); |
177 | 0 | } |
178 | | |
179 | | /* Parse precision. */ |
180 | 0 | if (*format == '.') |
181 | 0 | { |
182 | 0 | format++; |
183 | |
|
184 | 0 | if (c_isdigit (*format)) |
185 | 0 | { |
186 | 0 | do |
187 | 0 | format++; |
188 | 0 | while (c_isdigit (*format)); |
189 | 0 | } |
190 | 0 | else |
191 | 0 | --format; /* will jump to bad_format */ |
192 | 0 | } |
193 | | |
194 | | /* Parse size. */ |
195 | 0 | if (*format == 'l') |
196 | 0 | format++; |
197 | |
|
198 | 0 | enum format_arg_type type; |
199 | 0 | switch (*format) |
200 | 0 | { |
201 | 0 | case 'b': case 'd': case 'u': case 'o': case 'x': case 'X': |
202 | 0 | type = FAT_INTEGER; |
203 | 0 | break; |
204 | 0 | case 'e': case 'E': case 'f': case 'F': case 'g': case 'G': |
205 | 0 | case 'h': case 'H': |
206 | 0 | type = FAT_FLOAT; |
207 | 0 | break; |
208 | 0 | case 'c': |
209 | 0 | type = FAT_CHARACTER; |
210 | 0 | break; |
211 | 0 | case 's': |
212 | 0 | type = FAT_STRING; |
213 | 0 | break; |
214 | 0 | default: |
215 | 0 | if (*format == '\0') |
216 | 0 | { |
217 | 0 | *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); |
218 | 0 | FDI_SET (format - 1, FMTDIR_ERROR); |
219 | 0 | } |
220 | 0 | else |
221 | 0 | { |
222 | 0 | *invalid_reason = |
223 | 0 | INVALID_CONVERSION_SPECIFIER (directives, *format); |
224 | 0 | FDI_SET (format, FMTDIR_ERROR); |
225 | 0 | } |
226 | 0 | goto bad_format; |
227 | 0 | } |
228 | | |
229 | 0 | if (numbered_allocated == numbered_arg_count) |
230 | 0 | { |
231 | 0 | numbered_allocated = 2 * numbered_allocated + 1; |
232 | 0 | numbered = (struct numbered_arg *) xrealloc (numbered, numbered_allocated * sizeof (struct numbered_arg)); |
233 | 0 | } |
234 | 0 | numbered[numbered_arg_count].number = number; |
235 | 0 | numbered[numbered_arg_count].type = type; |
236 | 0 | numbered_arg_count++; |
237 | 0 | } |
238 | | |
239 | 0 | if (likely_intentional) |
240 | 0 | likely_intentional_directives++; |
241 | 0 | FDI_SET (format, FMTDIR_END); |
242 | |
|
243 | 0 | format++; |
244 | 0 | } |
245 | | |
246 | | /* Sort the numbered argument array, and eliminate duplicates. */ |
247 | 0 | if (numbered_arg_count > 1) |
248 | 0 | { |
249 | 0 | qsort (numbered, numbered_arg_count, |
250 | 0 | sizeof (struct numbered_arg), numbered_arg_compare); |
251 | | |
252 | | /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */ |
253 | 0 | bool err = false; |
254 | 0 | size_t i, j; |
255 | 0 | for (i = j = 0; i < numbered_arg_count; i++) |
256 | 0 | if (j > 0 && numbered[i].number == numbered[j-1].number) |
257 | 0 | { |
258 | 0 | enum format_arg_type type1 = numbered[i].type; |
259 | 0 | enum format_arg_type type2 = numbered[j-1].type; |
260 | |
|
261 | 0 | enum format_arg_type type_both; |
262 | 0 | if (type1 == type2) |
263 | 0 | type_both = type1; |
264 | 0 | else |
265 | 0 | { |
266 | | /* Incompatible types. */ |
267 | 0 | type_both = type1; |
268 | 0 | if (!err) |
269 | 0 | *invalid_reason = |
270 | 0 | INVALID_INCOMPATIBLE_ARG_TYPES (numbered[i].number); |
271 | 0 | err = true; |
272 | 0 | } |
273 | |
|
274 | 0 | numbered[j-1].type = type_both; |
275 | 0 | } |
276 | 0 | else |
277 | 0 | { |
278 | 0 | if (j < i) |
279 | 0 | { |
280 | 0 | numbered[j].number = numbered[i].number; |
281 | 0 | numbered[j].type = numbered[i].type; |
282 | 0 | } |
283 | 0 | j++; |
284 | 0 | } |
285 | 0 | numbered_arg_count = j; |
286 | 0 | if (err) |
287 | | /* *invalid_reason has already been set above. */ |
288 | 0 | goto bad_format; |
289 | 0 | } |
290 | | |
291 | 0 | struct spec *result = XMALLOC (struct spec); |
292 | 0 | result->directives = directives; |
293 | 0 | result->likely_intentional_directives = likely_intentional_directives; |
294 | 0 | result->numbered_arg_count = numbered_arg_count; |
295 | 0 | result->numbered = numbered; |
296 | 0 | return result; |
297 | | |
298 | 0 | bad_format: |
299 | 0 | if (numbered != NULL) |
300 | 0 | free (numbered); |
301 | 0 | return NULL; |
302 | 0 | } |
303 | | |
304 | | static void |
305 | | format_free (void *descr) |
306 | 0 | { |
307 | 0 | struct spec *spec = (struct spec *) descr; |
308 | |
|
309 | 0 | if (spec->numbered != NULL) |
310 | 0 | free (spec->numbered); |
311 | 0 | free (spec); |
312 | 0 | } |
313 | | |
314 | | static int |
315 | | format_get_number_of_directives (void *descr) |
316 | 0 | { |
317 | 0 | struct spec *spec = (struct spec *) descr; |
318 | |
|
319 | 0 | return spec->directives; |
320 | 0 | } |
321 | | |
322 | | static bool |
323 | | format_is_unlikely_intentional (void *descr) |
324 | 0 | { |
325 | 0 | struct spec *spec = (struct spec *) descr; |
326 | |
|
327 | 0 | return spec->likely_intentional_directives == 0; |
328 | 0 | } |
329 | | |
330 | | static bool |
331 | | format_check (void *msgid_descr, void *msgstr_descr, bool equality, |
332 | | formatstring_error_logger_t error_logger, void *error_logger_data, |
333 | | const char *pretty_msgid, const char *pretty_msgstr) |
334 | 0 | { |
335 | 0 | struct spec *spec1 = (struct spec *) msgid_descr; |
336 | 0 | struct spec *spec2 = (struct spec *) msgstr_descr; |
337 | 0 | bool err = false; |
338 | |
|
339 | 0 | if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0) |
340 | 0 | { |
341 | 0 | size_t n1 = spec1->numbered_arg_count; |
342 | 0 | size_t n2 = spec2->numbered_arg_count; |
343 | | |
344 | | /* Check that the argument numbers are the same. |
345 | | Both arrays are sorted. We search for the first difference. */ |
346 | 0 | { |
347 | 0 | size_t i, j; |
348 | 0 | for (i = 0, j = 0; i < n1 || j < n2; ) |
349 | 0 | { |
350 | 0 | int cmp = (i >= n1 ? 1 : |
351 | 0 | j >= n2 ? -1 : |
352 | 0 | spec1->numbered[i].number > spec2->numbered[j].number ? 1 : |
353 | 0 | spec1->numbered[i].number < spec2->numbered[j].number ? -1 : |
354 | 0 | 0); |
355 | |
|
356 | 0 | if (cmp > 0) |
357 | 0 | { |
358 | 0 | if (error_logger) |
359 | 0 | error_logger (error_logger_data, |
360 | 0 | _("a format specification for argument %zu, as in '%s', doesn't exist in '%s'"), |
361 | 0 | spec2->numbered[j].number, pretty_msgstr, |
362 | 0 | pretty_msgid); |
363 | 0 | err = true; |
364 | 0 | break; |
365 | 0 | } |
366 | 0 | else if (cmp < 0) |
367 | 0 | { |
368 | 0 | if (equality) |
369 | 0 | { |
370 | 0 | if (error_logger) |
371 | 0 | error_logger (error_logger_data, |
372 | 0 | _("a format specification for argument %zu doesn't exist in '%s'"), |
373 | 0 | spec1->numbered[i].number, pretty_msgstr); |
374 | 0 | err = true; |
375 | 0 | break; |
376 | 0 | } |
377 | 0 | else |
378 | 0 | i++; |
379 | 0 | } |
380 | 0 | else |
381 | 0 | j++, i++; |
382 | 0 | } |
383 | 0 | } |
384 | | /* Check the argument types are the same. */ |
385 | 0 | if (!err) |
386 | 0 | { |
387 | 0 | size_t i, j; |
388 | 0 | for (i = 0, j = 0; j < n2; ) |
389 | 0 | { |
390 | 0 | if (spec1->numbered[i].number == spec2->numbered[j].number) |
391 | 0 | { |
392 | 0 | if (spec1->numbered[i].type != spec2->numbered[j].type) |
393 | 0 | { |
394 | 0 | if (error_logger) |
395 | 0 | error_logger (error_logger_data, |
396 | 0 | _("format specifications in '%s' and '%s' for argument %zu are not the same"), |
397 | 0 | pretty_msgid, pretty_msgstr, |
398 | 0 | spec2->numbered[j].number); |
399 | 0 | err = true; |
400 | 0 | break; |
401 | 0 | } |
402 | 0 | j++, i++; |
403 | 0 | } |
404 | 0 | else |
405 | 0 | i++; |
406 | 0 | } |
407 | 0 | } |
408 | 0 | } |
409 | |
|
410 | 0 | return err; |
411 | 0 | } |
412 | | |
413 | | |
414 | | struct formatstring_parser formatstring_php = |
415 | | { |
416 | | format_parse, |
417 | | format_free, |
418 | | format_get_number_of_directives, |
419 | | format_is_unlikely_intentional, |
420 | | format_check |
421 | | }; |
422 | | |
423 | | |
424 | | #ifdef TEST |
425 | | |
426 | | /* Test program: Print the argument list specification returned by |
427 | | format_parse for strings read from standard input. */ |
428 | | |
429 | | #include <stdio.h> |
430 | | |
431 | | static void |
432 | | format_print (void *descr) |
433 | | { |
434 | | struct spec *spec = (struct spec *) descr; |
435 | | |
436 | | if (spec == NULL) |
437 | | { |
438 | | printf ("INVALID"); |
439 | | return; |
440 | | } |
441 | | |
442 | | printf ("("); |
443 | | size_t last = 1; |
444 | | for (size_t i = 0; i < spec->numbered_arg_count; i++) |
445 | | { |
446 | | size_t number = spec->numbered[i].number; |
447 | | |
448 | | if (i > 0) |
449 | | printf (" "); |
450 | | if (number < last) |
451 | | abort (); |
452 | | for (; last < number; last++) |
453 | | printf ("_ "); |
454 | | switch (spec->numbered[i].type) |
455 | | { |
456 | | case FAT_INTEGER: |
457 | | printf ("i"); |
458 | | break; |
459 | | case FAT_FLOAT: |
460 | | printf ("f"); |
461 | | break; |
462 | | case FAT_CHARACTER: |
463 | | printf ("c"); |
464 | | break; |
465 | | case FAT_STRING: |
466 | | printf ("s"); |
467 | | break; |
468 | | default: |
469 | | abort (); |
470 | | } |
471 | | last = number + 1; |
472 | | } |
473 | | printf (")"); |
474 | | } |
475 | | |
476 | | int |
477 | | main () |
478 | | { |
479 | | for (;;) |
480 | | { |
481 | | char *line = NULL; |
482 | | size_t line_size = 0; |
483 | | int line_len = getline (&line, &line_size, stdin); |
484 | | if (line_len < 0) |
485 | | break; |
486 | | if (line_len > 0 && line[line_len - 1] == '\n') |
487 | | line[--line_len] = '\0'; |
488 | | |
489 | | char *invalid_reason = NULL; |
490 | | void *descr = format_parse (line, false, NULL, &invalid_reason); |
491 | | |
492 | | format_print (descr); |
493 | | printf ("\n"); |
494 | | if (descr == NULL) |
495 | | printf ("%s\n", invalid_reason); |
496 | | |
497 | | free (invalid_reason); |
498 | | free (line); |
499 | | } |
500 | | |
501 | | return 0; |
502 | | } |
503 | | |
504 | | /* |
505 | | * For Emacs M-x compile |
506 | | * Local Variables: |
507 | | * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DTEST format-php.c ../gnulib-lib/libgettextlib.la" |
508 | | * End: |
509 | | */ |
510 | | |
511 | | #endif /* TEST */ |