/src/gettext-0.26/gettext-tools/src/format-sh.c
Line | Count | Source |
1 | | /* Shell format strings. |
2 | | Copyright (C) 2003-2025 Free Software Foundation, Inc. |
3 | | Written by Bruno Haible <bruno@clisp.org>, 2003. |
4 | | |
5 | | This program is free software: you can redistribute it and/or modify |
6 | | it under the terms of the GNU General Public License as published by |
7 | | the Free Software Foundation; either version 3 of the License, or |
8 | | (at your option) any later version. |
9 | | |
10 | | This program is distributed in the hope that it will be useful, |
11 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | GNU General Public License for more details. |
14 | | |
15 | | You should have received a copy of the GNU General Public License |
16 | | along with this program. If not, see <https://www.gnu.org/licenses/>. */ |
17 | | |
18 | | #ifdef HAVE_CONFIG_H |
19 | | # include <config.h> |
20 | | #endif |
21 | | |
22 | | #include <stdbool.h> |
23 | | #include <stdlib.h> |
24 | | #include <string.h> |
25 | | |
26 | | #include "format.h" |
27 | | #include "c-ctype.h" |
28 | | #include "xalloc.h" |
29 | | #include "format-invalid.h" |
30 | | #include "gettext.h" |
31 | | |
32 | 0 | #define _(str) gettext (str) |
33 | | |
34 | | /* Shell format strings are simply strings subjects to variable substitution. |
35 | | A variable substitution starts with '$' and is finished by either |
36 | | - a nonempty sequence of alphanumeric ASCII characters, the first being |
37 | | not a digit, or |
38 | | - an opening brace '{', a nonempty sequence of alphanumeric ASCII |
39 | | characters, the first being not a digit, and a closing brace '}'. |
40 | | We don't support variable references like $1, $$ or $? since they make |
41 | | no sense when 'envsubst' is invoked. |
42 | | We don't support non-ASCII variable names, to avoid dependencies w.r.t. the |
43 | | current encoding: While "${\xe0}" looks like a variable access in ISO-8859-1 |
44 | | encoding, it doesn't look like one in the BIG5, BIG5-HKSCS, GBK, GB18030, |
45 | | SHIFT_JIS, JOHAB encodings, because \xe0\x7d is a single character in these |
46 | | encodings. |
47 | | We don't support the POSIX syntax for default or alternate values: |
48 | | ${variable-default} ${variable:-default} |
49 | | ${variable=default} ${variable:=default} |
50 | | ${variable+replacement} ${variable:+replacement} |
51 | | ${variable?ignored} ${variable:?ignored} |
52 | | because the translator might be tempted to change the default value; if |
53 | | we allow it we have a security problem; if we don't allow it the translator |
54 | | will be surprised. |
55 | | */ |
56 | | |
57 | | struct named_arg |
58 | | { |
59 | | char *name; |
60 | | }; |
61 | | |
62 | | struct spec |
63 | | { |
64 | | size_t directives; |
65 | | size_t named_arg_count; |
66 | | struct named_arg *named; |
67 | | }; |
68 | | |
69 | | |
70 | | static int |
71 | | named_arg_compare (const void *p1, const void *p2) |
72 | 0 | { |
73 | 0 | return strcmp (((const struct named_arg *) p1)->name, |
74 | 0 | ((const struct named_arg *) p2)->name); |
75 | 0 | } |
76 | | |
77 | | #define INVALID_NON_ASCII_VARIABLE() \ |
78 | 0 | xstrdup (_("The string refers to a shell variable with a non-ASCII name.")) |
79 | | #define INVALID_SHELL_SYNTAX() \ |
80 | 0 | xstrdup (_("The string refers to a shell variable with complex shell brace syntax. This syntax is unsupported here due to security reasons.")) |
81 | | #define INVALID_CONTEXT_DEPENDENT_VARIABLE() \ |
82 | 0 | xstrdup (_("The string refers to a shell variable whose value may be different inside shell functions.")) |
83 | | #define INVALID_EMPTY_VARIABLE() \ |
84 | 0 | xstrdup (_("The string refers to a shell variable with an empty name.")) |
85 | | |
86 | | static void * |
87 | | format_parse (const char *format, bool translated, char *fdi, |
88 | | char **invalid_reason) |
89 | 0 | { |
90 | 0 | const char *const format_start = format; |
91 | 0 | struct spec spec; |
92 | 0 | size_t named_allocated; |
93 | 0 | struct spec *result; |
94 | |
|
95 | 0 | spec.directives = 0; |
96 | 0 | spec.named_arg_count = 0; |
97 | 0 | spec.named = NULL; |
98 | 0 | named_allocated = 0; |
99 | |
|
100 | 0 | for (; *format != '\0';) |
101 | 0 | if (*format++ == '$') |
102 | 0 | { |
103 | | /* A variable substitution. */ |
104 | 0 | char *name; |
105 | |
|
106 | 0 | FDI_SET (format - 1, FMTDIR_START); |
107 | 0 | spec.directives++; |
108 | |
|
109 | 0 | if (*format == '{') |
110 | 0 | { |
111 | 0 | const char *name_start; |
112 | 0 | const char *name_end; |
113 | 0 | size_t n; |
114 | |
|
115 | 0 | name_start = ++format; |
116 | 0 | for (; *format != '\0'; format++) |
117 | 0 | { |
118 | 0 | if (*format == '}') |
119 | 0 | break; |
120 | 0 | if (!c_isascii (*format)) |
121 | 0 | { |
122 | 0 | *invalid_reason = INVALID_NON_ASCII_VARIABLE (); |
123 | 0 | FDI_SET (format, FMTDIR_ERROR); |
124 | 0 | goto bad_format; |
125 | 0 | } |
126 | 0 | if (format > name_start |
127 | 0 | && (*format == '-' || *format == '=' || *format == '+' |
128 | 0 | || *format == '?' || *format == ':')) |
129 | 0 | { |
130 | 0 | *invalid_reason = INVALID_SHELL_SYNTAX (); |
131 | 0 | FDI_SET (format, FMTDIR_ERROR); |
132 | 0 | goto bad_format; |
133 | 0 | } |
134 | 0 | if (!(c_isalnum (*format) || *format == '_') |
135 | 0 | || (format == name_start && c_isdigit (*format))) |
136 | 0 | { |
137 | 0 | *invalid_reason = INVALID_CONTEXT_DEPENDENT_VARIABLE (); |
138 | 0 | FDI_SET (format, FMTDIR_ERROR); |
139 | 0 | goto bad_format; |
140 | 0 | } |
141 | 0 | } |
142 | 0 | if (*format == '\0') |
143 | 0 | { |
144 | 0 | *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); |
145 | 0 | FDI_SET (format - 1, FMTDIR_ERROR); |
146 | 0 | goto bad_format; |
147 | 0 | } |
148 | 0 | name_end = format++; |
149 | |
|
150 | 0 | n = name_end - name_start; |
151 | 0 | if (n == 0) |
152 | 0 | { |
153 | 0 | *invalid_reason = INVALID_EMPTY_VARIABLE (); |
154 | 0 | FDI_SET (format - 1, FMTDIR_ERROR); |
155 | 0 | goto bad_format; |
156 | 0 | } |
157 | 0 | name = XNMALLOC (n + 1, char); |
158 | 0 | memcpy (name, name_start, n); |
159 | 0 | name[n] = '\0'; |
160 | 0 | } |
161 | 0 | else if (c_isalpha (*format) || *format == '_') |
162 | 0 | { |
163 | 0 | const char *name_start; |
164 | 0 | const char *name_end; |
165 | 0 | size_t n; |
166 | |
|
167 | 0 | name_start = format; |
168 | 0 | do |
169 | 0 | format++; |
170 | 0 | while (*format != '\0' && (c_isalnum (*format) || *format == '_')); |
171 | 0 | name_end = format; |
172 | |
|
173 | 0 | n = name_end - name_start; |
174 | 0 | name = XNMALLOC (n + 1, char); |
175 | 0 | memcpy (name, name_start, n); |
176 | 0 | name[n] = '\0'; |
177 | 0 | } |
178 | 0 | else if (*format != '\0') |
179 | 0 | { |
180 | 0 | if (!c_isascii (*format)) |
181 | 0 | { |
182 | 0 | *invalid_reason = INVALID_NON_ASCII_VARIABLE (); |
183 | 0 | FDI_SET (format, FMTDIR_ERROR); |
184 | 0 | goto bad_format; |
185 | 0 | } |
186 | 0 | else |
187 | 0 | { |
188 | 0 | *invalid_reason = INVALID_CONTEXT_DEPENDENT_VARIABLE (); |
189 | 0 | FDI_SET (format, FMTDIR_ERROR); |
190 | 0 | goto bad_format; |
191 | 0 | } |
192 | 0 | } |
193 | 0 | else |
194 | 0 | { |
195 | 0 | *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); |
196 | 0 | FDI_SET (format - 1, FMTDIR_ERROR); |
197 | 0 | goto bad_format; |
198 | 0 | } |
199 | | |
200 | | /* Named argument. */ |
201 | 0 | if (named_allocated == spec.named_arg_count) |
202 | 0 | { |
203 | 0 | named_allocated = 2 * named_allocated + 1; |
204 | 0 | spec.named = (struct named_arg *) xrealloc (spec.named, named_allocated * sizeof (struct named_arg)); |
205 | 0 | } |
206 | 0 | spec.named[spec.named_arg_count].name = name; |
207 | 0 | spec.named_arg_count++; |
208 | |
|
209 | 0 | FDI_SET (format - 1, FMTDIR_END); |
210 | 0 | } |
211 | | |
212 | | /* Sort the named argument array, and eliminate duplicates. */ |
213 | 0 | if (spec.named_arg_count > 1) |
214 | 0 | { |
215 | 0 | size_t i, j; |
216 | |
|
217 | 0 | qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg), |
218 | 0 | named_arg_compare); |
219 | | |
220 | | /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */ |
221 | 0 | for (i = j = 0; i < spec.named_arg_count; i++) |
222 | 0 | if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0) |
223 | 0 | free (spec.named[i].name); |
224 | 0 | else |
225 | 0 | { |
226 | 0 | if (j < i) |
227 | 0 | spec.named[j].name = spec.named[i].name; |
228 | 0 | j++; |
229 | 0 | } |
230 | 0 | spec.named_arg_count = j; |
231 | 0 | } |
232 | |
|
233 | 0 | result = XMALLOC (struct spec); |
234 | 0 | *result = spec; |
235 | 0 | return result; |
236 | | |
237 | 0 | bad_format: |
238 | 0 | if (spec.named != NULL) |
239 | 0 | { |
240 | 0 | size_t i; |
241 | 0 | for (i = 0; i < spec.named_arg_count; i++) |
242 | 0 | free (spec.named[i].name); |
243 | 0 | free (spec.named); |
244 | 0 | } |
245 | 0 | return NULL; |
246 | 0 | } |
247 | | |
248 | | static void |
249 | | format_free (void *descr) |
250 | 0 | { |
251 | 0 | struct spec *spec = (struct spec *) descr; |
252 | |
|
253 | 0 | if (spec->named != NULL) |
254 | 0 | { |
255 | 0 | size_t i; |
256 | 0 | for (i = 0; i < spec->named_arg_count; i++) |
257 | 0 | free (spec->named[i].name); |
258 | 0 | free (spec->named); |
259 | 0 | } |
260 | 0 | free (spec); |
261 | 0 | } |
262 | | |
263 | | static int |
264 | | format_get_number_of_directives (void *descr) |
265 | 0 | { |
266 | 0 | struct spec *spec = (struct spec *) descr; |
267 | |
|
268 | 0 | return spec->directives; |
269 | 0 | } |
270 | | |
271 | | static bool |
272 | | format_check (void *msgid_descr, void *msgstr_descr, bool equality, |
273 | | formatstring_error_logger_t error_logger, void *error_logger_data, |
274 | | const char *pretty_msgid, const char *pretty_msgstr) |
275 | 0 | { |
276 | 0 | struct spec *spec1 = (struct spec *) msgid_descr; |
277 | 0 | struct spec *spec2 = (struct spec *) msgstr_descr; |
278 | 0 | bool err = false; |
279 | |
|
280 | 0 | if (spec1->named_arg_count + spec2->named_arg_count > 0) |
281 | 0 | { |
282 | 0 | size_t i, j; |
283 | 0 | size_t n1 = spec1->named_arg_count; |
284 | 0 | size_t n2 = spec2->named_arg_count; |
285 | | |
286 | | /* Check the argument names in spec2 are contained in those of spec1. |
287 | | Both arrays are sorted. We search for the first difference. */ |
288 | 0 | for (i = 0, j = 0; i < n1 || j < n2; ) |
289 | 0 | { |
290 | 0 | int cmp = (i >= n1 ? 1 : |
291 | 0 | j >= n2 ? -1 : |
292 | 0 | strcmp (spec1->named[i].name, spec2->named[j].name)); |
293 | |
|
294 | 0 | if (cmp > 0) |
295 | 0 | { |
296 | 0 | if (error_logger) |
297 | 0 | error_logger (error_logger_data, |
298 | 0 | _("a format specification for argument '%s', as in '%s', doesn't exist in '%s'"), |
299 | 0 | spec2->named[j].name, pretty_msgstr, |
300 | 0 | pretty_msgid); |
301 | 0 | err = true; |
302 | 0 | break; |
303 | 0 | } |
304 | 0 | else if (cmp < 0) |
305 | 0 | { |
306 | 0 | if (equality) |
307 | 0 | { |
308 | 0 | if (error_logger) |
309 | 0 | error_logger (error_logger_data, |
310 | 0 | _("a format specification for argument '%s' doesn't exist in '%s'"), |
311 | 0 | spec1->named[i].name, pretty_msgstr); |
312 | 0 | err = true; |
313 | 0 | break; |
314 | 0 | } |
315 | 0 | else |
316 | 0 | i++; |
317 | 0 | } |
318 | 0 | else |
319 | 0 | j++, i++; |
320 | 0 | } |
321 | 0 | } |
322 | |
|
323 | 0 | return err; |
324 | 0 | } |
325 | | |
326 | | |
327 | | struct formatstring_parser formatstring_sh = |
328 | | { |
329 | | format_parse, |
330 | | format_free, |
331 | | format_get_number_of_directives, |
332 | | NULL, |
333 | | format_check |
334 | | }; |
335 | | |
336 | | |
337 | | #ifdef TEST |
338 | | |
339 | | /* Test program: Print the argument list specification returned by |
340 | | format_parse for strings read from standard input. */ |
341 | | |
342 | | #include <stdio.h> |
343 | | |
344 | | static void |
345 | | format_print (void *descr) |
346 | | { |
347 | | struct spec *spec = (struct spec *) descr; |
348 | | size_t i; |
349 | | |
350 | | if (spec == NULL) |
351 | | { |
352 | | printf ("INVALID"); |
353 | | return; |
354 | | } |
355 | | |
356 | | printf ("{"); |
357 | | for (i = 0; i < spec->named_arg_count; i++) |
358 | | { |
359 | | if (i > 0) |
360 | | printf (", "); |
361 | | printf ("'%s'", spec->named[i].name); |
362 | | } |
363 | | printf ("}"); |
364 | | } |
365 | | |
366 | | int |
367 | | main () |
368 | | { |
369 | | for (;;) |
370 | | { |
371 | | char *line = NULL; |
372 | | size_t line_size = 0; |
373 | | int line_len; |
374 | | char *invalid_reason; |
375 | | void *descr; |
376 | | |
377 | | line_len = getline (&line, &line_size, stdin); |
378 | | if (line_len < 0) |
379 | | break; |
380 | | if (line_len > 0 && line[line_len - 1] == '\n') |
381 | | line[--line_len] = '\0'; |
382 | | |
383 | | invalid_reason = NULL; |
384 | | descr = format_parse (line, false, NULL, &invalid_reason); |
385 | | |
386 | | format_print (descr); |
387 | | printf ("\n"); |
388 | | if (descr == NULL) |
389 | | printf ("%s\n", invalid_reason); |
390 | | |
391 | | free (invalid_reason); |
392 | | free (line); |
393 | | } |
394 | | |
395 | | return 0; |
396 | | } |
397 | | |
398 | | /* |
399 | | * For Emacs M-x compile |
400 | | * Local Variables: |
401 | | * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-sh.c ../gnulib-lib/libgettextlib.la" |
402 | | * End: |
403 | | */ |
404 | | |
405 | | #endif /* TEST */ |