/src/gettext/gettext-tools/src/format-kde-kuit.c
Line | Count | Source |
1 | | /* KUIT (KDE User Interface Text) format strings. |
2 | | Copyright (C) 2015-2026 Free Software Foundation, Inc. |
3 | | |
4 | | This program is free software: you can redistribute it and/or modify |
5 | | it under the terms of the GNU General Public License as published by |
6 | | the Free Software Foundation; either version 3 of the License, or |
7 | | (at your option) any later version. |
8 | | |
9 | | This program is distributed in the hope that it will be useful, |
10 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | | GNU General Public License for more details. |
13 | | |
14 | | You should have received a copy of the GNU General Public License |
15 | | along with this program. If not, see <https://www.gnu.org/licenses/>. */ |
16 | | |
17 | | /* Written by Daiki Ueno. */ |
18 | | |
19 | | #include <config.h> |
20 | | |
21 | | #include <assert.h> |
22 | | #include <stdbool.h> |
23 | | #include <stdcountof.h> |
24 | | #include <stdlib.h> |
25 | | |
26 | | #include "format.h" |
27 | | #include "attribute.h" |
28 | | #include "unistr.h" |
29 | | #include "xalloc.h" |
30 | | #include "xvasprintf.h" |
31 | | #include "gettext.h" |
32 | | |
33 | | #if IN_LIBGETTEXTPO |
34 | | /* Use included markup parser to avoid extra dependency from |
35 | | libgettextpo to libxml2. */ |
36 | | # ifndef FORMAT_KDE_KUIT_FALLBACK_MARKUP |
37 | | # define FORMAT_KDE_KUIT_USE_FALLBACK_MARKUP 1 |
38 | | # endif |
39 | | #else |
40 | | # define FORMAT_KDE_KUIT_USE_LIBXML2 1 |
41 | | #endif |
42 | | |
43 | | #if FORMAT_KDE_KUIT_USE_LIBXML2 |
44 | | # include <libxml/parser.h> |
45 | | #elif FORMAT_KDE_KUIT_USE_FALLBACK_MARKUP |
46 | | # include "markup.h" |
47 | | #endif |
48 | | |
49 | | |
50 | 0 | #define _(str) gettext (str) |
51 | | |
52 | | |
53 | | /* KUIT (KDE User Interface Text) is an XML-like markup which augments |
54 | | translatable strings with semantic information: |
55 | | https://api.kde.org/frameworks/ki18n/html/prg_guide.html#kuit_markup |
56 | | KUIT can be seen as a fragment of a well-formed XML document, |
57 | | except that it allows '&' as a Qt accelerator marker and '%' as a |
58 | | format directive. */ |
59 | | |
60 | | struct spec |
61 | | { |
62 | | /* A format string descriptor returned from formatstring_kde.parse. */ |
63 | | void *base; |
64 | | }; |
65 | | |
66 | | #define XML_NS "https://www.gnu.org/s/gettext/kde" |
67 | | |
68 | | struct char_range |
69 | | { |
70 | | ucs4_t start; |
71 | | ucs4_t end; |
72 | | }; |
73 | | |
74 | | /* Character ranges for NameStartChar defined in: |
75 | | https://www.w3.org/TR/REC-xml/#NT-NameStartChar */ |
76 | | static const struct char_range name_chars1[] = |
77 | | { |
78 | | { ':', ':' }, |
79 | | { 'A', 'Z' }, |
80 | | { '_', '_' }, |
81 | | { 'a', 'z' }, |
82 | | { 0xC0, 0xD6 }, |
83 | | { 0xD8, 0xF6 }, |
84 | | { 0xF8, 0x2FF }, |
85 | | { 0x370, 0x37D }, |
86 | | { 0x37F, 0x1FFF }, |
87 | | { 0x200C, 0x200D }, |
88 | | { 0x2070, 0x218F }, |
89 | | { 0x2C00, 0x2FEF }, |
90 | | { 0x3001, 0xD7FF }, |
91 | | { 0xF900, 0xFDCF }, |
92 | | { 0xFDF0, 0xFFFD }, |
93 | | { 0x10000, 0xEFFFF } |
94 | | }; |
95 | | |
96 | | /* Character ranges for NameChar, excluding NameStartChar: |
97 | | https://www.w3.org/TR/REC-xml/#NT-NameChar */ |
98 | | static const struct char_range name_chars2[] = |
99 | | { |
100 | | { '-', '-' }, |
101 | | { '.', '.' }, |
102 | | { '0', '9' }, |
103 | | { 0xB7, 0xB7 }, |
104 | | { 0x0300, 0x036F }, |
105 | | { 0x203F, 0x2040 } |
106 | | }; |
107 | | |
108 | | /* Return true if INPUT is an XML reference. */ |
109 | | static bool |
110 | | is_reference (const char *input) |
111 | 0 | { |
112 | 0 | const char *str = input; |
113 | 0 | const char *str_limit = str + strlen (input); |
114 | 0 | ucs4_t uc; |
115 | |
|
116 | 0 | str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str); |
117 | 0 | assert (uc == '&'); |
118 | |
|
119 | 0 | str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str); |
120 | | |
121 | | /* CharRef */ |
122 | 0 | if (uc == '#') |
123 | 0 | { |
124 | 0 | str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str); |
125 | 0 | if (uc == 'x') |
126 | 0 | { |
127 | 0 | while (str < str_limit) |
128 | 0 | { |
129 | 0 | str += u8_mbtouc (&uc, (const unsigned char *) str, |
130 | 0 | str_limit - str); |
131 | 0 | if (!(('0' <= uc && uc <= '9') |
132 | 0 | || ('A' <= uc && uc <= 'F') |
133 | 0 | || ('a' <= uc && uc <= 'f'))) |
134 | 0 | break; |
135 | 0 | } |
136 | 0 | return uc == ';'; |
137 | 0 | } |
138 | 0 | else if ('0' <= uc && uc <= '9') |
139 | 0 | { |
140 | 0 | while (str < str_limit) |
141 | 0 | { |
142 | 0 | str += u8_mbtouc (&uc, (const unsigned char *) str, |
143 | 0 | str_limit - str); |
144 | 0 | if (!('0' <= uc && uc <= '9')) |
145 | 0 | break; |
146 | 0 | } |
147 | 0 | return uc == ';'; |
148 | 0 | } |
149 | 0 | } |
150 | 0 | else |
151 | 0 | { |
152 | | /* EntityRef */ |
153 | 0 | { |
154 | 0 | bool isNameStartChar = false; |
155 | 0 | for (int i = 0; i < countof (name_chars1); i++) |
156 | 0 | if (name_chars1[i].start <= uc && uc <= name_chars1[i].end) |
157 | 0 | { |
158 | 0 | isNameStartChar = true; |
159 | 0 | break; |
160 | 0 | } |
161 | |
|
162 | 0 | if (!isNameStartChar) |
163 | 0 | return false; |
164 | 0 | } |
165 | | |
166 | 0 | while (str < str_limit) |
167 | 0 | { |
168 | 0 | str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str); |
169 | |
|
170 | 0 | bool isNameChar = false; |
171 | 0 | for (int i = 0; i < countof (name_chars1); i++) |
172 | 0 | if (name_chars1[i].start <= uc && uc <= name_chars1[i].end) |
173 | 0 | { |
174 | 0 | isNameChar = true; |
175 | 0 | break; |
176 | 0 | } |
177 | 0 | if (!isNameChar) |
178 | 0 | for (int i = 0; i < countof (name_chars2); i++) |
179 | 0 | if (name_chars2[i].start <= uc && uc <= name_chars2[i].end) |
180 | 0 | { |
181 | 0 | isNameChar = true; |
182 | 0 | break; |
183 | 0 | } |
184 | |
|
185 | 0 | if (!isNameChar) |
186 | 0 | return false; |
187 | 0 | } |
188 | 0 | return uc == ';'; |
189 | 0 | } |
190 | | |
191 | 0 | return false; |
192 | 0 | } |
193 | | |
194 | | |
195 | | static void * |
196 | | format_parse (const char *format, bool translated, char *fdi, |
197 | | char **invalid_reason) |
198 | 0 | { |
199 | 0 | struct spec spec; |
200 | 0 | spec.base = NULL; |
201 | | |
202 | | /* Preprocess the input, putting the content in a <gt:kuit> element. */ |
203 | 0 | const char *str = format; |
204 | 0 | const char *str_limit = str + strlen (format); |
205 | |
|
206 | 0 | size_t amp_count; |
207 | 0 | for (amp_count = 0; str < str_limit; amp_count++) |
208 | 0 | { |
209 | 0 | const char *amp = strchrnul (str, '&'); |
210 | 0 | if (*amp != '&') |
211 | 0 | break; |
212 | 0 | str = amp + 1; |
213 | 0 | } |
214 | |
|
215 | 0 | char *buffer = |
216 | 0 | xmalloc (amp_count * 4 |
217 | 0 | + strlen (format) |
218 | 0 | + strlen ("<gt:kuit xmlns:gt=\"" XML_NS "\"></gt:kuit>") |
219 | 0 | + 1); |
220 | 0 | *buffer = '\0'; |
221 | |
|
222 | 0 | { |
223 | 0 | char *bp = buffer; |
224 | 0 | bp = stpcpy (bp, "<gt:kuit xmlns:gt=\"" XML_NS "\">"); |
225 | 0 | str = format; |
226 | 0 | while (str < str_limit) |
227 | 0 | { |
228 | 0 | const char *amp = strchrnul (str, '&'); |
229 | |
|
230 | 0 | bp = stpncpy (bp, str, amp - str); |
231 | 0 | if (*amp != '&') |
232 | 0 | break; |
233 | | |
234 | 0 | bp = stpcpy (bp, is_reference (amp) ? "&" : "&"); |
235 | 0 | str = amp + 1; |
236 | 0 | } |
237 | 0 | stpcpy (bp, "</gt:kuit>"); |
238 | 0 | } |
239 | |
|
240 | | #if FORMAT_KDE_KUIT_USE_LIBXML2 |
241 | | { |
242 | | xmlDocPtr doc = xmlReadMemory (buffer, strlen (buffer), "", NULL, |
243 | | XML_PARSE_NONET |
244 | | | XML_PARSE_NOWARNING |
245 | | | XML_PARSE_NOERROR |
246 | | | XML_PARSE_NOBLANKS); |
247 | | if (doc == NULL) |
248 | | { |
249 | | const xmlError *err = xmlGetLastError (); |
250 | | *invalid_reason = |
251 | | xasprintf (_("error while parsing: %s"), |
252 | | err->message); |
253 | | free (buffer); |
254 | | xmlFreeDoc (doc); |
255 | | return NULL; |
256 | | } |
257 | | |
258 | | free (buffer); |
259 | | xmlFreeDoc (doc); |
260 | | } |
261 | | #elif FORMAT_KDE_KUIT_USE_FALLBACK_MARKUP |
262 | | { |
263 | 0 | markup_parser_ty parser; |
264 | 0 | memset (&parser, 0, sizeof (markup_parser_ty)); |
265 | |
|
266 | 0 | markup_parse_context_ty *context = |
267 | 0 | markup_parse_context_new (&parser, 0, NULL); |
268 | |
|
269 | 0 | if (!markup_parse_context_parse (context, buffer, strlen (buffer))) |
270 | 0 | { |
271 | 0 | *invalid_reason = |
272 | 0 | xasprintf (_("error while parsing: %s"), |
273 | 0 | markup_parse_context_get_error (context)); |
274 | 0 | free (buffer); |
275 | 0 | markup_parse_context_free (context); |
276 | 0 | return NULL; |
277 | 0 | } |
278 | | |
279 | 0 | if (!markup_parse_context_end_parse (context)) |
280 | 0 | { |
281 | 0 | *invalid_reason = |
282 | 0 | xasprintf (_("error while parsing: %s"), |
283 | 0 | markup_parse_context_get_error (context)); |
284 | 0 | free (buffer); |
285 | 0 | markup_parse_context_free (context); |
286 | 0 | return NULL; |
287 | 0 | } |
288 | | |
289 | 0 | free (buffer); |
290 | 0 | markup_parse_context_free (context); |
291 | 0 | } |
292 | | #else |
293 | | /* No support for XML. */ |
294 | | free (buffer); |
295 | | #endif |
296 | | |
297 | 0 | spec.base = formatstring_kde.parse (format, translated, fdi, invalid_reason); |
298 | 0 | if (spec.base == NULL) |
299 | 0 | return NULL; |
300 | | |
301 | 0 | struct spec *result = XMALLOC (struct spec); |
302 | 0 | *result = spec; |
303 | 0 | return result; |
304 | 0 | } |
305 | | |
306 | | static void |
307 | | format_free (void *descr) |
308 | 0 | { |
309 | 0 | struct spec *spec = descr; |
310 | 0 | formatstring_kde.free (spec->base); |
311 | 0 | free (spec); |
312 | 0 | } |
313 | | |
314 | | static int |
315 | | format_get_number_of_directives (void *descr) |
316 | 0 | { |
317 | 0 | struct spec *spec = descr; |
318 | 0 | return formatstring_kde.get_number_of_directives (spec->base); |
319 | 0 | } |
320 | | |
321 | | static bool |
322 | | format_check (void *msgid_descr, void *msgstr_descr, bool equality, |
323 | | formatstring_error_logger_t error_logger, void *error_logger_data, |
324 | | const char *pretty_msgid, const char *pretty_msgstr) |
325 | 0 | { |
326 | 0 | struct spec *msgid_spec = msgid_descr; |
327 | 0 | struct spec *msgstr_spec = msgstr_descr; |
328 | |
|
329 | 0 | return formatstring_kde.check (msgid_spec->base, msgstr_spec->base, equality, |
330 | 0 | error_logger, error_logger_data, |
331 | 0 | pretty_msgid, pretty_msgstr); |
332 | 0 | } |
333 | | |
334 | | struct formatstring_parser formatstring_kde_kuit = |
335 | | { |
336 | | format_parse, |
337 | | format_free, |
338 | | format_get_number_of_directives, |
339 | | NULL, |
340 | | format_check |
341 | | }; |
342 | | |
343 | | |
344 | | #ifdef TEST_KUIT |
345 | | |
346 | | /* Test program: Print the argument list specification returned by |
347 | | format_parse for strings read from standard input. */ |
348 | | |
349 | | #include <stdio.h> |
350 | | |
351 | | struct kde_numbered_arg |
352 | | { |
353 | | size_t number; |
354 | | }; |
355 | | |
356 | | struct kde_spec |
357 | | { |
358 | | size_t directives; |
359 | | size_t numbered_arg_count; |
360 | | struct kde_numbered_arg *numbered |
361 | | COUNTED_BY (numbered_arg_count); |
362 | | }; |
363 | | |
364 | | static void |
365 | | format_print (void *descr) |
366 | | { |
367 | | struct spec *spec = (struct spec *) descr; |
368 | | |
369 | | if (spec == NULL) |
370 | | { |
371 | | printf ("INVALID"); |
372 | | return; |
373 | | } |
374 | | |
375 | | struct kde_spec *kspec = (struct kde_spec *) spec->base; |
376 | | |
377 | | if (kspec == NULL) |
378 | | { |
379 | | printf ("INVALID"); |
380 | | return; |
381 | | } |
382 | | |
383 | | printf ("("); |
384 | | size_t last = 1; |
385 | | for (size_t i = 0; i < kspec->numbered_arg_count; i++) |
386 | | { |
387 | | size_t number = kspec->numbered[i].number; |
388 | | |
389 | | if (i > 0) |
390 | | printf (" "); |
391 | | if (number < last) |
392 | | abort (); |
393 | | for (; last < number; last++) |
394 | | printf ("_ "); |
395 | | printf ("*"); |
396 | | last = number + 1; |
397 | | } |
398 | | printf (")"); |
399 | | } |
400 | | |
401 | | int |
402 | | main () |
403 | | { |
404 | | for (;;) |
405 | | { |
406 | | char *line = NULL; |
407 | | size_t line_size = 0; |
408 | | int line_len = getline (&line, &line_size, stdin); |
409 | | if (line_len < 0) |
410 | | break; |
411 | | if (line_len > 0 && line[line_len - 1] == '\n') |
412 | | line[--line_len] = '\0'; |
413 | | |
414 | | char *invalid_reason = NULL; |
415 | | void *descr = format_parse (line, false, NULL, &invalid_reason); |
416 | | |
417 | | format_print (descr); |
418 | | printf ("\n"); |
419 | | if (descr == NULL) |
420 | | printf ("%s\n", invalid_reason); |
421 | | |
422 | | free (invalid_reason); |
423 | | free (line); |
424 | | } |
425 | | |
426 | | return 0; |
427 | | } |
428 | | |
429 | | /* |
430 | | * For Emacs M-x compile |
431 | | * Local Variables: |
432 | | * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -I/usr/include/libxml2 -DTEST_KUIT format-kde-kuit.c format-kde.c ../gnulib-lib/libgettextlib.la" |
433 | | * End: |
434 | | */ |
435 | | |
436 | | #endif /* TEST */ |