Coverage Report

Created: 2026-05-31 06:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gettext/gettext-tools/src/format-kde-kuit.c
Line
Count
Source
1
/* KUIT (KDE User Interface Text) format strings.
2
   Copyright (C) 2015-2026 Free Software Foundation, Inc.
3
4
   This program is free software: you can redistribute it and/or modify
5
   it under the terms of the GNU General Public License as published by
6
   the Free Software Foundation; either version 3 of the License, or
7
   (at your option) any later version.
8
9
   This program is distributed in the hope that it will be useful,
10
   but WITHOUT ANY WARRANTY; without even the implied warranty of
11
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
   GNU General Public License for more details.
13
14
   You should have received a copy of the GNU General Public License
15
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
16
17
/* Written by Daiki Ueno.  */
18
19
#include <config.h>
20
21
#include <assert.h>
22
#include <stdbool.h>
23
#include <stdcountof.h>
24
#include <stdlib.h>
25
26
#include "format.h"
27
#include "attribute.h"
28
#include "unistr.h"
29
#include "xalloc.h"
30
#include "xvasprintf.h"
31
#include "gettext.h"
32
33
#if IN_LIBGETTEXTPO
34
/* Use included markup parser to avoid extra dependency from
35
   libgettextpo to libxml2.  */
36
# ifndef FORMAT_KDE_KUIT_FALLBACK_MARKUP
37
#  define FORMAT_KDE_KUIT_USE_FALLBACK_MARKUP 1
38
# endif
39
#else
40
#  define FORMAT_KDE_KUIT_USE_LIBXML2 1
41
#endif
42
43
#if FORMAT_KDE_KUIT_USE_LIBXML2
44
# include <libxml/parser.h>
45
#elif FORMAT_KDE_KUIT_USE_FALLBACK_MARKUP
46
# include "markup.h"
47
#endif
48
49
50
0
#define _(str) gettext (str)
51
52
53
/* KUIT (KDE User Interface Text) is an XML-like markup which augments
54
   translatable strings with semantic information:
55
   https://api.kde.org/frameworks/ki18n/html/prg_guide.html#kuit_markup
56
   KUIT can be seen as a fragment of a well-formed XML document,
57
   except that it allows '&' as a Qt accelerator marker and '%' as a
58
   format directive.  */
59
60
struct spec
61
{
62
  /* A format string descriptor returned from formatstring_kde.parse.  */
63
  void *base;
64
};
65
66
#define XML_NS "https://www.gnu.org/s/gettext/kde"
67
68
struct char_range
69
{
70
  ucs4_t start;
71
  ucs4_t end;
72
};
73
74
/* Character ranges for NameStartChar defined in:
75
   https://www.w3.org/TR/REC-xml/#NT-NameStartChar  */
76
static const struct char_range name_chars1[] =
77
  {
78
    { ':', ':' },
79
    { 'A', 'Z' },
80
    { '_', '_' },
81
    { 'a', 'z' },
82
    { 0xC0, 0xD6 },
83
    { 0xD8, 0xF6 },
84
    { 0xF8, 0x2FF },
85
    { 0x370, 0x37D },
86
    { 0x37F, 0x1FFF },
87
    { 0x200C, 0x200D },
88
    { 0x2070, 0x218F },
89
    { 0x2C00, 0x2FEF },
90
    { 0x3001, 0xD7FF },
91
    { 0xF900, 0xFDCF },
92
    { 0xFDF0, 0xFFFD },
93
    { 0x10000, 0xEFFFF }
94
  };
95
96
/* Character ranges for NameChar, excluding NameStartChar:
97
   https://www.w3.org/TR/REC-xml/#NT-NameChar  */
98
static const struct char_range name_chars2[] =
99
  {
100
    { '-', '-' },
101
    { '.', '.' },
102
    { '0', '9' },
103
    { 0xB7, 0xB7 },
104
    { 0x0300, 0x036F },
105
    { 0x203F, 0x2040 }
106
  };
107
108
/* Return true if INPUT is an XML reference.  */
109
static bool
110
is_reference (const char *input)
111
0
{
112
0
  const char *str = input;
113
0
  const char *str_limit = str + strlen (input);
114
0
  ucs4_t uc;
115
116
0
  str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
117
0
  assert (uc == '&');
118
119
0
  str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
120
121
  /* CharRef */
122
0
  if (uc == '#')
123
0
    {
124
0
      str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
125
0
      if (uc == 'x')
126
0
        {
127
0
          while (str < str_limit)
128
0
            {
129
0
              str += u8_mbtouc (&uc, (const unsigned char *) str,
130
0
                                str_limit - str);
131
0
              if (!(('0' <= uc && uc <= '9')
132
0
                    || ('A' <= uc && uc <= 'F')
133
0
                    || ('a' <= uc && uc <= 'f')))
134
0
                break;
135
0
            }
136
0
          return uc == ';';
137
0
        }
138
0
      else if ('0' <= uc && uc <= '9')
139
0
        {
140
0
          while (str < str_limit)
141
0
            {
142
0
              str += u8_mbtouc (&uc, (const unsigned char *) str,
143
0
                                str_limit - str);
144
0
              if (!('0' <= uc && uc <= '9'))
145
0
                break;
146
0
            }
147
0
          return uc == ';';
148
0
        }
149
0
    }
150
0
  else
151
0
    {
152
      /* EntityRef */
153
0
      {
154
0
        bool isNameStartChar = false;
155
0
        for (int i = 0; i < countof (name_chars1); i++)
156
0
          if (name_chars1[i].start <= uc && uc <= name_chars1[i].end)
157
0
            {
158
0
              isNameStartChar = true;
159
0
              break;
160
0
            }
161
162
0
        if (!isNameStartChar)
163
0
          return false;
164
0
      }
165
166
0
      while (str < str_limit)
167
0
        {
168
0
          str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
169
170
0
          bool isNameChar = false;
171
0
          for (int i = 0; i < countof (name_chars1); i++)
172
0
            if (name_chars1[i].start <= uc && uc <= name_chars1[i].end)
173
0
              {
174
0
                isNameChar = true;
175
0
                break;
176
0
              }
177
0
          if (!isNameChar)
178
0
            for (int i = 0; i < countof (name_chars2); i++)
179
0
              if (name_chars2[i].start <= uc && uc <= name_chars2[i].end)
180
0
                {
181
0
                  isNameChar = true;
182
0
                  break;
183
0
                }
184
185
0
          if (!isNameChar)
186
0
            return false;
187
0
        }
188
0
      return uc == ';';
189
0
    }
190
191
0
  return false;
192
0
}
193
194
195
static void *
196
format_parse (const char *format, bool translated, char *fdi,
197
              char **invalid_reason)
198
0
{
199
0
  struct spec spec;
200
0
  spec.base = NULL;
201
202
  /* Preprocess the input, putting the content in a <gt:kuit> element.  */
203
0
  const char *str = format;
204
0
  const char *str_limit = str + strlen (format);
205
206
0
  size_t amp_count;
207
0
  for (amp_count = 0; str < str_limit; amp_count++)
208
0
    {
209
0
      const char *amp = strchrnul (str, '&');
210
0
      if (*amp != '&')
211
0
        break;
212
0
      str = amp + 1;
213
0
    }
214
215
0
  char *buffer =
216
0
    xmalloc (amp_count * 4
217
0
             + strlen (format)
218
0
             + strlen ("<gt:kuit xmlns:gt=\"" XML_NS "\"></gt:kuit>")
219
0
             + 1);
220
0
  *buffer = '\0';
221
222
0
  {
223
0
    char *bp = buffer;
224
0
    bp = stpcpy (bp, "<gt:kuit xmlns:gt=\"" XML_NS "\">");
225
0
    str = format;
226
0
    while (str < str_limit)
227
0
      {
228
0
        const char *amp = strchrnul (str, '&');
229
230
0
        bp = stpncpy (bp, str, amp - str);
231
0
        if (*amp != '&')
232
0
          break;
233
234
0
        bp = stpcpy (bp, is_reference (amp) ? "&" : "&amp;");
235
0
        str = amp + 1;
236
0
      }
237
0
    stpcpy (bp, "</gt:kuit>");
238
0
  }
239
240
#if FORMAT_KDE_KUIT_USE_LIBXML2
241
    {
242
      xmlDocPtr doc = xmlReadMemory (buffer, strlen (buffer), "", NULL,
243
                                     XML_PARSE_NONET
244
                                     | XML_PARSE_NOWARNING
245
                                     | XML_PARSE_NOERROR
246
                                     | XML_PARSE_NOBLANKS);
247
      if (doc == NULL)
248
        {
249
          const xmlError *err = xmlGetLastError ();
250
          *invalid_reason =
251
            xasprintf (_("error while parsing: %s"),
252
                       err->message);
253
          free (buffer);
254
          xmlFreeDoc (doc);
255
          return NULL;
256
        }
257
258
      free (buffer);
259
      xmlFreeDoc (doc);
260
    }
261
#elif FORMAT_KDE_KUIT_USE_FALLBACK_MARKUP
262
    {
263
0
      markup_parser_ty parser;
264
0
      memset (&parser, 0, sizeof (markup_parser_ty));
265
266
0
      markup_parse_context_ty *context =
267
0
        markup_parse_context_new (&parser, 0, NULL);
268
269
0
      if (!markup_parse_context_parse (context, buffer, strlen (buffer)))
270
0
        {
271
0
          *invalid_reason =
272
0
            xasprintf (_("error while parsing: %s"),
273
0
                       markup_parse_context_get_error (context));
274
0
          free (buffer);
275
0
          markup_parse_context_free (context);
276
0
          return NULL;
277
0
        }
278
279
0
      if (!markup_parse_context_end_parse (context))
280
0
        {
281
0
          *invalid_reason =
282
0
            xasprintf (_("error while parsing: %s"),
283
0
                       markup_parse_context_get_error (context));
284
0
          free (buffer);
285
0
          markup_parse_context_free (context);
286
0
          return NULL;
287
0
        }
288
289
0
      free (buffer);
290
0
      markup_parse_context_free (context);
291
0
    }
292
#else
293
    /* No support for XML.  */
294
    free (buffer);
295
#endif
296
297
0
  spec.base = formatstring_kde.parse (format, translated, fdi, invalid_reason);
298
0
  if (spec.base == NULL)
299
0
    return NULL;
300
301
0
  struct spec *result = XMALLOC (struct spec);
302
0
  *result = spec;
303
0
  return result;
304
0
}
305
306
static void
307
format_free (void *descr)
308
0
{
309
0
  struct spec *spec = descr;
310
0
  formatstring_kde.free (spec->base);
311
0
  free (spec);
312
0
}
313
314
static int
315
format_get_number_of_directives (void *descr)
316
0
{
317
0
  struct spec *spec = descr;
318
0
  return formatstring_kde.get_number_of_directives (spec->base);
319
0
}
320
321
static bool
322
format_check (void *msgid_descr, void *msgstr_descr, bool equality,
323
              formatstring_error_logger_t error_logger, void *error_logger_data,
324
              const char *pretty_msgid, const char *pretty_msgstr)
325
0
{
326
0
  struct spec *msgid_spec = msgid_descr;
327
0
  struct spec *msgstr_spec = msgstr_descr;
328
329
0
  return formatstring_kde.check (msgid_spec->base, msgstr_spec->base, equality,
330
0
                                 error_logger, error_logger_data,
331
0
                                 pretty_msgid, pretty_msgstr);
332
0
}
333
334
struct formatstring_parser formatstring_kde_kuit =
335
{
336
  format_parse,
337
  format_free,
338
  format_get_number_of_directives,
339
  NULL,
340
  format_check
341
};
342
343
344
#ifdef TEST_KUIT
345
346
/* Test program: Print the argument list specification returned by
347
   format_parse for strings read from standard input.  */
348
349
#include <stdio.h>
350
351
struct kde_numbered_arg
352
{
353
  size_t number;
354
};
355
356
struct kde_spec
357
{
358
  size_t directives;
359
  size_t numbered_arg_count;
360
  struct kde_numbered_arg *numbered
361
    COUNTED_BY (numbered_arg_count);
362
};
363
364
static void
365
format_print (void *descr)
366
{
367
  struct spec *spec = (struct spec *) descr;
368
369
  if (spec == NULL)
370
    {
371
      printf ("INVALID");
372
      return;
373
    }
374
375
  struct kde_spec *kspec = (struct kde_spec *) spec->base;
376
377
  if (kspec == NULL)
378
    {
379
      printf ("INVALID");
380
      return;
381
    }
382
383
  printf ("(");
384
  size_t last = 1;
385
  for (size_t i = 0; i < kspec->numbered_arg_count; i++)
386
    {
387
      size_t number = kspec->numbered[i].number;
388
389
      if (i > 0)
390
        printf (" ");
391
      if (number < last)
392
        abort ();
393
      for (; last < number; last++)
394
        printf ("_ ");
395
      printf ("*");
396
      last = number + 1;
397
    }
398
  printf (")");
399
}
400
401
int
402
main ()
403
{
404
  for (;;)
405
    {
406
      char *line = NULL;
407
      size_t line_size = 0;
408
      int line_len = getline (&line, &line_size, stdin);
409
      if (line_len < 0)
410
        break;
411
      if (line_len > 0 && line[line_len - 1] == '\n')
412
        line[--line_len] = '\0';
413
414
      char *invalid_reason = NULL;
415
      void *descr = format_parse (line, false, NULL, &invalid_reason);
416
417
      format_print (descr);
418
      printf ("\n");
419
      if (descr == NULL)
420
        printf ("%s\n", invalid_reason);
421
422
      free (invalid_reason);
423
      free (line);
424
    }
425
426
  return 0;
427
}
428
429
/*
430
 * For Emacs M-x compile
431
 * Local Variables:
432
 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -I/usr/include/libxml2 -DTEST_KUIT format-kde-kuit.c format-kde.c ../gnulib-lib/libgettextlib.la"
433
 * End:
434
 */
435
436
#endif /* TEST */