Coverage Report

Created: 2026-06-09 06:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/pigeonhole/src/lib-sieve/util/urn.c
Line
Count
Source
1
/* Copyright (c) 2025 Pigeonhole authors, see the included COPYING file */
2
3
#include "lib.h"
4
#include "str.h"
5
#include "uri-util.h"
6
7
#include "urn.h"
8
9
/* RFC 8141, Section 2
10
11
   namestring    = assigned-name
12
                 [ rq-components ]
13
     [ "#" f-component ]
14
   assigned-name = "urn" ":" NID ":" NSS
15
   NID           = (alphanum) 0*30(ldh) (alphanum)
16
   ldh           = alphanum / "-"
17
   NSS           = pchar *(pchar / "/")
18
   rq-components = [ "?+" r-component ]
19
                 [ "?=" q-component ]
20
   r-component   = pchar *( pchar / "/" / "?" )
21
   q-component   = pchar *( pchar / "/" / "?" )
22
   f-component   = fragment
23
 */
24
25
/*
26
 * URN parser
27
 */
28
29
struct urn_parser {
30
  struct uri_parser parser;
31
32
  enum urn_parse_flags flags;
33
34
  struct urn *urn;
35
36
  bool normalizing:1;
37
};
38
39
const uint16_t urn_alphanum_char_mask = BIT(0);
40
const uint16_t urn_pchar_char_mask = BIT(0) | BIT(1);
41
const uint16_t urn_pchar_slash_char_mask = BIT(0) | BIT(1) | BIT(2);
42
const uint16_t urn_component_char_mask = BIT(0) | BIT(1) | BIT(2) | BIT(3);
43
44
static unsigned const char urn_char_lookup[256] = {
45
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  // 00
46
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  // 10
47
   0,  2,  0,  0,  2,  0,  2,  2,  0,  0,  2,  2,  2,  2,  2,  4,  // 20
48
   1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  2,  2,  0,  2,  0,  8,  // 30
49
   2,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 40
50
   1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  0,  0,  0,  2,  // 50
51
   0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 60
52
   1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  2,  0,  2,  2,  0,  // 70
53
54
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  // 80
55
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  // 90
56
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  // a0
57
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  // b0
58
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  // c0
59
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  // d0
60
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  // e0
61
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  // f0
62
};
63
64
static inline bool urn_char_is_alphanum(unsigned const char ch)
65
0
{
66
0
  return ((urn_char_lookup[ch] & urn_alphanum_char_mask) != 0);
67
0
}
68
69
static inline bool urn_char_is_pchar(unsigned const char ch)
70
0
{
71
0
  return ((urn_char_lookup[ch] & urn_pchar_char_mask) != 0);
72
0
}
73
74
static inline bool urn_char_is_pchar_slash(unsigned const char ch)
75
0
{
76
0
  return ((urn_char_lookup[ch] & urn_pchar_slash_char_mask) != 0);
77
0
}
78
79
static inline bool urn_char_is_component(unsigned const char ch)
80
0
{
81
0
  return ((urn_char_lookup[ch] & urn_component_char_mask) != 0);
82
0
}
83
84
static int
85
urn_parse_scheme(struct urn_parser *urn_parser, const char **scheme_r)
86
0
{
87
0
  struct uri_parser *parser = &urn_parser->parser;
88
89
0
  *scheme_r = NULL;
90
0
  if ((urn_parser->flags & URN_PARSE_SCHEME_EXTERNAL) != 0)
91
0
    return 0;
92
93
0
  if (uri_parse_scheme(parser, scheme_r) <= 0) {
94
0
    parser->cur = parser->begin;
95
0
    return -1;
96
0
  }
97
98
0
  return 1;
99
0
}
100
101
static int uri_parse_nid(struct urn_parser *urn_parser)
102
0
{
103
0
  struct uri_parser *parser = &urn_parser->parser;
104
0
  struct urn *urn = urn_parser->urn;
105
0
  const unsigned char *first = parser->cur;
106
107
  /* NID = (alphanum) 0*30(ldh) (alphanum)
108
   */
109
110
0
  if (parser->cur >= parser->end) {
111
0
    parser->error = "URN is empty";
112
0
    return -1;
113
0
  }
114
115
  /* alphanum */
116
0
  if (!urn_char_is_alphanum(*parser->cur)) {
117
0
    parser->error = p_strdup_printf(parser->pool,
118
0
      "URN NID begins with invalid character %s",
119
0
      uri_char_sanitize(*parser->cur));
120
0
    return -1;
121
0
  }
122
0
  parser->cur++;
123
124
  /* 0*30(ldh) */
125
0
  while (parser->cur < parser->end) {
126
0
    if (!urn_char_is_alphanum(*parser->cur) && *parser->cur != '-')
127
0
      break;
128
0
    if ((parser->cur - first) > 32)
129
0
      break;
130
0
    parser->cur++;
131
0
  }
132
133
  /* alphanum */
134
0
  if (parser->cur >= parser->end) {
135
0
    parser->error = "URN ends without NSS";
136
0
    return -1;
137
0
  }
138
0
  if (*parser->cur != ':') {
139
0
    if ((parser->cur - first) > 32) {
140
0
      parser->error = "URN NID is too long";
141
0
      return -1;
142
0
    }
143
0
    parser->error = p_strdup_printf(parser->pool,
144
0
      "URN NID contains invalid character %s",
145
0
      uri_char_sanitize(*parser->cur));
146
0
    return -1;
147
0
  }
148
0
  if (!urn_char_is_alphanum(*(parser->cur - 1))) {
149
0
    parser->error = p_strdup_printf(parser->pool,
150
0
      "URN NID ends with invalid character %s",
151
0
      uri_char_sanitize(*(parser->cur - 1)));
152
0
    return -1;
153
0
  }
154
0
  if ((parser->cur - first) < 2) {
155
0
    parser->error = "URN NID is too short";
156
0
    return -1;
157
0
  }
158
159
0
  if (urn != NULL)
160
0
    urn->nid = p_strdup_until(parser->pool, first, parser->cur);
161
0
  return 0;
162
0
}
163
164
static int uri_parse_nss(struct urn_parser *urn_parser)
165
0
{
166
0
  struct uri_parser *parser = &urn_parser->parser;
167
0
  struct urn *urn = urn_parser->urn;
168
0
  const unsigned char *first = parser->cur;
169
0
  string_t *nss = NULL;
170
0
  int ret;
171
172
  /* NSS = pchar *(pchar / "/")
173
   */
174
175
0
  if (parser->cur >= parser->end) {
176
0
    parser->error = "URN NSS is empty";
177
0
    return -1;
178
0
  }
179
180
0
  if (!urn_char_is_pchar(*parser->cur)) {
181
0
    parser->error = p_strdup_printf(parser->pool,
182
0
      "URN NSS begins with invalid character %s",
183
0
      uri_char_sanitize(*parser->cur));
184
0
    return -1;
185
0
  }
186
0
  parser->cur++;
187
188
0
  if (urn != NULL)
189
0
    nss = t_str_new(128);
190
191
  /* pchar *( pchar / "/" / "?" ) */
192
0
  while (parser->cur < parser->end) {
193
0
    if (*parser->cur == '%') {
194
0
      const unsigned char *pct = parser->cur;
195
0
      unsigned char ch = 0;
196
197
0
      ret = uri_parse_pct_encoded(parser, &ch);
198
0
      if (ret < 0)
199
0
        return -1;
200
0
      i_assert(ret > 0);
201
202
0
      if (urn != NULL) {
203
0
        str_append_data(nss, first, pct - first);
204
0
        if (!urn_parser->normalizing)
205
0
          str_append_c(nss, ch);
206
0
        else
207
0
          str_printfa(nss, "%%%02X", ch);
208
0
        first = parser->cur;
209
0
      }
210
0
      continue;
211
0
    }
212
0
    if (!urn_char_is_pchar_slash(*parser->cur))
213
0
      break;
214
0
    parser->cur++;
215
0
  }
216
217
0
  if (parser->cur < parser->end &&
218
0
      *parser->cur != '?' && *parser->cur != '#') {
219
0
    parser->error = p_strdup_printf(parser->pool,
220
0
      "URN NSS contains invalid character %s",
221
0
      uri_char_sanitize(*parser->cur));
222
0
    return -1;
223
0
  }
224
0
  if (urn != NULL) {
225
0
    str_append_data(nss, first, parser->cur - first);
226
0
    urn->nss = p_strdup(parser->pool, str_c(nss));
227
0
  }
228
0
  return 0;
229
0
}
230
231
static int urn_parse_assigned_name(struct urn_parser *urn_parser)
232
0
{
233
0
  struct uri_parser *parser = &urn_parser->parser;
234
0
  struct urn *urn = urn_parser->urn;
235
0
  const unsigned char *first = parser->cur;
236
237
  /* assigned-name = "urn" ":" NID ":" NSS
238
     NID           = (alphanum) 0*30(ldh) (alphanum)
239
     ldh           = alphanum / "-"
240
     NSS           = pchar *(pchar / "/")
241
242
     The "urn:" prefix is already parsed at this point.
243
   */
244
245
  /* NID */
246
0
  if (uri_parse_nid(urn_parser) < 0)
247
0
    return -1;
248
249
  /* : */
250
0
  i_assert(*parser->cur == ':');
251
0
  parser->cur++;
252
253
  /* NSS */
254
0
  if (uri_parse_nss(urn_parser) < 0)
255
0
    return -1;
256
257
0
  if (urn != NULL && !urn_parser->normalizing) {
258
0
    urn->assigned_name = p_strconcat(parser->pool,
259
0
      "urn:", t_strdup_until(first, parser->cur), NULL);
260
0
  }
261
0
  return 0;
262
0
}
263
264
static int
265
urn_parse_rq_component(struct urn_parser *urn_parser, bool query,
266
           const char **comp_r)
267
0
{
268
0
  struct uri_parser *parser = &urn_parser->parser;
269
0
  const unsigned char *first = parser->cur;
270
0
  int ret;
271
272
  /* rq-components = [ "?+" r-component ]
273
                   [ "?=" q-component ]
274
     r-component   = pchar *( pchar / "/" / "?" )
275
     q-component   = pchar *( pchar / "/" / "?" )
276
   */
277
278
  /* "?" */
279
0
  if (parser->cur >= parser->end || *parser->cur != '?')
280
0
    return 0;
281
0
  parser->cur++;
282
283
  /* "+" / "=" */
284
0
  if (parser->cur >= parser->end) {
285
0
    parser->error = "URN assinged name ends in bare '?'";
286
0
    return -1;
287
0
  } else if (query && *parser->cur == '+') {
288
0
    parser->error = p_strdup_printf(parser->pool,
289
0
            "URN has second R component");
290
0
    return -1;
291
0
  } else if (!query && *parser->cur == '=') {
292
0
    parser->cur = first;
293
0
    return 0;
294
0
  } else if (*parser->cur != '+' && *parser->cur != '=' ) {
295
0
    parser->error = p_strdup_printf(parser->pool,
296
0
      "URN %sQ component starts with invalid character %s",
297
0
      (query ? "" : "R or "),
298
0
      uri_char_sanitize(*parser->cur));
299
0
    return -1;
300
0
  }
301
0
  parser->cur++;
302
303
  /* pchar *( pchar / "/" / "?" ) */
304
0
  while (parser->cur < parser->end) {
305
0
    if (*parser->cur == '%') {
306
0
      unsigned char ch = 0;
307
308
0
      ret = uri_parse_pct_encoded(parser, &ch);
309
0
      if (ret < 0)
310
0
        return -1;
311
0
      if (ret > 0)
312
0
        continue;
313
0
    }
314
0
    if (*parser->cur == '?' && !query &&
315
0
        parser->cur < parser->end && *(parser->cur + 1) == '=')
316
0
      break;
317
0
    if (!urn_char_is_component(*parser->cur))
318
0
      break;
319
0
    parser->cur++;
320
0
  }
321
322
0
  if (!parser->parse_prefix && parser->cur < parser->end &&
323
0
      (query || *parser->cur != '?') &&
324
0
      *parser->cur != '#') {
325
0
    parser->error = p_strdup_printf(parser->pool,
326
0
      "%s component contains invalid character %s",
327
0
      (query ? "Q" : "R"),
328
0
      uri_char_sanitize(*parser->cur));
329
0
    return -1;
330
0
  }
331
332
0
  if (comp_r != NULL && !urn_parser->normalizing)
333
0
    *comp_r = p_strdup_until(parser->pool, first+2, parser->cur);
334
0
  return 1;
335
0
}
336
337
static int urn_parse_f_component(struct urn_parser *urn_parser)
338
0
{
339
0
  struct uri_parser *parser = &urn_parser->parser;
340
0
  struct urn *urn = urn_parser->urn;
341
0
  const char *fragment;
342
0
  int ret;
343
344
  /* [ "#" f-component ]
345
     f-component   = fragment
346
   */
347
348
0
  ret = uri_parse_fragment(parser, &fragment);
349
0
  if (ret < 0)
350
0
    return -1;
351
0
  if (urn == NULL)
352
0
    return 0;
353
0
  if (ret > 0 && !urn_parser->normalizing)
354
0
    urn->enc_f_component = p_strdup(parser->pool, fragment);
355
0
  return ret;
356
0
}
357
358
static int urn_do_parse(struct urn_parser *urn_parser)
359
0
{
360
0
  struct uri_parser *parser = &urn_parser->parser;
361
0
  struct urn *urn = urn_parser->urn;
362
0
  const char *scheme;
363
0
  int ret;
364
365
  /* "urn:" */
366
0
  ret = urn_parse_scheme(urn_parser, &scheme);
367
0
  if (ret < 0) {
368
0
    parser->error = "Not a valid URI";
369
0
    return -1;
370
0
  }
371
0
  if (ret > 0) {
372
0
    i_assert(scheme != NULL);
373
0
    if (strcasecmp(scheme, "urn") != 0) {
374
0
      parser->error = "Not an URN";
375
0
      return -1;
376
0
    }
377
0
  }
378
379
  /* assigned-name ("urn:" already parsed) */
380
0
  if (urn_parse_assigned_name(urn_parser) < 0)
381
0
    return -1;
382
383
  /* [ "?+" r-component ] */
384
0
  if (urn_parse_rq_component(urn_parser, FALSE,
385
0
           (urn == NULL ?
386
0
            NULL : &urn->enc_r_component)) < 0)
387
0
    return -1;
388
389
  /* [ "?=" q-component ] */
390
0
  if (urn_parse_rq_component(urn_parser, TRUE,
391
0
           (urn == NULL ?
392
0
            NULL : &urn->enc_q_component)) < 0)
393
0
    return -1;
394
395
  /* [ "#" f-component ] */
396
0
  if (urn_parse_f_component(urn_parser) < 0)
397
0
    return -1;
398
399
  /* must be at end of URN now */
400
0
  i_assert(parser->cur == parser->end);
401
402
0
  return 0;
403
0
}
404
405
int urn_parse(const char *urn, enum urn_parse_flags flags, pool_t pool,
406
        struct urn **urn_r, const char **error_r)
407
0
{
408
0
  struct urn_parser urn_parser;
409
410
0
  *error_r = NULL;
411
412
0
  i_assert(urn_r == NULL || pool != NULL);
413
414
0
  i_zero(&urn_parser);
415
0
  uri_parser_init(&urn_parser.parser, pool, urn);
416
417
0
  urn_parser.urn = (urn_r == NULL ? NULL : p_new(pool, struct urn, 1));
418
0
  urn_parser.flags = flags;
419
420
0
  if (urn_do_parse(&urn_parser) < 0) {
421
0
    *error_r = urn_parser.parser.error;
422
0
    return -1;
423
0
  }
424
0
  if (urn_r != NULL)
425
0
    *urn_r = urn_parser.urn;
426
0
  return 0;
427
0
}
428
429
int urn_validate(const char *urn, enum urn_parse_flags flags,
430
     const char **error_r)
431
0
{
432
0
  return urn_parse(urn, flags, NULL, NULL, error_r);
433
0
}
434
435
/*
436
 * URN construction
437
 */
438
439
const char *urn_create(const struct urn *urn)
440
0
{
441
0
  string_t *urnstr = t_str_new(512);
442
443
0
  uri_append_scheme(urnstr, "urn");
444
0
  if (urn->nid != NULL) {
445
0
    i_assert(urn->nss != NULL);
446
0
    str_append(urnstr, urn->nid);
447
0
    str_append_c(urnstr, ':');
448
0
    i_assert(*urn->nss != '/');
449
0
    uri_data_encode(urnstr, urn_char_lookup,
450
0
        urn_pchar_slash_char_mask, "", urn->nss);
451
0
  } else {
452
0
    const char *suffix, *nid_end, *nss;
453
454
0
    i_assert(urn->assigned_name != NULL);
455
0
    if (!str_begins_icase(urn->assigned_name, "urn:", &suffix))
456
0
      i_unreached();
457
0
    nid_end = strchr(suffix, ':');
458
0
    i_assert(nid_end != NULL);
459
0
    nss = nid_end + 1;
460
0
    i_assert(*nss != '/');
461
0
    str_append(urnstr, suffix);
462
0
  }
463
464
  /* r-component (pre-encoded) */
465
0
  if (urn->enc_r_component != NULL) {
466
0
    str_append(urnstr, "?+");
467
0
    str_append(urnstr, urn->enc_r_component);
468
0
  }
469
  /* q-component (pre-encoded) */
470
0
  if (urn->enc_q_component != NULL) {
471
0
    str_append(urnstr, "?=");
472
0
    str_append(urnstr, urn->enc_q_component);
473
0
  }
474
475
  /* fragment (pre-encoded) */
476
0
  if (urn->enc_f_component != NULL) {
477
0
    str_append_c(urnstr, '#');
478
0
    str_append(urnstr, urn->enc_f_component);
479
0
  }
480
481
0
  return str_c(urnstr);
482
0
}
483
484
/*
485
 * URN equality
486
 */
487
488
int urn_normalize(const char *urn_in, enum urn_parse_flags flags,
489
      const char **urn_out_r, const char **error_r)
490
0
{
491
0
  struct urn_parser urn_parser;
492
0
  struct urn urn;
493
494
0
  *error_r = NULL;
495
496
0
  i_zero(&urn_parser);
497
0
  uri_parser_init(&urn_parser.parser, pool_datastack_create(), urn_in);
498
499
0
  urn_parser.urn = &urn;
500
0
  urn_parser.flags = flags;
501
0
  urn_parser.normalizing = TRUE;
502
503
0
  if (urn_do_parse(&urn_parser) < 0) {
504
0
    *error_r = urn_parser.parser.error;
505
0
    return -1;
506
0
  }
507
508
0
  string_t *urnstr = t_str_new(512);
509
510
0
  if ((flags & URN_PARSE_SCHEME_EXTERNAL) == 0)
511
0
    uri_append_scheme(urnstr, "urn");
512
513
0
  i_assert(urn.nss != NULL);
514
0
  str_append(urnstr, t_str_lcase(urn.nid));
515
0
  str_append_c(urnstr, ':');
516
0
  i_assert(*urn.nss != '/');
517
0
  str_append(urnstr, urn.nss);
518
519
0
  *urn_out_r = str_c(urnstr);
520
0
  return 0;
521
0
}
522
523
int urn_equals(const char *urn1, const char *urn2, enum urn_parse_flags flags,
524
         const char **error_r)
525
0
{
526
0
  const char *urn1n, *urn2n;
527
528
0
  if (urn_normalize(urn1, flags, &urn1n, error_r) < 0)
529
0
    return -1;
530
0
  if (urn_normalize(urn2, flags, &urn2n, error_r) < 0)
531
0
    return -1;
532
533
0
  if (strcmp(urn1n, urn2n) == 0)
534
0
    return 1;
535
0
  return 0;
536
0
}