/src/dovecot/src/lib/punycode.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (c) 2024 Dovecot authors, see the included COPYING file */ |
2 | | |
3 | | #include "lib.h" |
4 | | #include "array.h" |
5 | | #include "str.h" |
6 | | #include "unichar.h" |
7 | | #include "punycode.h" |
8 | | |
9 | | /* Bootstring parameters for Punycode */ |
10 | | |
11 | | static const unsigned int base = 36; /* maximum basic code point */ |
12 | | static const unsigned int tmin = 1; |
13 | | static const unsigned int tmax = 26; |
14 | | static const unsigned int skew = 38; |
15 | | static const unsigned int damp = 700; |
16 | | static const unsigned int initialBias = 72; |
17 | | static const unsigned int initialN = 0x80; |
18 | | static const unsigned int delimiter = u'-'; |
19 | | |
20 | | /* |
21 | | code points digit-values |
22 | | ------------ ---------------------- |
23 | | 41..5A (A-Z) = 0 to 25, respectively |
24 | | 61..7A (a-z) = 0 to 25, respectively |
25 | | 30..39 (0-9) = 26 to 35, respectively |
26 | | */ |
27 | | static inline unsigned int decode_digit(unsigned char cp) |
28 | 0 | { |
29 | 0 | if (cp >= '0' && cp <= '9') |
30 | 0 | return cp - u'0' + 26; |
31 | 0 | else if (cp >= 'A' && cp <= 'Z') |
32 | 0 | return cp - u'A'; |
33 | 0 | else if (cp >= 'a' && cp <= 'z') |
34 | 0 | return cp - u'a'; |
35 | 0 | else |
36 | 0 | return base; |
37 | 0 | } |
38 | | |
39 | | /* Bias adaptation function */ |
40 | | |
41 | | static unsigned int adapt(unsigned int delta, unsigned int numpoints, bool firsttime) |
42 | 0 | { |
43 | 0 | unsigned int k; |
44 | |
|
45 | 0 | delta = firsttime ? delta / damp : delta >> 1; |
46 | | /* delta >> 1 is a faster way of doing delta / 2 */ |
47 | 0 | delta += delta / numpoints; |
48 | |
|
49 | 0 | for (k = 0; delta > ((base - tmin) * tmax) / 2; k += base) |
50 | 0 | delta /= base - tmin; |
51 | |
|
52 | 0 | return k + (base - tmin + 1) * delta / (delta + skew); |
53 | 0 | } |
54 | | |
55 | | /* Decodes a punycoded string into output, or returns -1 on error. */ |
56 | | int punycode_decode(const char *input, size_t len, string_t *output) |
57 | 0 | { |
58 | 0 | ARRAY(unichar_t) label; |
59 | 0 | size_t i = 0; |
60 | 0 | size_t out = 0; |
61 | 0 | unsigned int n = initialN, bias = initialBias; |
62 | 0 | const char *delim = NULL; |
63 | 0 | const char *end = CONST_PTR_OFFSET(input, len); |
64 | 0 | const char *ptr = input; |
65 | 0 | t_array_init(&label, len); |
66 | | |
67 | | /* find the rightmost delimiter, if present in string */ |
68 | 0 | delim = strrchr(ptr, delimiter); |
69 | 0 | i_assert(delim == NULL || delim < end); |
70 | | |
71 | | /* no delimiter found, reset to start of string */ |
72 | 0 | if (delim == NULL) |
73 | 0 | delim = input; |
74 | 0 | i_assert(delim <= end); |
75 | | |
76 | 0 | for (ptr = input; ptr < delim; ptr++) { |
77 | 0 | if ((unsigned char)*ptr >= 0x80) |
78 | | /* Has non-ascii input, this cannot be punycoded. */ |
79 | 0 | return -1; |
80 | 0 | i_assert(out < sizeof(label)); |
81 | | /* Add basic code points to label */ |
82 | 0 | unichar_t ch = (unsigned char)*ptr; |
83 | 0 | array_push_back(&label, &ch); |
84 | 0 | } |
85 | | |
86 | 0 | out = array_count(&label); |
87 | | |
88 | | /* Main decoding loop: start from after delimiter */ |
89 | 0 | if (delim != input) |
90 | 0 | ptr = delim + 1; |
91 | 0 | else |
92 | 0 | ptr = input; |
93 | |
|
94 | 0 | i_assert(ptr < end); |
95 | 0 | while (ptr < end) { |
96 | 0 | unsigned int oldi, w, k, digit, t; |
97 | | /* Decode a generalized variable-length integer into delta, |
98 | | which gets added to i. The overflow checking is easier if |
99 | | we increase i as we go, then subtract off its starting |
100 | | value at the end to obtain delta. */ |
101 | |
|
102 | 0 | oldi = i; |
103 | 0 | w = 1; |
104 | 0 | k = base; |
105 | |
|
106 | 0 | while (ptr <= end) { |
107 | | /* ptr points to next digit to decode */ |
108 | 0 | digit = decode_digit(*ptr++); |
109 | 0 | if (digit >= base) |
110 | 0 | return -1; |
111 | 0 | if (digit > (UINT_MAX - i) / w) |
112 | 0 | return -1; |
113 | 0 | i += digit * w; |
114 | 0 | t = k <= bias ? tmin : |
115 | 0 | k >= bias + tmax ? tmax : k - bias; |
116 | 0 | if (digit < t) |
117 | 0 | break; |
118 | 0 | if (w > UINT_MAX / (base - t)) |
119 | 0 | return -1; |
120 | 0 | w *= (base - t); |
121 | 0 | k += base; |
122 | 0 | } |
123 | | |
124 | 0 | bias = adapt(i - oldi, out + 1, oldi == 0); |
125 | | |
126 | | /* i was supposed to wrap around from out+1 to 0, incrementing |
127 | | n each time, so we'll fix that now: */ |
128 | |
|
129 | 0 | if (i / (out + 1) > UINT_MAX - n) |
130 | 0 | return -1; |
131 | | |
132 | 0 | n += i / (out + 1); |
133 | 0 | i %= (out + 1); |
134 | |
|
135 | 0 | if (n < initialN) |
136 | 0 | return -1; |
137 | | |
138 | | /* Insert n at position i of the output: */ |
139 | 0 | if (i <= out) { |
140 | 0 | out++; |
141 | 0 | array_insert(&label, i, &n, 1); |
142 | 0 | } else |
143 | 0 | return -1; |
144 | | |
145 | 0 | i++; |
146 | 0 | } |
147 | | |
148 | 0 | uni_ucs4_to_utf8(array_front(&label), out, output); |
149 | 0 | return 0; |
150 | 0 | } |