/src/postgres/src/common/encnames.c
Line | Count | Source |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * encnames.c |
4 | | * Encoding names and routines for working with them. |
5 | | * |
6 | | * Portions Copyright (c) 2001-2025, PostgreSQL Global Development Group |
7 | | * |
8 | | * IDENTIFICATION |
9 | | * src/common/encnames.c |
10 | | * |
11 | | *------------------------------------------------------------------------- |
12 | | */ |
13 | | #include "c.h" |
14 | | |
15 | | #include <ctype.h> |
16 | | #include <unistd.h> |
17 | | |
18 | | #include "mb/pg_wchar.h" |
19 | | |
20 | | |
21 | | /* ---------- |
22 | | * All encoding names, sorted: *** A L P H A B E T I C *** |
23 | | * |
24 | | * All names must be without irrelevant chars, search routines use |
25 | | * isalnum() chars only. It means ISO-8859-1, iso_8859-1 and Iso8859_1 |
26 | | * are always converted to 'iso88591'. All must be lower case. |
27 | | * |
28 | | * The table doesn't contain 'cs' aliases (like csISOLatin1). It's needed? |
29 | | * |
30 | | * Karel Zak, Aug 2001 |
31 | | * ---------- |
32 | | */ |
33 | | typedef struct pg_encname |
34 | | { |
35 | | const char *name; |
36 | | pg_enc encoding; |
37 | | } pg_encname; |
38 | | |
39 | | static const pg_encname pg_encname_tbl[] = |
40 | | { |
41 | | { |
42 | | "abc", PG_WIN1258 |
43 | | }, /* alias for WIN1258 */ |
44 | | { |
45 | | "alt", PG_WIN866 |
46 | | }, /* IBM866 */ |
47 | | { |
48 | | "big5", PG_BIG5 |
49 | | }, /* Big5; Chinese for Taiwan multibyte set */ |
50 | | { |
51 | | "euccn", PG_EUC_CN |
52 | | }, /* EUC-CN; Extended Unix Code for simplified |
53 | | * Chinese */ |
54 | | { |
55 | | "eucjis2004", PG_EUC_JIS_2004 |
56 | | }, /* EUC-JIS-2004; Extended UNIX Code fixed |
57 | | * Width for Japanese, standard JIS X 0213 */ |
58 | | { |
59 | | "eucjp", PG_EUC_JP |
60 | | }, /* EUC-JP; Extended UNIX Code fixed Width for |
61 | | * Japanese, standard OSF */ |
62 | | { |
63 | | "euckr", PG_EUC_KR |
64 | | }, /* EUC-KR; Extended Unix Code for Korean , KS |
65 | | * X 1001 standard */ |
66 | | { |
67 | | "euctw", PG_EUC_TW |
68 | | }, /* EUC-TW; Extended Unix Code for |
69 | | * |
70 | | * traditional Chinese */ |
71 | | { |
72 | | "gb18030", PG_GB18030 |
73 | | }, /* GB18030;GB18030 */ |
74 | | { |
75 | | "gbk", PG_GBK |
76 | | }, /* GBK; Chinese Windows CodePage 936 |
77 | | * simplified Chinese */ |
78 | | { |
79 | | "iso88591", PG_LATIN1 |
80 | | }, /* ISO-8859-1; RFC1345,KXS2 */ |
81 | | { |
82 | | "iso885910", PG_LATIN6 |
83 | | }, /* ISO-8859-10; RFC1345,KXS2 */ |
84 | | { |
85 | | "iso885913", PG_LATIN7 |
86 | | }, /* ISO-8859-13; RFC1345,KXS2 */ |
87 | | { |
88 | | "iso885914", PG_LATIN8 |
89 | | }, /* ISO-8859-14; RFC1345,KXS2 */ |
90 | | { |
91 | | "iso885915", PG_LATIN9 |
92 | | }, /* ISO-8859-15; RFC1345,KXS2 */ |
93 | | { |
94 | | "iso885916", PG_LATIN10 |
95 | | }, /* ISO-8859-16; RFC1345,KXS2 */ |
96 | | { |
97 | | "iso88592", PG_LATIN2 |
98 | | }, /* ISO-8859-2; RFC1345,KXS2 */ |
99 | | { |
100 | | "iso88593", PG_LATIN3 |
101 | | }, /* ISO-8859-3; RFC1345,KXS2 */ |
102 | | { |
103 | | "iso88594", PG_LATIN4 |
104 | | }, /* ISO-8859-4; RFC1345,KXS2 */ |
105 | | { |
106 | | "iso88595", PG_ISO_8859_5 |
107 | | }, /* ISO-8859-5; RFC1345,KXS2 */ |
108 | | { |
109 | | "iso88596", PG_ISO_8859_6 |
110 | | }, /* ISO-8859-6; RFC1345,KXS2 */ |
111 | | { |
112 | | "iso88597", PG_ISO_8859_7 |
113 | | }, /* ISO-8859-7; RFC1345,KXS2 */ |
114 | | { |
115 | | "iso88598", PG_ISO_8859_8 |
116 | | }, /* ISO-8859-8; RFC1345,KXS2 */ |
117 | | { |
118 | | "iso88599", PG_LATIN5 |
119 | | }, /* ISO-8859-9; RFC1345,KXS2 */ |
120 | | { |
121 | | "johab", PG_JOHAB |
122 | | }, /* JOHAB; Extended Unix Code for simplified |
123 | | * Chinese */ |
124 | | { |
125 | | "koi8", PG_KOI8R |
126 | | }, /* _dirty_ alias for KOI8-R (backward |
127 | | * compatibility) */ |
128 | | { |
129 | | "koi8r", PG_KOI8R |
130 | | }, /* KOI8-R; RFC1489 */ |
131 | | { |
132 | | "koi8u", PG_KOI8U |
133 | | }, /* KOI8-U; RFC2319 */ |
134 | | { |
135 | | "latin1", PG_LATIN1 |
136 | | }, /* alias for ISO-8859-1 */ |
137 | | { |
138 | | "latin10", PG_LATIN10 |
139 | | }, /* alias for ISO-8859-16 */ |
140 | | { |
141 | | "latin2", PG_LATIN2 |
142 | | }, /* alias for ISO-8859-2 */ |
143 | | { |
144 | | "latin3", PG_LATIN3 |
145 | | }, /* alias for ISO-8859-3 */ |
146 | | { |
147 | | "latin4", PG_LATIN4 |
148 | | }, /* alias for ISO-8859-4 */ |
149 | | { |
150 | | "latin5", PG_LATIN5 |
151 | | }, /* alias for ISO-8859-9 */ |
152 | | { |
153 | | "latin6", PG_LATIN6 |
154 | | }, /* alias for ISO-8859-10 */ |
155 | | { |
156 | | "latin7", PG_LATIN7 |
157 | | }, /* alias for ISO-8859-13 */ |
158 | | { |
159 | | "latin8", PG_LATIN8 |
160 | | }, /* alias for ISO-8859-14 */ |
161 | | { |
162 | | "latin9", PG_LATIN9 |
163 | | }, /* alias for ISO-8859-15 */ |
164 | | { |
165 | | "mskanji", PG_SJIS |
166 | | }, /* alias for Shift_JIS */ |
167 | | { |
168 | | "muleinternal", PG_MULE_INTERNAL |
169 | | }, |
170 | | { |
171 | | "shiftjis", PG_SJIS |
172 | | }, /* Shift_JIS; JIS X 0202-1991 */ |
173 | | |
174 | | { |
175 | | "shiftjis2004", PG_SHIFT_JIS_2004 |
176 | | }, /* SHIFT-JIS-2004; Shift JIS for Japanese, |
177 | | * standard JIS X 0213 */ |
178 | | { |
179 | | "sjis", PG_SJIS |
180 | | }, /* alias for Shift_JIS */ |
181 | | { |
182 | | "sqlascii", PG_SQL_ASCII |
183 | | }, |
184 | | { |
185 | | "tcvn", PG_WIN1258 |
186 | | }, /* alias for WIN1258 */ |
187 | | { |
188 | | "tcvn5712", PG_WIN1258 |
189 | | }, /* alias for WIN1258 */ |
190 | | { |
191 | | "uhc", PG_UHC |
192 | | }, /* UHC; Korean Windows CodePage 949 */ |
193 | | { |
194 | | "unicode", PG_UTF8 |
195 | | }, /* alias for UTF8 */ |
196 | | { |
197 | | "utf8", PG_UTF8 |
198 | | }, /* alias for UTF8 */ |
199 | | { |
200 | | "vscii", PG_WIN1258 |
201 | | }, /* alias for WIN1258 */ |
202 | | { |
203 | | "win", PG_WIN1251 |
204 | | }, /* _dirty_ alias for windows-1251 (backward |
205 | | * compatibility) */ |
206 | | { |
207 | | "win1250", PG_WIN1250 |
208 | | }, /* alias for Windows-1250 */ |
209 | | { |
210 | | "win1251", PG_WIN1251 |
211 | | }, /* alias for Windows-1251 */ |
212 | | { |
213 | | "win1252", PG_WIN1252 |
214 | | }, /* alias for Windows-1252 */ |
215 | | { |
216 | | "win1253", PG_WIN1253 |
217 | | }, /* alias for Windows-1253 */ |
218 | | { |
219 | | "win1254", PG_WIN1254 |
220 | | }, /* alias for Windows-1254 */ |
221 | | { |
222 | | "win1255", PG_WIN1255 |
223 | | }, /* alias for Windows-1255 */ |
224 | | { |
225 | | "win1256", PG_WIN1256 |
226 | | }, /* alias for Windows-1256 */ |
227 | | { |
228 | | "win1257", PG_WIN1257 |
229 | | }, /* alias for Windows-1257 */ |
230 | | { |
231 | | "win1258", PG_WIN1258 |
232 | | }, /* alias for Windows-1258 */ |
233 | | { |
234 | | "win866", PG_WIN866 |
235 | | }, /* IBM866 */ |
236 | | { |
237 | | "win874", PG_WIN874 |
238 | | }, /* alias for Windows-874 */ |
239 | | { |
240 | | "win932", PG_SJIS |
241 | | }, /* alias for Shift_JIS */ |
242 | | { |
243 | | "win936", PG_GBK |
244 | | }, /* alias for GBK */ |
245 | | { |
246 | | "win949", PG_UHC |
247 | | }, /* alias for UHC */ |
248 | | { |
249 | | "win950", PG_BIG5 |
250 | | }, /* alias for BIG5 */ |
251 | | { |
252 | | "windows1250", PG_WIN1250 |
253 | | }, /* Windows-1251; Microsoft */ |
254 | | { |
255 | | "windows1251", PG_WIN1251 |
256 | | }, /* Windows-1251; Microsoft */ |
257 | | { |
258 | | "windows1252", PG_WIN1252 |
259 | | }, /* Windows-1252; Microsoft */ |
260 | | { |
261 | | "windows1253", PG_WIN1253 |
262 | | }, /* Windows-1253; Microsoft */ |
263 | | { |
264 | | "windows1254", PG_WIN1254 |
265 | | }, /* Windows-1254; Microsoft */ |
266 | | { |
267 | | "windows1255", PG_WIN1255 |
268 | | }, /* Windows-1255; Microsoft */ |
269 | | { |
270 | | "windows1256", PG_WIN1256 |
271 | | }, /* Windows-1256; Microsoft */ |
272 | | { |
273 | | "windows1257", PG_WIN1257 |
274 | | }, /* Windows-1257; Microsoft */ |
275 | | { |
276 | | "windows1258", PG_WIN1258 |
277 | | }, /* Windows-1258; Microsoft */ |
278 | | { |
279 | | "windows866", PG_WIN866 |
280 | | }, /* IBM866 */ |
281 | | { |
282 | | "windows874", PG_WIN874 |
283 | | }, /* Windows-874; Microsoft */ |
284 | | { |
285 | | "windows932", PG_SJIS |
286 | | }, /* alias for Shift_JIS */ |
287 | | { |
288 | | "windows936", PG_GBK |
289 | | }, /* alias for GBK */ |
290 | | { |
291 | | "windows949", PG_UHC |
292 | | }, /* alias for UHC */ |
293 | | { |
294 | | "windows950", PG_BIG5 |
295 | | } /* alias for BIG5 */ |
296 | | }; |
297 | | |
298 | | /* ---------- |
299 | | * These are "official" encoding names. |
300 | | * ---------- |
301 | | */ |
302 | | #ifndef WIN32 |
303 | | #define DEF_ENC2NAME(name, codepage) { #name, PG_##name } |
304 | | #else |
305 | | #define DEF_ENC2NAME(name, codepage) { #name, PG_##name, codepage } |
306 | | #endif |
307 | | |
308 | | const pg_enc2name pg_enc2name_tbl[] = |
309 | | { |
310 | | [PG_SQL_ASCII] = DEF_ENC2NAME(SQL_ASCII, 0), |
311 | | [PG_EUC_JP] = DEF_ENC2NAME(EUC_JP, 20932), |
312 | | [PG_EUC_CN] = DEF_ENC2NAME(EUC_CN, 20936), |
313 | | [PG_EUC_KR] = DEF_ENC2NAME(EUC_KR, 51949), |
314 | | [PG_EUC_TW] = DEF_ENC2NAME(EUC_TW, 0), |
315 | | [PG_EUC_JIS_2004] = DEF_ENC2NAME(EUC_JIS_2004, 20932), |
316 | | [PG_UTF8] = DEF_ENC2NAME(UTF8, 65001), |
317 | | [PG_MULE_INTERNAL] = DEF_ENC2NAME(MULE_INTERNAL, 0), |
318 | | [PG_LATIN1] = DEF_ENC2NAME(LATIN1, 28591), |
319 | | [PG_LATIN2] = DEF_ENC2NAME(LATIN2, 28592), |
320 | | [PG_LATIN3] = DEF_ENC2NAME(LATIN3, 28593), |
321 | | [PG_LATIN4] = DEF_ENC2NAME(LATIN4, 28594), |
322 | | [PG_LATIN5] = DEF_ENC2NAME(LATIN5, 28599), |
323 | | [PG_LATIN6] = DEF_ENC2NAME(LATIN6, 0), |
324 | | [PG_LATIN7] = DEF_ENC2NAME(LATIN7, 0), |
325 | | [PG_LATIN8] = DEF_ENC2NAME(LATIN8, 0), |
326 | | [PG_LATIN9] = DEF_ENC2NAME(LATIN9, 28605), |
327 | | [PG_LATIN10] = DEF_ENC2NAME(LATIN10, 0), |
328 | | [PG_WIN1256] = DEF_ENC2NAME(WIN1256, 1256), |
329 | | [PG_WIN1258] = DEF_ENC2NAME(WIN1258, 1258), |
330 | | [PG_WIN866] = DEF_ENC2NAME(WIN866, 866), |
331 | | [PG_WIN874] = DEF_ENC2NAME(WIN874, 874), |
332 | | [PG_KOI8R] = DEF_ENC2NAME(KOI8R, 20866), |
333 | | [PG_WIN1251] = DEF_ENC2NAME(WIN1251, 1251), |
334 | | [PG_WIN1252] = DEF_ENC2NAME(WIN1252, 1252), |
335 | | [PG_ISO_8859_5] = DEF_ENC2NAME(ISO_8859_5, 28595), |
336 | | [PG_ISO_8859_6] = DEF_ENC2NAME(ISO_8859_6, 28596), |
337 | | [PG_ISO_8859_7] = DEF_ENC2NAME(ISO_8859_7, 28597), |
338 | | [PG_ISO_8859_8] = DEF_ENC2NAME(ISO_8859_8, 28598), |
339 | | [PG_WIN1250] = DEF_ENC2NAME(WIN1250, 1250), |
340 | | [PG_WIN1253] = DEF_ENC2NAME(WIN1253, 1253), |
341 | | [PG_WIN1254] = DEF_ENC2NAME(WIN1254, 1254), |
342 | | [PG_WIN1255] = DEF_ENC2NAME(WIN1255, 1255), |
343 | | [PG_WIN1257] = DEF_ENC2NAME(WIN1257, 1257), |
344 | | [PG_KOI8U] = DEF_ENC2NAME(KOI8U, 21866), |
345 | | [PG_SJIS] = DEF_ENC2NAME(SJIS, 932), |
346 | | [PG_BIG5] = DEF_ENC2NAME(BIG5, 950), |
347 | | [PG_GBK] = DEF_ENC2NAME(GBK, 936), |
348 | | [PG_UHC] = DEF_ENC2NAME(UHC, 949), |
349 | | [PG_GB18030] = DEF_ENC2NAME(GB18030, 54936), |
350 | | [PG_JOHAB] = DEF_ENC2NAME(JOHAB, 0), |
351 | | [PG_SHIFT_JIS_2004] = DEF_ENC2NAME(SHIFT_JIS_2004, 932), |
352 | | }; |
353 | | |
354 | | /* ---------- |
355 | | * These are encoding names for gettext. |
356 | | * |
357 | | * This covers all encodings except MULE_INTERNAL, which is alien to gettext. |
358 | | * ---------- |
359 | | */ |
360 | | const char *pg_enc2gettext_tbl[] = |
361 | | { |
362 | | [PG_SQL_ASCII] = "US-ASCII", |
363 | | [PG_UTF8] = "UTF-8", |
364 | | [PG_MULE_INTERNAL] = NULL, |
365 | | [PG_LATIN1] = "LATIN1", |
366 | | [PG_LATIN2] = "LATIN2", |
367 | | [PG_LATIN3] = "LATIN3", |
368 | | [PG_LATIN4] = "LATIN4", |
369 | | [PG_ISO_8859_5] = "ISO-8859-5", |
370 | | [PG_ISO_8859_6] = "ISO_8859-6", |
371 | | [PG_ISO_8859_7] = "ISO-8859-7", |
372 | | [PG_ISO_8859_8] = "ISO-8859-8", |
373 | | [PG_LATIN5] = "LATIN5", |
374 | | [PG_LATIN6] = "LATIN6", |
375 | | [PG_LATIN7] = "LATIN7", |
376 | | [PG_LATIN8] = "LATIN8", |
377 | | [PG_LATIN9] = "LATIN-9", |
378 | | [PG_LATIN10] = "LATIN10", |
379 | | [PG_KOI8R] = "KOI8-R", |
380 | | [PG_KOI8U] = "KOI8-U", |
381 | | [PG_WIN1250] = "CP1250", |
382 | | [PG_WIN1251] = "CP1251", |
383 | | [PG_WIN1252] = "CP1252", |
384 | | [PG_WIN1253] = "CP1253", |
385 | | [PG_WIN1254] = "CP1254", |
386 | | [PG_WIN1255] = "CP1255", |
387 | | [PG_WIN1256] = "CP1256", |
388 | | [PG_WIN1257] = "CP1257", |
389 | | [PG_WIN1258] = "CP1258", |
390 | | [PG_WIN866] = "CP866", |
391 | | [PG_WIN874] = "CP874", |
392 | | [PG_EUC_CN] = "EUC-CN", |
393 | | [PG_EUC_JP] = "EUC-JP", |
394 | | [PG_EUC_KR] = "EUC-KR", |
395 | | [PG_EUC_TW] = "EUC-TW", |
396 | | [PG_EUC_JIS_2004] = "EUC-JP", |
397 | | [PG_SJIS] = "SHIFT-JIS", |
398 | | [PG_BIG5] = "BIG5", |
399 | | [PG_GBK] = "GBK", |
400 | | [PG_UHC] = "UHC", |
401 | | [PG_GB18030] = "GB18030", |
402 | | [PG_JOHAB] = "JOHAB", |
403 | | [PG_SHIFT_JIS_2004] = "SHIFT_JISX0213", |
404 | | }; |
405 | | |
406 | | |
407 | | /* |
408 | | * Table of encoding names for ICU (currently covers backend encodings only) |
409 | | * |
410 | | * Reference: <https://ssl.icu-project.org/icu-bin/convexp> |
411 | | * |
412 | | * NULL entries are not supported by ICU, or their mapping is unclear. |
413 | | */ |
414 | | static const char *const pg_enc2icu_tbl[] = |
415 | | { |
416 | | [PG_SQL_ASCII] = NULL, |
417 | | [PG_EUC_JP] = "EUC-JP", |
418 | | [PG_EUC_CN] = "EUC-CN", |
419 | | [PG_EUC_KR] = "EUC-KR", |
420 | | [PG_EUC_TW] = "EUC-TW", |
421 | | [PG_EUC_JIS_2004] = NULL, |
422 | | [PG_UTF8] = "UTF-8", |
423 | | [PG_MULE_INTERNAL] = NULL, |
424 | | [PG_LATIN1] = "ISO-8859-1", |
425 | | [PG_LATIN2] = "ISO-8859-2", |
426 | | [PG_LATIN3] = "ISO-8859-3", |
427 | | [PG_LATIN4] = "ISO-8859-4", |
428 | | [PG_LATIN5] = "ISO-8859-9", |
429 | | [PG_LATIN6] = "ISO-8859-10", |
430 | | [PG_LATIN7] = "ISO-8859-13", |
431 | | [PG_LATIN8] = "ISO-8859-14", |
432 | | [PG_LATIN9] = "ISO-8859-15", |
433 | | [PG_LATIN10] = NULL, |
434 | | [PG_WIN1256] = "CP1256", |
435 | | [PG_WIN1258] = "CP1258", |
436 | | [PG_WIN866] = "CP866", |
437 | | [PG_WIN874] = NULL, |
438 | | [PG_KOI8R] = "KOI8-R", |
439 | | [PG_WIN1251] = "CP1251", |
440 | | [PG_WIN1252] = "CP1252", |
441 | | [PG_ISO_8859_5] = "ISO-8859-5", |
442 | | [PG_ISO_8859_6] = "ISO-8859-6", |
443 | | [PG_ISO_8859_7] = "ISO-8859-7", |
444 | | [PG_ISO_8859_8] = "ISO-8859-8", |
445 | | [PG_WIN1250] = "CP1250", |
446 | | [PG_WIN1253] = "CP1253", |
447 | | [PG_WIN1254] = "CP1254", |
448 | | [PG_WIN1255] = "CP1255", |
449 | | [PG_WIN1257] = "CP1257", |
450 | | [PG_KOI8U] = "KOI8-U", |
451 | | }; |
452 | | |
453 | | StaticAssertDecl(lengthof(pg_enc2icu_tbl) == PG_ENCODING_BE_LAST + 1, |
454 | | "pg_enc2icu_tbl incomplete"); |
455 | | |
456 | | |
457 | | /* |
458 | | * Is this encoding supported by ICU? |
459 | | */ |
460 | | bool |
461 | | is_encoding_supported_by_icu(int encoding) |
462 | 0 | { |
463 | 0 | if (!PG_VALID_BE_ENCODING(encoding)) |
464 | 0 | return false; |
465 | 0 | return (pg_enc2icu_tbl[encoding] != NULL); |
466 | 0 | } |
467 | | |
468 | | /* |
469 | | * Returns ICU's name for encoding, or NULL if not supported |
470 | | */ |
471 | | const char * |
472 | | get_encoding_name_for_icu(int encoding) |
473 | 0 | { |
474 | 0 | if (!PG_VALID_BE_ENCODING(encoding)) |
475 | 0 | return NULL; |
476 | 0 | return pg_enc2icu_tbl[encoding]; |
477 | 0 | } |
478 | | |
479 | | |
480 | | /* ---------- |
481 | | * Encoding checks, for error returns -1 else encoding id |
482 | | * ---------- |
483 | | */ |
484 | | int |
485 | | pg_valid_client_encoding(const char *name) |
486 | 2 | { |
487 | 2 | int enc; |
488 | | |
489 | 2 | if ((enc = pg_char_to_encoding(name)) < 0) |
490 | 0 | return -1; |
491 | | |
492 | 2 | if (!PG_VALID_FE_ENCODING(enc)) |
493 | 0 | return -1; |
494 | | |
495 | 2 | return enc; |
496 | 2 | } |
497 | | |
498 | | int |
499 | | pg_valid_server_encoding(const char *name) |
500 | 0 | { |
501 | 0 | int enc; |
502 | |
|
503 | 0 | if ((enc = pg_char_to_encoding(name)) < 0) |
504 | 0 | return -1; |
505 | | |
506 | 0 | if (!PG_VALID_BE_ENCODING(enc)) |
507 | 0 | return -1; |
508 | | |
509 | 0 | return enc; |
510 | 0 | } |
511 | | |
512 | | int |
513 | | pg_valid_server_encoding_id(int encoding) |
514 | 0 | { |
515 | 0 | return PG_VALID_BE_ENCODING(encoding); |
516 | 0 | } |
517 | | |
518 | | /* |
519 | | * Remove irrelevant chars from encoding name, store at *newkey |
520 | | * |
521 | | * (Caller's responsibility to provide a large enough buffer) |
522 | | */ |
523 | | static char * |
524 | | clean_encoding_name(const char *key, char *newkey) |
525 | 2 | { |
526 | 2 | const char *p; |
527 | 2 | char *np; |
528 | | |
529 | 20 | for (p = key, np = newkey; *p != '\0'; p++) |
530 | 18 | { |
531 | 18 | if (isalnum((unsigned char) *p)) |
532 | 16 | { |
533 | 16 | if (*p >= 'A' && *p <= 'Z') |
534 | 16 | *np++ = *p + 'a' - 'A'; |
535 | 0 | else |
536 | 0 | *np++ = *p; |
537 | 16 | } |
538 | 18 | } |
539 | 2 | *np = '\0'; |
540 | 2 | return newkey; |
541 | 2 | } |
542 | | |
543 | | /* |
544 | | * Search encoding by encoding name |
545 | | * |
546 | | * Returns encoding ID, or -1 if not recognized |
547 | | */ |
548 | | int |
549 | | pg_char_to_encoding(const char *name) |
550 | 2 | { |
551 | 2 | unsigned int nel = lengthof(pg_encname_tbl); |
552 | 2 | const pg_encname *base = pg_encname_tbl, |
553 | 2 | *last = base + nel - 1, |
554 | 2 | *position; |
555 | 2 | int result; |
556 | 2 | char buff[NAMEDATALEN], |
557 | 2 | *key; |
558 | | |
559 | 2 | if (name == NULL || *name == '\0') |
560 | 0 | return -1; |
561 | | |
562 | 2 | if (strlen(name) >= NAMEDATALEN) |
563 | 0 | return -1; /* it's certainly not in the table */ |
564 | | |
565 | 2 | key = clean_encoding_name(name, buff); |
566 | | |
567 | 12 | while (last >= base) |
568 | 12 | { |
569 | 12 | position = base + ((last - base) >> 1); |
570 | 12 | result = key[0] - position->name[0]; |
571 | | |
572 | 12 | if (result == 0) |
573 | 6 | { |
574 | 6 | result = strcmp(key, position->name); |
575 | 6 | if (result == 0) |
576 | 2 | return position->encoding; |
577 | 6 | } |
578 | 10 | if (result < 0) |
579 | 6 | last = position - 1; |
580 | 4 | else |
581 | 4 | base = position + 1; |
582 | 10 | } |
583 | 0 | return -1; |
584 | 2 | } |
585 | | |
586 | | const char * |
587 | | pg_encoding_to_char(int encoding) |
588 | 2 | { |
589 | 2 | if (PG_VALID_ENCODING(encoding)) |
590 | 2 | { |
591 | 2 | const pg_enc2name *p = &pg_enc2name_tbl[encoding]; |
592 | | |
593 | 2 | Assert(encoding == p->encoding); |
594 | 2 | return p->name; |
595 | 2 | } |
596 | 0 | return ""; |
597 | 2 | } |