/src/dcmtk/oficonv/libsrc/citrus_csmapper.c
Line | Count | Source |
1 | | /*- |
2 | | * Copyright (c)2003 Citrus Project, |
3 | | * All rights reserved. |
4 | | * |
5 | | * Redistribution and use in source and binary forms, with or without |
6 | | * modification, are permitted provided that the following conditions |
7 | | * are met: |
8 | | * 1. Redistributions of source code must retain the above copyright |
9 | | * notice, this list of conditions and the following disclaimer. |
10 | | * 2. Redistributions in binary form must reproduce the above copyright |
11 | | * notice, this list of conditions and the following disclaimer in the |
12 | | * documentation and/or other materials provided with the distribution. |
13 | | * |
14 | | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
15 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
16 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
17 | | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
18 | | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
19 | | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
20 | | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
21 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
22 | | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
23 | | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
24 | | * SUCH DAMAGE. |
25 | | */ |
26 | | |
27 | | #include "dcmtk/config/osconfig.h" |
28 | | #include "citrus_csmapper.h" |
29 | | |
30 | | |
31 | | #include <sys/types.h> |
32 | | #ifdef HAVE_SYS_QUEUE_H |
33 | | #include <sys/queue.h> |
34 | | #else |
35 | | #include "dcmtk/oficonv/queue.h" |
36 | | #endif |
37 | | |
38 | | |
39 | | #include <errno.h> |
40 | | #include <limits.h> |
41 | | #include <stdio.h> |
42 | | #include <stdlib.h> |
43 | | #include <string.h> |
44 | | |
45 | | #include "citrus_bcs.h" |
46 | | #include "citrus_types.h" |
47 | | #include "citrus_region.h" |
48 | | #include "citrus_lock.h" |
49 | | #include "citrus_memstream.h" |
50 | | #include "citrus_mmap.h" |
51 | | #include "citrus_module.h" |
52 | | #include "citrus_hash.h" |
53 | | #include "citrus_mapper.h" |
54 | | #include "citrus_pivot_file.h" |
55 | | #include "citrus_db.h" |
56 | | #include "citrus_db_hash.h" |
57 | | #include "citrus_lookup.h" |
58 | | #include "oficonv_strlcpy.h" |
59 | | |
60 | | static struct _citrus_mapper_area *maparea = NULL; |
61 | | |
62 | | #ifdef WITH_THREADS |
63 | | #ifdef HAVE_WINDOWS_H |
64 | | static SRWLOCK ma_lock = SRWLOCK_INIT; |
65 | | #elif defined(HAVE_PTHREAD_H) |
66 | | static pthread_rwlock_t ma_lock = PTHREAD_RWLOCK_INITIALIZER; |
67 | | #endif |
68 | | #endif |
69 | | |
70 | | #define CS_ALIAS_FILENAME "charset.alias" |
71 | 0 | #define CS_PIVOT_FILENAME "charset.pivot" |
72 | | |
73 | | /* ---------------------------------------------------------------------- */ |
74 | | |
75 | | /* write the full path to the CSMAPPER directory and the given filename (which may be NULL) |
76 | | * to the path_out buffer, which is expected to be OFICONV_PATH_MAX in size. |
77 | | */ |
78 | | static void getCSMapperPath(char *path_out, size_t path_size, const char *filename) |
79 | 0 | { |
80 | 0 | get_data_path(path_out, path_size, OFICONV_CSMAPPER_DIR, filename); |
81 | 0 | } |
82 | | |
83 | | |
84 | | static int |
85 | | get32(struct _citrus_region *r, uint32_t *rval) |
86 | 0 | { |
87 | |
|
88 | 0 | if (_citrus_region_size(r) != 4) |
89 | 0 | return (EFTYPE); |
90 | | |
91 | 0 | memcpy(rval, _citrus_region_head(r), (size_t)4); |
92 | 0 | *rval = be32toh(*rval); |
93 | |
|
94 | 0 | return (0); |
95 | 0 | } |
96 | | |
97 | | static int |
98 | | open_subdb(struct _citrus_db **subdb, struct _citrus_db *db, const char *src) |
99 | 0 | { |
100 | 0 | struct _citrus_region r; |
101 | 0 | int ret; |
102 | |
|
103 | 0 | ret = _citrus_db_lookup_by_string(db, src, &r, NULL); |
104 | 0 | if (ret) |
105 | 0 | return (ret); |
106 | 0 | ret = _citrus_db_open(subdb, &r, _CITRUS_PIVOT_SUB_MAGIC, _citrus_db_hash_std, NULL); |
107 | 0 | if (ret) |
108 | 0 | return (ret); |
109 | | |
110 | 0 | return (0); |
111 | 0 | } |
112 | | |
113 | | |
114 | 0 | #define NO_SUCH_FILE EOPNOTSUPP |
115 | | static int |
116 | | find_best_pivot_pvdb(const char *src, const char *dst, char *pivot, |
117 | | size_t pvlen, unsigned long *rnorm) |
118 | 0 | { |
119 | 0 | struct _citrus_db *db1, *db2, *db3; |
120 | 0 | struct _citrus_region fr, r1, r2; |
121 | 0 | char buf[LINE_MAX]; |
122 | 0 | char mapper_path[OFICONV_PATH_MAX]; |
123 | 0 | uint32_t val32; |
124 | 0 | unsigned long norm; |
125 | 0 | int i, num, ret; |
126 | |
|
127 | 0 | getCSMapperPath(mapper_path, sizeof(mapper_path), CS_PIVOT_FILENAME ".pvdb"); |
128 | 0 | ret = _citrus_map_file(&fr, mapper_path); |
129 | 0 | if (ret) { |
130 | 0 | if (ret == ENOENT) |
131 | 0 | ret = NO_SUCH_FILE; |
132 | 0 | return (ret); |
133 | 0 | } |
134 | 0 | ret = _citrus_db_open(&db1, &fr, _CITRUS_PIVOT_MAGIC, _citrus_db_hash_std, NULL); |
135 | 0 | if (ret) |
136 | 0 | goto quit1; |
137 | 0 | ret = open_subdb(&db2, db1, src); |
138 | 0 | if (ret) |
139 | 0 | goto quit2; |
140 | | |
141 | 0 | num = _citrus_db_get_number_of_entries(db2); |
142 | 0 | *rnorm = ULONG_MAX; |
143 | 0 | for (i = 0; i < num; i++) { |
144 | | /* iterate each pivot */ |
145 | 0 | ret = _citrus_db_get_entry(db2, i, &r1, &r2); |
146 | 0 | if (ret) |
147 | 0 | goto quit3; |
148 | | /* r1:pivot name, r2:norm among src and pivot */ |
149 | 0 | ret = get32(&r2, &val32); |
150 | 0 | if (ret) |
151 | 0 | goto quit3; |
152 | 0 | norm = val32; |
153 | 0 | snprintf(buf, sizeof(buf), "%.*s", |
154 | 0 | (int)_citrus_region_size(&r1), (char *)_citrus_region_head(&r1)); |
155 | | /* buf: pivot name */ |
156 | 0 | ret = open_subdb(&db3, db1, buf); |
157 | 0 | if (ret) |
158 | 0 | goto quit3; |
159 | 0 | if (_citrus_db_lookup_by_string(db3, dst, &r2, NULL) != 0) |
160 | | /* don't break the loop, test all src/dst pairs. */ |
161 | 0 | goto quit4; |
162 | | /* r2: norm among pivot and dst */ |
163 | 0 | ret = get32(&r2, &val32); |
164 | 0 | if (ret) |
165 | 0 | goto quit4; |
166 | 0 | norm += val32; |
167 | | /* judge minimum norm */ |
168 | 0 | if (norm < *rnorm) { |
169 | 0 | *rnorm = norm; |
170 | 0 | strlcpy(pivot, buf, pvlen); |
171 | 0 | } |
172 | 0 | quit4: |
173 | 0 | _citrus_db_close(db3); |
174 | 0 | if (ret) |
175 | 0 | goto quit3; |
176 | 0 | } |
177 | 0 | quit3: |
178 | 0 | _citrus_db_close(db2); |
179 | 0 | quit2: |
180 | 0 | _citrus_db_close(db1); |
181 | 0 | quit1: |
182 | 0 | _citrus_unmap_file(&fr); |
183 | 0 | if (ret) |
184 | 0 | return (ret); |
185 | | |
186 | 0 | if (*rnorm == ULONG_MAX) |
187 | 0 | return (ENOENT); |
188 | | |
189 | 0 | return (0); |
190 | 0 | } |
191 | | |
192 | | /* ---------------------------------------------------------------------- */ |
193 | | |
194 | | struct zone { |
195 | | const char *begin, *end; |
196 | | }; |
197 | | |
198 | | struct parse_arg { |
199 | | char dst[OFICONV_PATH_MAX]; |
200 | | unsigned long norm; |
201 | | }; |
202 | | |
203 | | static int |
204 | | parse_line(struct parse_arg *pa, struct _citrus_region *r) |
205 | 0 | { |
206 | 0 | struct zone z1, z2; |
207 | 0 | char buf[20]; |
208 | 0 | size_t len; |
209 | |
|
210 | 0 | len = _citrus_region_size(r); |
211 | 0 | z1.begin = _citrus_bcs_skip_ws_len(_citrus_region_head(r), &len); |
212 | 0 | if (len == 0) |
213 | 0 | return (EFTYPE); |
214 | 0 | z1.end = _citrus_bcs_skip_nonws_len(z1.begin, &len); |
215 | 0 | if (len == 0) |
216 | 0 | return (EFTYPE); |
217 | 0 | z2.begin = _citrus_bcs_skip_ws_len(z1.end, &len); |
218 | 0 | if (len == 0) |
219 | 0 | return (EFTYPE); |
220 | 0 | z2.end = _citrus_bcs_skip_nonws_len(z2.begin, &len); |
221 | | |
222 | | /* z1 : dst name, z2 : norm */ |
223 | 0 | snprintf(pa->dst, sizeof(pa->dst), |
224 | 0 | "%.*s", (int)(z1.end-z1.begin), z1.begin); |
225 | 0 | snprintf(buf, sizeof(buf), |
226 | 0 | "%.*s", (int)(z2.end-z2.begin), z2.begin); |
227 | 0 | pa->norm = _citrus_bcs_strtoul(buf, NULL, 0); |
228 | |
|
229 | 0 | return (0); |
230 | 0 | } |
231 | | |
232 | | static int |
233 | | find_dst(struct parse_arg *pasrc, const char *dst) |
234 | 0 | { |
235 | 0 | struct _citrus_lookup *cl; |
236 | 0 | struct parse_arg padst; |
237 | 0 | struct _citrus_region data; |
238 | 0 | int ret; |
239 | 0 | char mapper_path[OFICONV_PATH_MAX]; |
240 | |
|
241 | 0 | getCSMapperPath(mapper_path, sizeof(mapper_path), CS_PIVOT_FILENAME); |
242 | 0 | ret = _citrus_lookup_seq_open(&cl, mapper_path, _CITRUS_LOOKUP_CASE_IGNORE); |
243 | 0 | if (ret) |
244 | 0 | return (ret); |
245 | | |
246 | 0 | ret = _citrus_lookup_seq_lookup(cl, pasrc->dst, &data); |
247 | 0 | while (ret == 0) { |
248 | 0 | ret = parse_line(&padst, &data); |
249 | 0 | if (ret) |
250 | 0 | break; |
251 | 0 | if (strcmp(dst, padst.dst) == 0) { |
252 | 0 | pasrc->norm += padst.norm; |
253 | 0 | break; |
254 | 0 | } |
255 | 0 | ret = _citrus_lookup_seq_next(cl, NULL, &data); |
256 | 0 | } |
257 | 0 | _citrus_lookup_seq_close(cl); |
258 | |
|
259 | 0 | return (ret); |
260 | 0 | } |
261 | | |
262 | | static int |
263 | | find_best_pivot_lookup(const char *src, const char *dst, char *pivot, |
264 | | size_t pvlen, unsigned long *rnorm) |
265 | 0 | { |
266 | 0 | struct _citrus_lookup *cl; |
267 | 0 | struct _citrus_region data; |
268 | 0 | struct parse_arg pa; |
269 | 0 | char pivot_min[OFICONV_PATH_MAX]; |
270 | 0 | unsigned long norm_min; |
271 | 0 | int ret; |
272 | 0 | char mapper_path[OFICONV_PATH_MAX]; |
273 | |
|
274 | 0 | getCSMapperPath(mapper_path, sizeof(mapper_path), CS_PIVOT_FILENAME); |
275 | 0 | ret = _citrus_lookup_seq_open(&cl, mapper_path, _CITRUS_LOOKUP_CASE_IGNORE); |
276 | 0 | if (ret) |
277 | 0 | return (ret); |
278 | | |
279 | 0 | norm_min = ULONG_MAX; |
280 | | |
281 | | /* find pivot code */ |
282 | 0 | ret = _citrus_lookup_seq_lookup(cl, src, &data); |
283 | 0 | while (ret == 0) { |
284 | 0 | ret = parse_line(&pa, &data); |
285 | 0 | if (ret) |
286 | 0 | break; |
287 | 0 | ret = find_dst(&pa, dst); |
288 | 0 | if (ret) |
289 | 0 | break; |
290 | 0 | if (pa.norm < norm_min) { |
291 | 0 | norm_min = pa.norm; |
292 | 0 | strlcpy(pivot_min, pa.dst, sizeof(pivot_min)); |
293 | 0 | } |
294 | 0 | ret = _citrus_lookup_seq_next(cl, NULL, &data); |
295 | 0 | } |
296 | 0 | _citrus_lookup_seq_close(cl); |
297 | |
|
298 | 0 | if (ret != ENOENT) |
299 | 0 | return (ret); |
300 | 0 | if (norm_min == ULONG_MAX) |
301 | 0 | return (ENOENT); |
302 | 0 | strlcpy(pivot, pivot_min, pvlen); |
303 | 0 | if (rnorm) |
304 | 0 | *rnorm = norm_min; |
305 | |
|
306 | 0 | return (0); |
307 | 0 | } |
308 | | |
309 | | static int |
310 | | find_best_pivot(const char *src, const char *dst, char *pivot, size_t pvlen, |
311 | | unsigned long *rnorm) |
312 | 0 | { |
313 | 0 | int ret; |
314 | |
|
315 | 0 | ret = find_best_pivot_pvdb(src, dst, pivot, pvlen, rnorm); |
316 | 0 | if (ret == NO_SUCH_FILE) |
317 | 0 | ret = find_best_pivot_lookup(src, dst, pivot, pvlen, rnorm); |
318 | |
|
319 | 0 | return (ret); |
320 | 0 | } |
321 | | |
322 | | static __inline int |
323 | | open_serial_mapper(struct _citrus_mapper_area * ma, |
324 | | struct _citrus_csmapper * * rcm, |
325 | | const char *src, const char *pivot, const char *dst) |
326 | 0 | { |
327 | 0 | char buf[OFICONV_PATH_MAX]; |
328 | |
|
329 | | #if ( defined(__GNUC__) && (__GNUC__ > 7 || (__GNUC__ == 7 && __GNUC_MINOR__ >= 1 ) ) ) |
330 | | #pragma GCC diagnostic push |
331 | | #pragma GCC diagnostic ignored "-Wformat-truncation" |
332 | | #endif |
333 | |
|
334 | 0 | snprintf(buf, sizeof(buf), "%s/%s,%s/%s", src, pivot, pivot, dst); |
335 | |
|
336 | | #if ( defined(__GNUC__) && (__GNUC__ > 7 || (__GNUC__ == 7 && __GNUC_MINOR__ >= 1 ) ) ) |
337 | | #pragma GCC diagnostic pop |
338 | | #endif |
339 | |
|
340 | 0 | return (_citrus_mapper_open_direct(ma, rcm, "mapper_serial", buf)); |
341 | 0 | } |
342 | | |
343 | | static struct _citrus_csmapper *csm_none = NULL; |
344 | | static int |
345 | | get_none(struct _citrus_mapper_area * ma, |
346 | | struct _citrus_csmapper * * rcsm) |
347 | 0 | { |
348 | 0 | int ret; |
349 | |
|
350 | 0 | WLOCK(&ma_lock); |
351 | 0 | if (csm_none) { |
352 | 0 | *rcsm = csm_none; |
353 | 0 | ret = 0; |
354 | 0 | goto quit; |
355 | 0 | } |
356 | | |
357 | 0 | ret = _citrus_mapper_open_direct(ma, &csm_none, "mapper_none", ""); |
358 | 0 | if (ret) |
359 | 0 | goto quit; |
360 | 0 | _citrus_mapper_set_persistent(csm_none); |
361 | |
|
362 | 0 | *rcsm = csm_none; |
363 | 0 | ret = 0; |
364 | 0 | quit: |
365 | 0 | UNLOCK(&ma_lock); |
366 | 0 | return (ret); |
367 | 0 | } |
368 | | |
369 | | |
370 | | void _citrus_cleanup_csmapper_none() |
371 | 0 | { |
372 | 0 | WLOCK(&ma_lock); |
373 | 0 | if (csm_none) { |
374 | 0 | _citrus_mapper_close_nohash(csm_none); |
375 | 0 | csm_none = NULL; |
376 | 0 | } |
377 | 0 | UNLOCK(&ma_lock); |
378 | 0 | return; |
379 | 0 | } |
380 | | |
381 | | |
382 | | int |
383 | | _citrus_csmapper_open(struct _citrus_csmapper * * rcsm, |
384 | | const char * src, const char * dst, uint32_t flags, |
385 | | unsigned long *rnorm) |
386 | 0 | { |
387 | 0 | const char *realsrc, *realdst; |
388 | | #ifdef DCMTK_USE_OFICONV_CHARSET_ALIAS_FILE |
389 | | char buf1[OFICONV_PATH_MAX], buf2[OFICONV_PATH_MAX]; |
390 | | char alias_path[OFICONV_PATH_MAX]; |
391 | | #endif |
392 | 0 | char key[OFICONV_PATH_MAX], pivot[OFICONV_PATH_MAX]; |
393 | 0 | char mapper_path[OFICONV_PATH_MAX]; |
394 | 0 | unsigned long norm; |
395 | 0 | int ret; |
396 | 0 | norm = 0; |
397 | |
|
398 | 0 | getCSMapperPath(mapper_path, sizeof(mapper_path), NULL); |
399 | 0 | ret = _citrus_mapper_create_area(&maparea, mapper_path); |
400 | 0 | if (ret) |
401 | 0 | return (ret); |
402 | | |
403 | | #ifdef DCMTK_USE_OFICONV_CHARSET_ALIAS_FILE |
404 | | // Look up alias names in csmapper/charset.alias.db or csmapper/charset.alias |
405 | | getCSMapperPath(alias_path, sizeof(alias_path), CS_ALIAS_FILENAME); |
406 | | realsrc = _citrus_lookup_alias(alias_path, src, buf1, sizeof(buf1), _CITRUS_LOOKUP_CASE_IGNORE); |
407 | | realdst = _citrus_lookup_alias(alias_path, dst, buf2, sizeof(buf2), _CITRUS_LOOKUP_CASE_IGNORE); |
408 | | #else |
409 | | // Don't use the alias files csmapper/charset.alias.db or csmapper/charset.alias |
410 | 0 | realsrc = src; |
411 | 0 | realdst = dst; |
412 | 0 | #endif |
413 | 0 | if (!strcmp(realsrc, realdst)) { |
414 | 0 | ret = get_none(maparea, rcsm); |
415 | 0 | if (ret == 0 && rnorm != NULL) |
416 | 0 | *rnorm = 0; |
417 | 0 | return (ret); |
418 | 0 | } |
419 | | |
420 | 0 | snprintf(key, sizeof(key), "%s/%s", realsrc, realdst); |
421 | |
|
422 | 0 | ret = _citrus_mapper_open(maparea, rcsm, key); |
423 | 0 | if (ret == 0) { |
424 | 0 | if (rnorm != NULL) |
425 | 0 | *rnorm = 0; |
426 | 0 | return (0); |
427 | 0 | } |
428 | 0 | if (ret != ENOENT || (flags & _CITRUS_CSMAPPER_F_PREVENT_PIVOT)!=0) |
429 | 0 | return (ret); |
430 | | |
431 | 0 | ret = find_best_pivot(realsrc, realdst, pivot, sizeof(pivot), &norm); |
432 | 0 | if (ret) |
433 | 0 | return (ret); |
434 | | |
435 | 0 | ret = open_serial_mapper(maparea, rcsm, realsrc, pivot, realdst); |
436 | 0 | if (ret == 0 && rnorm != NULL) |
437 | 0 | *rnorm = norm; |
438 | |
|
439 | 0 | return (ret); |
440 | 0 | } |
441 | | |
442 | | void |
443 | | _citrus_csmapper_free() |
444 | 0 | { |
445 | 0 | if (maparea) |
446 | 0 | { |
447 | 0 | _citrus_mapper_delete_area(&maparea); |
448 | 0 | } |
449 | 0 | } |