/src/icu/source/common/unames.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2016 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  | /*  | 
4  |  | ******************************************************************************  | 
5  |  | *  | 
6  |  | *   Copyright (C) 1999-2014, International Business Machines  | 
7  |  | *   Corporation and others.  All Rights Reserved.  | 
8  |  | *  | 
9  |  | ******************************************************************************  | 
10  |  | *   file name:  unames.c  | 
11  |  | *   encoding:   UTF-8  | 
12  |  | *   tab size:   8 (not used)  | 
13  |  | *   indentation:4  | 
14  |  | *  | 
15  |  | *   created on: 1999oct04  | 
16  |  | *   created by: Markus W. Scherer  | 
17  |  | */  | 
18  |  |  | 
19  |  | #include "unicode/utypes.h"  | 
20  |  | #include "unicode/putil.h"  | 
21  |  | #include "unicode/uchar.h"  | 
22  |  | #include "unicode/udata.h"  | 
23  |  | #include "unicode/utf.h"  | 
24  |  | #include "unicode/utf16.h"  | 
25  |  | #include "uassert.h"  | 
26  |  | #include "ustr_imp.h"  | 
27  |  | #include "umutex.h"  | 
28  |  | #include "cmemory.h"  | 
29  |  | #include "cstring.h"  | 
30  |  | #include "ucln_cmn.h"  | 
31  |  | #include "udataswp.h"  | 
32  |  | #include "uprops.h"  | 
33  |  |  | 
34  |  | U_NAMESPACE_BEGIN  | 
35  |  |  | 
36  |  | /* prototypes ------------------------------------------------------------- */  | 
37  |  |  | 
38  |  | static const char DATA_NAME[] = "unames";  | 
39  |  | static const char DATA_TYPE[] = "icu";  | 
40  |  |  | 
41  | 0  | #define GROUP_SHIFT 5  | 
42  | 0  | #define LINES_PER_GROUP (1L<<GROUP_SHIFT)  | 
43  | 0  | #define GROUP_MASK (LINES_PER_GROUP-1)  | 
44  |  |  | 
45  |  | /*  | 
46  |  |  * This struct was replaced by explicitly accessing equivalent  | 
47  |  |  * fields from triples of uint16_t.  | 
48  |  |  * The Group struct was padded to 8 bytes on compilers for early ARM CPUs,  | 
49  |  |  * which broke the assumption that sizeof(Group)==6 and that the ++ operator  | 
50  |  |  * would advance by 6 bytes (3 uint16_t).  | 
51  |  |  *  | 
52  |  |  * We can't just change the data structure because it's loaded from a data file,  | 
53  |  |  * and we don't want to make it less compact, so we changed the access code.  | 
54  |  |  *  | 
55  |  |  * For details see ICU tickets 6331 and 6008.  | 
56  |  | typedef struct { | 
57  |  |     uint16_t groupMSB,  | 
58  |  |              offsetHigh, offsetLow; / * avoid padding * /  | 
59  |  | } Group;  | 
60  |  |  */  | 
61  |  | enum { | 
62  |  |     GROUP_MSB,  | 
63  |  |     GROUP_OFFSET_HIGH,  | 
64  |  |     GROUP_OFFSET_LOW,  | 
65  |  |     GROUP_LENGTH  | 
66  |  | };  | 
67  |  |  | 
68  |  | /*  | 
69  |  |  * Get the 32-bit group offset.  | 
70  |  |  * @param group (const uint16_t *) pointer to a Group triple of uint16_t  | 
71  |  |  * @return group offset (int32_t)  | 
72  |  |  */  | 
73  | 0  | #define GET_GROUP_OFFSET(group) ((int32_t)(group)[GROUP_OFFSET_HIGH]<<16|(group)[GROUP_OFFSET_LOW])  | 
74  |  |  | 
75  | 0  | #define NEXT_GROUP(group) ((group)+GROUP_LENGTH)  | 
76  | 0  | #define PREV_GROUP(group) ((group)-GROUP_LENGTH)  | 
77  |  |  | 
78  |  | typedef struct { | 
79  |  |     uint32_t start, end;  | 
80  |  |     uint8_t type, variant;  | 
81  |  |     uint16_t size;  | 
82  |  | } AlgorithmicRange;  | 
83  |  |  | 
84  |  | typedef struct { | 
85  |  |     uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset;  | 
86  |  | } UCharNames;  | 
87  |  |  | 
88  |  | /*  | 
89  |  |  * Get the groups table from a UCharNames struct.  | 
90  |  |  * The groups table consists of one uint16_t groupCount followed by  | 
91  |  |  * groupCount groups. Each group is a triple of uint16_t, see GROUP_LENGTH  | 
92  |  |  * and the comment for the old struct Group above.  | 
93  |  |  *  | 
94  |  |  * @param names (const UCharNames *) pointer to the UCharNames indexes  | 
95  |  |  * @return (const uint16_t *) pointer to the groups table  | 
96  |  |  */  | 
97  | 0  | #define GET_GROUPS(names) (const uint16_t *)((const char *)names+names->groupsOffset)  | 
98  |  |  | 
99  |  | typedef struct { | 
100  |  |     const char *otherName;  | 
101  |  |     UChar32 code;  | 
102  |  | } FindName;  | 
103  |  |  | 
104  | 0  | #define DO_FIND_NAME NULL  | 
105  |  |  | 
106  |  | static UDataMemory *uCharNamesData=NULL;  | 
107  |  | static UCharNames *uCharNames=NULL;  | 
108  |  | static icu::UInitOnce gCharNamesInitOnce = U_INITONCE_INITIALIZER;  | 
109  |  |  | 
110  |  | /*  | 
111  |  |  * Maximum length of character names (regular & 1.0).  | 
112  |  |  */  | 
113  |  | static int32_t gMaxNameLength=0;  | 
114  |  |  | 
115  |  | /*  | 
116  |  |  * Set of chars used in character names (regular & 1.0).  | 
117  |  |  * Chars are platform-dependent (can be EBCDIC).  | 
118  |  |  */  | 
119  |  | static uint32_t gNameSet[8]={ 0 }; | 
120  |  |  | 
121  | 0  | #define U_NONCHARACTER_CODE_POINT U_CHAR_CATEGORY_COUNT  | 
122  | 0  | #define U_LEAD_SURROGATE U_CHAR_CATEGORY_COUNT + 1  | 
123  | 0  | #define U_TRAIL_SURROGATE U_CHAR_CATEGORY_COUNT + 2  | 
124  |  |  | 
125  |  | #define U_CHAR_EXTENDED_CATEGORY_COUNT (U_CHAR_CATEGORY_COUNT + 3)  | 
126  |  |  | 
127  |  | static const char * const charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT] = { | 
128  |  |     "unassigned",  | 
129  |  |     "uppercase letter",  | 
130  |  |     "lowercase letter",  | 
131  |  |     "titlecase letter",  | 
132  |  |     "modifier letter",  | 
133  |  |     "other letter",  | 
134  |  |     "non spacing mark",  | 
135  |  |     "enclosing mark",  | 
136  |  |     "combining spacing mark",  | 
137  |  |     "decimal digit number",  | 
138  |  |     "letter number",  | 
139  |  |     "other number",  | 
140  |  |     "space separator",  | 
141  |  |     "line separator",  | 
142  |  |     "paragraph separator",  | 
143  |  |     "control",  | 
144  |  |     "format",  | 
145  |  |     "private use area",  | 
146  |  |     "surrogate",  | 
147  |  |     "dash punctuation",     | 
148  |  |     "start punctuation",  | 
149  |  |     "end punctuation",  | 
150  |  |     "connector punctuation",  | 
151  |  |     "other punctuation",  | 
152  |  |     "math symbol",  | 
153  |  |     "currency symbol",  | 
154  |  |     "modifier symbol",  | 
155  |  |     "other symbol",  | 
156  |  |     "initial punctuation",  | 
157  |  |     "final punctuation",  | 
158  |  |     "noncharacter",  | 
159  |  |     "lead surrogate",  | 
160  |  |     "trail surrogate"  | 
161  |  | };  | 
162  |  |  | 
163  |  | /* implementation ----------------------------------------------------------- */  | 
164  |  |  | 
165  |  | static UBool U_CALLCONV unames_cleanup(void)  | 
166  | 0  | { | 
167  | 0  |     if(uCharNamesData) { | 
168  | 0  |         udata_close(uCharNamesData);  | 
169  | 0  |         uCharNamesData = NULL;  | 
170  | 0  |     }  | 
171  | 0  |     if(uCharNames) { | 
172  | 0  |         uCharNames = NULL;  | 
173  | 0  |     }  | 
174  | 0  |     gCharNamesInitOnce.reset();  | 
175  | 0  |     gMaxNameLength=0;  | 
176  | 0  |     return TRUE;  | 
177  | 0  | }  | 
178  |  |  | 
179  |  | static UBool U_CALLCONV  | 
180  |  | isAcceptable(void * /*context*/,  | 
181  |  |              const char * /*type*/, const char * /*name*/,  | 
182  | 0  |              const UDataInfo *pInfo) { | 
183  | 0  |     return (UBool)(  | 
184  | 0  |         pInfo->size>=20 &&  | 
185  | 0  |         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&  | 
186  | 0  |         pInfo->charsetFamily==U_CHARSET_FAMILY &&  | 
187  | 0  |         pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */  | 
188  | 0  |         pInfo->dataFormat[1]==0x6e &&  | 
189  | 0  |         pInfo->dataFormat[2]==0x61 &&  | 
190  | 0  |         pInfo->dataFormat[3]==0x6d &&  | 
191  | 0  |         pInfo->formatVersion[0]==1);  | 
192  | 0  | }  | 
193  |  |  | 
194  |  | static void U_CALLCONV  | 
195  | 0  | loadCharNames(UErrorCode &status) { | 
196  | 0  |     U_ASSERT(uCharNamesData == NULL);  | 
197  | 0  |     U_ASSERT(uCharNames == NULL);  | 
198  |  | 
  | 
199  | 0  |     uCharNamesData = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &status);  | 
200  | 0  |     if(U_FAILURE(status)) { | 
201  | 0  |         uCharNamesData = NULL;  | 
202  | 0  |     } else { | 
203  | 0  |         uCharNames = (UCharNames *)udata_getMemory(uCharNamesData);  | 
204  | 0  |     }  | 
205  | 0  |     ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup);  | 
206  | 0  | }  | 
207  |  |  | 
208  |  |  | 
209  |  | static UBool  | 
210  | 0  | isDataLoaded(UErrorCode *pErrorCode) { | 
211  | 0  |     umtx_initOnce(gCharNamesInitOnce, &loadCharNames, *pErrorCode);  | 
212  | 0  |     return U_SUCCESS(*pErrorCode);  | 
213  | 0  | }  | 
214  |  |  | 
215  | 0  | #define WRITE_CHAR(buffer, bufferLength, bufferPos, c) UPRV_BLOCK_MACRO_BEGIN { \ | 
216  | 0  |     if((bufferLength)>0) { \ | 
217  | 0  |         *(buffer)++=c; \  | 
218  | 0  |         --(bufferLength); \  | 
219  | 0  |     } \  | 
220  | 0  |     ++(bufferPos); \  | 
221  | 0  | } UPRV_BLOCK_MACRO_END  | 
222  |  |  | 
223  | 0  | #define U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT  | 
224  |  |  | 
225  |  | /*  | 
226  |  |  * Important: expandName() and compareName() are almost the same -  | 
227  |  |  * apply fixes to both.  | 
228  |  |  *  | 
229  |  |  * UnicodeData.txt uses ';' as a field separator, so no  | 
230  |  |  * field can contain ';' as part of its contents.  | 
231  |  |  * In unames.dat, it is marked as token[';']==-1 only if the  | 
232  |  |  * semicolon is used in the data file - which is iff we  | 
233  |  |  * have Unicode 1.0 names or ISO comments or aliases.  | 
234  |  |  * So, it will be token[';']==-1 if we store U1.0 names/ISO comments/aliases  | 
235  |  |  * although we know that it will never be part of a name.  | 
236  |  |  */  | 
237  |  | static uint16_t  | 
238  |  | expandName(UCharNames *names,  | 
239  |  |            const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,  | 
240  | 0  |            char *buffer, uint16_t bufferLength) { | 
241  | 0  |     uint16_t *tokens=(uint16_t *)names+8;  | 
242  | 0  |     uint16_t token, tokenCount=*tokens++, bufferPos=0;  | 
243  | 0  |     uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;  | 
244  | 0  |     uint8_t c;  | 
245  |  | 
  | 
246  | 0  |     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) { | 
247  |  |         /*  | 
248  |  |          * skip the modern name if it is not requested _and_  | 
249  |  |          * if the semicolon byte value is a character, not a token number  | 
250  |  |          */  | 
251  | 0  |         if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) { | 
252  | 0  |             int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;  | 
253  | 0  |             do { | 
254  | 0  |                 while(nameLength>0) { | 
255  | 0  |                     --nameLength;  | 
256  | 0  |                     if(*name++==';') { | 
257  | 0  |                         break;  | 
258  | 0  |                     }  | 
259  | 0  |                 }  | 
260  | 0  |             } while(--fieldIndex>0);  | 
261  | 0  |         } else { | 
262  |  |             /*  | 
263  |  |              * the semicolon byte value is a token number, therefore  | 
264  |  |              * only modern names are stored in unames.dat and there is no  | 
265  |  |              * such requested alternate name here  | 
266  |  |              */  | 
267  | 0  |             nameLength=0;  | 
268  | 0  |         }  | 
269  | 0  |     }  | 
270  |  |  | 
271  |  |     /* write each letter directly, and write a token word per token */  | 
272  | 0  |     while(nameLength>0) { | 
273  | 0  |         --nameLength;  | 
274  | 0  |         c=*name++;  | 
275  |  | 
  | 
276  | 0  |         if(c>=tokenCount) { | 
277  | 0  |             if(c!=';') { | 
278  |  |                 /* implicit letter */  | 
279  | 0  |                 WRITE_CHAR(buffer, bufferLength, bufferPos, c);  | 
280  | 0  |             } else { | 
281  |  |                 /* finished */  | 
282  | 0  |                 break;  | 
283  | 0  |             }  | 
284  | 0  |         } else { | 
285  | 0  |             token=tokens[c];  | 
286  | 0  |             if(token==(uint16_t)(-2)) { | 
287  |  |                 /* this is a lead byte for a double-byte token */  | 
288  | 0  |                 token=tokens[c<<8|*name++];  | 
289  | 0  |                 --nameLength;  | 
290  | 0  |             }  | 
291  | 0  |             if(token==(uint16_t)(-1)) { | 
292  | 0  |                 if(c!=';') { | 
293  |  |                     /* explicit letter */  | 
294  | 0  |                     WRITE_CHAR(buffer, bufferLength, bufferPos, c);  | 
295  | 0  |                 } else { | 
296  |  |                     /* stop, but skip the semicolon if we are seeking  | 
297  |  |                        extended names and there was no 2.0 name but there  | 
298  |  |                        is a 1.0 name. */  | 
299  | 0  |                     if(!bufferPos && nameChoice == U_EXTENDED_CHAR_NAME) { | 
300  | 0  |                         if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) { | 
301  | 0  |                             continue;  | 
302  | 0  |                         }  | 
303  | 0  |                     }  | 
304  |  |                     /* finished */  | 
305  | 0  |                     break;  | 
306  | 0  |                 }  | 
307  | 0  |             } else { | 
308  |  |                 /* write token word */  | 
309  | 0  |                 uint8_t *tokenString=tokenStrings+token;  | 
310  | 0  |                 while((c=*tokenString++)!=0) { | 
311  | 0  |                     WRITE_CHAR(buffer, bufferLength, bufferPos, c);  | 
312  | 0  |                 }  | 
313  | 0  |             }  | 
314  | 0  |         }  | 
315  | 0  |     }  | 
316  |  |  | 
317  |  |     /* zero-terminate */  | 
318  | 0  |     if(bufferLength>0) { | 
319  | 0  |         *buffer=0;  | 
320  | 0  |     }  | 
321  |  | 
  | 
322  | 0  |     return bufferPos;  | 
323  | 0  | }  | 
324  |  |  | 
325  |  | /*  | 
326  |  |  * compareName() is almost the same as expandName() except that it compares  | 
327  |  |  * the currently expanded name to an input name.  | 
328  |  |  * It returns the match/no match result as soon as possible.  | 
329  |  |  */  | 
330  |  | static UBool  | 
331  |  | compareName(UCharNames *names,  | 
332  |  |             const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,  | 
333  | 0  |             const char *otherName) { | 
334  | 0  |     uint16_t *tokens=(uint16_t *)names+8;  | 
335  | 0  |     uint16_t token, tokenCount=*tokens++;  | 
336  | 0  |     uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;  | 
337  | 0  |     uint8_t c;  | 
338  | 0  |     const char *origOtherName = otherName;  | 
339  |  | 
  | 
340  | 0  |     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) { | 
341  |  |         /*  | 
342  |  |          * skip the modern name if it is not requested _and_  | 
343  |  |          * if the semicolon byte value is a character, not a token number  | 
344  |  |          */  | 
345  | 0  |         if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) { | 
346  | 0  |             int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;  | 
347  | 0  |             do { | 
348  | 0  |                 while(nameLength>0) { | 
349  | 0  |                     --nameLength;  | 
350  | 0  |                     if(*name++==';') { | 
351  | 0  |                         break;  | 
352  | 0  |                     }  | 
353  | 0  |                 }  | 
354  | 0  |             } while(--fieldIndex>0);  | 
355  | 0  |         } else { | 
356  |  |             /*  | 
357  |  |              * the semicolon byte value is a token number, therefore  | 
358  |  |              * only modern names are stored in unames.dat and there is no  | 
359  |  |              * such requested alternate name here  | 
360  |  |              */  | 
361  | 0  |             nameLength=0;  | 
362  | 0  |         }  | 
363  | 0  |     }  | 
364  |  |  | 
365  |  |     /* compare each letter directly, and compare a token word per token */  | 
366  | 0  |     while(nameLength>0) { | 
367  | 0  |         --nameLength;  | 
368  | 0  |         c=*name++;  | 
369  |  | 
  | 
370  | 0  |         if(c>=tokenCount) { | 
371  | 0  |             if(c!=';') { | 
372  |  |                 /* implicit letter */  | 
373  | 0  |                 if((char)c!=*otherName++) { | 
374  | 0  |                     return FALSE;  | 
375  | 0  |                 }  | 
376  | 0  |             } else { | 
377  |  |                 /* finished */  | 
378  | 0  |                 break;  | 
379  | 0  |             }  | 
380  | 0  |         } else { | 
381  | 0  |             token=tokens[c];  | 
382  | 0  |             if(token==(uint16_t)(-2)) { | 
383  |  |                 /* this is a lead byte for a double-byte token */  | 
384  | 0  |                 token=tokens[c<<8|*name++];  | 
385  | 0  |                 --nameLength;  | 
386  | 0  |             }  | 
387  | 0  |             if(token==(uint16_t)(-1)) { | 
388  | 0  |                 if(c!=';') { | 
389  |  |                     /* explicit letter */  | 
390  | 0  |                     if((char)c!=*otherName++) { | 
391  | 0  |                         return FALSE;  | 
392  | 0  |                     }  | 
393  | 0  |                 } else { | 
394  |  |                     /* stop, but skip the semicolon if we are seeking  | 
395  |  |                        extended names and there was no 2.0 name but there  | 
396  |  |                        is a 1.0 name. */  | 
397  | 0  |                     if(otherName == origOtherName && nameChoice == U_EXTENDED_CHAR_NAME) { | 
398  | 0  |                         if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) { | 
399  | 0  |                             continue;  | 
400  | 0  |                         }  | 
401  | 0  |                     }  | 
402  |  |                     /* finished */  | 
403  | 0  |                     break;  | 
404  | 0  |                 }  | 
405  | 0  |             } else { | 
406  |  |                 /* write token word */  | 
407  | 0  |                 uint8_t *tokenString=tokenStrings+token;  | 
408  | 0  |                 while((c=*tokenString++)!=0) { | 
409  | 0  |                     if((char)c!=*otherName++) { | 
410  | 0  |                         return FALSE;  | 
411  | 0  |                     }  | 
412  | 0  |                 }  | 
413  | 0  |             }  | 
414  | 0  |         }  | 
415  | 0  |     }  | 
416  |  |  | 
417  |  |     /* complete match? */  | 
418  | 0  |     return (UBool)(*otherName==0);  | 
419  | 0  | }  | 
420  |  |  | 
421  | 0  | static uint8_t getCharCat(UChar32 cp) { | 
422  | 0  |     uint8_t cat;  | 
423  |  | 
  | 
424  | 0  |     if (U_IS_UNICODE_NONCHAR(cp)) { | 
425  | 0  |         return U_NONCHARACTER_CODE_POINT;  | 
426  | 0  |     }  | 
427  |  |  | 
428  | 0  |     if ((cat = u_charType(cp)) == U_SURROGATE) { | 
429  | 0  |         cat = U_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE;  | 
430  | 0  |     }  | 
431  |  | 
  | 
432  | 0  |     return cat;  | 
433  | 0  | }  | 
434  |  |  | 
435  | 0  | static const char *getCharCatName(UChar32 cp) { | 
436  | 0  |     uint8_t cat = getCharCat(cp);  | 
437  |  |  | 
438  |  |     /* Return unknown if the table of names above is not up to  | 
439  |  |        date. */  | 
440  |  | 
  | 
441  | 0  |     if (cat >= UPRV_LENGTHOF(charCatNames)) { | 
442  | 0  |         return "unknown";  | 
443  | 0  |     } else { | 
444  | 0  |         return charCatNames[cat];  | 
445  | 0  |     }  | 
446  | 0  | }  | 
447  |  |  | 
448  | 0  | static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) { | 
449  | 0  |     const char *catname = getCharCatName(code);  | 
450  | 0  |     uint16_t length = 0;  | 
451  |  | 
  | 
452  | 0  |     UChar32 cp;  | 
453  | 0  |     int ndigits, i;  | 
454  |  |       | 
455  | 0  |     WRITE_CHAR(buffer, bufferLength, length, '<');  | 
456  | 0  |     while (catname[length - 1]) { | 
457  | 0  |         WRITE_CHAR(buffer, bufferLength, length, catname[length - 1]);  | 
458  | 0  |     }  | 
459  | 0  |     WRITE_CHAR(buffer, bufferLength, length, '-');  | 
460  | 0  |     for (cp = code, ndigits = 0; cp; ++ndigits, cp >>= 4)  | 
461  | 0  |         ;  | 
462  | 0  |     if (ndigits < 4)  | 
463  | 0  |         ndigits = 4;  | 
464  | 0  |     for (cp = code, i = ndigits; (cp || i > 0) && bufferLength; cp >>= 4, bufferLength--) { | 
465  | 0  |         uint8_t v = (uint8_t)(cp & 0xf);  | 
466  | 0  |         buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10);  | 
467  | 0  |     }  | 
468  | 0  |     buffer += ndigits;  | 
469  | 0  |     length += static_cast<uint16_t>(ndigits);  | 
470  | 0  |     WRITE_CHAR(buffer, bufferLength, length, '>');  | 
471  |  | 
  | 
472  | 0  |     return length;  | 
473  | 0  | }  | 
474  |  |  | 
475  |  | /*  | 
476  |  |  * getGroup() does a binary search for the group that contains the  | 
477  |  |  * Unicode code point "code".  | 
478  |  |  * The return value is always a valid Group* that may contain "code"  | 
479  |  |  * or else is the highest group before "code".  | 
480  |  |  * If the lowest group is after "code", then that one is returned.  | 
481  |  |  */  | 
482  |  | static const uint16_t *  | 
483  | 0  | getGroup(UCharNames *names, uint32_t code) { | 
484  | 0  |     const uint16_t *groups=GET_GROUPS(names);  | 
485  | 0  |     uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT),  | 
486  | 0  |              start=0,  | 
487  | 0  |              limit=*groups++,  | 
488  | 0  |              number;  | 
489  |  |  | 
490  |  |     /* binary search for the group of names that contains the one for code */  | 
491  | 0  |     while(start<limit-1) { | 
492  | 0  |         number=(uint16_t)((start+limit)/2);  | 
493  | 0  |         if(groupMSB<groups[number*GROUP_LENGTH+GROUP_MSB]) { | 
494  | 0  |             limit=number;  | 
495  | 0  |         } else { | 
496  | 0  |             start=number;  | 
497  | 0  |         }  | 
498  | 0  |     }  | 
499  |  |  | 
500  |  |     /* return this regardless of whether it is an exact match */  | 
501  | 0  |     return groups+start*GROUP_LENGTH;  | 
502  | 0  | }  | 
503  |  |  | 
504  |  | /*  | 
505  |  |  * expandGroupLengths() reads a block of compressed lengths of 32 strings and  | 
506  |  |  * expands them into offsets and lengths for each string.  | 
507  |  |  * Lengths are stored with a variable-width encoding in consecutive nibbles:  | 
508  |  |  * If a nibble<0xc, then it is the length itself (0=empty string).  | 
509  |  |  * If a nibble>=0xc, then it forms a length value with the following nibble.  | 
510  |  |  * Calculation see below.  | 
511  |  |  * The offsets and lengths arrays must be at least 33 (one more) long because  | 
512  |  |  * there is no check here at the end if the last nibble is still used.  | 
513  |  |  */  | 
514  |  | static const uint8_t *  | 
515  |  | expandGroupLengths(const uint8_t *s,  | 
516  | 0  |                    uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]) { | 
517  |  |     /* read the lengths of the 32 strings in this group and get each string's offset */  | 
518  | 0  |     uint16_t i=0, offset=0, length=0;  | 
519  | 0  |     uint8_t lengthByte;  | 
520  |  |  | 
521  |  |     /* all 32 lengths must be read to get the offset of the first group string */  | 
522  | 0  |     while(i<LINES_PER_GROUP) { | 
523  | 0  |         lengthByte=*s++;  | 
524  |  |  | 
525  |  |         /* read even nibble - MSBs of lengthByte */  | 
526  | 0  |         if(length>=12) { | 
527  |  |             /* double-nibble length spread across two bytes */  | 
528  | 0  |             length=(uint16_t)(((length&0x3)<<4|lengthByte>>4)+12);  | 
529  | 0  |             lengthByte&=0xf;  | 
530  | 0  |         } else if((lengthByte /* &0xf0 */)>=0xc0) { | 
531  |  |             /* double-nibble length spread across this one byte */  | 
532  | 0  |             length=(uint16_t)((lengthByte&0x3f)+12);  | 
533  | 0  |         } else { | 
534  |  |             /* single-nibble length in MSBs */  | 
535  | 0  |             length=(uint16_t)(lengthByte>>4);  | 
536  | 0  |             lengthByte&=0xf;  | 
537  | 0  |         }  | 
538  |  | 
  | 
539  | 0  |         *offsets++=offset;  | 
540  | 0  |         *lengths++=length;  | 
541  |  | 
  | 
542  | 0  |         offset+=length;  | 
543  | 0  |         ++i;  | 
544  |  |  | 
545  |  |         /* read odd nibble - LSBs of lengthByte */  | 
546  | 0  |         if((lengthByte&0xf0)==0) { | 
547  |  |             /* this nibble was not consumed for a double-nibble length above */  | 
548  | 0  |             length=lengthByte;  | 
549  | 0  |             if(length<12) { | 
550  |  |                 /* single-nibble length in LSBs */  | 
551  | 0  |                 *offsets++=offset;  | 
552  | 0  |                 *lengths++=length;  | 
553  |  | 
  | 
554  | 0  |                 offset+=length;  | 
555  | 0  |                 ++i;  | 
556  | 0  |             }  | 
557  | 0  |         } else { | 
558  | 0  |             length=0;   /* prevent double-nibble detection in the next iteration */  | 
559  | 0  |         }  | 
560  | 0  |     }  | 
561  |  |  | 
562  |  |     /* now, s is at the first group string */  | 
563  | 0  |     return s;  | 
564  | 0  | }  | 
565  |  |  | 
566  |  | static uint16_t  | 
567  |  | expandGroupName(UCharNames *names, const uint16_t *group,  | 
568  |  |                 uint16_t lineNumber, UCharNameChoice nameChoice,  | 
569  | 0  |                 char *buffer, uint16_t bufferLength) { | 
570  | 0  |     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];  | 
571  | 0  |     const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);  | 
572  | 0  |     s=expandGroupLengths(s, offsets, lengths);  | 
573  | 0  |     return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice,  | 
574  | 0  |                       buffer, bufferLength);  | 
575  | 0  | }  | 
576  |  |  | 
577  |  | static uint16_t  | 
578  |  | getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice,  | 
579  | 0  |         char *buffer, uint16_t bufferLength) { | 
580  | 0  |     const uint16_t *group=getGroup(names, code);  | 
581  | 0  |     if((uint16_t)(code>>GROUP_SHIFT)==group[GROUP_MSB]) { | 
582  | 0  |         return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice,  | 
583  | 0  |                                buffer, bufferLength);  | 
584  | 0  |     } else { | 
585  |  |         /* group not found */  | 
586  |  |         /* zero-terminate */  | 
587  | 0  |         if(bufferLength>0) { | 
588  | 0  |             *buffer=0;  | 
589  | 0  |         }  | 
590  | 0  |         return 0;  | 
591  | 0  |     }  | 
592  | 0  | }  | 
593  |  |  | 
594  |  | /*  | 
595  |  |  * enumGroupNames() enumerates all the names in a 32-group  | 
596  |  |  * and either calls the enumerator function or finds a given input name.  | 
597  |  |  */  | 
598  |  | static UBool  | 
599  |  | enumGroupNames(UCharNames *names, const uint16_t *group,  | 
600  |  |                UChar32 start, UChar32 end,  | 
601  |  |                UEnumCharNamesFn *fn, void *context,  | 
602  | 0  |                UCharNameChoice nameChoice) { | 
603  | 0  |     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];  | 
604  | 0  |     const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);  | 
605  |  | 
  | 
606  | 0  |     s=expandGroupLengths(s, offsets, lengths);  | 
607  | 0  |     if(fn!=DO_FIND_NAME) { | 
608  | 0  |         char buffer[200];  | 
609  | 0  |         uint16_t length;  | 
610  |  | 
  | 
611  | 0  |         while(start<=end) { | 
612  | 0  |             length=expandName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, buffer, sizeof(buffer));  | 
613  | 0  |             if (!length && nameChoice == U_EXTENDED_CHAR_NAME) { | 
614  | 0  |                 buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;  | 
615  | 0  |             }  | 
616  |  |             /* here, we assume that the buffer is large enough */  | 
617  | 0  |             if(length>0) { | 
618  | 0  |                 if(!fn(context, start, nameChoice, buffer, length)) { | 
619  | 0  |                     return FALSE;  | 
620  | 0  |                 }  | 
621  | 0  |             }  | 
622  | 0  |             ++start;  | 
623  | 0  |         }  | 
624  | 0  |     } else { | 
625  | 0  |         const char *otherName=((FindName *)context)->otherName;  | 
626  | 0  |         while(start<=end) { | 
627  | 0  |             if(compareName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, otherName)) { | 
628  | 0  |                 ((FindName *)context)->code=start;  | 
629  | 0  |                 return FALSE;  | 
630  | 0  |             }  | 
631  | 0  |             ++start;  | 
632  | 0  |         }  | 
633  | 0  |     }  | 
634  | 0  |     return TRUE;  | 
635  | 0  | }  | 
636  |  |  | 
637  |  | /*  | 
638  |  |  * enumExtNames enumerate extended names.  | 
639  |  |  * It only needs to do it if it is called with a real function and not  | 
640  |  |  * with the dummy DO_FIND_NAME, because u_charFromName() does a check  | 
641  |  |  * for extended names by itself.  | 
642  |  |  */   | 
643  |  | static UBool  | 
644  |  | enumExtNames(UChar32 start, UChar32 end,  | 
645  |  |              UEnumCharNamesFn *fn, void *context)  | 
646  | 0  | { | 
647  | 0  |     if(fn!=DO_FIND_NAME) { | 
648  | 0  |         char buffer[200];  | 
649  | 0  |         uint16_t length;  | 
650  |  |           | 
651  | 0  |         while(start<=end) { | 
652  | 0  |             buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;  | 
653  |  |             /* here, we assume that the buffer is large enough */  | 
654  | 0  |             if(length>0) { | 
655  | 0  |                 if(!fn(context, start, U_EXTENDED_CHAR_NAME, buffer, length)) { | 
656  | 0  |                     return FALSE;  | 
657  | 0  |                 }  | 
658  | 0  |             }  | 
659  | 0  |             ++start;  | 
660  | 0  |         }  | 
661  | 0  |     }  | 
662  |  |  | 
663  | 0  |     return TRUE;  | 
664  | 0  | }  | 
665  |  |  | 
666  |  | static UBool  | 
667  |  | enumNames(UCharNames *names,  | 
668  |  |           UChar32 start, UChar32 limit,  | 
669  |  |           UEnumCharNamesFn *fn, void *context,  | 
670  | 0  |           UCharNameChoice nameChoice) { | 
671  | 0  |     uint16_t startGroupMSB, endGroupMSB, groupCount;  | 
672  | 0  |     const uint16_t *group, *groupLimit;  | 
673  |  | 
  | 
674  | 0  |     startGroupMSB=(uint16_t)(start>>GROUP_SHIFT);  | 
675  | 0  |     endGroupMSB=(uint16_t)((limit-1)>>GROUP_SHIFT);  | 
676  |  |  | 
677  |  |     /* find the group that contains start, or the highest before it */  | 
678  | 0  |     group=getGroup(names, start);  | 
679  |  | 
  | 
680  | 0  |     if(startGroupMSB<group[GROUP_MSB] && nameChoice==U_EXTENDED_CHAR_NAME) { | 
681  |  |         /* enumerate synthetic names between start and the group start */  | 
682  | 0  |         UChar32 extLimit=((UChar32)group[GROUP_MSB]<<GROUP_SHIFT);  | 
683  | 0  |         if(extLimit>limit) { | 
684  | 0  |             extLimit=limit;  | 
685  | 0  |         }  | 
686  | 0  |         if(!enumExtNames(start, extLimit-1, fn, context)) { | 
687  | 0  |             return FALSE;  | 
688  | 0  |         }  | 
689  | 0  |         start=extLimit;  | 
690  | 0  |     }  | 
691  |  |  | 
692  | 0  |     if(startGroupMSB==endGroupMSB) { | 
693  | 0  |         if(startGroupMSB==group[GROUP_MSB]) { | 
694  |  |             /* if start and limit-1 are in the same group, then enumerate only in that one */  | 
695  | 0  |             return enumGroupNames(names, group, start, limit-1, fn, context, nameChoice);  | 
696  | 0  |         }  | 
697  | 0  |     } else { | 
698  | 0  |         const uint16_t *groups=GET_GROUPS(names);  | 
699  | 0  |         groupCount=*groups++;  | 
700  | 0  |         groupLimit=groups+groupCount*GROUP_LENGTH;  | 
701  |  | 
  | 
702  | 0  |         if(startGroupMSB==group[GROUP_MSB]) { | 
703  |  |             /* enumerate characters in the partial start group */  | 
704  | 0  |             if((start&GROUP_MASK)!=0) { | 
705  | 0  |                 if(!enumGroupNames(names, group,  | 
706  | 0  |                                    start, ((UChar32)startGroupMSB<<GROUP_SHIFT)+LINES_PER_GROUP-1,  | 
707  | 0  |                                    fn, context, nameChoice)) { | 
708  | 0  |                     return FALSE;  | 
709  | 0  |                 }  | 
710  | 0  |                 group=NEXT_GROUP(group); /* continue with the next group */  | 
711  | 0  |             }  | 
712  | 0  |         } else if(startGroupMSB>group[GROUP_MSB]) { | 
713  |  |             /* make sure that we start enumerating with the first group after start */  | 
714  | 0  |             const uint16_t *nextGroup=NEXT_GROUP(group);  | 
715  | 0  |             if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > startGroupMSB && nameChoice == U_EXTENDED_CHAR_NAME) { | 
716  | 0  |                 UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;  | 
717  | 0  |                 if (end > limit) { | 
718  | 0  |                     end = limit;  | 
719  | 0  |                 }  | 
720  | 0  |                 if (!enumExtNames(start, end - 1, fn, context)) { | 
721  | 0  |                     return FALSE;  | 
722  | 0  |                 }  | 
723  | 0  |             }  | 
724  | 0  |             group=nextGroup;  | 
725  | 0  |         }  | 
726  |  |  | 
727  |  |         /* enumerate entire groups between the start- and end-groups */  | 
728  | 0  |         while(group<groupLimit && group[GROUP_MSB]<endGroupMSB) { | 
729  | 0  |             const uint16_t *nextGroup;  | 
730  | 0  |             start=(UChar32)group[GROUP_MSB]<<GROUP_SHIFT;  | 
731  | 0  |             if(!enumGroupNames(names, group, start, start+LINES_PER_GROUP-1, fn, context, nameChoice)) { | 
732  | 0  |                 return FALSE;  | 
733  | 0  |             }  | 
734  | 0  |             nextGroup=NEXT_GROUP(group);  | 
735  | 0  |             if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > group[GROUP_MSB] + 1 && nameChoice == U_EXTENDED_CHAR_NAME) { | 
736  | 0  |                 UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;  | 
737  | 0  |                 if (end > limit) { | 
738  | 0  |                     end = limit;  | 
739  | 0  |                 }  | 
740  | 0  |                 if (!enumExtNames((group[GROUP_MSB] + 1) << GROUP_SHIFT, end - 1, fn, context)) { | 
741  | 0  |                     return FALSE;  | 
742  | 0  |                 }  | 
743  | 0  |             }  | 
744  | 0  |             group=nextGroup;  | 
745  | 0  |         }  | 
746  |  |  | 
747  |  |         /* enumerate within the end group (group[GROUP_MSB]==endGroupMSB) */  | 
748  | 0  |         if(group<groupLimit && group[GROUP_MSB]==endGroupMSB) { | 
749  | 0  |             return enumGroupNames(names, group, (limit-1)&~GROUP_MASK, limit-1, fn, context, nameChoice);  | 
750  | 0  |         } else if (nameChoice == U_EXTENDED_CHAR_NAME && group == groupLimit) { | 
751  | 0  |             UChar32 next = (PREV_GROUP(group)[GROUP_MSB] + 1) << GROUP_SHIFT;  | 
752  | 0  |             if (next > start) { | 
753  | 0  |                 start = next;  | 
754  | 0  |             }  | 
755  | 0  |         } else { | 
756  | 0  |             return TRUE;  | 
757  | 0  |         }  | 
758  | 0  |     }  | 
759  |  |  | 
760  |  |     /* we have not found a group, which means everything is made of  | 
761  |  |        extended names. */  | 
762  | 0  |     if (nameChoice == U_EXTENDED_CHAR_NAME) { | 
763  | 0  |         if (limit > UCHAR_MAX_VALUE + 1) { | 
764  | 0  |             limit = UCHAR_MAX_VALUE + 1;  | 
765  | 0  |         }  | 
766  | 0  |         return enumExtNames(start, limit - 1, fn, context);  | 
767  | 0  |     }  | 
768  |  |       | 
769  | 0  |     return TRUE;  | 
770  | 0  | }  | 
771  |  |  | 
772  |  | static uint16_t  | 
773  |  | writeFactorSuffix(const uint16_t *factors, uint16_t count,  | 
774  |  |                   const char *s, /* suffix elements */  | 
775  |  |                   uint32_t code,  | 
776  |  |                   uint16_t indexes[8], /* output fields from here */  | 
777  |  |                   const char *elementBases[8], const char *elements[8],  | 
778  | 0  |                   char *buffer, uint16_t bufferLength) { | 
779  | 0  |     uint16_t i, factor, bufferPos=0;  | 
780  | 0  |     char c;  | 
781  |  |  | 
782  |  |     /* write elements according to the factors */  | 
783  |  |  | 
784  |  |     /*  | 
785  |  |      * the factorized elements are determined by modulo arithmetic  | 
786  |  |      * with the factors of this algorithm  | 
787  |  |      *  | 
788  |  |      * note that for fewer operations, count is decremented here  | 
789  |  |      */  | 
790  | 0  |     --count;  | 
791  | 0  |     for(i=count; i>0; --i) { | 
792  | 0  |         factor=factors[i];  | 
793  | 0  |         indexes[i]=(uint16_t)(code%factor);  | 
794  | 0  |         code/=factor;  | 
795  | 0  |     }  | 
796  |  |     /*  | 
797  |  |      * we don't need to calculate the last modulus because start<=code<=end  | 
798  |  |      * guarantees here that code<=factors[0]  | 
799  |  |      */  | 
800  | 0  |     indexes[0]=(uint16_t)code;  | 
801  |  |  | 
802  |  |     /* write each element */  | 
803  | 0  |     for(;;) { | 
804  | 0  |         if(elementBases!=NULL) { | 
805  | 0  |             *elementBases++=s;  | 
806  | 0  |         }  | 
807  |  |  | 
808  |  |         /* skip indexes[i] strings */  | 
809  | 0  |         factor=indexes[i];  | 
810  | 0  |         while(factor>0) { | 
811  | 0  |             while(*s++!=0) {} | 
812  | 0  |             --factor;  | 
813  | 0  |         }  | 
814  | 0  |         if(elements!=NULL) { | 
815  | 0  |             *elements++=s;  | 
816  | 0  |         }  | 
817  |  |  | 
818  |  |         /* write element */  | 
819  | 0  |         while((c=*s++)!=0) { | 
820  | 0  |             WRITE_CHAR(buffer, bufferLength, bufferPos, c);  | 
821  | 0  |         }  | 
822  |  |  | 
823  |  |         /* we do not need to perform the rest of this loop for i==count - break here */  | 
824  | 0  |         if(i>=count) { | 
825  | 0  |             break;  | 
826  | 0  |         }  | 
827  |  |  | 
828  |  |         /* skip the rest of the strings for this factors[i] */  | 
829  | 0  |         factor=(uint16_t)(factors[i]-indexes[i]-1);  | 
830  | 0  |         while(factor>0) { | 
831  | 0  |             while(*s++!=0) {} | 
832  | 0  |             --factor;  | 
833  | 0  |         }  | 
834  |  | 
  | 
835  | 0  |         ++i;  | 
836  | 0  |     }  | 
837  |  |  | 
838  |  |     /* zero-terminate */  | 
839  | 0  |     if(bufferLength>0) { | 
840  | 0  |         *buffer=0;  | 
841  | 0  |     }  | 
842  |  | 
  | 
843  | 0  |     return bufferPos;  | 
844  | 0  | }  | 
845  |  |  | 
846  |  | /*  | 
847  |  |  * Important:  | 
848  |  |  * Parts of findAlgName() are almost the same as some of getAlgName().  | 
849  |  |  * Fixes must be applied to both.  | 
850  |  |  */  | 
851  |  | static uint16_t  | 
852  |  | getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice,  | 
853  | 0  |         char *buffer, uint16_t bufferLength) { | 
854  | 0  |     uint16_t bufferPos=0;  | 
855  |  |  | 
856  |  |     /* Only the normative character name can be algorithmic. */  | 
857  | 0  |     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) { | 
858  |  |         /* zero-terminate */  | 
859  | 0  |         if(bufferLength>0) { | 
860  | 0  |             *buffer=0;  | 
861  | 0  |         }  | 
862  | 0  |         return 0;  | 
863  | 0  |     }  | 
864  |  |  | 
865  | 0  |     switch(range->type) { | 
866  | 0  |     case 0: { | 
867  |  |         /* name = prefix hex-digits */  | 
868  | 0  |         const char *s=(const char *)(range+1);  | 
869  | 0  |         char c;  | 
870  |  | 
  | 
871  | 0  |         uint16_t i, count;  | 
872  |  |  | 
873  |  |         /* copy prefix */  | 
874  | 0  |         while((c=*s++)!=0) { | 
875  | 0  |             WRITE_CHAR(buffer, bufferLength, bufferPos, c);  | 
876  | 0  |         }  | 
877  |  |  | 
878  |  |         /* write hexadecimal code point value */  | 
879  | 0  |         count=range->variant;  | 
880  |  |  | 
881  |  |         /* zero-terminate */  | 
882  | 0  |         if(count<bufferLength) { | 
883  | 0  |             buffer[count]=0;  | 
884  | 0  |         }  | 
885  |  | 
  | 
886  | 0  |         for(i=count; i>0;) { | 
887  | 0  |             if(--i<bufferLength) { | 
888  | 0  |                 c=(char)(code&0xf);  | 
889  | 0  |                 if(c<10) { | 
890  | 0  |                     c+='0';  | 
891  | 0  |                 } else { | 
892  | 0  |                     c+='A'-10;  | 
893  | 0  |                 }  | 
894  | 0  |                 buffer[i]=c;  | 
895  | 0  |             }  | 
896  | 0  |             code>>=4;  | 
897  | 0  |         }  | 
898  |  | 
  | 
899  | 0  |         bufferPos+=count;  | 
900  | 0  |         break;  | 
901  | 0  |     }  | 
902  | 0  |     case 1: { | 
903  |  |         /* name = prefix factorized-elements */  | 
904  | 0  |         uint16_t indexes[8];  | 
905  | 0  |         const uint16_t *factors=(const uint16_t *)(range+1);  | 
906  | 0  |         uint16_t count=range->variant;  | 
907  | 0  |         const char *s=(const char *)(factors+count);  | 
908  | 0  |         char c;  | 
909  |  |  | 
910  |  |         /* copy prefix */  | 
911  | 0  |         while((c=*s++)!=0) { | 
912  | 0  |             WRITE_CHAR(buffer, bufferLength, bufferPos, c);  | 
913  | 0  |         }  | 
914  |  | 
  | 
915  | 0  |         bufferPos+=writeFactorSuffix(factors, count,  | 
916  | 0  |                                      s, code-range->start, indexes, NULL, NULL, buffer, bufferLength);  | 
917  | 0  |         break;  | 
918  | 0  |     }  | 
919  | 0  |     default:  | 
920  |  |         /* undefined type */  | 
921  |  |         /* zero-terminate */  | 
922  | 0  |         if(bufferLength>0) { | 
923  | 0  |             *buffer=0;  | 
924  | 0  |         }  | 
925  | 0  |         break;  | 
926  | 0  |     }  | 
927  |  |  | 
928  | 0  |     return bufferPos;  | 
929  | 0  | }  | 
930  |  |  | 
931  |  | /*  | 
932  |  |  * Important: enumAlgNames() and findAlgName() are almost the same.  | 
933  |  |  * Any fix must be applied to both.  | 
934  |  |  */  | 
935  |  | static UBool  | 
936  |  | enumAlgNames(AlgorithmicRange *range,  | 
937  |  |              UChar32 start, UChar32 limit,  | 
938  |  |              UEnumCharNamesFn *fn, void *context,  | 
939  | 0  |              UCharNameChoice nameChoice) { | 
940  | 0  |     char buffer[200];  | 
941  | 0  |     uint16_t length;  | 
942  |  | 
  | 
943  | 0  |     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) { | 
944  | 0  |         return TRUE;  | 
945  | 0  |     }  | 
946  |  |  | 
947  | 0  |     switch(range->type) { | 
948  | 0  |     case 0: { | 
949  | 0  |         char *s, *end;  | 
950  | 0  |         char c;  | 
951  |  |  | 
952  |  |         /* get the full name of the start character */  | 
953  | 0  |         length=getAlgName(range, (uint32_t)start, nameChoice, buffer, sizeof(buffer));  | 
954  | 0  |         if(length<=0) { | 
955  | 0  |             return TRUE;  | 
956  | 0  |         }  | 
957  |  |  | 
958  |  |         /* call the enumerator function with this first character */  | 
959  | 0  |         if(!fn(context, start, nameChoice, buffer, length)) { | 
960  | 0  |             return FALSE;  | 
961  | 0  |         }  | 
962  |  |  | 
963  |  |         /* go to the end of the name; all these names have the same length */  | 
964  | 0  |         end=buffer;  | 
965  | 0  |         while(*end!=0) { | 
966  | 0  |             ++end;  | 
967  | 0  |         }  | 
968  |  |  | 
969  |  |         /* enumerate the rest of the names */  | 
970  | 0  |         while(++start<limit) { | 
971  |  |             /* increment the hexadecimal number on a character-basis */  | 
972  | 0  |             s=end;  | 
973  | 0  |             for (;;) { | 
974  | 0  |                 c=*--s;  | 
975  | 0  |                 if(('0'<=c && c<'9') || ('A'<=c && c<'F')) { | 
976  | 0  |                     *s=(char)(c+1);  | 
977  | 0  |                     break;  | 
978  | 0  |                 } else if(c=='9') { | 
979  | 0  |                     *s='A';  | 
980  | 0  |                     break;  | 
981  | 0  |                 } else if(c=='F') { | 
982  | 0  |                     *s='0';  | 
983  | 0  |                 }  | 
984  | 0  |             }  | 
985  |  | 
  | 
986  | 0  |             if(!fn(context, start, nameChoice, buffer, length)) { | 
987  | 0  |                 return FALSE;  | 
988  | 0  |             }  | 
989  | 0  |         }  | 
990  | 0  |         break;  | 
991  | 0  |     }  | 
992  | 0  |     case 1: { | 
993  | 0  |         uint16_t indexes[8];  | 
994  | 0  |         const char *elementBases[8], *elements[8];  | 
995  | 0  |         const uint16_t *factors=(const uint16_t *)(range+1);  | 
996  | 0  |         uint16_t count=range->variant;  | 
997  | 0  |         const char *s=(const char *)(factors+count);  | 
998  | 0  |         char *suffix, *t;  | 
999  | 0  |         uint16_t prefixLength, i, idx;  | 
1000  |  | 
  | 
1001  | 0  |         char c;  | 
1002  |  |  | 
1003  |  |         /* name = prefix factorized-elements */  | 
1004  |  |  | 
1005  |  |         /* copy prefix */  | 
1006  | 0  |         suffix=buffer;  | 
1007  | 0  |         prefixLength=0;  | 
1008  | 0  |         while((c=*s++)!=0) { | 
1009  | 0  |             *suffix++=c;  | 
1010  | 0  |             ++prefixLength;  | 
1011  | 0  |         }  | 
1012  |  |  | 
1013  |  |         /* append the suffix of the start character */  | 
1014  | 0  |         length=(uint16_t)(prefixLength+writeFactorSuffix(factors, count,  | 
1015  | 0  |                                               s, (uint32_t)start-range->start,  | 
1016  | 0  |                                               indexes, elementBases, elements,  | 
1017  | 0  |                                               suffix, (uint16_t)(sizeof(buffer)-prefixLength)));  | 
1018  |  |  | 
1019  |  |         /* call the enumerator function with this first character */  | 
1020  | 0  |         if(!fn(context, start, nameChoice, buffer, length)) { | 
1021  | 0  |             return FALSE;  | 
1022  | 0  |         }  | 
1023  |  |  | 
1024  |  |         /* enumerate the rest of the names */  | 
1025  | 0  |         while(++start<limit) { | 
1026  |  |             /* increment the indexes in lexical order bound by the factors */  | 
1027  | 0  |             i=count;  | 
1028  | 0  |             for (;;) { | 
1029  | 0  |                 idx=(uint16_t)(indexes[--i]+1);  | 
1030  | 0  |                 if(idx<factors[i]) { | 
1031  |  |                     /* skip one index and its element string */  | 
1032  | 0  |                     indexes[i]=idx;  | 
1033  | 0  |                     s=elements[i];  | 
1034  | 0  |                     while(*s++!=0) { | 
1035  | 0  |                     }  | 
1036  | 0  |                     elements[i]=s;  | 
1037  | 0  |                     break;  | 
1038  | 0  |                 } else { | 
1039  |  |                     /* reset this index to 0 and its element string to the first one */  | 
1040  | 0  |                     indexes[i]=0;  | 
1041  | 0  |                     elements[i]=elementBases[i];  | 
1042  | 0  |                 }  | 
1043  | 0  |             }  | 
1044  |  |  | 
1045  |  |             /* to make matters a little easier, just append all elements to the suffix */  | 
1046  | 0  |             t=suffix;  | 
1047  | 0  |             length=prefixLength;  | 
1048  | 0  |             for(i=0; i<count; ++i) { | 
1049  | 0  |                 s=elements[i];  | 
1050  | 0  |                 while((c=*s++)!=0) { | 
1051  | 0  |                     *t++=c;  | 
1052  | 0  |                     ++length;  | 
1053  | 0  |                 }  | 
1054  | 0  |             }  | 
1055  |  |             /* zero-terminate */  | 
1056  | 0  |             *t=0;  | 
1057  |  | 
  | 
1058  | 0  |             if(!fn(context, start, nameChoice, buffer, length)) { | 
1059  | 0  |                 return FALSE;  | 
1060  | 0  |             }  | 
1061  | 0  |         }  | 
1062  | 0  |         break;  | 
1063  | 0  |     }  | 
1064  | 0  |     default:  | 
1065  |  |         /* undefined type */  | 
1066  | 0  |         break;  | 
1067  | 0  |     }  | 
1068  |  |  | 
1069  | 0  |     return TRUE;  | 
1070  | 0  | }  | 
1071  |  |  | 
1072  |  | /*  | 
1073  |  |  * findAlgName() is almost the same as enumAlgNames() except that it  | 
1074  |  |  * returns the code point for a name if it fits into the range.  | 
1075  |  |  * It returns 0xffff otherwise.  | 
1076  |  |  */  | 
1077  |  | static UChar32  | 
1078  | 0  | findAlgName(AlgorithmicRange *range, UCharNameChoice nameChoice, const char *otherName) { | 
1079  | 0  |     UChar32 code;  | 
1080  |  | 
  | 
1081  | 0  |     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) { | 
1082  | 0  |         return 0xffff;  | 
1083  | 0  |     }  | 
1084  |  |  | 
1085  | 0  |     switch(range->type) { | 
1086  | 0  |     case 0: { | 
1087  |  |         /* name = prefix hex-digits */  | 
1088  | 0  |         const char *s=(const char *)(range+1);  | 
1089  | 0  |         char c;  | 
1090  |  | 
  | 
1091  | 0  |         uint16_t i, count;  | 
1092  |  |  | 
1093  |  |         /* compare prefix */  | 
1094  | 0  |         while((c=*s++)!=0) { | 
1095  | 0  |             if((char)c!=*otherName++) { | 
1096  | 0  |                 return 0xffff;  | 
1097  | 0  |             }  | 
1098  | 0  |         }  | 
1099  |  |  | 
1100  |  |         /* read hexadecimal code point value */  | 
1101  | 0  |         count=range->variant;  | 
1102  | 0  |         code=0;  | 
1103  | 0  |         for(i=0; i<count; ++i) { | 
1104  | 0  |             c=*otherName++;  | 
1105  | 0  |             if('0'<=c && c<='9') { | 
1106  | 0  |                 code=(code<<4)|(c-'0');  | 
1107  | 0  |             } else if('A'<=c && c<='F') { | 
1108  | 0  |                 code=(code<<4)|(c-'A'+10);  | 
1109  | 0  |             } else { | 
1110  | 0  |                 return 0xffff;  | 
1111  | 0  |             }  | 
1112  | 0  |         }  | 
1113  |  |  | 
1114  |  |         /* does it fit into the range? */  | 
1115  | 0  |         if(*otherName==0 && range->start<=(uint32_t)code && (uint32_t)code<=range->end) { | 
1116  | 0  |             return code;  | 
1117  | 0  |         }  | 
1118  | 0  |         break;  | 
1119  | 0  |     }  | 
1120  | 0  |     case 1: { | 
1121  | 0  |         char buffer[64];  | 
1122  | 0  |         uint16_t indexes[8];  | 
1123  | 0  |         const char *elementBases[8], *elements[8];  | 
1124  | 0  |         const uint16_t *factors=(const uint16_t *)(range+1);  | 
1125  | 0  |         uint16_t count=range->variant;  | 
1126  | 0  |         const char *s=(const char *)(factors+count), *t;  | 
1127  | 0  |         UChar32 start, limit;  | 
1128  | 0  |         uint16_t i, idx;  | 
1129  |  | 
  | 
1130  | 0  |         char c;  | 
1131  |  |  | 
1132  |  |         /* name = prefix factorized-elements */  | 
1133  |  |  | 
1134  |  |         /* compare prefix */  | 
1135  | 0  |         while((c=*s++)!=0) { | 
1136  | 0  |             if((char)c!=*otherName++) { | 
1137  | 0  |                 return 0xffff;  | 
1138  | 0  |             }  | 
1139  | 0  |         }  | 
1140  |  |  | 
1141  | 0  |         start=(UChar32)range->start;  | 
1142  | 0  |         limit=(UChar32)(range->end+1);  | 
1143  |  |  | 
1144  |  |         /* initialize the suffix elements for enumeration; indexes should all be set to 0 */  | 
1145  | 0  |         writeFactorSuffix(factors, count, s, 0,  | 
1146  | 0  |                           indexes, elementBases, elements, buffer, sizeof(buffer));  | 
1147  |  |  | 
1148  |  |         /* compare the first suffix */  | 
1149  | 0  |         if(0==uprv_strcmp(otherName, buffer)) { | 
1150  | 0  |             return start;  | 
1151  | 0  |         }  | 
1152  |  |  | 
1153  |  |         /* enumerate and compare the rest of the suffixes */  | 
1154  | 0  |         while(++start<limit) { | 
1155  |  |             /* increment the indexes in lexical order bound by the factors */  | 
1156  | 0  |             i=count;  | 
1157  | 0  |             for (;;) { | 
1158  | 0  |                 idx=(uint16_t)(indexes[--i]+1);  | 
1159  | 0  |                 if(idx<factors[i]) { | 
1160  |  |                     /* skip one index and its element string */  | 
1161  | 0  |                     indexes[i]=idx;  | 
1162  | 0  |                     s=elements[i];  | 
1163  | 0  |                     while(*s++!=0) {} | 
1164  | 0  |                     elements[i]=s;  | 
1165  | 0  |                     break;  | 
1166  | 0  |                 } else { | 
1167  |  |                     /* reset this index to 0 and its element string to the first one */  | 
1168  | 0  |                     indexes[i]=0;  | 
1169  | 0  |                     elements[i]=elementBases[i];  | 
1170  | 0  |                 }  | 
1171  | 0  |             }  | 
1172  |  |  | 
1173  |  |             /* to make matters a little easier, just compare all elements of the suffix */  | 
1174  | 0  |             t=otherName;  | 
1175  | 0  |             for(i=0; i<count; ++i) { | 
1176  | 0  |                 s=elements[i];  | 
1177  | 0  |                 while((c=*s++)!=0) { | 
1178  | 0  |                     if(c!=*t++) { | 
1179  | 0  |                         s=""; /* does not match */  | 
1180  | 0  |                         i=99;  | 
1181  | 0  |                     }  | 
1182  | 0  |                 }  | 
1183  | 0  |             }  | 
1184  | 0  |             if(i<99 && *t==0) { | 
1185  | 0  |                 return start;  | 
1186  | 0  |             }  | 
1187  | 0  |         }  | 
1188  | 0  |         break;  | 
1189  | 0  |     }  | 
1190  | 0  |     default:  | 
1191  |  |         /* undefined type */  | 
1192  | 0  |         break;  | 
1193  | 0  |     }  | 
1194  |  |  | 
1195  | 0  |     return 0xffff;  | 
1196  | 0  | }  | 
1197  |  |  | 
1198  |  | /* sets of name characters, maximum name lengths ---------------------------- */  | 
1199  |  |  | 
1200  | 0  | #define SET_ADD(set, c) ((set)[(uint8_t)c>>5]|=((uint32_t)1<<((uint8_t)c&0x1f)))  | 
1201  | 0  | #define SET_CONTAINS(set, c) (((set)[(uint8_t)c>>5]&((uint32_t)1<<((uint8_t)c&0x1f)))!=0)  | 
1202  |  |  | 
1203  |  | static int32_t  | 
1204  | 0  | calcStringSetLength(uint32_t set[8], const char *s) { | 
1205  | 0  |     int32_t length=0;  | 
1206  | 0  |     char c;  | 
1207  |  | 
  | 
1208  | 0  |     while((c=*s++)!=0) { | 
1209  | 0  |         SET_ADD(set, c);  | 
1210  | 0  |         ++length;  | 
1211  | 0  |     }  | 
1212  | 0  |     return length;  | 
1213  | 0  | }  | 
1214  |  |  | 
1215  |  | static int32_t  | 
1216  | 0  | calcAlgNameSetsLengths(int32_t maxNameLength) { | 
1217  | 0  |     AlgorithmicRange *range;  | 
1218  | 0  |     uint32_t *p;  | 
1219  | 0  |     uint32_t rangeCount;  | 
1220  | 0  |     int32_t length;  | 
1221  |  |  | 
1222  |  |     /* enumerate algorithmic ranges */  | 
1223  | 0  |     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);  | 
1224  | 0  |     rangeCount=*p;  | 
1225  | 0  |     range=(AlgorithmicRange *)(p+1);  | 
1226  | 0  |     while(rangeCount>0) { | 
1227  | 0  |         switch(range->type) { | 
1228  | 0  |         case 0:  | 
1229  |  |             /* name = prefix + (range->variant times) hex-digits */  | 
1230  |  |             /* prefix */  | 
1231  | 0  |             length=calcStringSetLength(gNameSet, (const char *)(range+1))+range->variant;  | 
1232  | 0  |             if(length>maxNameLength) { | 
1233  | 0  |                 maxNameLength=length;  | 
1234  | 0  |             }  | 
1235  | 0  |             break;  | 
1236  | 0  |         case 1: { | 
1237  |  |             /* name = prefix factorized-elements */  | 
1238  | 0  |             const uint16_t *factors=(const uint16_t *)(range+1);  | 
1239  | 0  |             const char *s;  | 
1240  | 0  |             int32_t i, count=range->variant, factor, factorLength, maxFactorLength;  | 
1241  |  |  | 
1242  |  |             /* prefix length */  | 
1243  | 0  |             s=(const char *)(factors+count);  | 
1244  | 0  |             length=calcStringSetLength(gNameSet, s);  | 
1245  | 0  |             s+=length+1; /* start of factor suffixes */  | 
1246  |  |  | 
1247  |  |             /* get the set and maximum factor suffix length for each factor */  | 
1248  | 0  |             for(i=0; i<count; ++i) { | 
1249  | 0  |                 maxFactorLength=0;  | 
1250  | 0  |                 for(factor=factors[i]; factor>0; --factor) { | 
1251  | 0  |                     factorLength=calcStringSetLength(gNameSet, s);  | 
1252  | 0  |                     s+=factorLength+1;  | 
1253  | 0  |                     if(factorLength>maxFactorLength) { | 
1254  | 0  |                         maxFactorLength=factorLength;  | 
1255  | 0  |                     }  | 
1256  | 0  |                 }  | 
1257  | 0  |                 length+=maxFactorLength;  | 
1258  | 0  |             }  | 
1259  |  | 
  | 
1260  | 0  |             if(length>maxNameLength) { | 
1261  | 0  |                 maxNameLength=length;  | 
1262  | 0  |             }  | 
1263  | 0  |             break;  | 
1264  | 0  |         }  | 
1265  | 0  |         default:  | 
1266  |  |             /* unknown type */  | 
1267  | 0  |             break;  | 
1268  | 0  |         }  | 
1269  |  |  | 
1270  | 0  |         range=(AlgorithmicRange *)((uint8_t *)range+range->size);  | 
1271  | 0  |         --rangeCount;  | 
1272  | 0  |     }  | 
1273  | 0  |     return maxNameLength;  | 
1274  | 0  | }  | 
1275  |  |  | 
1276  |  | static int32_t  | 
1277  | 0  | calcExtNameSetsLengths(int32_t maxNameLength) { | 
1278  | 0  |     int32_t i, length;  | 
1279  |  | 
  | 
1280  | 0  |     for(i=0; i<UPRV_LENGTHOF(charCatNames); ++i) { | 
1281  |  |         /*  | 
1282  |  |          * for each category, count the length of the category name  | 
1283  |  |          * plus 9=  | 
1284  |  |          * 2 for <>  | 
1285  |  |          * 1 for -  | 
1286  |  |          * 6 for most hex digits per code point  | 
1287  |  |          */  | 
1288  | 0  |         length=9+calcStringSetLength(gNameSet, charCatNames[i]);  | 
1289  | 0  |         if(length>maxNameLength) { | 
1290  | 0  |             maxNameLength=length;  | 
1291  | 0  |         }  | 
1292  | 0  |     }  | 
1293  | 0  |     return maxNameLength;  | 
1294  | 0  | }  | 
1295  |  |  | 
1296  |  | static int32_t  | 
1297  |  | calcNameSetLength(const uint16_t *tokens, uint16_t tokenCount, const uint8_t *tokenStrings, int8_t *tokenLengths,  | 
1298  |  |                   uint32_t set[8],  | 
1299  | 0  |                   const uint8_t **pLine, const uint8_t *lineLimit) { | 
1300  | 0  |     const uint8_t *line=*pLine;  | 
1301  | 0  |     int32_t length=0, tokenLength;  | 
1302  | 0  |     uint16_t c, token;  | 
1303  |  | 
  | 
1304  | 0  |     while(line!=lineLimit && (c=*line++)!=(uint8_t)';') { | 
1305  | 0  |         if(c>=tokenCount) { | 
1306  |  |             /* implicit letter */  | 
1307  | 0  |             SET_ADD(set, c);  | 
1308  | 0  |             ++length;  | 
1309  | 0  |         } else { | 
1310  | 0  |             token=tokens[c];  | 
1311  | 0  |             if(token==(uint16_t)(-2)) { | 
1312  |  |                 /* this is a lead byte for a double-byte token */  | 
1313  | 0  |                 c=c<<8|*line++;  | 
1314  | 0  |                 token=tokens[c];  | 
1315  | 0  |             }  | 
1316  | 0  |             if(token==(uint16_t)(-1)) { | 
1317  |  |                 /* explicit letter */  | 
1318  | 0  |                 SET_ADD(set, c);  | 
1319  | 0  |                 ++length;  | 
1320  | 0  |             } else { | 
1321  |  |                 /* count token word */  | 
1322  | 0  |                 if(tokenLengths!=NULL) { | 
1323  |  |                     /* use cached token length */  | 
1324  | 0  |                     tokenLength=tokenLengths[c];  | 
1325  | 0  |                     if(tokenLength==0) { | 
1326  | 0  |                         tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);  | 
1327  | 0  |                         tokenLengths[c]=(int8_t)tokenLength;  | 
1328  | 0  |                     }  | 
1329  | 0  |                 } else { | 
1330  | 0  |                     tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);  | 
1331  | 0  |                 }  | 
1332  | 0  |                 length+=tokenLength;  | 
1333  | 0  |             }  | 
1334  | 0  |         }  | 
1335  | 0  |     }  | 
1336  |  | 
  | 
1337  | 0  |     *pLine=line;  | 
1338  | 0  |     return length;  | 
1339  | 0  | }  | 
1340  |  |  | 
1341  |  | static void  | 
1342  | 0  | calcGroupNameSetsLengths(int32_t maxNameLength) { | 
1343  | 0  |     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];  | 
1344  |  | 
  | 
1345  | 0  |     uint16_t *tokens=(uint16_t *)uCharNames+8;  | 
1346  | 0  |     uint16_t tokenCount=*tokens++;  | 
1347  | 0  |     uint8_t *tokenStrings=(uint8_t *)uCharNames+uCharNames->tokenStringOffset;  | 
1348  |  | 
  | 
1349  | 0  |     int8_t *tokenLengths;  | 
1350  |  | 
  | 
1351  | 0  |     const uint16_t *group;  | 
1352  | 0  |     const uint8_t *s, *line, *lineLimit;  | 
1353  |  | 
  | 
1354  | 0  |     int32_t groupCount, lineNumber, length;  | 
1355  |  | 
  | 
1356  | 0  |     tokenLengths=(int8_t *)uprv_malloc(tokenCount);  | 
1357  | 0  |     if(tokenLengths!=NULL) { | 
1358  | 0  |         uprv_memset(tokenLengths, 0, tokenCount);  | 
1359  | 0  |     }  | 
1360  |  | 
  | 
1361  | 0  |     group=GET_GROUPS(uCharNames);  | 
1362  | 0  |     groupCount=*group++;  | 
1363  |  |  | 
1364  |  |     /* enumerate all groups */  | 
1365  | 0  |     while(groupCount>0) { | 
1366  | 0  |         s=(uint8_t *)uCharNames+uCharNames->groupStringOffset+GET_GROUP_OFFSET(group);  | 
1367  | 0  |         s=expandGroupLengths(s, offsets, lengths);  | 
1368  |  |  | 
1369  |  |         /* enumerate all lines in each group */  | 
1370  | 0  |         for(lineNumber=0; lineNumber<LINES_PER_GROUP; ++lineNumber) { | 
1371  | 0  |             line=s+offsets[lineNumber];  | 
1372  | 0  |             length=lengths[lineNumber];  | 
1373  | 0  |             if(length==0) { | 
1374  | 0  |                 continue;  | 
1375  | 0  |             }  | 
1376  |  |  | 
1377  | 0  |             lineLimit=line+length;  | 
1378  |  |  | 
1379  |  |             /* read regular name */  | 
1380  | 0  |             length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);  | 
1381  | 0  |             if(length>maxNameLength) { | 
1382  | 0  |                 maxNameLength=length;  | 
1383  | 0  |             }  | 
1384  | 0  |             if(line==lineLimit) { | 
1385  | 0  |                 continue;  | 
1386  | 0  |             }  | 
1387  |  |  | 
1388  |  |             /* read Unicode 1.0 name */  | 
1389  | 0  |             length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);  | 
1390  | 0  |             if(length>maxNameLength) { | 
1391  | 0  |                 maxNameLength=length;  | 
1392  | 0  |             }  | 
1393  | 0  |             if(line==lineLimit) { | 
1394  | 0  |                 continue;  | 
1395  | 0  |             }  | 
1396  |  |  | 
1397  |  |             /* read ISO comment */  | 
1398  |  |             /*length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gISOCommentSet, &line, lineLimit);*/  | 
1399  | 0  |         }  | 
1400  |  | 
  | 
1401  | 0  |         group=NEXT_GROUP(group);  | 
1402  | 0  |         --groupCount;  | 
1403  | 0  |     }  | 
1404  |  | 
  | 
1405  | 0  |     if(tokenLengths!=NULL) { | 
1406  | 0  |         uprv_free(tokenLengths);  | 
1407  | 0  |     }  | 
1408  |  |  | 
1409  |  |     /* set gMax... - name length last for threading */  | 
1410  | 0  |     gMaxNameLength=maxNameLength;  | 
1411  | 0  | }  | 
1412  |  |  | 
1413  |  | static UBool  | 
1414  | 0  | calcNameSetsLengths(UErrorCode *pErrorCode) { | 
1415  | 0  |     static const char extChars[]="0123456789ABCDEF<>-";  | 
1416  | 0  |     int32_t i, maxNameLength;  | 
1417  |  | 
  | 
1418  | 0  |     if(gMaxNameLength!=0) { | 
1419  | 0  |         return TRUE;  | 
1420  | 0  |     }  | 
1421  |  |  | 
1422  | 0  |     if(!isDataLoaded(pErrorCode)) { | 
1423  | 0  |         return FALSE;  | 
1424  | 0  |     }  | 
1425  |  |  | 
1426  |  |     /* set hex digits, used in various names, and <>-, used in extended names */  | 
1427  | 0  |     for(i=0; i<(int32_t)sizeof(extChars)-1; ++i) { | 
1428  | 0  |         SET_ADD(gNameSet, extChars[i]);  | 
1429  | 0  |     }  | 
1430  |  |  | 
1431  |  |     /* set sets and lengths from algorithmic names */  | 
1432  | 0  |     maxNameLength=calcAlgNameSetsLengths(0);  | 
1433  |  |  | 
1434  |  |     /* set sets and lengths from extended names */  | 
1435  | 0  |     maxNameLength=calcExtNameSetsLengths(maxNameLength);  | 
1436  |  |  | 
1437  |  |     /* set sets and lengths from group names, set global maximum values */  | 
1438  | 0  |     calcGroupNameSetsLengths(maxNameLength);  | 
1439  |  | 
  | 
1440  | 0  |     return TRUE;  | 
1441  | 0  | }  | 
1442  |  |  | 
1443  |  | U_NAMESPACE_END  | 
1444  |  |  | 
1445  |  | /* public API --------------------------------------------------------------- */  | 
1446  |  |  | 
1447  |  | U_NAMESPACE_USE  | 
1448  |  |  | 
1449  |  | U_CAPI int32_t U_EXPORT2  | 
1450  |  | u_charName(UChar32 code, UCharNameChoice nameChoice,  | 
1451  |  |            char *buffer, int32_t bufferLength,  | 
1452  | 0  |            UErrorCode *pErrorCode) { | 
1453  | 0  |      AlgorithmicRange *algRange;  | 
1454  | 0  |     uint32_t *p;  | 
1455  | 0  |     uint32_t i;  | 
1456  | 0  |     int32_t length;  | 
1457  |  |  | 
1458  |  |     /* check the argument values */  | 
1459  | 0  |     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | 
1460  | 0  |         return 0;  | 
1461  | 0  |     } else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT ||  | 
1462  | 0  |               bufferLength<0 || (bufferLength>0 && buffer==NULL)  | 
1463  | 0  |     ) { | 
1464  | 0  |         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;  | 
1465  | 0  |         return 0;  | 
1466  | 0  |     }  | 
1467  |  |  | 
1468  | 0  |     if((uint32_t)code>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) { | 
1469  | 0  |         return u_terminateChars(buffer, bufferLength, 0, pErrorCode);  | 
1470  | 0  |     }  | 
1471  |  |  | 
1472  | 0  |     length=0;  | 
1473  |  |  | 
1474  |  |     /* try algorithmic names first */  | 
1475  | 0  |     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);  | 
1476  | 0  |     i=*p;  | 
1477  | 0  |     algRange=(AlgorithmicRange *)(p+1);  | 
1478  | 0  |     while(i>0) { | 
1479  | 0  |         if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) { | 
1480  | 0  |             length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);  | 
1481  | 0  |             break;  | 
1482  | 0  |         }  | 
1483  | 0  |         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);  | 
1484  | 0  |         --i;  | 
1485  | 0  |     }  | 
1486  |  | 
  | 
1487  | 0  |     if(i==0) { | 
1488  | 0  |         if (nameChoice == U_EXTENDED_CHAR_NAME) { | 
1489  | 0  |             length = getName(uCharNames, (uint32_t )code, U_EXTENDED_CHAR_NAME, buffer, (uint16_t) bufferLength);  | 
1490  | 0  |             if (!length) { | 
1491  |  |                 /* extended character name */  | 
1492  | 0  |                 length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLength);  | 
1493  | 0  |             }  | 
1494  | 0  |         } else { | 
1495  |  |             /* normal character name */  | 
1496  | 0  |             length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);  | 
1497  | 0  |         }  | 
1498  | 0  |     }  | 
1499  |  | 
  | 
1500  | 0  |     return u_terminateChars(buffer, bufferLength, length, pErrorCode);  | 
1501  | 0  | }  | 
1502  |  |  | 
1503  |  | U_CAPI int32_t U_EXPORT2  | 
1504  |  | u_getISOComment(UChar32 /*c*/,  | 
1505  |  |                 char *dest, int32_t destCapacity,  | 
1506  | 0  |                 UErrorCode *pErrorCode) { | 
1507  |  |     /* check the argument values */  | 
1508  | 0  |     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | 
1509  | 0  |         return 0;  | 
1510  | 0  |     } else if(destCapacity<0 || (destCapacity>0 && dest==NULL)) { | 
1511  | 0  |         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;  | 
1512  | 0  |         return 0;  | 
1513  | 0  |     }  | 
1514  |  |  | 
1515  | 0  |     return u_terminateChars(dest, destCapacity, 0, pErrorCode);  | 
1516  | 0  | }  | 
1517  |  |  | 
1518  |  | U_CAPI UChar32 U_EXPORT2  | 
1519  |  | u_charFromName(UCharNameChoice nameChoice,  | 
1520  |  |                const char *name,  | 
1521  | 0  |                UErrorCode *pErrorCode) { | 
1522  | 0  |     char upper[120] = {0}; | 
1523  | 0  |     char lower[120] = {0}; | 
1524  | 0  |     FindName findName;  | 
1525  | 0  |     AlgorithmicRange *algRange;  | 
1526  | 0  |     uint32_t *p;  | 
1527  | 0  |     uint32_t i;  | 
1528  | 0  |     UChar32 cp = 0;  | 
1529  | 0  |     char c0;  | 
1530  | 0  |     static constexpr UChar32 error = 0xffff;     /* Undefined, but use this for backwards compatibility. */  | 
1531  |  | 
  | 
1532  | 0  |     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | 
1533  | 0  |         return error;  | 
1534  | 0  |     }  | 
1535  |  |  | 
1536  | 0  |     if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || name==NULL || *name==0) { | 
1537  | 0  |         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;  | 
1538  | 0  |         return error;  | 
1539  | 0  |     }  | 
1540  |  |  | 
1541  | 0  |     if(!isDataLoaded(pErrorCode)) { | 
1542  | 0  |         return error;  | 
1543  | 0  |     }  | 
1544  |  |  | 
1545  |  |     /* construct the uppercase and lowercase of the name first */  | 
1546  | 0  |     for(i=0; i<sizeof(upper); ++i) { | 
1547  | 0  |         if((c0=*name++)!=0) { | 
1548  | 0  |             upper[i]=uprv_toupper(c0);  | 
1549  | 0  |             lower[i]=uprv_tolower(c0);  | 
1550  | 0  |         } else { | 
1551  | 0  |             upper[i]=lower[i]=0;  | 
1552  | 0  |             break;  | 
1553  | 0  |         }  | 
1554  | 0  |     }  | 
1555  | 0  |     if(i==sizeof(upper)) { | 
1556  |  |         /* name too long, there is no such character */  | 
1557  | 0  |         *pErrorCode = U_ILLEGAL_CHAR_FOUND;  | 
1558  | 0  |         return error;  | 
1559  | 0  |     }  | 
1560  |  |     // i==strlen(name)==strlen(lower)==strlen(upper)  | 
1561  |  |  | 
1562  |  |     /* try extended names first */  | 
1563  | 0  |     if (lower[0] == '<') { | 
1564  | 0  |         if (nameChoice == U_EXTENDED_CHAR_NAME && lower[--i] == '>') { | 
1565  |  |             // Parse a string like "<category-HHHH>" where HHHH is a hex code point.  | 
1566  | 0  |             uint32_t limit = i;  | 
1567  | 0  |             while (i >= 3 && lower[--i] != '-') {} | 
1568  |  |  | 
1569  |  |             // There should be 1 to 8 hex digits.  | 
1570  | 0  |             int32_t hexLength = limit - (i + 1);  | 
1571  | 0  |             if (i >= 2 && lower[i] == '-' && 1 <= hexLength && hexLength <= 8) { | 
1572  | 0  |                 uint32_t cIdx;  | 
1573  |  | 
  | 
1574  | 0  |                 lower[i] = 0;  | 
1575  |  | 
  | 
1576  | 0  |                 for (++i; i < limit; ++i) { | 
1577  | 0  |                     if (lower[i] >= '0' && lower[i] <= '9') { | 
1578  | 0  |                         cp = (cp << 4) + lower[i] - '0';  | 
1579  | 0  |                     } else if (lower[i] >= 'a' && lower[i] <= 'f') { | 
1580  | 0  |                         cp = (cp << 4) + lower[i] - 'a' + 10;  | 
1581  | 0  |                     } else { | 
1582  | 0  |                         *pErrorCode = U_ILLEGAL_CHAR_FOUND;  | 
1583  | 0  |                         return error;  | 
1584  | 0  |                     }  | 
1585  |  |                     // Prevent signed-integer overflow and out-of-range code points.  | 
1586  | 0  |                     if (cp > UCHAR_MAX_VALUE) { | 
1587  | 0  |                         *pErrorCode = U_ILLEGAL_CHAR_FOUND;  | 
1588  | 0  |                         return error;  | 
1589  | 0  |                     }  | 
1590  | 0  |                 }  | 
1591  |  |  | 
1592  |  |                 /* Now validate the category name.  | 
1593  |  |                    We could use a binary search, or a trie, if  | 
1594  |  |                    we really wanted to. */  | 
1595  | 0  |                 uint8_t cat = getCharCat(cp);  | 
1596  | 0  |                 for (lower[i] = 0, cIdx = 0; cIdx < UPRV_LENGTHOF(charCatNames); ++cIdx) { | 
1597  |  | 
  | 
1598  | 0  |                     if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) { | 
1599  | 0  |                         if (cat == cIdx) { | 
1600  | 0  |                             return cp;  | 
1601  | 0  |                         }  | 
1602  | 0  |                         break;  | 
1603  | 0  |                     }  | 
1604  | 0  |                 }  | 
1605  | 0  |             }  | 
1606  | 0  |         }  | 
1607  |  |  | 
1608  | 0  |         *pErrorCode = U_ILLEGAL_CHAR_FOUND;  | 
1609  | 0  |         return error;  | 
1610  | 0  |     }  | 
1611  |  |  | 
1612  |  |     /* try algorithmic names now */  | 
1613  | 0  |     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);  | 
1614  | 0  |     i=*p;  | 
1615  | 0  |     algRange=(AlgorithmicRange *)(p+1);  | 
1616  | 0  |     while(i>0) { | 
1617  | 0  |         if((cp=findAlgName(algRange, nameChoice, upper))!=0xffff) { | 
1618  | 0  |             return cp;  | 
1619  | 0  |         }  | 
1620  | 0  |         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);  | 
1621  | 0  |         --i;  | 
1622  | 0  |     }  | 
1623  |  |  | 
1624  |  |     /* normal character name */  | 
1625  | 0  |     findName.otherName=upper;  | 
1626  | 0  |     findName.code=error;  | 
1627  | 0  |     enumNames(uCharNames, 0, UCHAR_MAX_VALUE + 1, DO_FIND_NAME, &findName, nameChoice);  | 
1628  | 0  |     if (findName.code == error) { | 
1629  | 0  |          *pErrorCode = U_ILLEGAL_CHAR_FOUND;  | 
1630  | 0  |     }  | 
1631  | 0  |     return findName.code;  | 
1632  | 0  | }  | 
1633  |  |  | 
1634  |  | U_CAPI void U_EXPORT2  | 
1635  |  | u_enumCharNames(UChar32 start, UChar32 limit,  | 
1636  |  |                 UEnumCharNamesFn *fn,  | 
1637  |  |                 void *context,  | 
1638  |  |                 UCharNameChoice nameChoice,  | 
1639  | 0  |                 UErrorCode *pErrorCode) { | 
1640  | 0  |     AlgorithmicRange *algRange;  | 
1641  | 0  |     uint32_t *p;  | 
1642  | 0  |     uint32_t i;  | 
1643  |  | 
  | 
1644  | 0  |     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | 
1645  | 0  |         return;  | 
1646  | 0  |     }  | 
1647  |  |  | 
1648  | 0  |     if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || fn==NULL) { | 
1649  | 0  |         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;  | 
1650  | 0  |         return;  | 
1651  | 0  |     }  | 
1652  |  |  | 
1653  | 0  |     if((uint32_t) limit > UCHAR_MAX_VALUE + 1) { | 
1654  | 0  |         limit = UCHAR_MAX_VALUE + 1;  | 
1655  | 0  |     }  | 
1656  | 0  |     if((uint32_t)start>=(uint32_t)limit) { | 
1657  | 0  |         return;  | 
1658  | 0  |     }  | 
1659  |  |  | 
1660  | 0  |     if(!isDataLoaded(pErrorCode)) { | 
1661  | 0  |         return;  | 
1662  | 0  |     }  | 
1663  |  |  | 
1664  |  |     /* interleave the data-driven ones with the algorithmic ones */  | 
1665  |  |     /* iterate over all algorithmic ranges; assume that they are in ascending order */  | 
1666  | 0  |     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);  | 
1667  | 0  |     i=*p;  | 
1668  | 0  |     algRange=(AlgorithmicRange *)(p+1);  | 
1669  | 0  |     while(i>0) { | 
1670  |  |         /* enumerate the character names before the current algorithmic range */  | 
1671  |  |         /* here: start<limit */  | 
1672  | 0  |         if((uint32_t)start<algRange->start) { | 
1673  | 0  |             if((uint32_t)limit<=algRange->start) { | 
1674  | 0  |                 enumNames(uCharNames, start, limit, fn, context, nameChoice);  | 
1675  | 0  |                 return;  | 
1676  | 0  |             }  | 
1677  | 0  |             if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) { | 
1678  | 0  |                 return;  | 
1679  | 0  |             }  | 
1680  | 0  |             start=(UChar32)algRange->start;  | 
1681  | 0  |         }  | 
1682  |  |         /* enumerate the character names in the current algorithmic range */  | 
1683  |  |         /* here: algRange->start<=start<limit */  | 
1684  | 0  |         if((uint32_t)start<=algRange->end) { | 
1685  | 0  |             if((uint32_t)limit<=(algRange->end+1)) { | 
1686  | 0  |                 enumAlgNames(algRange, start, limit, fn, context, nameChoice);  | 
1687  | 0  |                 return;  | 
1688  | 0  |             }  | 
1689  | 0  |             if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, context, nameChoice)) { | 
1690  | 0  |                 return;  | 
1691  | 0  |             }  | 
1692  | 0  |             start=(UChar32)algRange->end+1;  | 
1693  | 0  |         }  | 
1694  |  |         /* continue to the next algorithmic range (here: start<limit) */  | 
1695  | 0  |         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);  | 
1696  | 0  |         --i;  | 
1697  | 0  |     }  | 
1698  |  |     /* enumerate the character names after the last algorithmic range */  | 
1699  | 0  |     enumNames(uCharNames, start, limit, fn, context, nameChoice);  | 
1700  | 0  | }  | 
1701  |  |  | 
1702  |  | U_CAPI int32_t U_EXPORT2  | 
1703  | 0  | uprv_getMaxCharNameLength() { | 
1704  | 0  |     UErrorCode errorCode=U_ZERO_ERROR;  | 
1705  | 0  |     if(calcNameSetsLengths(&errorCode)) { | 
1706  | 0  |         return gMaxNameLength;  | 
1707  | 0  |     } else { | 
1708  | 0  |         return 0;  | 
1709  | 0  |     }  | 
1710  | 0  | }  | 
1711  |  |  | 
1712  |  | /**  | 
1713  |  |  * Converts the char set cset into a Unicode set uset.  | 
1714  |  |  * @param cset Set of 256 bit flags corresponding to a set of chars.  | 
1715  |  |  * @param uset USet to receive characters. Existing contents are deleted.  | 
1716  |  |  */  | 
1717  |  | static void  | 
1718  | 0  | charSetToUSet(uint32_t cset[8], const USetAdder *sa) { | 
1719  | 0  |     UChar us[256];  | 
1720  | 0  |     char cs[256];  | 
1721  |  | 
  | 
1722  | 0  |     int32_t i, length;  | 
1723  | 0  |     UErrorCode errorCode;  | 
1724  |  | 
  | 
1725  | 0  |     errorCode=U_ZERO_ERROR;  | 
1726  |  | 
  | 
1727  | 0  |     if(!calcNameSetsLengths(&errorCode)) { | 
1728  | 0  |         return;  | 
1729  | 0  |     }  | 
1730  |  |  | 
1731  |  |     /* build a char string with all chars that are used in character names */  | 
1732  | 0  |     length=0;  | 
1733  | 0  |     for(i=0; i<256; ++i) { | 
1734  | 0  |         if(SET_CONTAINS(cset, i)) { | 
1735  | 0  |             cs[length++]=(char)i;  | 
1736  | 0  |         }  | 
1737  | 0  |     }  | 
1738  |  |  | 
1739  |  |     /* convert the char string to a UChar string */  | 
1740  | 0  |     u_charsToUChars(cs, us, length);  | 
1741  |  |  | 
1742  |  |     /* add each UChar to the USet */  | 
1743  | 0  |     for(i=0; i<length; ++i) { | 
1744  | 0  |         if(us[i]!=0 || cs[i]==0) { /* non-invariant chars become (UChar)0 */ | 
1745  | 0  |             sa->add(sa->set, us[i]);  | 
1746  | 0  |         }  | 
1747  | 0  |     }  | 
1748  | 0  | }  | 
1749  |  |  | 
1750  |  | /**  | 
1751  |  |  * Fills set with characters that are used in Unicode character names.  | 
1752  |  |  * @param set USet to receive characters.  | 
1753  |  |  */  | 
1754  |  | U_CAPI void U_EXPORT2  | 
1755  | 0  | uprv_getCharNameCharacters(const USetAdder *sa) { | 
1756  | 0  |     charSetToUSet(gNameSet, sa);  | 
1757  | 0  | }  | 
1758  |  |  | 
1759  |  | /* data swapping ------------------------------------------------------------ */  | 
1760  |  |  | 
1761  |  | /*  | 
1762  |  |  * The token table contains non-negative entries for token bytes,  | 
1763  |  |  * and -1 for bytes that represent themselves in the data file's charset.  | 
1764  |  |  * -2 entries are used for lead bytes.  | 
1765  |  |  *  | 
1766  |  |  * Direct bytes (-1 entries) must be translated from the input charset family  | 
1767  |  |  * to the output charset family.  | 
1768  |  |  * makeTokenMap() writes a permutation mapping for this.  | 
1769  |  |  * Use it once for single-/lead-byte tokens and once more for all trail byte  | 
1770  |  |  * tokens. (';' is an unused trail byte marked with -1.) | 
1771  |  |  */  | 
1772  |  | static void  | 
1773  |  | makeTokenMap(const UDataSwapper *ds,  | 
1774  |  |              int16_t tokens[], uint16_t tokenCount,  | 
1775  |  |              uint8_t map[256],  | 
1776  | 0  |              UErrorCode *pErrorCode) { | 
1777  | 0  |     UBool usedOutChar[256];  | 
1778  | 0  |     uint16_t i, j;  | 
1779  | 0  |     uint8_t c1, c2;  | 
1780  |  | 
  | 
1781  | 0  |     if(U_FAILURE(*pErrorCode)) { | 
1782  | 0  |         return;  | 
1783  | 0  |     }  | 
1784  |  |  | 
1785  | 0  |     if(ds->inCharset==ds->outCharset) { | 
1786  |  |         /* Same charset family: identity permutation */  | 
1787  | 0  |         for(i=0; i<256; ++i) { | 
1788  | 0  |             map[i]=(uint8_t)i;  | 
1789  | 0  |         }  | 
1790  | 0  |     } else { | 
1791  | 0  |         uprv_memset(map, 0, 256);  | 
1792  | 0  |         uprv_memset(usedOutChar, 0, 256);  | 
1793  |  | 
  | 
1794  | 0  |         if(tokenCount>256) { | 
1795  | 0  |             tokenCount=256;  | 
1796  | 0  |         }  | 
1797  |  |  | 
1798  |  |         /* set the direct bytes (byte 0 always maps to itself) */  | 
1799  | 0  |         for(i=1; i<tokenCount; ++i) { | 
1800  | 0  |             if(tokens[i]==-1) { | 
1801  |  |                 /* convert the direct byte character */  | 
1802  | 0  |                 c1=(uint8_t)i;  | 
1803  | 0  |                 ds->swapInvChars(ds, &c1, 1, &c2, pErrorCode);  | 
1804  | 0  |                 if(U_FAILURE(*pErrorCode)) { | 
1805  | 0  |                     udata_printError(ds, "unames/makeTokenMap() finds variant character 0x%02x used (input charset family %d)\n",  | 
1806  | 0  |                                      i, ds->inCharset);  | 
1807  | 0  |                     return;  | 
1808  | 0  |                 }  | 
1809  |  |  | 
1810  |  |                 /* enter the converted character into the map and mark it used */  | 
1811  | 0  |                 map[c1]=c2;  | 
1812  | 0  |                 usedOutChar[c2]=TRUE;  | 
1813  | 0  |             }  | 
1814  | 0  |         }  | 
1815  |  |  | 
1816  |  |         /* set the mappings for the rest of the permutation */  | 
1817  | 0  |         for(i=j=1; i<tokenCount; ++i) { | 
1818  |  |             /* set mappings that were not set for direct bytes */  | 
1819  | 0  |             if(map[i]==0) { | 
1820  |  |                 /* set an output byte value that was not used as an output byte above */  | 
1821  | 0  |                 while(usedOutChar[j]) { | 
1822  | 0  |                     ++j;  | 
1823  | 0  |                 }  | 
1824  | 0  |                 map[i]=(uint8_t)j++;  | 
1825  | 0  |             }  | 
1826  | 0  |         }  | 
1827  |  |  | 
1828  |  |         /*  | 
1829  |  |          * leave mappings at tokenCount and above unset if tokenCount<256  | 
1830  |  |          * because they won't be used  | 
1831  |  |          */  | 
1832  | 0  |     }  | 
1833  | 0  | }  | 
1834  |  |  | 
1835  |  | U_CAPI int32_t U_EXPORT2  | 
1836  |  | uchar_swapNames(const UDataSwapper *ds,  | 
1837  |  |                 const void *inData, int32_t length, void *outData,  | 
1838  | 0  |                 UErrorCode *pErrorCode) { | 
1839  | 0  |     const UDataInfo *pInfo;  | 
1840  | 0  |     int32_t headerSize;  | 
1841  |  | 
  | 
1842  | 0  |     const uint8_t *inBytes;  | 
1843  | 0  |     uint8_t *outBytes;  | 
1844  |  | 
  | 
1845  | 0  |     uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset,  | 
1846  | 0  |              offset, i, count, stringsCount;  | 
1847  |  | 
  | 
1848  | 0  |     const AlgorithmicRange *inRange;  | 
1849  | 0  |     AlgorithmicRange *outRange;  | 
1850  |  |  | 
1851  |  |     /* udata_swapDataHeader checks the arguments */  | 
1852  | 0  |     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);  | 
1853  | 0  |     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | 
1854  | 0  |         return 0;  | 
1855  | 0  |     }  | 
1856  |  |  | 
1857  |  |     /* check data format and format version */  | 
1858  | 0  |     pInfo=(const UDataInfo *)((const char *)inData+4);  | 
1859  | 0  |     if(!(  | 
1860  | 0  |         pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */  | 
1861  | 0  |         pInfo->dataFormat[1]==0x6e &&  | 
1862  | 0  |         pInfo->dataFormat[2]==0x61 &&  | 
1863  | 0  |         pInfo->dataFormat[3]==0x6d &&  | 
1864  | 0  |         pInfo->formatVersion[0]==1  | 
1865  | 0  |     )) { | 
1866  | 0  |         udata_printError(ds, "uchar_swapNames(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unames.icu\n",  | 
1867  | 0  |                          pInfo->dataFormat[0], pInfo->dataFormat[1],  | 
1868  | 0  |                          pInfo->dataFormat[2], pInfo->dataFormat[3],  | 
1869  | 0  |                          pInfo->formatVersion[0]);  | 
1870  | 0  |         *pErrorCode=U_UNSUPPORTED_ERROR;  | 
1871  | 0  |         return 0;  | 
1872  | 0  |     }  | 
1873  |  |  | 
1874  | 0  |     inBytes=(const uint8_t *)inData+headerSize;  | 
1875  | 0  |     outBytes=(uint8_t *)outData+headerSize;  | 
1876  | 0  |     if(length<0) { | 
1877  | 0  |         algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]);  | 
1878  | 0  |     } else { | 
1879  | 0  |         length-=headerSize;  | 
1880  | 0  |         if( length<20 ||  | 
1881  | 0  |             (uint32_t)length<(algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]))  | 
1882  | 0  |         ) { | 
1883  | 0  |             udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu\n",  | 
1884  | 0  |                              length);  | 
1885  | 0  |             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;  | 
1886  | 0  |             return 0;  | 
1887  | 0  |         }  | 
1888  | 0  |     }  | 
1889  |  |  | 
1890  | 0  |     if(length<0) { | 
1891  |  |         /* preflighting: iterate through algorithmic ranges */  | 
1892  | 0  |         offset=algNamesOffset;  | 
1893  | 0  |         count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));  | 
1894  | 0  |         offset+=4;  | 
1895  |  | 
  | 
1896  | 0  |         for(i=0; i<count; ++i) { | 
1897  | 0  |             inRange=(const AlgorithmicRange *)(inBytes+offset);  | 
1898  | 0  |             offset+=ds->readUInt16(inRange->size);  | 
1899  | 0  |         }  | 
1900  | 0  |     } else { | 
1901  |  |         /* swap data */  | 
1902  | 0  |         const uint16_t *p;  | 
1903  | 0  |         uint16_t *q, *temp;  | 
1904  |  | 
  | 
1905  | 0  |         int16_t tokens[512];  | 
1906  | 0  |         uint16_t tokenCount;  | 
1907  |  | 
  | 
1908  | 0  |         uint8_t map[256], trailMap[256];  | 
1909  |  |  | 
1910  |  |         /* copy the data for inaccessible bytes */  | 
1911  | 0  |         if(inBytes!=outBytes) { | 
1912  | 0  |             uprv_memcpy(outBytes, inBytes, length);  | 
1913  | 0  |         }  | 
1914  |  |  | 
1915  |  |         /* the initial 4 offsets first */  | 
1916  | 0  |         tokenStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[0]);  | 
1917  | 0  |         groupsOffset=ds->readUInt32(((const uint32_t *)inBytes)[1]);  | 
1918  | 0  |         groupStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[2]);  | 
1919  | 0  |         ds->swapArray32(ds, inBytes, 16, outBytes, pErrorCode);  | 
1920  |  |  | 
1921  |  |         /*  | 
1922  |  |          * now the tokens table  | 
1923  |  |          * it needs to be permutated along with the compressed name strings  | 
1924  |  |          */  | 
1925  | 0  |         p=(const uint16_t *)(inBytes+16);  | 
1926  | 0  |         q=(uint16_t *)(outBytes+16);  | 
1927  |  |  | 
1928  |  |         /* read and swap the tokenCount */  | 
1929  | 0  |         tokenCount=ds->readUInt16(*p);  | 
1930  | 0  |         ds->swapArray16(ds, p, 2, q, pErrorCode);  | 
1931  | 0  |         ++p;  | 
1932  | 0  |         ++q;  | 
1933  |  |  | 
1934  |  |         /* read the first 512 tokens and make the token maps */  | 
1935  | 0  |         if(tokenCount<=512) { | 
1936  | 0  |             count=tokenCount;  | 
1937  | 0  |         } else { | 
1938  | 0  |             count=512;  | 
1939  | 0  |         }  | 
1940  | 0  |         for(i=0; i<count; ++i) { | 
1941  | 0  |             tokens[i]=udata_readInt16(ds, p[i]);  | 
1942  | 0  |         }  | 
1943  | 0  |         for(; i<512; ++i) { | 
1944  | 0  |             tokens[i]=0; /* fill the rest of the tokens array if tokenCount<512 */  | 
1945  | 0  |         }  | 
1946  | 0  |         makeTokenMap(ds, tokens, tokenCount, map, pErrorCode);  | 
1947  | 0  |         makeTokenMap(ds, tokens+256, (uint16_t)(tokenCount>256 ? tokenCount-256 : 0), trailMap, pErrorCode);  | 
1948  | 0  |         if(U_FAILURE(*pErrorCode)) { | 
1949  | 0  |             return 0;  | 
1950  | 0  |         }  | 
1951  |  |  | 
1952  |  |         /*  | 
1953  |  |          * swap and permutate the tokens  | 
1954  |  |          * go through a temporary array to support in-place swapping  | 
1955  |  |          */  | 
1956  | 0  |         temp=(uint16_t *)uprv_malloc(tokenCount*2);  | 
1957  | 0  |         if(temp==NULL) { | 
1958  | 0  |             udata_printError(ds, "out of memory swapping %u unames.icu tokens\n",  | 
1959  | 0  |                              tokenCount);  | 
1960  | 0  |             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;  | 
1961  | 0  |             return 0;  | 
1962  | 0  |         }  | 
1963  |  |  | 
1964  |  |         /* swap and permutate single-/lead-byte tokens */  | 
1965  | 0  |         for(i=0; i<tokenCount && i<256; ++i) { | 
1966  | 0  |             ds->swapArray16(ds, p+i, 2, temp+map[i], pErrorCode);  | 
1967  | 0  |         }  | 
1968  |  |  | 
1969  |  |         /* swap and permutate trail-byte tokens */  | 
1970  | 0  |         for(; i<tokenCount; ++i) { | 
1971  | 0  |             ds->swapArray16(ds, p+i, 2, temp+(i&0xffffff00)+trailMap[i&0xff], pErrorCode);  | 
1972  | 0  |         }  | 
1973  |  |  | 
1974  |  |         /* copy the result into the output and free the temporary array */  | 
1975  | 0  |         uprv_memcpy(q, temp, tokenCount*2);  | 
1976  | 0  |         uprv_free(temp);  | 
1977  |  |  | 
1978  |  |         /*  | 
1979  |  |          * swap the token strings but not a possible padding byte after  | 
1980  |  |          * the terminating NUL of the last string  | 
1981  |  |          */  | 
1982  | 0  |         udata_swapInvStringBlock(ds, inBytes+tokenStringOffset, (int32_t)(groupsOffset-tokenStringOffset),  | 
1983  | 0  |                                     outBytes+tokenStringOffset, pErrorCode);  | 
1984  | 0  |         if(U_FAILURE(*pErrorCode)) { | 
1985  | 0  |             udata_printError(ds, "uchar_swapNames(token strings) failed\n");  | 
1986  | 0  |             return 0;  | 
1987  | 0  |         }  | 
1988  |  |  | 
1989  |  |         /* swap the group table */  | 
1990  | 0  |         count=ds->readUInt16(*((const uint16_t *)(inBytes+groupsOffset)));  | 
1991  | 0  |         ds->swapArray16(ds, inBytes+groupsOffset, (int32_t)((1+count*3)*2),  | 
1992  | 0  |                            outBytes+groupsOffset, pErrorCode);  | 
1993  |  |  | 
1994  |  |         /*  | 
1995  |  |          * swap the group strings  | 
1996  |  |          * swap the string bytes but not the nibble-encoded string lengths  | 
1997  |  |          */  | 
1998  | 0  |         if(ds->inCharset!=ds->outCharset) { | 
1999  | 0  |             uint16_t offsets[LINES_PER_GROUP+1], lengths[LINES_PER_GROUP+1];  | 
2000  |  | 
  | 
2001  | 0  |             const uint8_t *inStrings, *nextInStrings;  | 
2002  | 0  |             uint8_t *outStrings;  | 
2003  |  | 
  | 
2004  | 0  |             uint8_t c;  | 
2005  |  | 
  | 
2006  | 0  |             inStrings=inBytes+groupStringOffset;  | 
2007  | 0  |             outStrings=outBytes+groupStringOffset;  | 
2008  |  | 
  | 
2009  | 0  |             stringsCount=algNamesOffset-groupStringOffset;  | 
2010  |  |  | 
2011  |  |             /* iterate through string groups until only a few padding bytes are left */  | 
2012  | 0  |             while(stringsCount>32) { | 
2013  | 0  |                 nextInStrings=expandGroupLengths(inStrings, offsets, lengths);  | 
2014  |  |  | 
2015  |  |                 /* move past the length bytes */  | 
2016  | 0  |                 stringsCount-=(uint32_t)(nextInStrings-inStrings);  | 
2017  | 0  |                 outStrings+=nextInStrings-inStrings;  | 
2018  | 0  |                 inStrings=nextInStrings;  | 
2019  |  | 
  | 
2020  | 0  |                 count=offsets[31]+lengths[31]; /* total number of string bytes in this group */  | 
2021  | 0  |                 stringsCount-=count;  | 
2022  |  |  | 
2023  |  |                 /* swap the string bytes using map[] and trailMap[] */  | 
2024  | 0  |                 while(count>0) { | 
2025  | 0  |                     c=*inStrings++;  | 
2026  | 0  |                     *outStrings++=map[c];  | 
2027  | 0  |                     if(tokens[c]!=-2) { | 
2028  | 0  |                         --count;  | 
2029  | 0  |                     } else { | 
2030  |  |                         /* token lead byte: swap the trail byte, too */  | 
2031  | 0  |                         *outStrings++=trailMap[*inStrings++];  | 
2032  | 0  |                         count-=2;  | 
2033  | 0  |                     }  | 
2034  | 0  |                 }  | 
2035  | 0  |             }  | 
2036  | 0  |         }  | 
2037  |  |  | 
2038  |  |         /* swap the algorithmic ranges */  | 
2039  | 0  |         offset=algNamesOffset;  | 
2040  | 0  |         count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));  | 
2041  | 0  |         ds->swapArray32(ds, inBytes+offset, 4, outBytes+offset, pErrorCode);  | 
2042  | 0  |         offset+=4;  | 
2043  |  | 
  | 
2044  | 0  |         for(i=0; i<count; ++i) { | 
2045  | 0  |             if(offset>(uint32_t)length) { | 
2046  | 0  |                 udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu algorithmic range %u\n",  | 
2047  | 0  |                                  length, i);  | 
2048  | 0  |                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;  | 
2049  | 0  |                 return 0;  | 
2050  | 0  |             }  | 
2051  |  |  | 
2052  | 0  |             inRange=(const AlgorithmicRange *)(inBytes+offset);  | 
2053  | 0  |             outRange=(AlgorithmicRange *)(outBytes+offset);  | 
2054  | 0  |             offset+=ds->readUInt16(inRange->size);  | 
2055  |  | 
  | 
2056  | 0  |             ds->swapArray32(ds, inRange, 8, outRange, pErrorCode);  | 
2057  | 0  |             ds->swapArray16(ds, &inRange->size, 2, &outRange->size, pErrorCode);  | 
2058  | 0  |             switch(inRange->type) { | 
2059  | 0  |             case 0:  | 
2060  |  |                 /* swap prefix string */  | 
2061  | 0  |                 ds->swapInvChars(ds, inRange+1, (int32_t)uprv_strlen((const char *)(inRange+1)),  | 
2062  | 0  |                                     outRange+1, pErrorCode);  | 
2063  | 0  |                 if(U_FAILURE(*pErrorCode)) { | 
2064  | 0  |                     udata_printError(ds, "uchar_swapNames(prefix string of algorithmic range %u) failed\n",  | 
2065  | 0  |                                      i);  | 
2066  | 0  |                     return 0;  | 
2067  | 0  |                 }  | 
2068  | 0  |                 break;  | 
2069  | 0  |             case 1:  | 
2070  | 0  |                 { | 
2071  |  |                     /* swap factors and the prefix and factor strings */  | 
2072  | 0  |                     uint32_t factorsCount;  | 
2073  |  | 
  | 
2074  | 0  |                     factorsCount=inRange->variant;  | 
2075  | 0  |                     p=(const uint16_t *)(inRange+1);  | 
2076  | 0  |                     q=(uint16_t *)(outRange+1);  | 
2077  | 0  |                     ds->swapArray16(ds, p, (int32_t)(factorsCount*2), q, pErrorCode);  | 
2078  |  |  | 
2079  |  |                     /* swap the strings, up to the last terminating NUL */  | 
2080  | 0  |                     p+=factorsCount;  | 
2081  | 0  |                     q+=factorsCount;  | 
2082  | 0  |                     stringsCount=(uint32_t)((inBytes+offset)-(const uint8_t *)p);  | 
2083  | 0  |                     while(stringsCount>0 && ((const uint8_t *)p)[stringsCount-1]!=0) { | 
2084  | 0  |                         --stringsCount;  | 
2085  | 0  |                     }  | 
2086  | 0  |                     ds->swapInvChars(ds, p, (int32_t)stringsCount, q, pErrorCode);  | 
2087  | 0  |                 }  | 
2088  | 0  |                 break;  | 
2089  | 0  |             default:  | 
2090  | 0  |                 udata_printError(ds, "uchar_swapNames(): unknown type %u of algorithmic range %u\n",  | 
2091  | 0  |                                  inRange->type, i);  | 
2092  | 0  |                 *pErrorCode=U_UNSUPPORTED_ERROR;  | 
2093  | 0  |                 return 0;  | 
2094  | 0  |             }  | 
2095  | 0  |         }  | 
2096  | 0  |     }  | 
2097  |  |  | 
2098  | 0  |     return headerSize+(int32_t)offset;  | 
2099  | 0  | }  | 
2100  |  |  | 
2101  |  | /*  | 
2102  |  |  * Hey, Emacs, please set the following:  | 
2103  |  |  *  | 
2104  |  |  * Local Variables:  | 
2105  |  |  * indent-tabs-mode: nil  | 
2106  |  |  * End:  | 
2107  |  |  *  | 
2108  |  |  */  |