/src/icu/source/i18n/collationweights.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2016 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  | /*    | 
4  |  | *******************************************************************************  | 
5  |  | *  | 
6  |  | *   Copyright (C) 1999-2015, International Business Machines  | 
7  |  | *   Corporation and others.  All Rights Reserved.  | 
8  |  | *  | 
9  |  | *******************************************************************************  | 
10  |  | *   file name:  collationweights.cpp  | 
11  |  | *   encoding:   UTF-8  | 
12  |  | *   tab size:   8 (not used)  | 
13  |  | *   indentation:4  | 
14  |  | *  | 
15  |  | *   created on: 2001mar08 as ucol_wgt.cpp  | 
16  |  | *   created by: Markus W. Scherer  | 
17  |  | *  | 
18  |  | *   This file contains code for allocating n collation element weights  | 
19  |  | *   between two exclusive limits.  | 
20  |  | *   It is used only internally by the collation tailoring builder.  | 
21  |  | */  | 
22  |  |  | 
23  |  | #include "unicode/utypes.h"  | 
24  |  |  | 
25  |  | #if !UCONFIG_NO_COLLATION  | 
26  |  |  | 
27  |  | #include "cmemory.h"  | 
28  |  | #include "collation.h"  | 
29  |  | #include "collationweights.h"  | 
30  |  | #include "uarrsort.h"  | 
31  |  | #include "uassert.h"  | 
32  |  |  | 
33  |  | #ifdef UCOL_DEBUG  | 
34  |  | #   include <stdio.h>  | 
35  |  | #endif  | 
36  |  |  | 
37  |  | U_NAMESPACE_BEGIN  | 
38  |  |  | 
39  |  | /* collation element weight allocation -------------------------------------- */  | 
40  |  |  | 
41  |  | /* helper functions for CE weights */  | 
42  |  |  | 
43  |  | static inline uint32_t  | 
44  | 0  | getWeightTrail(uint32_t weight, int32_t length) { | 
45  | 0  |     return (uint32_t)(weight>>(8*(4-length)))&0xff;  | 
46  | 0  | }  | 
47  |  |  | 
48  |  | static inline uint32_t  | 
49  | 0  | setWeightTrail(uint32_t weight, int32_t length, uint32_t trail) { | 
50  | 0  |     length=8*(4-length);  | 
51  | 0  |     return (uint32_t)((weight&(0xffffff00<<length))|(trail<<length));  | 
52  | 0  | }  | 
53  |  |  | 
54  |  | static inline uint32_t  | 
55  | 0  | getWeightByte(uint32_t weight, int32_t idx) { | 
56  | 0  |     return getWeightTrail(weight, idx); /* same calculation */  | 
57  | 0  | }  | 
58  |  |  | 
59  |  | static inline uint32_t  | 
60  | 0  | setWeightByte(uint32_t weight, int32_t idx, uint32_t byte) { | 
61  | 0  |     uint32_t mask; /* 0xffffffff except a 00 "hole" for the index-th byte */  | 
62  |  | 
  | 
63  | 0  |     idx*=8;  | 
64  | 0  |     if(idx<32) { | 
65  | 0  |         mask=((uint32_t)0xffffffff)>>idx;  | 
66  | 0  |     } else { | 
67  |  |         // Do not use uint32_t>>32 because on some platforms that does not shift at all  | 
68  |  |         // while we need it to become 0.  | 
69  |  |         // PowerPC: 0xffffffff>>32 = 0           (wanted)  | 
70  |  |         // x86:     0xffffffff>>32 = 0xffffffff  (not wanted)  | 
71  |  |         //  | 
72  |  |         // ANSI C99 6.5.7 Bitwise shift operators:  | 
73  |  |         // "If the value of the right operand is negative  | 
74  |  |         // or is greater than or equal to the width of the promoted left operand,  | 
75  |  |         // the behavior is undefined."  | 
76  | 0  |         mask=0;  | 
77  | 0  |     }  | 
78  | 0  |     idx=32-idx;  | 
79  | 0  |     mask|=0xffffff00<<idx;  | 
80  | 0  |     return (uint32_t)((weight&mask)|(byte<<idx));  | 
81  | 0  | }  | 
82  |  |  | 
83  |  | static inline uint32_t  | 
84  | 0  | truncateWeight(uint32_t weight, int32_t length) { | 
85  | 0  |     return (uint32_t)(weight&(0xffffffff<<(8*(4-length))));  | 
86  | 0  | }  | 
87  |  |  | 
88  |  | static inline uint32_t  | 
89  | 0  | incWeightTrail(uint32_t weight, int32_t length) { | 
90  | 0  |     return (uint32_t)(weight+(1UL<<(8*(4-length))));  | 
91  | 0  | }  | 
92  |  |  | 
93  |  | static inline uint32_t  | 
94  | 0  | decWeightTrail(uint32_t weight, int32_t length) { | 
95  | 0  |     return (uint32_t)(weight-(1UL<<(8*(4-length))));  | 
96  | 0  | }  | 
97  |  |  | 
98  |  | CollationWeights::CollationWeights()  | 
99  | 0  |         : middleLength(0), rangeIndex(0), rangeCount(0) { | 
100  | 0  |     for(int32_t i = 0; i < 5; ++i) { | 
101  | 0  |         minBytes[i] = maxBytes[i] = 0;  | 
102  | 0  |     }  | 
103  | 0  | }  | 
104  |  |  | 
105  |  | void  | 
106  | 0  | CollationWeights::initForPrimary(UBool compressible) { | 
107  | 0  |     middleLength=1;  | 
108  | 0  |     minBytes[1] = Collation::MERGE_SEPARATOR_BYTE + 1;  | 
109  | 0  |     maxBytes[1] = Collation::TRAIL_WEIGHT_BYTE;  | 
110  | 0  |     if(compressible) { | 
111  | 0  |         minBytes[2] = Collation::PRIMARY_COMPRESSION_LOW_BYTE + 1;  | 
112  | 0  |         maxBytes[2] = Collation::PRIMARY_COMPRESSION_HIGH_BYTE - 1;  | 
113  | 0  |     } else { | 
114  | 0  |         minBytes[2] = 2;  | 
115  | 0  |         maxBytes[2] = 0xff;  | 
116  | 0  |     }  | 
117  | 0  |     minBytes[3] = 2;  | 
118  | 0  |     maxBytes[3] = 0xff;  | 
119  | 0  |     minBytes[4] = 2;  | 
120  | 0  |     maxBytes[4] = 0xff;  | 
121  | 0  | }  | 
122  |  |  | 
123  |  | void  | 
124  | 0  | CollationWeights::initForSecondary() { | 
125  |  |     // We use only the lower 16 bits for secondary weights.  | 
126  | 0  |     middleLength=3;  | 
127  | 0  |     minBytes[1] = 0;  | 
128  | 0  |     maxBytes[1] = 0;  | 
129  | 0  |     minBytes[2] = 0;  | 
130  | 0  |     maxBytes[2] = 0;  | 
131  | 0  |     minBytes[3] = Collation::LEVEL_SEPARATOR_BYTE + 1;  | 
132  | 0  |     maxBytes[3] = 0xff;  | 
133  | 0  |     minBytes[4] = 2;  | 
134  | 0  |     maxBytes[4] = 0xff;  | 
135  | 0  | }  | 
136  |  |  | 
137  |  | void  | 
138  | 0  | CollationWeights::initForTertiary() { | 
139  |  |     // We use only the lower 16 bits for tertiary weights.  | 
140  | 0  |     middleLength=3;  | 
141  | 0  |     minBytes[1] = 0;  | 
142  | 0  |     maxBytes[1] = 0;  | 
143  | 0  |     minBytes[2] = 0;  | 
144  | 0  |     maxBytes[2] = 0;  | 
145  |  |     // We use only 6 bits per byte.  | 
146  |  |     // The other bits are used for case & quaternary weights.  | 
147  | 0  |     minBytes[3] = Collation::LEVEL_SEPARATOR_BYTE + 1;  | 
148  | 0  |     maxBytes[3] = 0x3f;  | 
149  | 0  |     minBytes[4] = 2;  | 
150  | 0  |     maxBytes[4] = 0x3f;  | 
151  | 0  | }  | 
152  |  |  | 
153  |  | uint32_t  | 
154  | 0  | CollationWeights::incWeight(uint32_t weight, int32_t length) const { | 
155  | 0  |     for(;;) { | 
156  | 0  |         uint32_t byte=getWeightByte(weight, length);  | 
157  | 0  |         if(byte<maxBytes[length]) { | 
158  | 0  |             return setWeightByte(weight, length, byte+1);  | 
159  | 0  |         } else { | 
160  |  |             // Roll over, set this byte to the minimum and increment the previous one.  | 
161  | 0  |             weight=setWeightByte(weight, length, minBytes[length]);  | 
162  | 0  |             --length;  | 
163  | 0  |             U_ASSERT(length > 0);  | 
164  | 0  |         }  | 
165  | 0  |     }  | 
166  | 0  | }  | 
167  |  |  | 
168  |  | uint32_t  | 
169  | 0  | CollationWeights::incWeightByOffset(uint32_t weight, int32_t length, int32_t offset) const { | 
170  | 0  |     for(;;) { | 
171  | 0  |         offset += getWeightByte(weight, length);  | 
172  | 0  |         if((uint32_t)offset <= maxBytes[length]) { | 
173  | 0  |             return setWeightByte(weight, length, offset);  | 
174  | 0  |         } else { | 
175  |  |             // Split the offset between this byte and the previous one.  | 
176  | 0  |             offset -= minBytes[length];  | 
177  | 0  |             weight = setWeightByte(weight, length, minBytes[length] + offset % countBytes(length));  | 
178  | 0  |             offset /= countBytes(length);  | 
179  | 0  |             --length;  | 
180  | 0  |             U_ASSERT(length > 0);  | 
181  | 0  |         }  | 
182  | 0  |     }  | 
183  | 0  | }  | 
184  |  |  | 
185  |  | void  | 
186  | 0  | CollationWeights::lengthenRange(WeightRange &range) const { | 
187  | 0  |     int32_t length=range.length+1;  | 
188  | 0  |     range.start=setWeightTrail(range.start, length, minBytes[length]);  | 
189  | 0  |     range.end=setWeightTrail(range.end, length, maxBytes[length]);  | 
190  | 0  |     range.count*=countBytes(length);  | 
191  | 0  |     range.length=length;  | 
192  | 0  | }  | 
193  |  |  | 
194  |  | /* for uprv_sortArray: sort ranges in weight order */  | 
195  |  | static int32_t U_CALLCONV  | 
196  | 0  | compareRanges(const void * /*context*/, const void *left, const void *right) { | 
197  | 0  |     uint32_t l, r;  | 
198  |  | 
  | 
199  | 0  |     l=((const CollationWeights::WeightRange *)left)->start;  | 
200  | 0  |     r=((const CollationWeights::WeightRange *)right)->start;  | 
201  | 0  |     if(l<r) { | 
202  | 0  |         return -1;  | 
203  | 0  |     } else if(l>r) { | 
204  | 0  |         return 1;  | 
205  | 0  |     } else { | 
206  | 0  |         return 0;  | 
207  | 0  |     }  | 
208  | 0  | }  | 
209  |  |  | 
210  |  | UBool  | 
211  | 0  | CollationWeights::getWeightRanges(uint32_t lowerLimit, uint32_t upperLimit) { | 
212  | 0  |     U_ASSERT(lowerLimit != 0);  | 
213  | 0  |     U_ASSERT(upperLimit != 0);  | 
214  |  |  | 
215  |  |     /* get the lengths of the limits */  | 
216  | 0  |     int32_t lowerLength=lengthOfWeight(lowerLimit);  | 
217  | 0  |     int32_t upperLength=lengthOfWeight(upperLimit);  | 
218  |  | 
  | 
219  |  | #ifdef UCOL_DEBUG  | 
220  |  |     printf("length of lower limit 0x%08lx is %ld\n", lowerLimit, lowerLength); | 
221  |  |     printf("length of upper limit 0x%08lx is %ld\n", upperLimit, upperLength); | 
222  |  | #endif  | 
223  | 0  |     U_ASSERT(lowerLength>=middleLength);  | 
224  |  |     // Permit upperLength<middleLength: The upper limit for secondaries is 0x10000.  | 
225  |  | 
  | 
226  | 0  |     if(lowerLimit>=upperLimit) { | 
227  |  | #ifdef UCOL_DEBUG  | 
228  |  |         printf("error: no space between lower & upper limits\n"); | 
229  |  | #endif  | 
230  | 0  |         return FALSE;  | 
231  | 0  |     }  | 
232  |  |  | 
233  |  |     /* check that neither is a prefix of the other */  | 
234  | 0  |     if(lowerLength<upperLength) { | 
235  | 0  |         if(lowerLimit==truncateWeight(upperLimit, lowerLength)) { | 
236  |  | #ifdef UCOL_DEBUG  | 
237  |  |             printf("error: lower limit 0x%08lx is a prefix of upper limit 0x%08lx\n", lowerLimit, upperLimit); | 
238  |  | #endif  | 
239  | 0  |             return FALSE;  | 
240  | 0  |         }  | 
241  | 0  |     }  | 
242  |  |     /* if the upper limit is a prefix of the lower limit then the earlier test lowerLimit>=upperLimit has caught it */  | 
243  |  |  | 
244  | 0  |     WeightRange lower[5], middle, upper[5]; /* [0] and [1] are not used - this simplifies indexing */  | 
245  | 0  |     uprv_memset(lower, 0, sizeof(lower));  | 
246  | 0  |     uprv_memset(&middle, 0, sizeof(middle));  | 
247  | 0  |     uprv_memset(upper, 0, sizeof(upper));  | 
248  |  |  | 
249  |  |     /*  | 
250  |  |      * With the limit lengths of 1..4, there are up to 7 ranges for allocation:  | 
251  |  |      * range     minimum length  | 
252  |  |      * lower[4]  4  | 
253  |  |      * lower[3]  3  | 
254  |  |      * lower[2]  2  | 
255  |  |      * middle    1  | 
256  |  |      * upper[2]  2  | 
257  |  |      * upper[3]  3  | 
258  |  |      * upper[4]  4  | 
259  |  |      *  | 
260  |  |      * We are now going to calculate up to 7 ranges.  | 
261  |  |      * Some of them will typically overlap, so we will then have to merge and eliminate ranges.  | 
262  |  |      */  | 
263  | 0  |     uint32_t weight=lowerLimit;  | 
264  | 0  |     for(int32_t length=lowerLength; length>middleLength; --length) { | 
265  | 0  |         uint32_t trail=getWeightTrail(weight, length);  | 
266  | 0  |         if(trail<maxBytes[length]) { | 
267  | 0  |             lower[length].start=incWeightTrail(weight, length);  | 
268  | 0  |             lower[length].end=setWeightTrail(weight, length, maxBytes[length]);  | 
269  | 0  |             lower[length].length=length;  | 
270  | 0  |             lower[length].count=maxBytes[length]-trail;  | 
271  | 0  |         }  | 
272  | 0  |         weight=truncateWeight(weight, length-1);  | 
273  | 0  |     }  | 
274  | 0  |     if(weight<0xff000000) { | 
275  | 0  |         middle.start=incWeightTrail(weight, middleLength);  | 
276  | 0  |     } else { | 
277  |  |         // Prevent overflow for primary lead byte FF  | 
278  |  |         // which would yield a middle range starting at 0.  | 
279  | 0  |         middle.start=0xffffffff;  // no middle range  | 
280  | 0  |     }  | 
281  |  | 
  | 
282  | 0  |     weight=upperLimit;  | 
283  | 0  |     for(int32_t length=upperLength; length>middleLength; --length) { | 
284  | 0  |         uint32_t trail=getWeightTrail(weight, length);  | 
285  | 0  |         if(trail>minBytes[length]) { | 
286  | 0  |             upper[length].start=setWeightTrail(weight, length, minBytes[length]);  | 
287  | 0  |             upper[length].end=decWeightTrail(weight, length);  | 
288  | 0  |             upper[length].length=length;  | 
289  | 0  |             upper[length].count=trail-minBytes[length];  | 
290  | 0  |         }  | 
291  | 0  |         weight=truncateWeight(weight, length-1);  | 
292  | 0  |     }  | 
293  | 0  |     middle.end=decWeightTrail(weight, middleLength);  | 
294  |  |  | 
295  |  |     /* set the middle range */  | 
296  | 0  |     middle.length=middleLength;  | 
297  | 0  |     if(middle.end>=middle.start) { | 
298  | 0  |         middle.count=(int32_t)((middle.end-middle.start)>>(8*(4-middleLength)))+1;  | 
299  | 0  |     } else { | 
300  |  |         /* no middle range, eliminate overlaps */  | 
301  | 0  |         for(int32_t length=4; length>middleLength; --length) { | 
302  | 0  |             if(lower[length].count>0 && upper[length].count>0) { | 
303  |  |                 // Note: The lowerEnd and upperStart weights are versions of  | 
304  |  |                 // lowerLimit and upperLimit (which are lowerLimit<upperLimit),  | 
305  |  |                 // truncated (still less-or-equal)  | 
306  |  |                 // and then with their last bytes changed to the  | 
307  |  |                 // maxByte (for lowerEnd) or minByte (for upperStart).  | 
308  | 0  |                 const uint32_t lowerEnd=lower[length].end;  | 
309  | 0  |                 const uint32_t upperStart=upper[length].start;  | 
310  | 0  |                 UBool merged=FALSE;  | 
311  |  | 
  | 
312  | 0  |                 if(lowerEnd>upperStart) { | 
313  |  |                     // These two lower and upper ranges collide.  | 
314  |  |                     // Since lowerLimit<upperLimit and lowerEnd and upperStart  | 
315  |  |                     // are versions with only their last bytes modified  | 
316  |  |                     // (and following ones removed/reset to 0),  | 
317  |  |                     // lowerEnd>upperStart is only possible  | 
318  |  |                     // if the leading bytes are equal  | 
319  |  |                     // and lastByte(lowerEnd)>lastByte(upperStart).  | 
320  | 0  |                     U_ASSERT(truncateWeight(lowerEnd, length-1)==  | 
321  | 0  |                             truncateWeight(upperStart, length-1));  | 
322  |  |                     // Intersect these two ranges.  | 
323  | 0  |                     lower[length].end=upper[length].end;  | 
324  | 0  |                     lower[length].count=  | 
325  | 0  |                             (int32_t)getWeightTrail(lower[length].end, length)-  | 
326  | 0  |                             (int32_t)getWeightTrail(lower[length].start, length)+1;  | 
327  |  |                     // count might be <=0 in which case there is no room,  | 
328  |  |                     // and the range-collecting code below will ignore this range.  | 
329  | 0  |                     merged=TRUE;  | 
330  | 0  |                 } else if(lowerEnd==upperStart) { | 
331  |  |                     // Not possible, unless minByte==maxByte which is not allowed.  | 
332  | 0  |                     U_ASSERT(minBytes[length]<maxBytes[length]);  | 
333  | 0  |                 } else /* lowerEnd<upperStart */ { | 
334  | 0  |                     if(incWeight(lowerEnd, length)==upperStart) { | 
335  |  |                         // Merge adjacent ranges.  | 
336  | 0  |                         lower[length].end=upper[length].end;  | 
337  | 0  |                         lower[length].count+=upper[length].count;  // might be >countBytes  | 
338  | 0  |                         merged=TRUE;  | 
339  | 0  |                     }  | 
340  | 0  |                 }  | 
341  | 0  |                 if(merged) { | 
342  |  |                     // Remove all shorter ranges.  | 
343  |  |                     // There was no room available for them between the ranges we just merged.  | 
344  | 0  |                     upper[length].count=0;  | 
345  | 0  |                     while(--length>middleLength) { | 
346  | 0  |                         lower[length].count=upper[length].count=0;  | 
347  | 0  |                     }  | 
348  | 0  |                     break;  | 
349  | 0  |                 }  | 
350  | 0  |             }  | 
351  | 0  |         }  | 
352  | 0  |     }  | 
353  |  | 
  | 
354  |  | #ifdef UCOL_DEBUG  | 
355  |  |     /* print ranges */  | 
356  |  |     for(int32_t length=4; length>=2; --length) { | 
357  |  |         if(lower[length].count>0) { | 
358  |  |             printf("lower[%ld] .start=0x%08lx .end=0x%08lx .count=%ld\n", length, lower[length].start, lower[length].end, lower[length].count); | 
359  |  |         }  | 
360  |  |     }  | 
361  |  |     if(middle.count>0) { | 
362  |  |         printf("middle   .start=0x%08lx .end=0x%08lx .count=%ld\n", middle.start, middle.end, middle.count); | 
363  |  |     }  | 
364  |  |     for(int32_t length=2; length<=4; ++length) { | 
365  |  |         if(upper[length].count>0) { | 
366  |  |             printf("upper[%ld] .start=0x%08lx .end=0x%08lx .count=%ld\n", length, upper[length].start, upper[length].end, upper[length].count); | 
367  |  |         }  | 
368  |  |     }  | 
369  |  | #endif  | 
370  |  |  | 
371  |  |     /* copy the ranges, shortest first, into the result array */  | 
372  | 0  |     rangeCount=0;  | 
373  | 0  |     if(middle.count>0) { | 
374  | 0  |         uprv_memcpy(ranges, &middle, sizeof(WeightRange));  | 
375  | 0  |         rangeCount=1;  | 
376  | 0  |     }  | 
377  | 0  |     for(int32_t length=middleLength+1; length<=4; ++length) { | 
378  |  |         /* copy upper first so that later the middle range is more likely the first one to use */  | 
379  | 0  |         if(upper[length].count>0) { | 
380  | 0  |             uprv_memcpy(ranges+rangeCount, upper+length, sizeof(WeightRange));  | 
381  | 0  |             ++rangeCount;  | 
382  | 0  |         }  | 
383  | 0  |         if(lower[length].count>0) { | 
384  | 0  |             uprv_memcpy(ranges+rangeCount, lower+length, sizeof(WeightRange));  | 
385  | 0  |             ++rangeCount;  | 
386  | 0  |         }  | 
387  | 0  |     }  | 
388  | 0  |     return rangeCount>0;  | 
389  | 0  | }  | 
390  |  |  | 
391  |  | UBool  | 
392  | 0  | CollationWeights::allocWeightsInShortRanges(int32_t n, int32_t minLength) { | 
393  |  |     // See if the first few minLength and minLength+1 ranges have enough weights.  | 
394  | 0  |     for(int32_t i = 0; i < rangeCount && ranges[i].length <= (minLength + 1); ++i) { | 
395  | 0  |         if(n <= ranges[i].count) { | 
396  |  |             // Use the first few minLength and minLength+1 ranges.  | 
397  | 0  |             if(ranges[i].length > minLength) { | 
398  |  |                 // Reduce the number of weights from the last minLength+1 range  | 
399  |  |                 // which might sort before some minLength ranges,  | 
400  |  |                 // so that we use all weights in the minLength ranges.  | 
401  | 0  |                 ranges[i].count = n;  | 
402  | 0  |             }  | 
403  | 0  |             rangeCount = i + 1;  | 
404  |  | #ifdef UCOL_DEBUG  | 
405  |  |             printf("take first %ld ranges\n", rangeCount); | 
406  |  | #endif  | 
407  |  | 
  | 
408  | 0  |             if(rangeCount>1) { | 
409  |  |                 /* sort the ranges by weight values */  | 
410  | 0  |                 UErrorCode errorCode=U_ZERO_ERROR;  | 
411  | 0  |                 uprv_sortArray(ranges, rangeCount, sizeof(WeightRange),  | 
412  | 0  |                                compareRanges, NULL, FALSE, &errorCode);  | 
413  |  |                 /* ignore error code: we know that the internal sort function will not fail here */  | 
414  | 0  |             }  | 
415  | 0  |             return TRUE;  | 
416  | 0  |         }  | 
417  | 0  |         n -= ranges[i].count;  // still >0  | 
418  | 0  |     }  | 
419  | 0  |     return FALSE;  | 
420  | 0  | }  | 
421  |  |  | 
422  |  | UBool  | 
423  | 0  | CollationWeights::allocWeightsInMinLengthRanges(int32_t n, int32_t minLength) { | 
424  |  |     // See if the minLength ranges have enough weights  | 
425  |  |     // when we split one and lengthen the following ones.  | 
426  | 0  |     int32_t count = 0;  | 
427  | 0  |     int32_t minLengthRangeCount;  | 
428  | 0  |     for(minLengthRangeCount = 0;  | 
429  | 0  |             minLengthRangeCount < rangeCount &&  | 
430  | 0  |                 ranges[minLengthRangeCount].length == minLength;  | 
431  | 0  |             ++minLengthRangeCount) { | 
432  | 0  |         count += ranges[minLengthRangeCount].count;  | 
433  | 0  |     }  | 
434  |  | 
  | 
435  | 0  |     int32_t nextCountBytes = countBytes(minLength + 1);  | 
436  | 0  |     if(n > count * nextCountBytes) { return FALSE; } | 
437  |  |  | 
438  |  |     // Use the minLength ranges. Merge them, and then split again as necessary.  | 
439  | 0  |     uint32_t start = ranges[0].start;  | 
440  | 0  |     uint32_t end = ranges[0].end;  | 
441  | 0  |     for(int32_t i = 1; i < minLengthRangeCount; ++i) { | 
442  | 0  |         if(ranges[i].start < start) { start = ranges[i].start; } | 
443  | 0  |         if(ranges[i].end > end) { end = ranges[i].end; } | 
444  | 0  |     }  | 
445  |  |  | 
446  |  |     // Calculate how to split the range between minLength (count1) and minLength+1 (count2).  | 
447  |  |     // Goal:  | 
448  |  |     //   count1 + count2 * nextCountBytes = n  | 
449  |  |     //   count1 + count2 = count  | 
450  |  |     // These turn into  | 
451  |  |     //   (count - count2) + count2 * nextCountBytes = n  | 
452  |  |     // and then into the following count1 & count2 computations.  | 
453  | 0  |     int32_t count2 = (n - count) / (nextCountBytes - 1);  // number of weights to be lengthened  | 
454  | 0  |     int32_t count1 = count - count2;  // number of minLength weights  | 
455  | 0  |     if(count2 == 0 || (count1 + count2 * nextCountBytes) < n) { | 
456  |  |         // round up  | 
457  | 0  |         ++count2;  | 
458  | 0  |         --count1;  | 
459  | 0  |         U_ASSERT((count1 + count2 * nextCountBytes) >= n);  | 
460  | 0  |     }  | 
461  |  | 
  | 
462  | 0  |     ranges[0].start = start;  | 
463  |  | 
  | 
464  | 0  |     if(count1 == 0) { | 
465  |  |         // Make one long range.  | 
466  | 0  |         ranges[0].end = end;  | 
467  | 0  |         ranges[0].count = count;  | 
468  | 0  |         lengthenRange(ranges[0]);  | 
469  | 0  |         rangeCount = 1;  | 
470  | 0  |     } else { | 
471  |  |         // Split the range, lengthen the second part.  | 
472  |  | #ifdef UCOL_DEBUG  | 
473  |  |         printf("split the range number %ld (out of %ld minLength ranges) by %ld:%ld\n", | 
474  |  |                splitRange, rangeCount, count1, count2);  | 
475  |  | #endif  | 
476  |  |  | 
477  |  |         // Next start = start + count1. First end = 1 before that.  | 
478  | 0  |         ranges[0].end = incWeightByOffset(start, minLength, count1 - 1);  | 
479  | 0  |         ranges[0].count = count1;  | 
480  |  | 
  | 
481  | 0  |         ranges[1].start = incWeight(ranges[0].end, minLength);  | 
482  | 0  |         ranges[1].end = end;  | 
483  | 0  |         ranges[1].length = minLength;  // +1 when lengthened  | 
484  | 0  |         ranges[1].count = count2;  // *countBytes when lengthened  | 
485  | 0  |         lengthenRange(ranges[1]);  | 
486  | 0  |         rangeCount = 2;  | 
487  | 0  |     }  | 
488  | 0  |     return TRUE;  | 
489  | 0  | }  | 
490  |  |  | 
491  |  | /*  | 
492  |  |  * call getWeightRanges and then determine heuristically  | 
493  |  |  * which ranges to use for a given number of weights between (excluding)  | 
494  |  |  * two limits  | 
495  |  |  */  | 
496  |  | UBool  | 
497  | 0  | CollationWeights::allocWeights(uint32_t lowerLimit, uint32_t upperLimit, int32_t n) { | 
498  |  | #ifdef UCOL_DEBUG  | 
499  |  |     puts(""); | 
500  |  | #endif  | 
501  |  | 
  | 
502  | 0  |     if(!getWeightRanges(lowerLimit, upperLimit)) { | 
503  |  | #ifdef UCOL_DEBUG  | 
504  |  |         printf("error: unable to get Weight ranges\n"); | 
505  |  | #endif  | 
506  | 0  |         return FALSE;  | 
507  | 0  |     }  | 
508  |  |  | 
509  |  |     /* try until we find suitably large ranges */  | 
510  | 0  |     for(;;) { | 
511  |  |         /* get the smallest number of bytes in a range */  | 
512  | 0  |         int32_t minLength=ranges[0].length;  | 
513  |  | 
  | 
514  | 0  |         if(allocWeightsInShortRanges(n, minLength)) { break; } | 
515  |  |  | 
516  | 0  |         if(minLength == 4) { | 
517  |  | #ifdef UCOL_DEBUG  | 
518  |  |             printf("error: the maximum number of %ld weights is insufficient for n=%ld\n", | 
519  |  |                    minLengthCount, n);  | 
520  |  | #endif  | 
521  | 0  |             return FALSE;  | 
522  | 0  |         }  | 
523  |  |  | 
524  | 0  |         if(allocWeightsInMinLengthRanges(n, minLength)) { break; } | 
525  |  |  | 
526  |  |         /* no good match, lengthen all minLength ranges and iterate */  | 
527  |  | #ifdef UCOL_DEBUG  | 
528  |  |         printf("lengthen the short ranges from %ld bytes to %ld and iterate\n", minLength, minLength+1); | 
529  |  | #endif  | 
530  | 0  |         for(int32_t i=0; i<rangeCount && ranges[i].length==minLength; ++i) { | 
531  | 0  |             lengthenRange(ranges[i]);  | 
532  | 0  |         }  | 
533  | 0  |     }  | 
534  |  |  | 
535  |  | #ifdef UCOL_DEBUG  | 
536  |  |     puts("final ranges:"); | 
537  |  |     for(int32_t i=0; i<rangeCount; ++i) { | 
538  |  |         printf("ranges[%ld] .start=0x%08lx .end=0x%08lx .length=%ld .count=%ld\n", | 
539  |  |                i, ranges[i].start, ranges[i].end, ranges[i].length, ranges[i].count);  | 
540  |  |     }  | 
541  |  | #endif  | 
542  |  |  | 
543  | 0  |     rangeIndex = 0;  | 
544  | 0  |     return TRUE;  | 
545  | 0  | }  | 
546  |  |  | 
547  |  | uint32_t  | 
548  | 0  | CollationWeights::nextWeight() { | 
549  | 0  |     if(rangeIndex >= rangeCount) { | 
550  | 0  |         return 0xffffffff;  | 
551  | 0  |     } else { | 
552  |  |         /* get the next weight */  | 
553  | 0  |         WeightRange &range = ranges[rangeIndex];  | 
554  | 0  |         uint32_t weight = range.start;  | 
555  | 0  |         if(--range.count == 0) { | 
556  |  |             /* this range is finished */  | 
557  | 0  |             ++rangeIndex;  | 
558  | 0  |         } else { | 
559  |  |             /* increment the weight for the next value */  | 
560  | 0  |             range.start = incWeight(weight, range.length);  | 
561  | 0  |             U_ASSERT(range.start <= range.end);  | 
562  | 0  |         }  | 
563  |  | 
  | 
564  | 0  |         return weight;  | 
565  | 0  |     }  | 
566  | 0  | }  | 
567  |  |  | 
568  |  | U_NAMESPACE_END  | 
569  |  |  | 
570  |  | #endif /* #if !UCONFIG_NO_COLLATION */  |