/src/glib/glib/gunicollate.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* gunicollate.c - Collation |
2 | | * |
3 | | * Copyright 2001,2005 Red Hat, Inc. |
4 | | * |
5 | | * This library is free software; you can redistribute it and/or |
6 | | * modify it under the terms of the GNU Lesser General Public |
7 | | * License as published by the Free Software Foundation; either |
8 | | * version 2.1 of the License, or (at your option) any later version. |
9 | | * |
10 | | * This library is distributed in the hope that it will be useful, |
11 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | | * Lesser General Public License for more details. |
14 | | * |
15 | | * You should have received a copy of the GNU Lesser General Public License |
16 | | * along with this library; if not, see <http://www.gnu.org/licenses/>. |
17 | | */ |
18 | | |
19 | | #include "config.h" |
20 | | |
21 | | #include <locale.h> |
22 | | #include <string.h> |
23 | | #ifdef HAVE_WCHAR_H |
24 | | #include <wchar.h> |
25 | | #endif |
26 | | |
27 | | #ifdef HAVE_CARBON |
28 | | #include <CoreServices/CoreServices.h> |
29 | | #endif |
30 | | |
31 | | #include "gmem.h" |
32 | | #include "gunicode.h" |
33 | | #include "gunicodeprivate.h" |
34 | | #include "gstring.h" |
35 | | #include "gstrfuncs.h" |
36 | | #include "gtestutils.h" |
37 | | #include "gcharset.h" |
38 | | #include "gconvert.h" |
39 | | |
40 | | #if SIZEOF_WCHAR_T == 4 && defined(__STDC_ISO_10646__) |
41 | | #define GUNICHAR_EQUALS_WCHAR_T 1 |
42 | | #endif |
43 | | |
44 | | #ifdef _MSC_VER |
45 | | /* Workaround for bug in MSVCR80.DLL */ |
46 | | static gsize |
47 | | msc_strxfrm_wrapper (char *string1, |
48 | | const char *string2, |
49 | | gsize count) |
50 | | { |
51 | | if (!string1 || count <= 0) |
52 | | { |
53 | | char tmp; |
54 | | |
55 | | return strxfrm (&tmp, string2, 1); |
56 | | } |
57 | | return strxfrm (string1, string2, count); |
58 | | } |
59 | | #define strxfrm msc_strxfrm_wrapper |
60 | | #endif |
61 | | |
62 | | /** |
63 | | * g_utf8_collate: |
64 | | * @str1: a UTF-8 encoded string |
65 | | * @str2: a UTF-8 encoded string |
66 | | * |
67 | | * Compares two strings for ordering using the linguistically |
68 | | * correct rules for the [current locale][setlocale]. |
69 | | * When sorting a large number of strings, it will be significantly |
70 | | * faster to obtain collation keys with g_utf8_collate_key() and |
71 | | * compare the keys with strcmp() when sorting instead of sorting |
72 | | * the original strings. |
73 | | * |
74 | | * Returns: < 0 if @str1 compares before @str2, |
75 | | * 0 if they compare equal, > 0 if @str1 compares after @str2. |
76 | | **/ |
77 | | gint |
78 | | g_utf8_collate (const gchar *str1, |
79 | | const gchar *str2) |
80 | 0 | { |
81 | 0 | gint result; |
82 | |
|
83 | | #ifdef HAVE_CARBON |
84 | | |
85 | | UniChar *str1_utf16; |
86 | | UniChar *str2_utf16; |
87 | | glong len1; |
88 | | glong len2; |
89 | | SInt32 retval = 0; |
90 | | |
91 | | g_return_val_if_fail (str1 != NULL, 0); |
92 | | g_return_val_if_fail (str2 != NULL, 0); |
93 | | |
94 | | str1_utf16 = g_utf8_to_utf16 (str1, -1, NULL, &len1, NULL); |
95 | | str2_utf16 = g_utf8_to_utf16 (str2, -1, NULL, &len2, NULL); |
96 | | |
97 | | UCCompareTextDefault (kUCCollateStandardOptions, |
98 | | str1_utf16, len1, str2_utf16, len2, |
99 | | NULL, &retval); |
100 | | result = retval; |
101 | | |
102 | | g_free (str2_utf16); |
103 | | g_free (str1_utf16); |
104 | | |
105 | | #elif defined(HAVE_WCHAR_H) && defined(GUNICHAR_EQUALS_WCHAR_T) |
106 | |
|
107 | 0 | gunichar *str1_norm; |
108 | 0 | gunichar *str2_norm; |
109 | |
|
110 | 0 | g_return_val_if_fail (str1 != NULL, 0); |
111 | 0 | g_return_val_if_fail (str2 != NULL, 0); |
112 | | |
113 | 0 | str1_norm = _g_utf8_normalize_wc (str1, -1, G_NORMALIZE_ALL_COMPOSE); |
114 | 0 | str2_norm = _g_utf8_normalize_wc (str2, -1, G_NORMALIZE_ALL_COMPOSE); |
115 | |
|
116 | 0 | result = wcscoll ((wchar_t *)str1_norm, (wchar_t *)str2_norm); |
117 | |
|
118 | 0 | g_free (str1_norm); |
119 | 0 | g_free (str2_norm); |
120 | |
|
121 | | #else |
122 | | |
123 | | const gchar *charset; |
124 | | gchar *str1_norm; |
125 | | gchar *str2_norm; |
126 | | |
127 | | g_return_val_if_fail (str1 != NULL, 0); |
128 | | g_return_val_if_fail (str2 != NULL, 0); |
129 | | |
130 | | str1_norm = g_utf8_normalize (str1, -1, G_NORMALIZE_ALL_COMPOSE); |
131 | | str2_norm = g_utf8_normalize (str2, -1, G_NORMALIZE_ALL_COMPOSE); |
132 | | |
133 | | if (g_get_charset (&charset)) |
134 | | { |
135 | | result = strcoll (str1_norm, str2_norm); |
136 | | } |
137 | | else |
138 | | { |
139 | | gchar *str1_locale = g_convert (str1_norm, -1, charset, "UTF-8", NULL, NULL, NULL); |
140 | | gchar *str2_locale = g_convert (str2_norm, -1, charset, "UTF-8", NULL, NULL, NULL); |
141 | | |
142 | | if (str1_locale && str2_locale) |
143 | | result = strcoll (str1_locale, str2_locale); |
144 | | else if (str1_locale) |
145 | | result = -1; |
146 | | else if (str2_locale) |
147 | | result = 1; |
148 | | else |
149 | | result = strcmp (str1_norm, str2_norm); |
150 | | |
151 | | g_free (str1_locale); |
152 | | g_free (str2_locale); |
153 | | } |
154 | | |
155 | | g_free (str1_norm); |
156 | | g_free (str2_norm); |
157 | | |
158 | | #endif |
159 | |
|
160 | 0 | return result; |
161 | 0 | } |
162 | | |
163 | | #if defined(HAVE_WCHAR_H) && defined(GUNICHAR_EQUALS_WCHAR_T) |
164 | | /* We need UTF-8 encoding of numbers to encode the weights if |
165 | | * we are using wcsxfrm. However, we aren't encoding Unicode |
166 | | * characters, so we can't simply use g_unichar_to_utf8. |
167 | | * |
168 | | * The following routine is taken (with modification) from GNU |
169 | | * libc's strxfrm routine: |
170 | | * |
171 | | * Copyright (C) 1995-1999,2000,2001 Free Software Foundation, Inc. |
172 | | * Written by Ulrich Drepper <drepper@cygnus.com>, 1995. |
173 | | */ |
174 | | static inline int |
175 | | utf8_encode (char *buf, wchar_t val) |
176 | 0 | { |
177 | 0 | int retval; |
178 | |
|
179 | 0 | if (val < 0x80) |
180 | 0 | { |
181 | 0 | if (buf) |
182 | 0 | *buf++ = (char) val; |
183 | 0 | retval = 1; |
184 | 0 | } |
185 | 0 | else |
186 | 0 | { |
187 | 0 | int step; |
188 | |
|
189 | 0 | for (step = 2; step < 6; ++step) |
190 | 0 | if ((val & (~(guint32)0 << (5 * step + 1))) == 0) |
191 | 0 | break; |
192 | 0 | retval = step; |
193 | |
|
194 | 0 | if (buf) |
195 | 0 | { |
196 | 0 | *buf = (unsigned char) (~0xff >> step); |
197 | 0 | --step; |
198 | 0 | do |
199 | 0 | { |
200 | 0 | buf[step] = 0x80 | (val & 0x3f); |
201 | 0 | val >>= 6; |
202 | 0 | } |
203 | 0 | while (--step > 0); |
204 | 0 | *buf |= val; |
205 | 0 | } |
206 | 0 | } |
207 | |
|
208 | 0 | return retval; |
209 | 0 | } |
210 | | #endif |
211 | | |
212 | | #ifdef HAVE_CARBON |
213 | | |
214 | | static gchar * |
215 | | collate_key_to_string (UCCollationValue *key, |
216 | | gsize key_len) |
217 | | { |
218 | | gchar *result; |
219 | | gsize result_len; |
220 | | long *lkey = (long *) key; |
221 | | |
222 | | /* UCCollationValue format: |
223 | | * |
224 | | * UCCollateOptions (32/64 bits) |
225 | | * SizeInBytes (32/64 bits) |
226 | | * Value (8 bits arrey) |
227 | | * |
228 | | * UCCollateOptions: ordering option mask of the collator |
229 | | * used to create the key. Size changes on 32-bit / 64-bit |
230 | | * hosts. On 64-bits also the extra half-word seems to have |
231 | | * some extra (unknown) meaning. |
232 | | * SizeInBytes: size of the whole structure, in bytes |
233 | | * (including UCCollateOptions and SizeInBytes fields). Size |
234 | | * changes on 32-bit & 64-bit hosts. |
235 | | * Value: array of bytes containing the comparison weights. |
236 | | * Seems to have several sub-strings separated by \001 and \002 |
237 | | * chars. Also, experience shows this is directly strcmp-able. |
238 | | */ |
239 | | |
240 | | result_len = lkey[1]; |
241 | | result = g_malloc (result_len + 1); |
242 | | memcpy (result, &lkey[2], result_len); |
243 | | result[result_len] = '\0'; |
244 | | |
245 | | return result; |
246 | | } |
247 | | |
248 | | static gchar * |
249 | | carbon_collate_key_with_collator (const gchar *str, |
250 | | gssize len, |
251 | | CollatorRef collator) |
252 | | { |
253 | | UniChar *str_utf16 = NULL; |
254 | | glong len_utf16; |
255 | | OSStatus ret; |
256 | | UCCollationValue staticbuf[512]; |
257 | | UCCollationValue *freeme = NULL; |
258 | | UCCollationValue *buf; |
259 | | ItemCount buf_len; |
260 | | ItemCount key_len; |
261 | | ItemCount try_len; |
262 | | gchar *result = NULL; |
263 | | |
264 | | str_utf16 = g_utf8_to_utf16 (str, len, NULL, &len_utf16, NULL); |
265 | | try_len = len_utf16 * 5 + 2; |
266 | | |
267 | | if (try_len <= sizeof staticbuf) |
268 | | { |
269 | | buf = staticbuf; |
270 | | buf_len = sizeof staticbuf; |
271 | | } |
272 | | else |
273 | | { |
274 | | freeme = g_new (UCCollationValue, try_len); |
275 | | buf = freeme; |
276 | | buf_len = try_len; |
277 | | } |
278 | | |
279 | | ret = UCGetCollationKey (collator, str_utf16, len_utf16, |
280 | | buf_len, &key_len, buf); |
281 | | |
282 | | if (ret == kCollateBufferTooSmall) |
283 | | { |
284 | | freeme = g_renew (UCCollationValue, freeme, try_len * 2); |
285 | | buf = freeme; |
286 | | buf_len = try_len * 2; |
287 | | ret = UCGetCollationKey (collator, str_utf16, len_utf16, |
288 | | buf_len, &key_len, buf); |
289 | | } |
290 | | |
291 | | if (ret == 0) |
292 | | result = collate_key_to_string (buf, key_len); |
293 | | else |
294 | | result = g_strdup (""); |
295 | | |
296 | | g_free (freeme); |
297 | | g_free (str_utf16); |
298 | | return result; |
299 | | } |
300 | | |
301 | | static gchar * |
302 | | carbon_collate_key (const gchar *str, |
303 | | gssize len) |
304 | | { |
305 | | static CollatorRef collator; |
306 | | |
307 | | if (G_UNLIKELY (!collator)) |
308 | | { |
309 | | UCCreateCollator (NULL, 0, kUCCollateStandardOptions, &collator); |
310 | | |
311 | | if (!collator) |
312 | | { |
313 | | static gboolean been_here; |
314 | | if (!been_here) |
315 | | g_warning ("%s: UCCreateCollator failed", G_STRLOC); |
316 | | been_here = TRUE; |
317 | | return g_strdup (""); |
318 | | } |
319 | | } |
320 | | |
321 | | return carbon_collate_key_with_collator (str, len, collator); |
322 | | } |
323 | | |
324 | | static gchar * |
325 | | carbon_collate_key_for_filename (const gchar *str, |
326 | | gssize len) |
327 | | { |
328 | | static CollatorRef collator; |
329 | | |
330 | | if (G_UNLIKELY (!collator)) |
331 | | { |
332 | | /* http://developer.apple.com/qa/qa2004/qa1159.html */ |
333 | | UCCreateCollator (NULL, 0, |
334 | | kUCCollateComposeInsensitiveMask |
335 | | | kUCCollateWidthInsensitiveMask |
336 | | | kUCCollateCaseInsensitiveMask |
337 | | | kUCCollateDigitsOverrideMask |
338 | | | kUCCollateDigitsAsNumberMask |
339 | | | kUCCollatePunctuationSignificantMask, |
340 | | &collator); |
341 | | |
342 | | if (!collator) |
343 | | { |
344 | | static gboolean been_here; |
345 | | if (!been_here) |
346 | | g_warning ("%s: UCCreateCollator failed", G_STRLOC); |
347 | | been_here = TRUE; |
348 | | return g_strdup (""); |
349 | | } |
350 | | } |
351 | | |
352 | | return carbon_collate_key_with_collator (str, len, collator); |
353 | | } |
354 | | |
355 | | #endif /* HAVE_CARBON */ |
356 | | |
357 | | /** |
358 | | * g_utf8_collate_key: |
359 | | * @str: a UTF-8 encoded string. |
360 | | * @len: length of @str, in bytes, or -1 if @str is nul-terminated. |
361 | | * |
362 | | * Converts a string into a collation key that can be compared |
363 | | * with other collation keys produced by the same function using |
364 | | * strcmp(). |
365 | | * |
366 | | * The results of comparing the collation keys of two strings |
367 | | * with strcmp() will always be the same as comparing the two |
368 | | * original keys with g_utf8_collate(). |
369 | | * |
370 | | * Note that this function depends on the [current locale][setlocale]. |
371 | | * |
372 | | * Returns: a newly allocated string. This string should |
373 | | * be freed with g_free() when you are done with it. |
374 | | **/ |
375 | | gchar * |
376 | | g_utf8_collate_key (const gchar *str, |
377 | | gssize len) |
378 | 0 | { |
379 | 0 | gchar *result; |
380 | |
|
381 | | #ifdef HAVE_CARBON |
382 | | |
383 | | g_return_val_if_fail (str != NULL, NULL); |
384 | | result = carbon_collate_key (str, len); |
385 | | |
386 | | #elif defined(HAVE_WCHAR_H) && defined(GUNICHAR_EQUALS_WCHAR_T) |
387 | |
|
388 | 0 | gsize xfrm_len; |
389 | 0 | gunichar *str_norm; |
390 | 0 | wchar_t *result_wc; |
391 | 0 | gsize i; |
392 | 0 | gsize result_len = 0; |
393 | |
|
394 | 0 | g_return_val_if_fail (str != NULL, NULL); |
395 | | |
396 | 0 | str_norm = _g_utf8_normalize_wc (str, len, G_NORMALIZE_ALL_COMPOSE); |
397 | |
|
398 | 0 | xfrm_len = wcsxfrm (NULL, (wchar_t *)str_norm, 0); |
399 | 0 | result_wc = g_new (wchar_t, xfrm_len + 1); |
400 | 0 | wcsxfrm (result_wc, (wchar_t *)str_norm, xfrm_len + 1); |
401 | |
|
402 | 0 | for (i = 0; i < xfrm_len; i++) |
403 | 0 | result_len += utf8_encode (NULL, result_wc[i]); |
404 | |
|
405 | 0 | result = g_malloc (result_len + 1); |
406 | 0 | result_len = 0; |
407 | 0 | for (i = 0; i < xfrm_len; i++) |
408 | 0 | result_len += utf8_encode (result + result_len, result_wc[i]); |
409 | |
|
410 | 0 | result[result_len] = '\0'; |
411 | |
|
412 | 0 | g_free (result_wc); |
413 | 0 | g_free (str_norm); |
414 | |
|
415 | 0 | return result; |
416 | | #else |
417 | | |
418 | | gsize xfrm_len; |
419 | | const gchar *charset; |
420 | | gchar *str_norm; |
421 | | |
422 | | g_return_val_if_fail (str != NULL, NULL); |
423 | | |
424 | | str_norm = g_utf8_normalize (str, len, G_NORMALIZE_ALL_COMPOSE); |
425 | | |
426 | | result = NULL; |
427 | | |
428 | | if (g_get_charset (&charset)) |
429 | | { |
430 | | xfrm_len = strxfrm (NULL, str_norm, 0); |
431 | | if (xfrm_len < G_MAXINT - 2) |
432 | | { |
433 | | result = g_malloc (xfrm_len + 1); |
434 | | strxfrm (result, str_norm, xfrm_len + 1); |
435 | | } |
436 | | } |
437 | | else |
438 | | { |
439 | | gchar *str_locale = g_convert (str_norm, -1, charset, "UTF-8", NULL, NULL, NULL); |
440 | | |
441 | | if (str_locale) |
442 | | { |
443 | | xfrm_len = strxfrm (NULL, str_locale, 0); |
444 | | if (xfrm_len < 0 || xfrm_len >= G_MAXINT - 2) |
445 | | { |
446 | | g_free (str_locale); |
447 | | str_locale = NULL; |
448 | | } |
449 | | } |
450 | | if (str_locale) |
451 | | { |
452 | | result = g_malloc (xfrm_len + 2); |
453 | | result[0] = 'A'; |
454 | | strxfrm (result + 1, str_locale, xfrm_len + 1); |
455 | | |
456 | | g_free (str_locale); |
457 | | } |
458 | | } |
459 | | |
460 | | if (!result) |
461 | | { |
462 | | xfrm_len = strlen (str_norm); |
463 | | result = g_malloc (xfrm_len + 2); |
464 | | result[0] = 'B'; |
465 | | memcpy (result + 1, str_norm, xfrm_len); |
466 | | result[xfrm_len+1] = '\0'; |
467 | | } |
468 | | |
469 | | g_free (str_norm); |
470 | | #endif |
471 | | |
472 | 0 | return result; |
473 | 0 | } |
474 | | |
475 | | /* This is a collation key that is very very likely to sort before any |
476 | | * collation key that libc strxfrm generates. We use this before any |
477 | | * special case (dot or number) to make sure that its sorted before |
478 | | * anything else. |
479 | | */ |
480 | 0 | #define COLLATION_SENTINEL "\1\1\1" |
481 | | |
482 | | /** |
483 | | * g_utf8_collate_key_for_filename: |
484 | | * @str: a UTF-8 encoded string. |
485 | | * @len: length of @str, in bytes, or -1 if @str is nul-terminated. |
486 | | * |
487 | | * Converts a string into a collation key that can be compared |
488 | | * with other collation keys produced by the same function using strcmp(). |
489 | | * |
490 | | * In order to sort filenames correctly, this function treats the dot '.' |
491 | | * as a special case. Most dictionary orderings seem to consider it |
492 | | * insignificant, thus producing the ordering "event.c" "eventgenerator.c" |
493 | | * "event.h" instead of "event.c" "event.h" "eventgenerator.c". Also, we |
494 | | * would like to treat numbers intelligently so that "file1" "file10" "file5" |
495 | | * is sorted as "file1" "file5" "file10". |
496 | | * |
497 | | * Note that this function depends on the [current locale][setlocale]. |
498 | | * |
499 | | * Returns: a newly allocated string. This string should |
500 | | * be freed with g_free() when you are done with it. |
501 | | * |
502 | | * Since: 2.8 |
503 | | */ |
504 | | gchar * |
505 | | g_utf8_collate_key_for_filename (const gchar *str, |
506 | | gssize len) |
507 | 0 | { |
508 | 0 | #ifndef HAVE_CARBON |
509 | 0 | GString *result; |
510 | 0 | GString *append; |
511 | 0 | const gchar *p; |
512 | 0 | const gchar *prev; |
513 | 0 | const gchar *end; |
514 | 0 | gchar *collate_key; |
515 | 0 | gint digits; |
516 | 0 | gint leading_zeros; |
517 | | |
518 | | /* |
519 | | * How it works: |
520 | | * |
521 | | * Split the filename into collatable substrings which do |
522 | | * not contain [.0-9] and special-cased substrings. The collatable |
523 | | * substrings are run through the normal g_utf8_collate_key() and the |
524 | | * resulting keys are concatenated with keys generated from the |
525 | | * special-cased substrings. |
526 | | * |
527 | | * Special cases: Dots are handled by replacing them with '\1' which |
528 | | * implies that short dot-delimited substrings are before long ones, |
529 | | * e.g. |
530 | | * |
531 | | * a\1a (a.a) |
532 | | * a-\1a (a-.a) |
533 | | * aa\1a (aa.a) |
534 | | * |
535 | | * Numbers are handled by prepending to each number d-1 superdigits |
536 | | * where d = number of digits in the number and SUPERDIGIT is a |
537 | | * character with an integer value higher than any digit (for instance |
538 | | * ':'). This ensures that single-digit numbers are sorted before |
539 | | * double-digit numbers which in turn are sorted separately from |
540 | | * triple-digit numbers, etc. To avoid strange side-effects when |
541 | | * sorting strings that already contain SUPERDIGITs, a '\2' |
542 | | * is also prepended, like this |
543 | | * |
544 | | * file\21 (file1) |
545 | | * file\25 (file5) |
546 | | * file\2:10 (file10) |
547 | | * file\2:26 (file26) |
548 | | * file\2::100 (file100) |
549 | | * file:foo (file:foo) |
550 | | * |
551 | | * This has the side-effect of sorting numbers before everything else (except |
552 | | * dots), but this is probably OK. |
553 | | * |
554 | | * Leading digits are ignored when doing the above. To discriminate |
555 | | * numbers which differ only in the number of leading digits, we append |
556 | | * the number of leading digits as a byte at the very end of the collation |
557 | | * key. |
558 | | * |
559 | | * To try avoid conflict with any collation key sequence generated by libc we |
560 | | * start each switch to a special cased part with a sentinel that hopefully |
561 | | * will sort before anything libc will generate. |
562 | | */ |
563 | |
|
564 | 0 | if (len < 0) |
565 | 0 | len = strlen (str); |
566 | |
|
567 | 0 | result = g_string_sized_new (len * 2); |
568 | 0 | append = g_string_sized_new (0); |
569 | |
|
570 | 0 | end = str + len; |
571 | | |
572 | | /* No need to use utf8 functions, since we're only looking for ascii chars */ |
573 | 0 | for (prev = p = str; p < end; p++) |
574 | 0 | { |
575 | 0 | switch (*p) |
576 | 0 | { |
577 | 0 | case '.': |
578 | 0 | if (prev != p) |
579 | 0 | { |
580 | 0 | collate_key = g_utf8_collate_key (prev, p - prev); |
581 | 0 | g_string_append (result, collate_key); |
582 | 0 | g_free (collate_key); |
583 | 0 | } |
584 | | |
585 | 0 | g_string_append (result, COLLATION_SENTINEL "\1"); |
586 | | |
587 | | /* skip the dot */ |
588 | 0 | prev = p + 1; |
589 | 0 | break; |
590 | | |
591 | 0 | case '0': |
592 | 0 | case '1': |
593 | 0 | case '2': |
594 | 0 | case '3': |
595 | 0 | case '4': |
596 | 0 | case '5': |
597 | 0 | case '6': |
598 | 0 | case '7': |
599 | 0 | case '8': |
600 | 0 | case '9': |
601 | 0 | if (prev != p) |
602 | 0 | { |
603 | 0 | collate_key = g_utf8_collate_key (prev, p - prev); |
604 | 0 | g_string_append (result, collate_key); |
605 | 0 | g_free (collate_key); |
606 | 0 | } |
607 | | |
608 | 0 | g_string_append (result, COLLATION_SENTINEL "\2"); |
609 | | |
610 | 0 | prev = p; |
611 | | |
612 | | /* write d-1 colons */ |
613 | 0 | if (*p == '0') |
614 | 0 | { |
615 | 0 | leading_zeros = 1; |
616 | 0 | digits = 0; |
617 | 0 | } |
618 | 0 | else |
619 | 0 | { |
620 | 0 | leading_zeros = 0; |
621 | 0 | digits = 1; |
622 | 0 | } |
623 | | |
624 | 0 | while (++p < end) |
625 | 0 | { |
626 | 0 | if (*p == '0' && !digits) |
627 | 0 | ++leading_zeros; |
628 | 0 | else if (g_ascii_isdigit(*p)) |
629 | 0 | ++digits; |
630 | 0 | else |
631 | 0 | { |
632 | | /* count an all-zero sequence as |
633 | | * one digit plus leading zeros |
634 | | */ |
635 | 0 | if (!digits) |
636 | 0 | { |
637 | 0 | ++digits; |
638 | 0 | --leading_zeros; |
639 | 0 | } |
640 | 0 | break; |
641 | 0 | } |
642 | 0 | } |
643 | |
|
644 | 0 | while (digits > 1) |
645 | 0 | { |
646 | 0 | g_string_append_c (result, ':'); |
647 | 0 | --digits; |
648 | 0 | } |
649 | |
|
650 | 0 | if (leading_zeros > 0) |
651 | 0 | { |
652 | 0 | g_string_append_c (append, (char)leading_zeros); |
653 | 0 | prev += leading_zeros; |
654 | 0 | } |
655 | | |
656 | | /* write the number itself */ |
657 | 0 | g_string_append_len (result, prev, p - prev); |
658 | | |
659 | 0 | prev = p; |
660 | 0 | --p; /* go one step back to avoid disturbing outer loop */ |
661 | 0 | break; |
662 | | |
663 | 0 | default: |
664 | | /* other characters just accumulate */ |
665 | 0 | break; |
666 | 0 | } |
667 | 0 | } |
668 | | |
669 | 0 | if (prev != p) |
670 | 0 | { |
671 | 0 | collate_key = g_utf8_collate_key (prev, p - prev); |
672 | 0 | g_string_append (result, collate_key); |
673 | 0 | g_free (collate_key); |
674 | 0 | } |
675 | | |
676 | 0 | g_string_append (result, append->str); |
677 | 0 | g_string_free (append, TRUE); |
678 | |
|
679 | 0 | return g_string_free (result, FALSE); |
680 | | #else /* HAVE_CARBON */ |
681 | | return carbon_collate_key_for_filename (str, len); |
682 | | #endif |
683 | 0 | } |