/src/icu/source/common/cstring.cpp

Source (jump to first uncovered line)
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
*   Copyright (C) 1997-2011, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
******************************************************************************
*
* File CSTRING.C
*
* @author       Helena Shih
*
* Modification History:
*
*   Date        Name        Description
*   6/18/98     hshih       Created
*   09/08/98    stephen     Added include for ctype, for Mac Port
*   11/15/99    helena      Integrated S/390 IEEE changes. 
******************************************************************************
*/



#include <stdlib.h>
#include <stdio.h>
#include "unicode/utypes.h"
#include "cmemory.h"
#include "cstring.h"
#include "uassert.h"

/*
 * We hardcode case conversion for invariant characters to match our expectation
 * and the compiler execution charset.
 * This prevents problems on systems
 * - with non-default casing behavior, like Turkish system locales where
 *   tolower('I') maps to dotless i and toupper('i') maps to dotted I
 * - where there are no lowercase Latin characters at all, or using different
 *   codes (some old EBCDIC codepages)
 *
 * This works because the compiler usually runs on a platform where the execution
 * charset includes all of the invariant characters at their expected
 * code positions, so that the char * string literals in ICU code match
 * the char literals here.
 *
 * Note that the set of lowercase Latin letters is discontiguous in EBCDIC
 * and the set of uppercase Latin letters is discontiguous as well.
 */

U_CAPI UBool U_EXPORT2
uprv_isASCIILetter(char c) {
#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    return
        ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') ||
        ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z');
#else
    return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
#endif
}

U_CAPI char U_EXPORT2
uprv_toupper(char c) {
#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
        c=(char)(c+('A'-'a'));
    }
#else
    if('a'<=c && c<='z') {
        c=(char)(c+('A'-'a'));
    }
#endif
    return c;
}


#if 0
/*
 * Commented out because cstring.h defines uprv_tolower() to be
 * the same as either uprv_asciitolower() or uprv_ebcdictolower()
 * to reduce the amount of code to cover with tests.
 *
 * Note that this uprv_tolower() definition is likely to work for most
 * charset families, not just ASCII and EBCDIC, because its #else branch
 * is written generically.
 */
U_CAPI char U_EXPORT2
uprv_tolower(char c) {
#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
        c=(char)(c+('a'-'A'));
    }
#else
    if('A'<=c && c<='Z') {
        c=(char)(c+('a'-'A'));
    }
#endif
    return c;
}
#endif

U_CAPI char U_EXPORT2
uprv_asciitolower(char c) {
    if(0x41<=c && c<=0x5a) {
        c=(char)(c+0x20);
    }
    return c;
}

U_CAPI char U_EXPORT2
uprv_ebcdictolower(char c) {
    if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
        (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
        (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
    ) {
        c=(char)(c-0x40);
    }
    return c;
}


U_CAPI char* U_EXPORT2
T_CString_toLowerCase(char* str)
{
    char* origPtr = str;

    if (str) {
        do
            *str = (char)uprv_tolower(*str);
        while (*(str++));
    }

    return origPtr;
}

U_CAPI char* U_EXPORT2
T_CString_toUpperCase(char* str)
{
    char* origPtr = str;

    if (str) {
        do
            *str = (char)uprv_toupper(*str);
        while (*(str++));
    }

    return origPtr;
}

/*
 * Takes a int32_t and fills in  a char* string with that number "radix"-based.
 * Does not handle negative values (makes an empty string for them).
 * Writes at most 12 chars ("-2147483647" plus NUL).
 * Returns the length of the string (not including the NUL).
 */
U_CAPI int32_t U_EXPORT2
T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
{
    char      tbuf[30];
    int32_t   tbx    = sizeof(tbuf);
    uint8_t   digit;
    int32_t   length = 0;
    uint32_t  uval;
    
    U_ASSERT(radix>=2 && radix<=16);
    uval = (uint32_t) v;
    if(v<0 && radix == 10) {
        /* Only in base 10 do we conside numbers to be signed. */
        uval = (uint32_t)(-v); 
        buffer[length++] = '-';
    }
    
    tbx = sizeof(tbuf)-1;
    tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
    do {
        digit = (uint8_t)(uval % radix);
        tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
        uval  = uval / radix;
    } while (uval != 0);
    
    /* copy converted number into user buffer  */
    uprv_strcpy(buffer+length, tbuf+tbx);
    length += sizeof(tbuf) - tbx -1;
    return length;
}



/*
 * Takes a int64_t and fills in  a char* string with that number "radix"-based.
 * Writes at most 21: chars ("-9223372036854775807" plus NUL).
 * Returns the length of the string, not including the terminating NULL.
 */
U_CAPI int32_t U_EXPORT2
T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
{
    char      tbuf[30];
    int32_t   tbx    = sizeof(tbuf);
    uint8_t   digit;
    int32_t   length = 0;
    uint64_t  uval;
    
    U_ASSERT(radix>=2 && radix<=16);
    uval = (uint64_t) v;
    if(v<0 && radix == 10) {
        /* Only in base 10 do we conside numbers to be signed. */
        uval = (uint64_t)(-v); 
        buffer[length++] = '-';
    }
    
    tbx = sizeof(tbuf)-1;
    tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
    do {
        digit = (uint8_t)(uval % radix);
        tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
        uval  = uval / radix;
    } while (uval != 0);
    
    /* copy converted number into user buffer  */
    uprv_strcpy(buffer+length, tbuf+tbx);
    length += sizeof(tbuf) - tbx -1;
    return length;
}


U_CAPI int32_t U_EXPORT2
T_CString_stringToInteger(const char *integerString, int32_t radix)
{
    char *end;
    return uprv_strtoul(integerString, &end, radix);

}

U_CAPI int U_EXPORT2
uprv_stricmp(const char *str1, const char *str2) {
    if(str1==NULL) {
        if(str2==NULL) {
            return 0;
        } else {
            return -1;
        }
    } else if(str2==NULL) {
        return 1;
    } else {
        /* compare non-NULL strings lexically with lowercase */
        int rc;
        unsigned char c1, c2;

        for(;;) {
            c1=(unsigned char)*str1;
            c2=(unsigned char)*str2;
            if(c1==0) {
                if(c2==0) {
                    return 0;
                } else {
                    return -1;
                }
            } else if(c2==0) {
                return 1;
            } else {
                /* compare non-zero characters with lowercase */
                rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
                if(rc!=0) {
                    return rc;
                }
            }
            ++str1;
            ++str2;
        }
    }
}

U_CAPI int U_EXPORT2
uprv_strnicmp(const char *str1, const char *str2, uint32_t n) {
    if(str1==NULL) {
        if(str2==NULL) {
            return 0;
        } else {
            return -1;
        }
    } else if(str2==NULL) {
        return 1;
    } else {
        /* compare non-NULL strings lexically with lowercase */
        int rc;
        unsigned char c1, c2;

        for(; n--;) {
            c1=(unsigned char)*str1;
            c2=(unsigned char)*str2;
            if(c1==0) {
                if(c2==0) {
                    return 0;
                } else {
                    return -1;
                }
            } else if(c2==0) {
                return 1;
            } else {
                /* compare non-zero characters with lowercase */
                rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
                if(rc!=0) {
                    return rc;
                }
            }
            ++str1;
            ++str2;
        }
    }

    return 0;
}

U_CAPI char* U_EXPORT2
uprv_strdup(const char *src) {
    size_t len = uprv_strlen(src) + 1;
    char *dup = (char *) uprv_malloc(len);

    if (dup) {
        uprv_memcpy(dup, src, len);
    }

    return dup;
}

U_CAPI char* U_EXPORT2
uprv_strndup(const char *src, int32_t n) {
    char *dup;

    if(n < 0) {
        dup = uprv_strdup(src);
    } else {
        dup = (char*)uprv_malloc(n+1);
        if (dup) { 
            uprv_memcpy(dup, src, n);
            dup[n] = 0;
        }
    }

    return dup;
}

Coverage Report

Created: 2025-01-28 06:38

Line	Count	Source (jump to first uncovered line)
1		// © 2016 and later: Unicode, Inc. and others.
2		// License & terms of use: http://www.unicode.org/copyright.html
3		/*
4		******************************************************************************
5		*
6		* Copyright (C) 1997-2011, International Business Machines
7		* Corporation and others. All Rights Reserved.
8		*
9		******************************************************************************
10		*
11		* File CSTRING.C
12		*
13		* @author Helena Shih
14		*
15		* Modification History:
16		*
17		* Date Name Description
18		* 6/18/98 hshih Created
19		* 09/08/98 stephen Added include for ctype, for Mac Port
20		* 11/15/99 helena Integrated S/390 IEEE changes.
21		******************************************************************************
22		*/
23
24
25
26		#include <stdlib.h>
27		#include <stdio.h>
28		#include "unicode/utypes.h"
29		#include "cmemory.h"
30		#include "cstring.h"
31		#include "uassert.h"
32
33		/*
34		* We hardcode case conversion for invariant characters to match our expectation
35		* and the compiler execution charset.
36		* This prevents problems on systems
37		* - with non-default casing behavior, like Turkish system locales where
38		* tolower('I') maps to dotless i and toupper('i') maps to dotted I
39		* - where there are no lowercase Latin characters at all, or using different
40		* codes (some old EBCDIC codepages)
41		*
42		* This works because the compiler usually runs on a platform where the execution
43		* charset includes all of the invariant characters at their expected
44		* code positions, so that the char * string literals in ICU code match
45		* the char literals here.
46		*
47		* Note that the set of lowercase Latin letters is discontiguous in EBCDIC
48		* and the set of uppercase Latin letters is discontiguous as well.
49		*/
50
51		U_CAPI UBool U_EXPORT2
52	0	uprv_isASCIILetter(char c) {
53		#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
54		return
55		('a'<=c && c<='i') \|\| ('j'<=c && c<='r') \|\| ('s'<=c && c<='z') \|\|
56		('A'<=c && c<='I') \|\| ('J'<=c && c<='R') \|\| ('S'<=c && c<='Z');
57		#else
58	0	return ('a'<=c && c<='z') \|\| ('A'<=c && c<='Z');
59	0	#endif
60	0	}
61
62		U_CAPI char U_EXPORT2
63	0	uprv_toupper(char c) {
64		#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
65		if(('a'<=c && c<='i') \|\| ('j'<=c && c<='r') \|\| ('s'<=c && c<='z')) {
66		c=(char)(c+('A'-'a'));
67		}
68		#else
69	0	if('a'<=c && c<='z') {
70	0	c=(char)(c+('A'-'a'));
71	0	}
72	0	#endif
73	0	return c;
74	0	}
75
76
77		#if 0
78		/*
79		* Commented out because cstring.h defines uprv_tolower() to be
80		* the same as either uprv_asciitolower() or uprv_ebcdictolower()
81		* to reduce the amount of code to cover with tests.
82		*
83		* Note that this uprv_tolower() definition is likely to work for most
84		* charset families, not just ASCII and EBCDIC, because its #else branch
85		* is written generically.
86		*/
87		U_CAPI char U_EXPORT2
88		uprv_tolower(char c) {
89		#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
90		if(('A'<=c && c<='I') \|\| ('J'<=c && c<='R') \|\| ('S'<=c && c<='Z')) {
91		c=(char)(c+('a'-'A'));
92		}
93		#else
94		if('A'<=c && c<='Z') {
95		c=(char)(c+('a'-'A'));
96		}
97		#endif
98		return c;
99		}
100		#endif
101
102		U_CAPI char U_EXPORT2
103	0	uprv_asciitolower(char c) {
104	0	if(0x41<=c && c<=0x5a) {
105	0	c=(char)(c+0x20);
106	0	}
107	0	return c;
108	0	}
109
110		U_CAPI char U_EXPORT2
111	0	uprv_ebcdictolower(char c) {
112	0	if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) \|\|
113	0	(0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) \|\|
114	0	(0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
115	0	) {
116	0	c=(char)(c-0x40);
117	0	}
118	0	return c;
119	0	}
120
121
122		U_CAPI char* U_EXPORT2
123		T_CString_toLowerCase(char* str)
124	0	{
125	0	char* origPtr = str;
126
127	0	if (str) {
128	0	do
129	0	str = (char)uprv_tolower(str);
130	0	while (*(str++));
131	0	}
132
133	0	return origPtr;
134	0	}
135
136		U_CAPI char* U_EXPORT2
137		T_CString_toUpperCase(char* str)
138	0	{
139	0	char* origPtr = str;
140
141	0	if (str) {
142	0	do
143	0	str = (char)uprv_toupper(str);
144	0	while (*(str++));
145	0	}
146
147	0	return origPtr;
148	0	}
149
150		/*
151		* Takes a int32_t and fills in a char* string with that number "radix"-based.
152		* Does not handle negative values (makes an empty string for them).
153		* Writes at most 12 chars ("-2147483647" plus NUL).
154		* Returns the length of the string (not including the NUL).
155		*/
156		U_CAPI int32_t U_EXPORT2
157		T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
158	0	{
159	0	char tbuf[30];
160	0	int32_t tbx = sizeof(tbuf);
161	0	uint8_t digit;
162	0	int32_t length = 0;
163	0	uint32_t uval;
164
165	0	U_ASSERT(radix>=2 && radix<=16);
166	0	uval = (uint32_t) v;
167	0	if(v<0 && radix == 10) {
168		/* Only in base 10 do we conside numbers to be signed. */
169	0	uval = (uint32_t)(-v);
170	0	buffer[length++] = '-';
171	0	}
172
173	0	tbx = sizeof(tbuf)-1;
174	0	tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
175	0	do {
176	0	digit = (uint8_t)(uval % radix);
177	0	tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
178	0	uval = uval / radix;
179	0	} while (uval != 0);
180
181		/* copy converted number into user buffer */
182	0	uprv_strcpy(buffer+length, tbuf+tbx);
183	0	length += sizeof(tbuf) - tbx -1;
184	0	return length;
185	0	}
186
187
188
189		/*
190		* Takes a int64_t and fills in a char* string with that number "radix"-based.
191		* Writes at most 21: chars ("-9223372036854775807" plus NUL).
192		* Returns the length of the string, not including the terminating NULL.
193		*/
194		U_CAPI int32_t U_EXPORT2
195		T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
196	0	{
197	0	char tbuf[30];
198	0	int32_t tbx = sizeof(tbuf);
199	0	uint8_t digit;
200	0	int32_t length = 0;
201	0	uint64_t uval;
202
203	0	U_ASSERT(radix>=2 && radix<=16);
204	0	uval = (uint64_t) v;
205	0	if(v<0 && radix == 10) {
206		/* Only in base 10 do we conside numbers to be signed. */
207	0	uval = (uint64_t)(-v);
208	0	buffer[length++] = '-';
209	0	}
210
211	0	tbx = sizeof(tbuf)-1;
212	0	tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
213	0	do {
214	0	digit = (uint8_t)(uval % radix);
215	0	tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
216	0	uval = uval / radix;
217	0	} while (uval != 0);
218
219		/* copy converted number into user buffer */
220	0	uprv_strcpy(buffer+length, tbuf+tbx);
221	0	length += sizeof(tbuf) - tbx -1;
222	0	return length;
223	0	}
224
225
226		U_CAPI int32_t U_EXPORT2
227		T_CString_stringToInteger(const char *integerString, int32_t radix)
228	0	{
229	0	char *end;
230	0	return uprv_strtoul(integerString, &end, radix);
231
232	0	}
233
234		U_CAPI int U_EXPORT2
235	0	uprv_stricmp(const char str1, const char str2) {
236	0	if(str1==NULL) {
237	0	if(str2==NULL) {
238	0	return 0;
239	0	} else {
240	0	return -1;
241	0	}
242	0	} else if(str2==NULL) {
243	0	return 1;
244	0	} else {
245		/* compare non-NULL strings lexically with lowercase */
246	0	int rc;
247	0	unsigned char c1, c2;
248
249	0	for(;;) {
250	0	c1=(unsigned char)*str1;
251	0	c2=(unsigned char)*str2;
252	0	if(c1==0) {
253	0	if(c2==0) {
254	0	return 0;
255	0	} else {
256	0	return -1;
257	0	}
258	0	} else if(c2==0) {
259	0	return 1;
260	0	} else {
261		/* compare non-zero characters with lowercase */
262	0	rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
263	0	if(rc!=0) {
264	0	return rc;
265	0	}
266	0	}
267	0	++str1;
268	0	++str2;
269	0	}
270	0	}
271	0	}
272
273		U_CAPI int U_EXPORT2
274	0	uprv_strnicmp(const char str1, const char str2, uint32_t n) {
275	0	if(str1==NULL) {
276	0	if(str2==NULL) {
277	0	return 0;
278	0	} else {
279	0	return -1;
280	0	}
281	0	} else if(str2==NULL) {
282	0	return 1;
283	0	} else {
284		/* compare non-NULL strings lexically with lowercase */
285	0	int rc;
286	0	unsigned char c1, c2;
287
288	0	for(; n--;) {
289	0	c1=(unsigned char)*str1;
290	0	c2=(unsigned char)*str2;
291	0	if(c1==0) {
292	0	if(c2==0) {
293	0	return 0;
294	0	} else {
295	0	return -1;
296	0	}
297	0	} else if(c2==0) {
298	0	return 1;
299	0	} else {
300		/* compare non-zero characters with lowercase */
301	0	rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
302	0	if(rc!=0) {
303	0	return rc;
304	0	}
305	0	}
306	0	++str1;
307	0	++str2;
308	0	}
309	0	}
310
311	0	return 0;
312	0	}
313
314		U_CAPI char* U_EXPORT2
315	0	uprv_strdup(const char *src) {
316	0	size_t len = uprv_strlen(src) + 1;
317	0	char dup = (char ) uprv_malloc(len);
318
319	0	if (dup) {
320	0	uprv_memcpy(dup, src, len);
321	0	}
322
323	0	return dup;
324	0	}
325
326		U_CAPI char* U_EXPORT2
327	0	uprv_strndup(const char *src, int32_t n) {
328	0	char *dup;
329
330	0	if(n < 0) {
331	0	dup = uprv_strdup(src);
332	0	} else {
333	0	dup = (char*)uprv_malloc(n+1);
334	0	if (dup) {
335	0	uprv_memcpy(dup, src, n);
336	0	dup[n] = 0;
337	0	}
338	0	}
339
340	0	return dup;
341	0	}