/work/workdir/UnpackedTarball/libexttextcat/src/utf8misc.c

Source (jump to first uncovered line)
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/***************************************************************************
 *   Copyright (C) 2006 by Jocelyn Merand                                  *
 *   joc.mer@gmail.com                                                     *
 *                                                                         *
 * THE BSD LICENSE
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * - Redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer.
 *
 * - Redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in the
 * documentation and/or other materials provided with the
 * distribution.
 *
 * - Neither the name of the WiseGuys Internet B.V. nor the names of
 * its contributors may be used to endorse or promote products derived
 * from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ***************************************************************************/

#include "utf8misc.h"
/* #include <stdio.h> */

/**
 * These variables are used in character processing functions
 * These have been added to manage utf-8 symbols, particularly escape chars
 */
#ifndef _UTF8_
#define _UTF8_
#endif

#ifdef _UTF8_
#define ESCAPE_MASK 0x80
#define WEIGHT_MASK 0xF0
#else
#define ESCAPE_MASK 0xFF
#define WEIGHT_MASK 0x00
#endif

const char *utf8_next_char(const char *str)
{
    if (*str & ESCAPE_MASK)
    {
        /* 
         * if the first bit of the current char is 1 then *str is an escape
         * character
         */
        unsigned char escape_char = ((*str & WEIGHT_MASK) << 1);

        /* 
         * and we use it to count (by bit translation) following characters
         * (only the weightest part)
         */
        while (escape_char & ESCAPE_MASK && *str)
        {
            /* 
             * every step, we move the byte of 1 bit left, when first bit is 0,
             * it's finished
             */
            escape_char = escape_char << 1;
            ++str;
        }
    }
    if (*str)
    {
        /* 
         * finally, if we are not on the \0 character, we jump to the next
         * character
         */
        ++str;
    }
    return str;
}

int utf8_charcopy(const char *str, char *dest)
{

    int pointer = 0;
    /* if the first bit of the current char is 1 */
    if (str[pointer] & ESCAPE_MASK)
    {
        /* 
         * then str[pointer] is an escape character and we use it to count
         * following characters (only the weightest part)
         */
        unsigned char escape_char = ((str[pointer] & WEIGHT_MASK) << 1);

        /* 
         * every step, we move the byte of 1 bit left, when first bit is 0,
         * it's finished
         */
        while (escape_char & ESCAPE_MASK && str[pointer])
        {
            dest[pointer] = str[pointer];
            escape_char = escape_char << 1;
            ++pointer;
        }
    }
    if (str[pointer])
    {
        dest[pointer] = str[pointer];
        ++pointer;
    }

    return pointer;
}


int utf8_issame(char *lex, char *key, int len)
{
    /* printf("[%s] prefix of [%s] with length %i", lex, key, len); */
    int char_counter = 0;
    int pointer = 0;
    while (char_counter < len)
    {

        /* if the first bit of the current char is 1 */
        if (key[pointer] & ESCAPE_MASK)
        {
            /* 
             * then key[pointer] is an escape character and we use it to count
             * (only the weightest part)
             */

            unsigned char escape_char = ((key[pointer] & WEIGHT_MASK) << 1);

            while (escape_char & ESCAPE_MASK && key[pointer] == lex[pointer])
            {
                escape_char = escape_char << 1;
                ++pointer;
            }
        }
        ++char_counter;         /* and we are on a new utf8 character */
        if (key[pointer] != lex[pointer])
        {
            return 0;
            /* printf(" NO\n", lex, key, len); */
        }
        ++pointer;
    }
    if (lex[pointer] != '\0')
    {
        return 0;
        /* printf(" NO\n"); */
    }

    /* printf(" YES\n"); */

    return 1;
}

extern int utf8_strlen(const char *str)
{
    int char_counter = 0;
    while (*str)
    {
        str = utf8_next_char(str);
        ++char_counter;         /* and we are on a new utf8 character */
    }
    return char_counter;
}

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Line	Count	Source (jump to first uncovered line)
1		/* -- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -- */
2		/***************************************************************************
3		* Copyright (C) 2006 by Jocelyn Merand *
4		* joc.mer@gmail.com *
5		* *
6		* THE BSD LICENSE
7		*
8		* Redistribution and use in source and binary forms, with or without
9		* modification, are permitted provided that the following conditions
10		* are met:
11		*
12		* - Redistributions of source code must retain the above copyright
13		* notice, this list of conditions and the following disclaimer.
14		*
15		* - Redistributions in binary form must reproduce the above copyright
16		* notice, this list of conditions and the following disclaimer in the
17		* documentation and/or other materials provided with the
18		* distribution.
19		*
20		* - Neither the name of the WiseGuys Internet B.V. nor the names of
21		* its contributors may be used to endorse or promote products derived
22		* from this software without specific prior written permission.
23		*
24		* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25		* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26		* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27		* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28		* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29		* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30		* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
31		* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
32		* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33		* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34		* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35		***************************************************************************/
36
37		#include "utf8misc.h"
38		/* #include <stdio.h> */
39
40		/**
41		* These variables are used in character processing functions
42		* These have been added to manage utf-8 symbols, particularly escape chars
43		*/
44		#ifndef _UTF8_
45		#define _UTF8_
46		#endif
47
48		#ifdef _UTF8_
49	0	#define ESCAPE_MASK 0x80
50	0	#define WEIGHT_MASK 0xF0
51		#else
52		#define ESCAPE_MASK 0xFF
53		#define WEIGHT_MASK 0x00
54		#endif
55
56		const char utf8_next_char(const char str)
57	0	{
58	0	if (*str & ESCAPE_MASK)
59	0	{
60		/*
61		* if the first bit of the current char is 1 then *str is an escape
62		* character
63		*/
64	0	unsigned char escape_char = ((*str & WEIGHT_MASK) << 1);
65
66		/*
67		* and we use it to count (by bit translation) following characters
68		* (only the weightest part)
69		*/
70	0	while (escape_char & ESCAPE_MASK && *str)
71	0	{
72		/*
73		* every step, we move the byte of 1 bit left, when first bit is 0,
74		* it's finished
75		*/
76	0	escape_char = escape_char << 1;
77	0	++str;
78	0	}
79	0	}
80	0	if (*str)
81	0	{
82		/*
83		* finally, if we are not on the \0 character, we jump to the next
84		* character
85		*/
86	0	++str;
87	0	}
88	0	return str;
89	0	}
90
91		int utf8_charcopy(const char str, char dest)
92	0	{
93
94	0	int pointer = 0;
95		/* if the first bit of the current char is 1 */
96	0	if (str[pointer] & ESCAPE_MASK)
97	0	{
98		/*
99		* then str[pointer] is an escape character and we use it to count
100		* following characters (only the weightest part)
101		*/
102	0	unsigned char escape_char = ((str[pointer] & WEIGHT_MASK) << 1);
103
104		/*
105		* every step, we move the byte of 1 bit left, when first bit is 0,
106		* it's finished
107		*/
108	0	while (escape_char & ESCAPE_MASK && str[pointer])
109	0	{
110	0	dest[pointer] = str[pointer];
111	0	escape_char = escape_char << 1;
112	0	++pointer;
113	0	}
114	0	}
115	0	if (str[pointer])
116	0	{
117	0	dest[pointer] = str[pointer];
118	0	++pointer;
119	0	}
120
121	0	return pointer;
122	0	}
123
124
125		int utf8_issame(char lex, char key, int len)
126	0	{
127		/* printf("[%s] prefix of [%s] with length %i", lex, key, len); */
128	0	int char_counter = 0;
129	0	int pointer = 0;
130	0	while (char_counter < len)
131	0	{
132
133		/* if the first bit of the current char is 1 */
134	0	if (key[pointer] & ESCAPE_MASK)
135	0	{
136		/*
137		* then key[pointer] is an escape character and we use it to count
138		* (only the weightest part)
139		*/
140
141	0	unsigned char escape_char = ((key[pointer] & WEIGHT_MASK) << 1);
142
143	0	while (escape_char & ESCAPE_MASK && key[pointer] == lex[pointer])
144	0	{
145	0	escape_char = escape_char << 1;
146	0	++pointer;
147	0	}
148	0	}
149	0	++char_counter; /* and we are on a new utf8 character */
150	0	if (key[pointer] != lex[pointer])
151	0	{
152	0	return 0;
153		/* printf(" NO\n", lex, key, len); */
154	0	}
155	0	++pointer;
156	0	}
157	0	if (lex[pointer] != '\0')
158	0	{
159	0	return 0;
160		/* printf(" NO\n"); */
161	0	}
162
163		/* printf(" YES\n"); */
164
165	0	return 1;
166	0	}
167
168		extern int utf8_strlen(const char *str)
169	0	{
170	0	int char_counter = 0;
171	0	while (*str)
172	0	{
173	0	str = utf8_next_char(str);
174	0	++char_counter; /* and we are on a new utf8 character */
175	0	}
176	0	return char_counter;
177	0	}
178
179		/* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Coverage Report

Created: 2025-07-07 10:01