/src/libyang/src/common.c

Source (jump to first uncovered line)
/**
 * @file common.c
 * @author Michal Vasko <mvasko@cesnet.cz>
 * @brief common internal definitions for libyang
 *
 * Copyright (c) 2018 CESNET, z.s.p.o.
 *
 * This source code is licensed under BSD 3-Clause License (the "License").
 * You may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     https://opensource.org/licenses/BSD-3-Clause
 */

#define _GNU_SOURCE

#include "common.h"

#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <inttypes.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>

#include "compat.h"
#include "tree_schema_internal.h"

void *
ly_realloc(void *ptr, size_t size)
{
    void *new_mem;

    new_mem = realloc(ptr, size);
    if (!new_mem) {
        free(ptr);
    }

    return new_mem;
}

char *
ly_strnchr(const char *s, int c, size_t len)
{
    for ( ; len && (*s != (char)c); ++s, --len) {}
    return len ? (char *)s : NULL;
}

int
ly_strncmp(const char *refstr, const char *str, size_t str_len)
{
    int rc = strncmp(refstr, str, str_len);

    if (!rc && (refstr[str_len] == '\0')) {
        return 0;
    } else {
        return rc ? rc : 1;
    }
}

#define LY_OVERFLOW_ADD(MAX, X, Y) ((X > MAX - Y) ? 1 : 0)

#define LY_OVERFLOW_MUL(MAX, X, Y) ((X > MAX / Y) ? 1 : 0)

LY_ERR
ly_strntou8(const char *nptr, size_t len, uint8_t *ret)
{
    uint8_t num = 0, dig, dec_pow;

    if (len > 3) {
        /* overflow for sure */
        return LY_EDENIED;
    }

    dec_pow = 1;
    for ( ; len && isdigit(nptr[len - 1]); --len) {
        dig = nptr[len - 1] - 48;

        if (LY_OVERFLOW_MUL(UINT8_MAX, dig, dec_pow)) {
            return LY_EDENIED;
        }
        dig *= dec_pow;

        if (LY_OVERFLOW_ADD(UINT8_MAX, num, dig)) {
            return LY_EDENIED;
        }
        num += dig;

        dec_pow *= 10;
    }

    if (len) {
        return LY_EVALID;
    }
    *ret = num;
    return LY_SUCCESS;
}

LY_ERR
ly_value_prefix_next(const char *str_begin, const char *str_end, uint32_t *len, ly_bool *is_prefix, const char **str_next)
{
    const char *stop, *prefix;
    size_t bytes_read;
    uint32_t c;
    ly_bool prefix_found;
    LY_ERR ret = LY_SUCCESS;

    assert(len && is_prefix && str_next);

#define IS_AT_END(PTR, STR_END) (STR_END ? PTR == STR_END : !(*PTR))

    *str_next = NULL;
    *is_prefix = 0;
    *len = 0;

    if (!str_begin || !(*str_begin) || (str_begin == str_end)) {
        return ret;
    }

    stop = str_begin;
    prefix = NULL;
    prefix_found = 0;

    do {
        /* look for the beginning of the YANG value */
        do {
            LY_CHECK_RET(ly_getutf8(&stop, &c, &bytes_read));
        } while (!is_xmlqnamestartchar(c) && !IS_AT_END(stop, str_end));

        if (IS_AT_END(stop, str_end)) {
            break;
        }

        /* maybe the prefix was found */
        prefix = stop - bytes_read;

        /* look for the the end of the prefix */
        do {
            LY_CHECK_RET(ly_getutf8(&stop, &c, &bytes_read));
        } while (is_xmlqnamechar(c) && !IS_AT_END(stop, str_end));

        prefix_found = c == ':' ? 1 : 0;

        /* if it wasn't the prefix, keep looking */
    } while (!IS_AT_END(stop, str_end) && !prefix_found);

    if ((str_begin == prefix) && prefix_found) {
        /* prefix found at the beginning of the input string */
        *is_prefix = 1;
        *str_next = IS_AT_END(stop, str_end) ? NULL : stop;
        *len = (stop - bytes_read) - str_begin;
    } else if ((str_begin != prefix) && (prefix_found)) {
        /* there is a some string before prefix */
        *str_next = prefix;
        *len = prefix - str_begin;
    } else {
        /* no prefix found */
        *len = stop - str_begin;
    }

#undef IS_AT_END

    return ret;
}

LY_ERR
ly_getutf8(const char **input, uint32_t *utf8_char, size_t *bytes_read)
{
    uint32_t c, aux;
    size_t len;

    if (bytes_read) {
        (*bytes_read) = 0;
    }

    c = (*input)[0];
    LY_CHECK_RET(!c, LY_EINVAL);

    if (!(c & 0x80)) {
        /* one byte character */
        len = 1;

        if ((c < 0x20) && (c != 0x9) && (c != 0xa) && (c != 0xd)) {
            return LY_EINVAL;
        }
    } else if ((c & 0xe0) == 0xc0) {
        /* two bytes character */
        len = 2;

        aux = (*input)[1];
        if ((aux & 0xc0) != 0x80) {
            return LY_EINVAL;
        }
        c = ((c & 0x1f) << 6) | (aux & 0x3f);

        if (c < 0x80) {
            return LY_EINVAL;
        }
    } else if ((c & 0xf0) == 0xe0) {
        /* three bytes character */
        len = 3;

        c &= 0x0f;
        for (uint64_t i = 1; i <= 2; i++) {
            aux = (*input)[i];
            if ((aux & 0xc0) != 0x80) {
                return LY_EINVAL;
            }

            c = (c << 6) | (aux & 0x3f);
        }

        if ((c < 0x800) || ((c > 0xd7ff) && (c < 0xe000)) || (c > 0xfffd)) {
            return LY_EINVAL;
        }
    } else if ((c & 0xf8) == 0xf0) {
        /* four bytes character */
        len = 4;

        c &= 0x07;
        for (uint64_t i = 1; i <= 3; i++) {
            aux = (*input)[i];
            if ((aux & 0xc0) != 0x80) {
                return LY_EINVAL;
            }

            c = (c << 6) | (aux & 0x3f);
        }

        if ((c < 0x1000) || (c > 0x10ffff)) {
            return LY_EINVAL;
        }
    } else {
        return LY_EINVAL;
    }

    (*utf8_char) = c;
    (*input) += len;
    if (bytes_read) {
        (*bytes_read) = len;
    }
    return LY_SUCCESS;
}

LY_ERR
ly_pututf8(char *dst, uint32_t value, size_t *bytes_written)
{
    if (value < 0x80) {
        /* one byte character */
        if ((value < 0x20) &&
                (value != 0x09) &&
                (value != 0x0a) &&
                (value != 0x0d)) {
            return LY_EINVAL;
        }

        dst[0] = value;
        (*bytes_written) = 1;
    } else if (value < 0x800) {
        /* two bytes character */
        dst[0] = 0xc0 | (value >> 6);
        dst[1] = 0x80 | (value & 0x3f);
        (*bytes_written) = 2;
    } else if (value < 0xfffe) {
        /* three bytes character */
        if (((value & 0xf800) == 0xd800) ||
                ((value >= 0xfdd0) && (value <= 0xfdef))) {
            /* exclude surrogate blocks %xD800-DFFF */
            /* exclude noncharacters %xFDD0-FDEF */
            return LY_EINVAL;
        }

        dst[0] = 0xe0 | (value >> 12);
        dst[1] = 0x80 | ((value >> 6) & 0x3f);
        dst[2] = 0x80 | (value & 0x3f);

        (*bytes_written) = 3;
    } else if (value < 0x10fffe) {
        if ((value & 0xffe) == 0xffe) {
            /* exclude noncharacters %xFFFE-FFFF, %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF,
             * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF,
             * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */
            return LY_EINVAL;
        }
        /* four bytes character */
        dst[0] = 0xf0 | (value >> 18);
        dst[1] = 0x80 | ((value >> 12) & 0x3f);
        dst[2] = 0x80 | ((value >> 6) & 0x3f);
        dst[3] = 0x80 | (value & 0x3f);

        (*bytes_written) = 4;
    } else {
        return LY_EINVAL;
    }
    return LY_SUCCESS;
}

/**
 * @brief Static table of the UTF8 characters lengths according to their first byte.
 */
static const unsigned char utf8_char_length_table[] = {
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
    4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
};

size_t
ly_utf8len(const char *str, size_t bytes)
{
    size_t len = 0;
    const char *ptr = str;

    while (((size_t)(ptr - str) < bytes) && *ptr) {
        ++len;
        ptr += utf8_char_length_table[((unsigned char)(*ptr))];
    }
    return len;
}

size_t
LY_VCODE_INSTREXP_len(const char *str)
{
    size_t len = 0;

    if (!str) {
        return len;
    } else if (!str[0]) {
        return 1;
    }
    for (len = 1; len < LY_VCODE_INSTREXP_MAXLEN && str[len]; ++len) {}
    return len;
}

LY_ERR
ly_mmap(struct ly_ctx *ctx, int fd, size_t *length, void **addr)
{
    struct stat sb;
    long pagesize;
    size_t m;

    assert(length);
    assert(addr);
    assert(fd >= 0);

    if (fstat(fd, &sb) == -1) {
        LOGERR(ctx, LY_ESYS, "Failed to stat the file descriptor (%s) for the mmap().", strerror(errno));
        return LY_ESYS;
    }
    if (!S_ISREG(sb.st_mode)) {
        LOGERR(ctx, LY_EINVAL, "File to mmap() is not a regular file.");
        return LY_ESYS;
    }
    if (!sb.st_size) {
        *addr = NULL;
        return LY_SUCCESS;
    }
    pagesize = sysconf(_SC_PAGESIZE);

    m = sb.st_size % pagesize;
    if (m && (pagesize - m >= 1)) {
        /* there will be enough space (at least 1 byte) after the file content mapping to provide zeroed NULL-termination byte */
        *length = sb.st_size + 1;
        *addr = mmap(NULL, *length, PROT_READ, MAP_PRIVATE, fd, 0);
    } else {
        /* there will not be enough bytes after the file content mapping for the additional bytes and some of them
         * would overflow into another page that would not be zerroed and any access into it would generate SIGBUS.
         * Therefore we have to do the following hack with double mapping. First, the required number of bytes
         * (including the additinal bytes) is required as anonymous and thus they will be really provided (actually more
         * because of using whole pages) and also initialized by zeros. Then, the file is mapped to the same address
         * where the anonymous mapping starts. */
        *length = sb.st_size + pagesize;
        *addr = mmap(NULL, *length, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
        *addr = mmap(*addr, sb.st_size, PROT_READ, MAP_PRIVATE | MAP_FIXED, fd, 0);
    }
    if (*addr == MAP_FAILED) {
        LOGERR(ctx, LY_ESYS, "mmap() failed (%s).", strerror(errno));
        return LY_ESYS;
    }

    return LY_SUCCESS;
}

LY_ERR
ly_munmap(void *addr, size_t length)
{
    if (munmap(addr, length)) {
        return LY_ESYS;
    }
    return LY_SUCCESS;
}

LY_ERR
ly_strcat(char **dest, const char *format, ...)
{
    va_list fp;
    char *addition = NULL;
    size_t len;

    va_start(fp, format);
    len = vasprintf(&addition, format, fp);
    len += (*dest ? strlen(*dest) : 0) + 1;

    if (*dest) {
        *dest = ly_realloc(*dest, len);
        if (!*dest) {
            va_end(fp);
            return LY_EMEM;
        }
        *dest = strcat(*dest, addition);
        free(addition);
    } else {
        *dest = addition;
    }

    va_end(fp);
    return LY_SUCCESS;
}

LY_ERR
ly_parse_int(const char *val_str, size_t val_len, int64_t min, int64_t max, int base, int64_t *ret)
{
    LY_ERR rc = LY_SUCCESS;
    char *ptr, *str;
    int64_t i;

    LY_CHECK_ARG_RET(NULL, val_str, val_str[0], val_len, LY_EINVAL);

    /* duplicate the value */
    str = strndup(val_str, val_len);
    LY_CHECK_RET(!str, LY_EMEM);

    /* parse the value to avoid accessing following bytes */
    errno = 0;
    i = strtoll(str, &ptr, base);
    if (errno || (ptr == str)) {
        /* invalid string */
        rc = LY_EVALID;
    } else if ((i < min) || (i > max)) {
        /* invalid number */
        rc = LY_EDENIED;
    } else if (*ptr) {
        while (isspace(*ptr)) {
            ++ptr;
        }
        if (*ptr) {
            /* invalid characters after some number */
            rc = LY_EVALID;
        }
    }

    /* cleanup */
    free(str);
    if (!rc) {
        *ret = i;
    }
    return rc;
}

LY_ERR
ly_parse_uint(const char *val_str, size_t val_len, uint64_t max, int base, uint64_t *ret)
{
    LY_ERR rc = LY_SUCCESS;
    char *ptr, *str;
    uint64_t u;

    LY_CHECK_ARG_RET(NULL, val_str, val_str[0], val_len, LY_EINVAL);

    /* duplicate the value to avoid accessing following bytes */
    str = strndup(val_str, val_len);
    LY_CHECK_RET(!str, LY_EMEM);

    /* parse the value */
    errno = 0;
    u = strtoull(str, &ptr, base);
    if (errno || (ptr == str)) {
        /* invalid string */
        rc = LY_EVALID;
    } else if ((u > max) || (u && (str[0] == '-'))) {
        /* invalid number */
        rc = LY_EDENIED;
    } else if (*ptr) {
        while (isspace(*ptr)) {
            ++ptr;
        }
        if (*ptr) {
            /* invalid characters after some number */
            rc = LY_EVALID;
        }
    }

    /* cleanup */
    free(str);
    if (!rc) {
        *ret = u;
    }
    return rc;
}

/**
 * @brief Parse an identifier.
 *
 * ;; An identifier MUST NOT start with (('X'|'x') ('M'|'m') ('L'|'l'))
 * identifier          = (ALPHA / "_")
 *                       *(ALPHA / DIGIT / "_" / "-" / ".")
 *
 * @param[in,out] id Identifier to parse. When returned, it points to the first character which is not part of the identifier.
 * @return LY_ERR value: LY_SUCCESS or LY_EINVAL in case of invalid starting character.
 */
static LY_ERR
lys_parse_id(const char **id)
{
    assert(id && *id);

    if (!is_yangidentstartchar(**id)) {
        return LY_EINVAL;
    }
    ++(*id);

    while (is_yangidentchar(**id)) {
        ++(*id);
    }
    return LY_SUCCESS;
}

LY_ERR
ly_parse_nodeid(const char **id, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
{
    assert(id && *id);
    assert(prefix && prefix_len);
    assert(name && name_len);

    *prefix = *id;
    *prefix_len = 0;
    *name = NULL;
    *name_len = 0;

    LY_CHECK_RET(lys_parse_id(id));
    if (**id == ':') {
        /* there is prefix */
        *prefix_len = *id - *prefix;
        ++(*id);
        *name = *id;

        LY_CHECK_RET(lys_parse_id(id));
        *name_len = *id - *name;
    } else {
        /* there is no prefix, so what we have as prefix now is actually the name */
        *name = *prefix;
        *name_len = *id - *name;
        *prefix = NULL;
    }

    return LY_SUCCESS;
}

LY_ERR
ly_parse_instance_predicate(const char **pred, size_t limit, LYD_FORMAT format,
        const char **prefix, size_t *prefix_len, const char **id, size_t *id_len, const char **value, size_t *value_len,
        const char **errmsg)
{
    LY_ERR ret = LY_EVALID;
    const char *in = *pred;
    size_t offset = 1;
    uint8_t expr = 0; /* 0 - position predicate; 1 - leaf-list-predicate; 2 - key-predicate */
    char quot;

    assert(in[0] == '[');

    *prefix = *id = *value = NULL;
    *prefix_len = *id_len = *value_len = 0;

    /* leading *WSP */
    for ( ; isspace(in[offset]); offset++) {}

    if (isdigit(in[offset])) {
        /* pos: "[" *WSP positive-integer-value *WSP "]" */
        if (in[offset] == '0') {
            /* zero */
            *errmsg = "The position predicate cannot be zero.";
            goto error;
        }

        /* positive-integer-value */
        *value = &in[offset++];
        for ( ; isdigit(in[offset]); offset++) {}
        *value_len = &in[offset] - *value;

    } else if (in[offset] == '.') {
        /* leaf-list-predicate: "[" *WSP "." *WSP "=" *WSP quoted-string *WSP "]" */
        *id = &in[offset];
        *id_len = 1;
        offset++;
        expr = 1;
    } else if (in[offset] == '-') {
        /* typically negative value */
        *errmsg = "Invalid instance predicate format (negative position or invalid node-identifier).";
        goto error;
    } else {
        /* key-predicate: "[" *WSP node-identifier *WSP "=" *WSP quoted-string *WSP "]" */
        in = &in[offset];
        if (ly_parse_nodeid(&in, prefix, prefix_len, id, id_len)) {
            *errmsg = "Invalid node-identifier.";
            goto error;
        }
        if ((format == LYD_XML) && !(*prefix)) {
            /* all node names MUST be qualified with explicit namespace prefix */
            *errmsg = "Missing prefix of a node name.";
            goto error;
        }
        offset = in - *pred;
        in = *pred;
        expr = 2;
    }

    if (expr) {
        /*  *WSP "=" *WSP quoted-string *WSP "]" */
        for ( ; isspace(in[offset]); offset++) {}

        if (in[offset] != '=') {
            if (expr == 1) {
                *errmsg = "Unexpected character instead of \'=\' in leaf-list-predicate.";
            } else { /* 2 */
                *errmsg = "Unexpected character instead of \'=\' in key-predicate.";
            }
            goto error;
        }
        offset++;
        for ( ; isspace(in[offset]); offset++) {}

        /* quoted-string */
        quot = in[offset++];
        if ((quot != '\'') && (quot != '\"')) {
            *errmsg = "String value is not quoted.";
            goto error;
        }
        *value = &in[offset];
        for ( ; offset < limit && (in[offset] != quot || (offset && in[offset - 1] == '\\')); offset++) {}
        if (in[offset] == quot) {
            *value_len = &in[offset] - *value;
            offset++;
        } else {
            *errmsg = "Value is not terminated quoted-string.";
            goto error;
        }
    }

    /* *WSP "]" */
    for ( ; isspace(in[offset]); offset++) {}
    if (in[offset] != ']') {
        if (expr == 0) {
            *errmsg = "Predicate (pos) is not terminated by \']\' character.";
        } else if (expr == 1) {
            *errmsg = "Predicate (leaf-list-predicate) is not terminated by \']\' character.";
        } else { /* 2 */
            *errmsg = "Predicate (key-predicate) is not terminated by \']\' character.";
        }
        goto error;
    }
    offset++;

    if (offset <= limit) {
        *pred = &in[offset];
        return LY_SUCCESS;
    }

    /* we read after the limit */
    *errmsg = "Predicate is incomplete.";
    *prefix = *id = *value = NULL;
    *prefix_len = *id_len = *value_len = 0;
    offset = limit;
    ret = LY_EINVAL;

error:
    *pred = &in[offset];
    return ret;
}

Coverage Report

Created: 2025-08-26 06:20

Line	Count	Source (jump to first uncovered line)
1		/**
2		* @file common.c
3		* @author Michal Vasko <mvasko@cesnet.cz>
4		* @brief common internal definitions for libyang
5		*
6		* Copyright (c) 2018 CESNET, z.s.p.o.
7		*
8		* This source code is licensed under BSD 3-Clause License (the "License").
9		* You may not use this file except in compliance with the License.
10		* You may obtain a copy of the License at
11		*
12		* https://opensource.org/licenses/BSD-3-Clause
13		*/
14
15		#define _GNU_SOURCE
16
17		#include "common.h"
18
19		#include <assert.h>
20		#include <ctype.h>
21		#include <errno.h>
22		#include <inttypes.h>
23		#include <stdarg.h>
24		#include <stdio.h>
25		#include <stdlib.h>
26		#include <string.h>
27		#include <sys/mman.h>
28		#include <sys/stat.h>
29		#include <unistd.h>
30
31		#include "compat.h"
32		#include "tree_schema_internal.h"
33
34		void *
35		ly_realloc(void *ptr, size_t size)
36	0	{
37	0	void *new_mem;
38
39	0	new_mem = realloc(ptr, size);
40	0	if (!new_mem) {
41	0	free(ptr);
42	0	}
43
44	0	return new_mem;
45	0	}
46
47		char *
48		ly_strnchr(const char *s, int c, size_t len)
49	0	{
50	0	for ( ; len && (*s != (char)c); ++s, --len) {}
51	0	return len ? (char *)s : NULL;
52	0	}
53
54		int
55		ly_strncmp(const char refstr, const char str, size_t str_len)
56	0	{
57	0	int rc = strncmp(refstr, str, str_len);
58
59	0	if (!rc && (refstr[str_len] == '\0')) {
60	0	return 0;
61	0	} else {
62	0	return rc ? rc : 1;
63	0	}
64	0	}
65
66	0	#define LY_OVERFLOW_ADD(MAX, X, Y) ((X > MAX - Y) ? 1 : 0)
67
68	0	#define LY_OVERFLOW_MUL(MAX, X, Y) ((X > MAX / Y) ? 1 : 0)
69
70		LY_ERR
71		ly_strntou8(const char nptr, size_t len, uint8_t ret)
72	0	{
73	0	uint8_t num = 0, dig, dec_pow;
74
75	0	if (len > 3) {
76		/* overflow for sure */
77	0	return LY_EDENIED;
78	0	}
79
80	0	dec_pow = 1;
81	0	for ( ; len && isdigit(nptr[len - 1]); --len) {
82	0	dig = nptr[len - 1] - 48;
83
84	0	if (LY_OVERFLOW_MUL(UINT8_MAX, dig, dec_pow)) {
85	0	return LY_EDENIED;
86	0	}
87	0	dig *= dec_pow;
88
89	0	if (LY_OVERFLOW_ADD(UINT8_MAX, num, dig)) {
90	0	return LY_EDENIED;
91	0	}
92	0	num += dig;
93
94	0	dec_pow *= 10;
95	0	}
96
97	0	if (len) {
98	0	return LY_EVALID;
99	0	}
100	0	*ret = num;
101	0	return LY_SUCCESS;
102	0	}
103
104		LY_ERR
105		ly_value_prefix_next(const char str_begin, const char str_end, uint32_t len, ly_bool is_prefix, const char **str_next)
106	0	{
107	0	const char stop, prefix;
108	0	size_t bytes_read;
109	0	uint32_t c;
110	0	ly_bool prefix_found;
111	0	LY_ERR ret = LY_SUCCESS;
112
113	0	assert(len && is_prefix && str_next);
114
115	0	#define IS_AT_END(PTR, STR_END) (STR_END ? PTR == STR_END : !(*PTR))
116
117	0	*str_next = NULL;
118	0	*is_prefix = 0;
119	0	*len = 0;
120
121	0	if (!str_begin \|\| !(*str_begin) \|\| (str_begin == str_end)) {
122	0	return ret;
123	0	}
124
125	0	stop = str_begin;
126	0	prefix = NULL;
127	0	prefix_found = 0;
128
129	0	do {
130		/* look for the beginning of the YANG value */
131	0	do {
132	0	LY_CHECK_RET(ly_getutf8(&stop, &c, &bytes_read));
133	0	} while (!is_xmlqnamestartchar(c) && !IS_AT_END(stop, str_end));
134
135	0	if (IS_AT_END(stop, str_end)) {
136	0	break;
137	0	}
138
139		/* maybe the prefix was found */
140	0	prefix = stop - bytes_read;
141
142		/* look for the the end of the prefix */
143	0	do {
144	0	LY_CHECK_RET(ly_getutf8(&stop, &c, &bytes_read));
145	0	} while (is_xmlqnamechar(c) && !IS_AT_END(stop, str_end));
146
147	0	prefix_found = c == ':' ? 1 : 0;
148
149		/* if it wasn't the prefix, keep looking */
150	0	} while (!IS_AT_END(stop, str_end) && !prefix_found);
151
152	0	if ((str_begin == prefix) && prefix_found) {
153		/* prefix found at the beginning of the input string */
154	0	*is_prefix = 1;
155	0	*str_next = IS_AT_END(stop, str_end) ? NULL : stop;
156	0	*len = (stop - bytes_read) - str_begin;
157	0	} else if ((str_begin != prefix) && (prefix_found)) {
158		/* there is a some string before prefix */
159	0	*str_next = prefix;
160	0	*len = prefix - str_begin;
161	0	} else {
162		/* no prefix found */
163	0	*len = stop - str_begin;
164	0	}
165
166	0	#undef IS_AT_END
167
168	0	return ret;
169	0	}
170
171		LY_ERR
172		ly_getutf8(const char *input, uint32_t utf8_char, size_t *bytes_read)
173	0	{
174	0	uint32_t c, aux;
175	0	size_t len;
176
177	0	if (bytes_read) {
178	0	(*bytes_read) = 0;
179	0	}
180
181	0	c = (*input)[0];
182	0	LY_CHECK_RET(!c, LY_EINVAL);
183
184	0	if (!(c & 0x80)) {
185		/* one byte character */
186	0	len = 1;
187
188	0	if ((c < 0x20) && (c != 0x9) && (c != 0xa) && (c != 0xd)) {
189	0	return LY_EINVAL;
190	0	}
191	0	} else if ((c & 0xe0) == 0xc0) {
192		/* two bytes character */
193	0	len = 2;
194
195	0	aux = (*input)[1];
196	0	if ((aux & 0xc0) != 0x80) {
197	0	return LY_EINVAL;
198	0	}
199	0	c = ((c & 0x1f) << 6) \| (aux & 0x3f);
200
201	0	if (c < 0x80) {
202	0	return LY_EINVAL;
203	0	}
204	0	} else if ((c & 0xf0) == 0xe0) {
205		/* three bytes character */
206	0	len = 3;
207
208	0	c &= 0x0f;
209	0	for (uint64_t i = 1; i <= 2; i++) {
210	0	aux = (*input)[i];
211	0	if ((aux & 0xc0) != 0x80) {
212	0	return LY_EINVAL;
213	0	}
214
215	0	c = (c << 6) \| (aux & 0x3f);
216	0	}
217
218	0	if ((c < 0x800) \|\| ((c > 0xd7ff) && (c < 0xe000)) \|\| (c > 0xfffd)) {
219	0	return LY_EINVAL;
220	0	}
221	0	} else if ((c & 0xf8) == 0xf0) {
222		/* four bytes character */
223	0	len = 4;
224
225	0	c &= 0x07;
226	0	for (uint64_t i = 1; i <= 3; i++) {
227	0	aux = (*input)[i];
228	0	if ((aux & 0xc0) != 0x80) {
229	0	return LY_EINVAL;
230	0	}
231
232	0	c = (c << 6) \| (aux & 0x3f);
233	0	}
234
235	0	if ((c < 0x1000) \|\| (c > 0x10ffff)) {
236	0	return LY_EINVAL;
237	0	}
238	0	} else {
239	0	return LY_EINVAL;
240	0	}
241
242	0	(*utf8_char) = c;
243	0	(*input) += len;
244	0	if (bytes_read) {
245	0	(*bytes_read) = len;
246	0	}
247	0	return LY_SUCCESS;
248	0	}
249
250		LY_ERR
251		ly_pututf8(char dst, uint32_t value, size_t bytes_written)
252	0	{
253	0	if (value < 0x80) {
254		/* one byte character */
255	0	if ((value < 0x20) &&
256	0	(value != 0x09) &&
257	0	(value != 0x0a) &&
258	0	(value != 0x0d)) {
259	0	return LY_EINVAL;
260	0	}
261
262	0	dst[0] = value;
263	0	(*bytes_written) = 1;
264	0	} else if (value < 0x800) {
265		/* two bytes character */
266	0	dst[0] = 0xc0 \| (value >> 6);
267	0	dst[1] = 0x80 \| (value & 0x3f);
268	0	(*bytes_written) = 2;
269	0	} else if (value < 0xfffe) {
270		/* three bytes character */
271	0	if (((value & 0xf800) == 0xd800) \|\|
272	0	((value >= 0xfdd0) && (value <= 0xfdef))) {
273		/* exclude surrogate blocks %xD800-DFFF */
274		/* exclude noncharacters %xFDD0-FDEF */
275	0	return LY_EINVAL;
276	0	}
277
278	0	dst[0] = 0xe0 \| (value >> 12);
279	0	dst[1] = 0x80 \| ((value >> 6) & 0x3f);
280	0	dst[2] = 0x80 \| (value & 0x3f);
281
282	0	(*bytes_written) = 3;
283	0	} else if (value < 0x10fffe) {
284	0	if ((value & 0xffe) == 0xffe) {
285		/* exclude noncharacters %xFFFE-FFFF, %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF,
286		* %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF,
287		* %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */
288	0	return LY_EINVAL;
289	0	}
290		/* four bytes character */
291	0	dst[0] = 0xf0 \| (value >> 18);
292	0	dst[1] = 0x80 \| ((value >> 12) & 0x3f);
293	0	dst[2] = 0x80 \| ((value >> 6) & 0x3f);
294	0	dst[3] = 0x80 \| (value & 0x3f);
295
296	0	(*bytes_written) = 4;
297	0	} else {
298	0	return LY_EINVAL;
299	0	}
300	0	return LY_SUCCESS;
301	0	}
302
303		/**
304		* @brief Static table of the UTF8 characters lengths according to their first byte.
305		*/
306		static const unsigned char utf8_char_length_table[] = {
307		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
308		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
309		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
310		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
311		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
312		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
313		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
314		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
315		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
316		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
317		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
318		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
319		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
320		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
321		3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
322		4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
323		};
324
325		size_t
326		ly_utf8len(const char *str, size_t bytes)
327	0	{
328	0	size_t len = 0;
329	0	const char *ptr = str;
330
331	0	while (((size_t)(ptr - str) < bytes) && *ptr) {
332	0	++len;
333	0	ptr += utf8_char_length_table[((unsigned char)(*ptr))];
334	0	}
335	0	return len;
336	0	}
337
338		size_t
339		LY_VCODE_INSTREXP_len(const char *str)
340	0	{
341	0	size_t len = 0;
342
343	0	if (!str) {
344	0	return len;
345	0	} else if (!str[0]) {
346	0	return 1;
347	0	}
348	0	for (len = 1; len < LY_VCODE_INSTREXP_MAXLEN && str[len]; ++len) {}
349	0	return len;
350	0	}
351
352		LY_ERR
353		ly_mmap(struct ly_ctx ctx, int fd, size_t length, void **addr)
354	0	{
355	0	struct stat sb;
356	0	long pagesize;
357	0	size_t m;
358
359	0	assert(length);
360	0	assert(addr);
361	0	assert(fd >= 0);
362
363	0	if (fstat(fd, &sb) == -1) {
364	0	LOGERR(ctx, LY_ESYS, "Failed to stat the file descriptor (%s) for the mmap().", strerror(errno));
365	0	return LY_ESYS;
366	0	}
367	0	if (!S_ISREG(sb.st_mode)) {
368	0	LOGERR(ctx, LY_EINVAL, "File to mmap() is not a regular file.");
369	0	return LY_ESYS;
370	0	}
371	0	if (!sb.st_size) {
372	0	*addr = NULL;
373	0	return LY_SUCCESS;
374	0	}
375	0	pagesize = sysconf(_SC_PAGESIZE);
376
377	0	m = sb.st_size % pagesize;
378	0	if (m && (pagesize - m >= 1)) {
379		/* there will be enough space (at least 1 byte) after the file content mapping to provide zeroed NULL-termination byte */
380	0	*length = sb.st_size + 1;
381	0	addr = mmap(NULL, length, PROT_READ, MAP_PRIVATE, fd, 0);
382	0	} else {
383		/* there will not be enough bytes after the file content mapping for the additional bytes and some of them
384		* would overflow into another page that would not be zerroed and any access into it would generate SIGBUS.
385		* Therefore we have to do the following hack with double mapping. First, the required number of bytes
386		* (including the additinal bytes) is required as anonymous and thus they will be really provided (actually more
387		* because of using whole pages) and also initialized by zeros. Then, the file is mapped to the same address
388		* where the anonymous mapping starts. */
389	0	*length = sb.st_size + pagesize;
390	0	addr = mmap(NULL, length, PROT_READ, MAP_PRIVATE \| MAP_ANONYMOUS, -1, 0);
391	0	addr = mmap(addr, sb.st_size, PROT_READ, MAP_PRIVATE \| MAP_FIXED, fd, 0);
392	0	}
393	0	if (*addr == MAP_FAILED) {
394	0	LOGERR(ctx, LY_ESYS, "mmap() failed (%s).", strerror(errno));
395	0	return LY_ESYS;
396	0	}
397
398	0	return LY_SUCCESS;
399	0	}
400
401		LY_ERR
402		ly_munmap(void *addr, size_t length)
403	0	{
404	0	if (munmap(addr, length)) {
405	0	return LY_ESYS;
406	0	}
407	0	return LY_SUCCESS;
408	0	}
409
410		LY_ERR
411		ly_strcat(char *dest, const char format, ...)
412	0	{
413	0	va_list fp;
414	0	char *addition = NULL;
415	0	size_t len;
416
417	0	va_start(fp, format);
418	0	len = vasprintf(&addition, format, fp);
419	0	len += (dest ? strlen(dest) : 0) + 1;
420
421	0	if (*dest) {
422	0	dest = ly_realloc(dest, len);
423	0	if (!*dest) {
424	0	va_end(fp);
425	0	return LY_EMEM;
426	0	}
427	0	dest = strcat(dest, addition);
428	0	free(addition);
429	0	} else {
430	0	*dest = addition;
431	0	}
432
433	0	va_end(fp);
434	0	return LY_SUCCESS;
435	0	}
436
437		LY_ERR
438		ly_parse_int(const char val_str, size_t val_len, int64_t min, int64_t max, int base, int64_t ret)
439	0	{
440	0	LY_ERR rc = LY_SUCCESS;
441	0	char ptr, str;
442	0	int64_t i;
443
444	0	LY_CHECK_ARG_RET(NULL, val_str, val_str[0], val_len, LY_EINVAL);
445
446		/* duplicate the value */
447	0	str = strndup(val_str, val_len);
448	0	LY_CHECK_RET(!str, LY_EMEM);
449
450		/* parse the value to avoid accessing following bytes */
451	0	errno = 0;
452	0	i = strtoll(str, &ptr, base);
453	0	if (errno \|\| (ptr == str)) {
454		/* invalid string */
455	0	rc = LY_EVALID;
456	0	} else if ((i < min) \|\| (i > max)) {
457		/* invalid number */
458	0	rc = LY_EDENIED;
459	0	} else if (*ptr) {
460	0	while (isspace(*ptr)) {
461	0	++ptr;
462	0	}
463	0	if (*ptr) {
464		/* invalid characters after some number */
465	0	rc = LY_EVALID;
466	0	}
467	0	}
468
469		/* cleanup */
470	0	free(str);
471	0	if (!rc) {
472	0	*ret = i;
473	0	}
474	0	return rc;
475	0	}
476
477		LY_ERR
478		ly_parse_uint(const char val_str, size_t val_len, uint64_t max, int base, uint64_t ret)
479	0	{
480	0	LY_ERR rc = LY_SUCCESS;
481	0	char ptr, str;
482	0	uint64_t u;
483
484	0	LY_CHECK_ARG_RET(NULL, val_str, val_str[0], val_len, LY_EINVAL);
485
486		/* duplicate the value to avoid accessing following bytes */
487	0	str = strndup(val_str, val_len);
488	0	LY_CHECK_RET(!str, LY_EMEM);
489
490		/* parse the value */
491	0	errno = 0;
492	0	u = strtoull(str, &ptr, base);
493	0	if (errno \|\| (ptr == str)) {
494		/* invalid string */
495	0	rc = LY_EVALID;
496	0	} else if ((u > max) \|\| (u && (str[0] == '-'))) {
497		/* invalid number */
498	0	rc = LY_EDENIED;
499	0	} else if (*ptr) {
500	0	while (isspace(*ptr)) {
501	0	++ptr;
502	0	}
503	0	if (*ptr) {
504		/* invalid characters after some number */
505	0	rc = LY_EVALID;
506	0	}
507	0	}
508
509		/* cleanup */
510	0	free(str);
511	0	if (!rc) {
512	0	*ret = u;
513	0	}
514	0	return rc;
515	0	}
516
517		/**
518		* @brief Parse an identifier.
519		*
520		* ;; An identifier MUST NOT start with (('X'\|'x') ('M'\|'m') ('L'\|'l'))
521		* identifier = (ALPHA / "_")
522		* *(ALPHA / DIGIT / "_" / "-" / ".")
523		*
524		* @param[in,out] id Identifier to parse. When returned, it points to the first character which is not part of the identifier.
525		* @return LY_ERR value: LY_SUCCESS or LY_EINVAL in case of invalid starting character.
526		*/
527		static LY_ERR
528		lys_parse_id(const char **id)
529	0	{
530	0	assert(id && *id);
531
532	0	if (!is_yangidentstartchar(**id)) {
533	0	return LY_EINVAL;
534	0	}
535	0	++(*id);
536
537	0	while (is_yangidentchar(**id)) {
538	0	++(*id);
539	0	}
540	0	return LY_SUCCESS;
541	0	}
542
543		LY_ERR
544		ly_parse_nodeid(const char id, const char prefix, size_t prefix_len, const char name, size_t name_len)
545	0	{
546	0	assert(id && *id);
547	0	assert(prefix && prefix_len);
548	0	assert(name && name_len);
549
550	0	prefix = id;
551	0	*prefix_len = 0;
552	0	*name = NULL;
553	0	*name_len = 0;
554
555	0	LY_CHECK_RET(lys_parse_id(id));
556	0	if (**id == ':') {
557		/* there is prefix */
558	0	prefix_len = id - *prefix;
559	0	++(*id);
560	0	name = id;
561
562	0	LY_CHECK_RET(lys_parse_id(id));
563	0	name_len = id - *name;
564	0	} else {
565		/* there is no prefix, so what we have as prefix now is actually the name */
566	0	name = prefix;
567	0	name_len = id - *name;
568	0	*prefix = NULL;
569	0	}
570
571	0	return LY_SUCCESS;
572	0	}
573
574		LY_ERR
575		ly_parse_instance_predicate(const char **pred, size_t limit, LYD_FORMAT format,
576		const char *prefix, size_t prefix_len, const char *id, size_t id_len, const char *value, size_t value_len,
577		const char **errmsg)
578	0	{
579	0	LY_ERR ret = LY_EVALID;
580	0	const char in = pred;
581	0	size_t offset = 1;
582	0	uint8_t expr = 0; /* 0 - position predicate; 1 - leaf-list-predicate; 2 - key-predicate */
583	0	char quot;
584
585	0	assert(in[0] == '[');
586
587	0	prefix = id = *value = NULL;
588	0	prefix_len = id_len = *value_len = 0;
589
590		/* leading WSP /
591	0	for ( ; isspace(in[offset]); offset++) {}
592
593	0	if (isdigit(in[offset])) {
594		/* pos: "[" WSP positive-integer-value WSP "]" */
595	0	if (in[offset] == '0') {
596		/* zero */
597	0	*errmsg = "The position predicate cannot be zero.";
598	0	goto error;
599	0	}
600
601		/* positive-integer-value */
602	0	*value = &in[offset++];
603	0	for ( ; isdigit(in[offset]); offset++) {}
604	0	value_len = &in[offset] - value;
605
606	0	} else if (in[offset] == '.') {
607		/* leaf-list-predicate: "[" WSP "." WSP "=" WSP quoted-string WSP "]" */
608	0	*id = &in[offset];
609	0	*id_len = 1;
610	0	offset++;
611	0	expr = 1;
612	0	} else if (in[offset] == '-') {
613		/* typically negative value */
614	0	*errmsg = "Invalid instance predicate format (negative position or invalid node-identifier).";
615	0	goto error;
616	0	} else {
617		/* key-predicate: "[" WSP node-identifier WSP "=" WSP quoted-string WSP "]" */
618	0	in = &in[offset];
619	0	if (ly_parse_nodeid(&in, prefix, prefix_len, id, id_len)) {
620	0	*errmsg = "Invalid node-identifier.";
621	0	goto error;
622	0	}
623	0	if ((format == LYD_XML) && !(*prefix)) {
624		/* all node names MUST be qualified with explicit namespace prefix */
625	0	*errmsg = "Missing prefix of a node name.";
626	0	goto error;
627	0	}
628	0	offset = in - *pred;
629	0	in = *pred;
630	0	expr = 2;
631	0	}
632
633	0	if (expr) {
634		/* WSP "=" WSP quoted-string WSP "]" /
635	0	for ( ; isspace(in[offset]); offset++) {}
636
637	0	if (in[offset] != '=') {
638	0	if (expr == 1) {
639	0	*errmsg = "Unexpected character instead of \'=\' in leaf-list-predicate.";
640	0	} else { /* 2 */
641	0	*errmsg = "Unexpected character instead of \'=\' in key-predicate.";
642	0	}
643	0	goto error;
644	0	}
645	0	offset++;
646	0	for ( ; isspace(in[offset]); offset++) {}
647
648		/* quoted-string */
649	0	quot = in[offset++];
650	0	if ((quot != '\'') && (quot != '\"')) {
651	0	*errmsg = "String value is not quoted.";
652	0	goto error;
653	0	}
654	0	*value = &in[offset];
655	0	for ( ; offset < limit && (in[offset] != quot \|\| (offset && in[offset - 1] == '\\')); offset++) {}
656	0	if (in[offset] == quot) {
657	0	value_len = &in[offset] - value;
658	0	offset++;
659	0	} else {
660	0	*errmsg = "Value is not terminated quoted-string.";
661	0	goto error;
662	0	}
663	0	}
664
665		/* WSP "]" /
666	0	for ( ; isspace(in[offset]); offset++) {}
667	0	if (in[offset] != ']') {
668	0	if (expr == 0) {
669	0	*errmsg = "Predicate (pos) is not terminated by \']\' character.";
670	0	} else if (expr == 1) {
671	0	*errmsg = "Predicate (leaf-list-predicate) is not terminated by \']\' character.";
672	0	} else { /* 2 */
673	0	*errmsg = "Predicate (key-predicate) is not terminated by \']\' character.";
674	0	}
675	0	goto error;
676	0	}
677	0	offset++;
678
679	0	if (offset <= limit) {
680	0	*pred = &in[offset];
681	0	return LY_SUCCESS;
682	0	}
683
684		/* we read after the limit */
685	0	*errmsg = "Predicate is incomplete.";
686	0	prefix = id = *value = NULL;
687	0	prefix_len = id_len = *value_len = 0;
688	0	offset = limit;
689	0	ret = LY_EINVAL;
690
691	0	error:
692	0	*pred = &in[offset];
693	0	return ret;
694	0	}