Coverage Report

Created: 2026-03-21 06:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/llama.cpp/common/unicode.h
Line
Count
Source
1
#pragma once
2
3
#include <cstdint>
4
#include <string_view>
5
#include <vector>
6
#include <string>
7
8
// UTF-8 parsing utilities for streaming-aware unicode support
9
10
struct utf8_parse_result {
11
    uint32_t codepoint;      // Decoded codepoint (only valid if status == SUCCESS)
12
    size_t bytes_consumed;   // How many bytes this codepoint uses (1-4)
13
    enum status { SUCCESS, INCOMPLETE, INVALID } status;
14
15
    utf8_parse_result(enum status s, uint32_t cp = 0, size_t bytes = 0)
16
0
        : codepoint(cp), bytes_consumed(bytes), status(s) {}
17
};
18
19
// Determine the expected length of a UTF-8 sequence from its first byte
20
// Returns 0 for invalid first bytes
21
size_t common_utf8_sequence_length(unsigned char first_byte);
22
23
// Check if a string ends with a complete UTF-8 sequence.
24
bool common_utf8_is_complete(const std::string & s);
25
26
// Parse a single UTF-8 codepoint from input
27
utf8_parse_result common_parse_utf8_codepoint(std::string_view input, size_t offset);
28
29
std::string common_unicode_cpts_to_utf8(const std::vector<uint32_t> & cps);
30
std::string common_unicode_cpt_to_utf8(uint32_t cpt);