Coverage Report

Created: 2026-04-12 07:27

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vlc/modules/demux/dvb-text.h
Line
Count
Source
1
/*****************************************************************************
2
 * dvb-text.h:
3
 *****************************************************************************
4
 * Copyright (C) 2007-2011 VLC authors and VideoLAN
5
 *
6
 * This program is free software; you can redistribute it and/or modify it
7
 * under the terms of the GNU Lesser General Public License as published by
8
 * the Free Software Foundation; either version 2.1 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * This program is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
 * GNU Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public License
17
 * along with this program; if not, write to the Free Software Foundation,
18
 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
19
 *****************************************************************************/
20
21
/**
22
 * Converts a DVB SI text item to UTF-8.
23
 * Refer to EN 800 486 annex A.
24
 * @return a heap-allocation nul-terminated UTF-8 string or NULL on error.
25
 */
26
static char *vlc_from_EIT (const void *buf, size_t length)
27
770
{
28
770
    if (unlikely(length == 0))
29
0
        return NULL;
30
31
770
    char encbuf[12];
32
770
    const char *encoding = encbuf;
33
34
770
    const char *in = buf;
35
770
    size_t offset = 1;
36
770
    unsigned char c = *in;
37
38
770
    if (c >= 0x20)
39
770
    {
40
770
        offset = 0;
41
770
        encoding = "ISO_6937";
42
770
    }
43
0
    else if ((1 << c) & 0x0EFE) /* 1-7, 9-11 -> ISO 8859-(c+4) */
44
0
    {
45
0
        snprintf (encbuf, sizeof (encbuf), "ISO_8859-%u", 4u + c);
46
0
    }
47
0
    else switch (c)
48
0
    {
49
0
        case 0x10: /* two more bytes */
50
0
            offset = 3;
51
0
            if (length < 3 || in[1] != 0x00)
52
0
                return NULL;
53
54
0
            c = in[2];
55
0
            if ((1 << c) & 0xEFFE) /* 1-11, 13-15 -> ISO 8859-(c) */
56
0
               snprintf (encbuf, sizeof (encbuf), "ISO_8859-%hhu", c);
57
0
           else
58
0
               return NULL;
59
0
           break;
60
0
        case 0x11: /* the BMP */
61
0
        case 0x14: /* Big5 subset of the BMP */
62
0
            encoding = "UCS-2BE";
63
0
            break;
64
0
        case 0x12:
65
            /* DVB has no clue about Korean. KS X 1001 (a.k.a. KS C 5601) is a
66
             * character set, not a character encoding... So we assume EUC-KR.
67
             * It is an encoding of KS X 1001. In practice, I guess nobody uses
68
             * this in any real DVB system. */
69
0
            encoding = "EUC-KR";
70
0
            break;
71
0
        case 0x13: /* GB-2312-1980 */
72
0
            encoding = "GB2312";
73
0
            break;
74
0
        case 0x15:
75
0
            encoding = "UTF-8";
76
0
            break;
77
#if 0
78
        case 0x1F: /* operator-specific(?) */
79
            offset = 2;
80
#endif
81
0
        default:
82
0
            return NULL;
83
0
    }
84
85
770
    in += offset;
86
770
    length -= offset;
87
88
770
    char *out = FromCharset (encoding, in, length);
89
770
    if (out == NULL)
90
0
    {   /* Fallback... */
91
0
        out = strndup (in, length);
92
0
        if (unlikely(out == NULL))
93
0
            return NULL;
94
0
        EnsureUTF8 (out);
95
0
    }
96
97
770
    length = strlen(out);
98
    /* Convert control codes */
99
770
    for (char *p = strchr (out, '\xC2'); p; p = strchr (p + 1, '\xC2'))
100
0
    {
101
        /* We have valid UTF-8, to 0xC2 is followed by a continuation byte. */
102
        /* 0x80-0x85,0x88-0x89 are reserved.
103
         * 0x86-0x87 are identical to Unicode and Latin-1.
104
         * 0x8A is CR/LF.
105
         * 0x8B-0x9F are unspecified. */
106
0
        if (p[1] == '\x8A')
107
0
            memcpy (p, "\r\n", 2);
108
109
        /* Strip character emphasis */
110
0
        if (p[1] == '\x86' || p[1] == '\x87') {
111
0
            const size_t n = p - out;
112
0
            memmove (p, p+2, length - n);
113
0
            length -= 2;
114
0
            out[length] = '\0';
115
0
            if (length == n)
116
0
                break;
117
0
        }
118
0
    }
119
120
    /* Private use area */
121
770
    for (char *p = strchr (out, '\xEE'); p; p = strchr (p + 1, '\xEE'))
122
0
    {
123
        /* Within UTF-8, 0xEE is followed by a two continuation bytes. */
124
0
        if (p[1] != '\x82')
125
0
            continue;
126
0
        if (p[2] == '\x8A')
127
0
            memcpy (p, "\r\r\n", 3); /* we need three bytes, so to CRs ;) */
128
129
        /* Strip character emphasis */
130
0
        if (p[2] == '\x86' || p[2] == '\x87') {
131
0
            const size_t n = p - out;
132
0
            memmove (p, p+3, length - n);
133
0
            length -= 3;
134
0
            out[length] = '\0';
135
0
            if (length == n)
136
0
                break;
137
0
        }
138
0
    }
139
140
770
    return out;
141
770
}