Coverage Report

Created: 2026-02-14 09:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libreoffice/unotools/source/misc/wincodepage.cxx
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 */
9
10
#include <sal/config.h>
11
12
#include <string_view>
13
14
#include <unotools/wincodepage.hxx>
15
#include <rtl/textenc.h>
16
17
namespace{
18
19
struct LangEncodingDef
20
{
21
    const std::u16string_view msLangStr;
22
    rtl_TextEncoding meTextEncoding;
23
};
24
25
// See https://msdn.microsoft.com/en-us/library/windows/desktop/dd317756
26
rtl_TextEncoding impl_getWinTextEncodingFromLangStrANSI(const OUString& sLanguage)
27
14.6k
{
28
14.6k
    static constexpr LangEncodingDef aLanguageTab[] =
29
14.6k
    {
30
14.6k
        { u"en",    RTL_TEXTENCODING_MS_1252 }, // Most used -> first in list
31
14.6k
        { u"th",    RTL_TEXTENCODING_MS_874 },
32
14.6k
        { u"ja",    RTL_TEXTENCODING_MS_932 },
33
14.6k
        { u"zh-cn", RTL_TEXTENCODING_MS_936 },  // Chinese (simplified) - must go before "zh"
34
14.6k
        { u"ko",    RTL_TEXTENCODING_MS_949 },
35
14.6k
        { u"zh",    RTL_TEXTENCODING_MS_950 },  // Chinese (traditional)
36
14.6k
        { u"bs",    RTL_TEXTENCODING_MS_1250 },
37
14.6k
        { u"cs",    RTL_TEXTENCODING_MS_1250 },
38
14.6k
        { u"hr",    RTL_TEXTENCODING_MS_1250 },
39
14.6k
        { u"hu",    RTL_TEXTENCODING_MS_1250 },
40
14.6k
        { u"pl",    RTL_TEXTENCODING_MS_1250 },
41
14.6k
        { u"ro",    RTL_TEXTENCODING_MS_1250 },
42
14.6k
        { u"sk",    RTL_TEXTENCODING_MS_1250 },
43
14.6k
        { u"sl",    RTL_TEXTENCODING_MS_1250 },
44
//        { "sr",    RTL_TEXTENCODING_MS_1250 },
45
14.6k
        { u"sq",    RTL_TEXTENCODING_MS_1250 },
46
14.6k
        { u"be",    RTL_TEXTENCODING_MS_1251 },
47
14.6k
        { u"bg",    RTL_TEXTENCODING_MS_1251 },
48
14.6k
        { u"mk",    RTL_TEXTENCODING_MS_1251 },
49
14.6k
        { u"ru",    RTL_TEXTENCODING_MS_1251 },
50
14.6k
        { u"sr",    RTL_TEXTENCODING_MS_1251 },
51
14.6k
        { u"uk",    RTL_TEXTENCODING_MS_1251 },
52
14.6k
        { u"es",    RTL_TEXTENCODING_MS_1252 },
53
14.6k
        { u"el",    RTL_TEXTENCODING_MS_1253 },
54
14.6k
        { u"tr",    RTL_TEXTENCODING_MS_1254 },
55
14.6k
        { u"he",    RTL_TEXTENCODING_MS_1255 },
56
14.6k
        { u"ar",    RTL_TEXTENCODING_MS_1256 },
57
14.6k
        { u"et",    RTL_TEXTENCODING_MS_1257 },
58
14.6k
        { u"lt",    RTL_TEXTENCODING_MS_1257 },
59
14.6k
        { u"lv",    RTL_TEXTENCODING_MS_1257 },
60
14.6k
        { u"vi",    RTL_TEXTENCODING_MS_1258 },
61
14.6k
    };
62
63
14.6k
    for (auto& def : aLanguageTab)
64
14.6k
    {
65
14.6k
        if (sLanguage.startsWithIgnoreAsciiCase(def.msLangStr))
66
14.6k
            return def.meTextEncoding;
67
14.6k
    }
68
69
0
    return RTL_TEXTENCODING_MS_1252;
70
14.6k
}
71
72
/* ----------------------------------------------------------------------- */
73
74
// See https://msdn.microsoft.com/en-us/library/windows/desktop/dd317756
75
// See http://shapelib.maptools.org/codepage.html
76
rtl_TextEncoding impl_getWinTextEncodingFromLangStrOEM(const OUString& sLanguage)
77
1.22k
{
78
1.22k
    static constexpr LangEncodingDef aLanguageTab[] =
79
1.22k
    {
80
1.22k
        { u"de",    RTL_TEXTENCODING_IBM_437 }, // OEM United States
81
1.22k
        { u"en-us", RTL_TEXTENCODING_IBM_437 }, // OEM United States
82
1.22k
        { u"fi",    RTL_TEXTENCODING_IBM_437 }, // OEM United States
83
1.22k
        { u"fr-ca", RTL_TEXTENCODING_IBM_863 }, // OEM French Canadian; French Canadian (DOS)
84
1.22k
        { u"fr",    RTL_TEXTENCODING_IBM_437 }, // OEM United States
85
1.22k
        { u"it",    RTL_TEXTENCODING_IBM_437 }, // OEM United States
86
1.22k
        { u"nl",    RTL_TEXTENCODING_IBM_437 }, // OEM United States
87
1.22k
        { u"sv",    RTL_TEXTENCODING_IBM_437 }, // OEM United States
88
1.22k
        { u"el",    RTL_TEXTENCODING_IBM_737 }, // OEM Greek (formerly 437G); Greek (DOS)
89
1.22k
        { u"et",    RTL_TEXTENCODING_IBM_775 }, // OEM Baltic; Baltic (DOS)
90
1.22k
        { u"lt",    RTL_TEXTENCODING_IBM_775 }, // OEM Baltic; Baltic (DOS)
91
1.22k
        { u"lv",    RTL_TEXTENCODING_IBM_775 }, // OEM Baltic; Baltic (DOS)
92
1.22k
        { u"en",    RTL_TEXTENCODING_IBM_850 }, // OEM Multilingual Latin 1; Western European (DOS)
93
1.22k
        { u"bs",    RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS)
94
1.22k
        { u"cs",    RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS)
95
1.22k
        { u"hr",    RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS)
96
1.22k
        { u"hu",    RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS)
97
1.22k
        { u"pl",    RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS)
98
1.22k
        { u"ro",    RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS)
99
1.22k
        { u"sk",    RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS)
100
1.22k
        { u"sl",    RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS)
101
//        { "sr",    RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS)
102
1.22k
        { u"bg",    RTL_TEXTENCODING_IBM_855 }, // OEM Cyrillic (primarily Russian)
103
1.22k
        { u"mk",    RTL_TEXTENCODING_IBM_855 }, // OEM Cyrillic (primarily Russian)
104
1.22k
        { u"sr",    RTL_TEXTENCODING_IBM_855 }, // OEM Cyrillic (primarily Russian)
105
1.22k
        { u"tr",    RTL_TEXTENCODING_IBM_857 }, // OEM Turkish; Turkish (DOS)
106
1.22k
        { u"pt",    RTL_TEXTENCODING_IBM_860 }, // OEM Portuguese; Portuguese (DOS)
107
1.22k
        { u"is",    RTL_TEXTENCODING_IBM_861 }, // OEM Icelandic; Icelandic (DOS)
108
1.22k
        { u"he",    RTL_TEXTENCODING_IBM_862 }, // OEM Hebrew; Hebrew (DOS)
109
1.22k
        { u"ar",    RTL_TEXTENCODING_IBM_864 }, // OEM Arabic; Arabic (864)
110
1.22k
        { u"da",    RTL_TEXTENCODING_IBM_865 }, // OEM Nordic; Nordic (DOS)
111
1.22k
        { u"nn",    RTL_TEXTENCODING_IBM_865 }, // OEM Nordic; Nordic (DOS)
112
1.22k
        { u"be",    RTL_TEXTENCODING_IBM_866 }, // OEM Russian; Cyrillic (DOS)
113
1.22k
        { u"ru",    RTL_TEXTENCODING_IBM_866 }, // OEM Russian; Cyrillic (DOS)
114
1.22k
        { u"uk",    RTL_TEXTENCODING_IBM_866 }, // OEM Russian; Cyrillic (DOS)
115
1.22k
        { u"th",    RTL_TEXTENCODING_MS_874 },  // ANSI/OEM Thai (ISO 8859-11); Thai (Windows)
116
1.22k
        { u"ja",    RTL_TEXTENCODING_MS_932 },  // ANSI/OEM Japanese; Japanese (Shift-JIS)
117
1.22k
        { u"zh-cn", RTL_TEXTENCODING_MS_936 },  // ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312)
118
1.22k
        { u"ko",    RTL_TEXTENCODING_MS_949 },  // ANSI/OEM Korean (Unified Hangul Code)
119
1.22k
        { u"zh",    RTL_TEXTENCODING_MS_950 },  // ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5)
120
1.22k
        { u"vi",    RTL_TEXTENCODING_MS_1258 }, // ANSI/OEM Vietnamese; Vietnamese (Windows)
121
1.22k
    };
122
123
1.22k
    for (auto& def : aLanguageTab)
124
2.45k
    {
125
2.45k
        if (sLanguage.startsWithIgnoreAsciiCase(def.msLangStr))
126
1.22k
            return def.meTextEncoding;
127
2.45k
    }
128
129
0
    return RTL_TEXTENCODING_IBM_850;
130
1.22k
}
131
132
} // namespace
133
134
rtl_TextEncoding utl_getWinTextEncodingFromLangStr(const OUString& sLanguage, bool bOEM)
135
15.8k
{
136
15.8k
    return bOEM ?
137
1.22k
        impl_getWinTextEncodingFromLangStrOEM(sLanguage) :
138
15.8k
        impl_getWinTextEncodingFromLangStrANSI(sLanguage);
139
15.8k
}
140
141
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */