/src/libreoffice/include/i18nutil/unicode.hxx
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * This file incorporates work covered by the following license notice: |
10 | | * |
11 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | | * contributor license agreements. See the NOTICE file distributed |
13 | | * with this work for additional information regarding copyright |
14 | | * ownership. The ASF licenses this file to you under the Apache |
15 | | * License, Version 2.0 (the "License"); you may not use this file |
16 | | * except in compliance with the License. You may obtain a copy of |
17 | | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | | */ |
19 | | #ifndef INCLUDED_I18NUTIL_UNICODE_HXX |
20 | | #define INCLUDED_I18NUTIL_UNICODE_HXX |
21 | | |
22 | | #include <com/sun/star/i18n/UnicodeScript.hpp> |
23 | | #include <sal/types.h> |
24 | | #include <rtl/ustrbuf.hxx> |
25 | | #include <unicode/uchar.h> |
26 | | #include <unicode/uscript.h> |
27 | | #include <i18nutil/i18nutildllapi.h> |
28 | | |
29 | | class LanguageTag; |
30 | | |
31 | | struct ScriptTypeList |
32 | | { |
33 | | css::i18n::UnicodeScript from; |
34 | | css::i18n::UnicodeScript to; |
35 | | sal_Int16 value; |
36 | | }; |
37 | | |
38 | | class I18NUTIL_DLLPUBLIC unicode |
39 | | { |
40 | | public: |
41 | | static sal_Int16 getUnicodeType(const sal_uInt32 ch); |
42 | | static sal_Int16 getUnicodeScriptType(const sal_Unicode ch, const ScriptTypeList* typeList, |
43 | | sal_Int16 unknownType = 0); |
44 | | static sal_Unicode getUnicodeScriptStart(css::i18n::UnicodeScript type); |
45 | | static sal_Unicode getUnicodeScriptEnd(css::i18n::UnicodeScript type); |
46 | | static sal_uInt8 getUnicodeDirection(const sal_Unicode ch); |
47 | | static sal_uInt32 GetMirroredChar(sal_uInt32); |
48 | | static bool isControl(const sal_uInt32 ch); |
49 | | static bool isAlpha(const sal_uInt32 ch); |
50 | | static bool isSpace(const sal_uInt32 ch); |
51 | | static bool isWhiteSpace(const sal_uInt32 ch); |
52 | | |
53 | | /** Check for Unicode variation sequence selectors |
54 | | |
55 | | @param nCode A Unicode code point. |
56 | | |
57 | | @return True if code is a Unicode variation sequence selector. |
58 | | */ |
59 | | static bool isVariationSelector(sal_uInt32 nCode) |
60 | 0 | { |
61 | 0 | return u_getIntPropertyValue(nCode, UCHAR_VARIATION_SELECTOR) != 0; |
62 | 0 | } Unexecuted instantiation: unicode::isVariationSelector(unsigned int) Unexecuted instantiation: unicode::isVariationSelector(unsigned int) |
63 | | |
64 | | //Map an ISO 15924 script code to Latin/Asian/Complex/Weak |
65 | | static sal_Int16 getScriptClassFromUScriptCode(UScriptCode eScript); |
66 | | |
67 | | //Return a language that can be written in a given ISO 15924 script code |
68 | | static OString getExemplarLanguageForUScriptCode(UScriptCode eScript); |
69 | | |
70 | | //Format a number as a percentage according to the rules of the given |
71 | | //language, e.g. 100 -> "100%" for en-US vs "100 %" for de-DE |
72 | | static OUString formatPercent(double dNumber, const LanguageTag& rLangTag); |
73 | | |
74 | | /** Map a LanguageTag's language ISO 639 code or script ISO 15924 code or |
75 | | language-script or locale to Latin/Asian/Complex/Weak. If more than one |
76 | | script is used with a language(-country) tag then the first (default) |
77 | | script is mapped for that language. |
78 | | |
79 | | @return a css::i18n::ScriptType value. |
80 | | */ |
81 | | static sal_Int16 getScriptClassFromLanguageTag(const LanguageTag& rLanguageTag); |
82 | | }; |
83 | | |
84 | | /* |
85 | | Toggle between a character and its Unicode Notation. |
86 | | -implements the concept found in Microsoft Word's Alt-X |
87 | | -accepts sequences of up to 8 hex characters and converts into the corresponding Unicode Character |
88 | | -example: 0000A78c or 2bc |
89 | | -accepts sequences of up to 256 characters in Unicode notation |
90 | | -example: U+00000065u+0331u+308 |
91 | | -handles complex characters (with combining elements) and the all of the Unicode planes. |
92 | | */ |
93 | | class I18NUTIL_DLLPUBLIC ToggleUnicodeCodepoint |
94 | | { |
95 | | private: |
96 | | OUStringBuffer maInput; |
97 | | OUStringBuffer maUtf16; |
98 | | OUStringBuffer maCombining; |
99 | | bool mbRequiresU = false; |
100 | | bool mbIsHexString = false; |
101 | | #ifndef NDEBUG |
102 | | bool mbInputEnded = false; |
103 | | #endif |
104 | | |
105 | | public: |
106 | | /** |
107 | | Build an input string of valid UTF16/UCS4 units to toggle. |
108 | | -do not call the other functions until the input process is complete |
109 | | -build string from Right to Left. (Start from the character to the left of the cursor: move left.) |
110 | | - accepted input: |
111 | | - a sequence of 2 to 8 hex characters not preceded by U+, to convert to Unicode; |
112 | | - a sequence of up to 256 concatenated U+ notation - like u+xxxxU+yyyy, where xxxx and |
113 | | yyyy are sequences of 2 to 8 hexadecimal digits - to convert it all to Unicode; |
114 | | - a single (maybe combined) "symbol" - i.e., one or several codepoints that constitute |
115 | | one glyph - to convert from Unicode to U+ notation. |
116 | | */ |
117 | | bool AllowMoreInput(sal_uInt32 uChar); |
118 | | |
119 | | /** |
120 | | Validates (and potentially modifies) the input string. |
121 | | -all non-input functions must use this function to first to validate the input string |
122 | | -additional input may be prevented after this function is called |
123 | | */ |
124 | | OUString StringToReplace(); |
125 | | OUString ReplacementString(); |
126 | | }; |
127 | | |
128 | | #endif |
129 | | |
130 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |