/src/libreoffice/sal/textenc/unichars.cxx
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * This file incorporates work covered by the following license notice: |
10 | | * |
11 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | | * contributor license agreements. See the NOTICE file distributed |
13 | | * with this work for additional information regarding copyright |
14 | | * ownership. The ASF licenses this file to you under the Apache |
15 | | * License, Version 2.0 (the "License"); you may not use this file |
16 | | * except in compliance with the License. You may obtain a copy of |
17 | | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | | */ |
19 | | |
20 | | #include <sal/config.h> |
21 | | #include <sal/types.h> |
22 | | #include "unichars.hxx" |
23 | | |
24 | | bool ImplIsControlOrFormat(sal_uInt32 nUtf32) |
25 | 0 | { |
26 | | // All code points of |
27 | | // <http://www.unicode.org/Public/UNIDATA/UnicodeData.txt>, Version 3.1.1, |
28 | | // that have a General Category of Cc (Other, Control) or Cf (Other, |
29 | | // Format): |
30 | 0 | return nUtf32 <= 0x001F || (nUtf32 >= 0x007F && nUtf32 <= 0x009F) |
31 | 0 | || nUtf32 == 0x070F // SYRIAC ABBREVIATION MARK |
32 | 0 | || nUtf32 == 0x180B // MONGOLIAN FREE VARIATION SELECTOR ONE |
33 | 0 | || nUtf32 == 0x180C // MONGOLIAN FREE VARIATION SELECTOR TWO |
34 | 0 | || nUtf32 == 0x180D // MONGOLIAN FREE VARIATION SELECTOR THREE |
35 | 0 | || nUtf32 == 0x180E // MONGOLIAN VOWEL SEPARATOR |
36 | 0 | || nUtf32 == 0x200C // ZERO WIDTH NON-JOINER |
37 | 0 | || nUtf32 == 0x200D // ZERO WIDTH JOINER |
38 | 0 | || nUtf32 == 0x200E // LEFT-TO-RIGHT MARK |
39 | 0 | || nUtf32 == 0x200F // RIGHT-TO-LEFT MARK |
40 | 0 | || nUtf32 == 0x202A // LEFT-TO-RIGHT EMBEDDING |
41 | 0 | || nUtf32 == 0x202B // RIGHT-TO-LEFT EMBEDDING |
42 | 0 | || nUtf32 == 0x202C // POP DIRECTIONAL FORMATTING |
43 | 0 | || nUtf32 == 0x202D // LEFT-TO-RIGHT OVERRIDE |
44 | 0 | || nUtf32 == 0x202E // RIGHT-TO-LEFT OVERRIDE |
45 | 0 | || nUtf32 == 0x206A // INHIBIT SYMMETRIC SWAPPING |
46 | 0 | || nUtf32 == 0x206B // ACTIVATE SYMMETRIC SWAPPING |
47 | 0 | || nUtf32 == 0x206C // INHIBIT ARABIC FORM SHAPING |
48 | 0 | || nUtf32 == 0x206D // ACTIVATE ARABIC FORM SHAPING |
49 | 0 | || nUtf32 == 0x206E // NATIONAL DIGIT SHAPES |
50 | 0 | || nUtf32 == 0x206F // NOMINAL DIGIT SHAPES |
51 | 0 | || nUtf32 == 0xFEFF // ZERO WIDTH NO-BREAK SPACE |
52 | 0 | || nUtf32 == 0xFFF9 // INTERLINEAR ANNOTATION ANCHOR |
53 | 0 | || nUtf32 == 0xFFFA // INTERLINEAR ANNOTATION SEPARATOR |
54 | 0 | || nUtf32 == 0xFFFB // INTERLINEAR ANNOTATION TERMINATOR |
55 | 0 | || nUtf32 == 0x1D173 // MUSICAL SYMBOL BEGIN BEAM |
56 | 0 | || nUtf32 == 0x1D174 // MUSICAL SYMBOL END BEAM |
57 | 0 | || nUtf32 == 0x1D175 // MUSICAL SYMBOL BEGIN TIE |
58 | 0 | || nUtf32 == 0x1D176 // MUSICAL SYMBOL END TIE |
59 | 0 | || nUtf32 == 0x1D177 // MUSICAL SYMBOL BEGIN SLUR |
60 | 0 | || nUtf32 == 0x1D178 // MUSICAL SYMBOL END SLUR |
61 | 0 | || nUtf32 == 0x1D179 // MUSICAL SYMBOL BEGIN PHRASE |
62 | 0 | || nUtf32 == 0x1D17A // MUSICAL SYMBOL END PHRASE |
63 | 0 | || nUtf32 == 0xE0001 // LANGUAGE TAG |
64 | 0 | || (nUtf32 >= 0xE0020 && nUtf32 <= 0xE007F); |
65 | 0 | } |
66 | | |
67 | | bool ImplIsPrivateUse(sal_uInt32 nUtf32) |
68 | 0 | { |
69 | | // All code points of |
70 | | // <http://www.unicode.org/Public/UNIDATA/UnicodeData.txt>, Version 3.1.1, |
71 | | // that have a General Category of Co (Other, Private Use): |
72 | 0 | return (nUtf32 >= 0xE000 && nUtf32 <= 0xF8FF) || (nUtf32 >= 0xF0000 && nUtf32 <= 0xFFFFD) |
73 | 0 | || (nUtf32 >= 0x100000 && nUtf32 <= 0x10FFFD); |
74 | 0 | } |
75 | | |
76 | | bool ImplIsZeroWidth(sal_uInt32 nUtf32) |
77 | 0 | { |
78 | | // All code points of |
79 | | // <http://www.unicode.org/Public/UNIDATA/UnicodeData.txt>, Version 3.1.1, |
80 | | // that have "ZERO WIDTH" in their Character name: |
81 | 0 | return nUtf32 == 0x200B // ZERO WIDTH SPACE |
82 | 0 | || nUtf32 == 0x200C // ZERO WIDTH NON-JOINER |
83 | 0 | || nUtf32 == 0x200D // ZERO WIDTH JOINER |
84 | 0 | || nUtf32 == 0xFEFF; // ZERO WIDTH NO-BREAK SPACE |
85 | 0 | } |
86 | | |
87 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |