Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/intl/unicharutil/util/GreekCasing.cpp
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* This Source Code Form is subject to the terms of the Mozilla Public
3
 * License, v. 2.0. If a copy of the MPL was not distributed with this
4
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5
6
#include "GreekCasing.h"
7
#include "nsUnicharUtils.h"
8
#include "nsUnicodeProperties.h"
9
10
// Custom uppercase mapping for Greek; see bug 307039 for details
11
0
#define GREEK_LOWER_ALPHA                      0x03B1
12
0
#define GREEK_LOWER_ALPHA_TONOS                0x03AC
13
0
#define GREEK_LOWER_ALPHA_OXIA                 0x1F71
14
0
#define GREEK_LOWER_EPSILON                    0x03B5
15
0
#define GREEK_LOWER_EPSILON_TONOS              0x03AD
16
0
#define GREEK_LOWER_EPSILON_OXIA               0x1F73
17
0
#define GREEK_LOWER_ETA                        0x03B7
18
0
#define GREEK_LOWER_ETA_TONOS                  0x03AE
19
0
#define GREEK_LOWER_ETA_OXIA                   0x1F75
20
0
#define GREEK_LOWER_IOTA                       0x03B9
21
0
#define GREEK_LOWER_IOTA_TONOS                 0x03AF
22
0
#define GREEK_LOWER_IOTA_OXIA                  0x1F77
23
0
#define GREEK_LOWER_IOTA_DIALYTIKA             0x03CA
24
0
#define GREEK_LOWER_IOTA_DIALYTIKA_TONOS       0x0390
25
0
#define GREEK_LOWER_IOTA_DIALYTIKA_OXIA        0x1FD3
26
0
#define GREEK_LOWER_OMICRON                    0x03BF
27
0
#define GREEK_LOWER_OMICRON_TONOS              0x03CC
28
0
#define GREEK_LOWER_OMICRON_OXIA               0x1F79
29
0
#define GREEK_LOWER_UPSILON                    0x03C5
30
0
#define GREEK_LOWER_UPSILON_TONOS              0x03CD
31
0
#define GREEK_LOWER_UPSILON_OXIA               0x1F7B
32
0
#define GREEK_LOWER_UPSILON_DIALYTIKA          0x03CB
33
0
#define GREEK_LOWER_UPSILON_DIALYTIKA_TONOS    0x03B0
34
0
#define GREEK_LOWER_UPSILON_DIALYTIKA_OXIA     0x1FE3
35
0
#define GREEK_LOWER_OMEGA                      0x03C9
36
0
#define GREEK_LOWER_OMEGA_TONOS                0x03CE
37
0
#define GREEK_LOWER_OMEGA_OXIA                 0x1F7D
38
0
#define GREEK_UPPER_ALPHA                      0x0391
39
0
#define GREEK_UPPER_EPSILON                    0x0395
40
0
#define GREEK_UPPER_ETA                        0x0397
41
0
#define GREEK_UPPER_IOTA                       0x0399
42
0
#define GREEK_UPPER_IOTA_DIALYTIKA             0x03AA
43
0
#define GREEK_UPPER_OMICRON                    0x039F
44
0
#define GREEK_UPPER_UPSILON                    0x03A5
45
0
#define GREEK_UPPER_UPSILON_DIALYTIKA          0x03AB
46
0
#define GREEK_UPPER_OMEGA                      0x03A9
47
0
#define GREEK_UPPER_ALPHA_TONOS                0x0386
48
0
#define GREEK_UPPER_ALPHA_OXIA                 0x1FBB
49
0
#define GREEK_UPPER_EPSILON_TONOS              0x0388
50
0
#define GREEK_UPPER_EPSILON_OXIA               0x1FC9
51
0
#define GREEK_UPPER_ETA_TONOS                  0x0389
52
0
#define GREEK_UPPER_ETA_OXIA                   0x1FCB
53
0
#define GREEK_UPPER_IOTA_TONOS                 0x038A
54
0
#define GREEK_UPPER_IOTA_OXIA                  0x1FDB
55
0
#define GREEK_UPPER_OMICRON_TONOS              0x038C
56
0
#define GREEK_UPPER_OMICRON_OXIA               0x1FF9
57
0
#define GREEK_UPPER_UPSILON_TONOS              0x038E
58
0
#define GREEK_UPPER_UPSILON_OXIA               0x1FEB
59
0
#define GREEK_UPPER_OMEGA_TONOS                0x038F
60
0
#define GREEK_UPPER_OMEGA_OXIA                 0x1FFB
61
0
#define COMBINING_ACUTE_ACCENT                 0x0301
62
0
#define COMBINING_DIAERESIS                    0x0308
63
0
#define COMBINING_ACUTE_TONE_MARK              0x0341
64
0
#define COMBINING_GREEK_DIALYTIKA_TONOS        0x0344
65
66
namespace mozilla {
67
68
uint32_t
69
GreekCasing::UpperCase(uint32_t aCh, GreekCasing::State& aState,
70
                       bool& aMarkEtaPos, bool& aUpdateMarkedEta)
71
0
{
72
0
  aMarkEtaPos = false;
73
0
  aUpdateMarkedEta = false;
74
0
75
0
  uint8_t category = unicode::GetGeneralCategory(aCh);
76
0
77
0
  if (aState == kEtaAccMarked) {
78
0
    switch (category) {
79
0
      case HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER:
80
0
      case HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER:
81
0
      case HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER:
82
0
      case HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER:
83
0
      case HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER:
84
0
      case HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK:
85
0
      case HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK:
86
0
      case HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK:
87
0
        aUpdateMarkedEta = true;
88
0
        break;
89
0
      default:
90
0
        break;
91
0
    }
92
0
    aState = kEtaAcc;
93
0
  }
94
0
95
0
  switch (aCh) {
96
0
  case GREEK_UPPER_ALPHA:
97
0
  case GREEK_LOWER_ALPHA:
98
0
    aState = kAlpha;
99
0
    return GREEK_UPPER_ALPHA;
100
0
101
0
  case GREEK_UPPER_EPSILON:
102
0
  case GREEK_LOWER_EPSILON:
103
0
    aState = kEpsilon;
104
0
    return GREEK_UPPER_EPSILON;
105
0
106
0
  case GREEK_UPPER_ETA:
107
0
  case GREEK_LOWER_ETA:
108
0
    aState = kEta;
109
0
    return GREEK_UPPER_ETA;
110
0
111
0
  case GREEK_UPPER_IOTA:
112
0
    aState = kIota;
113
0
    return GREEK_UPPER_IOTA;
114
0
115
0
  case GREEK_UPPER_OMICRON:
116
0
  case GREEK_LOWER_OMICRON:
117
0
    aState = kOmicron;
118
0
    return GREEK_UPPER_OMICRON;
119
0
120
0
  case GREEK_UPPER_UPSILON:
121
0
    switch (aState) {
122
0
    case kOmicron:
123
0
      aState = kOmicronUpsilon;
124
0
      break;
125
0
    default:
126
0
      aState = kUpsilon;
127
0
      break;
128
0
    }
129
0
    return GREEK_UPPER_UPSILON;
130
0
131
0
  case GREEK_UPPER_OMEGA:
132
0
  case GREEK_LOWER_OMEGA:
133
0
    aState = kOmega;
134
0
    return GREEK_UPPER_OMEGA;
135
0
136
0
  // iota and upsilon may be the second vowel of a diphthong
137
0
  case GREEK_LOWER_IOTA:
138
0
    switch (aState) {
139
0
    case kAlphaAcc:
140
0
    case kEpsilonAcc:
141
0
    case kOmicronAcc:
142
0
    case kUpsilonAcc:
143
0
      aState = kInWord;
144
0
      return GREEK_UPPER_IOTA_DIALYTIKA;
145
0
    default:
146
0
      break;
147
0
    }
148
0
    aState = kIota;
149
0
    return GREEK_UPPER_IOTA;
150
0
151
0
  case GREEK_LOWER_UPSILON:
152
0
    switch (aState) {
153
0
    case kAlphaAcc:
154
0
    case kEpsilonAcc:
155
0
    case kEtaAcc:
156
0
    case kOmicronAcc:
157
0
      aState = kInWord;
158
0
      return GREEK_UPPER_UPSILON_DIALYTIKA;
159
0
    case kOmicron:
160
0
      aState = kOmicronUpsilon;
161
0
      break;
162
0
    default:
163
0
      aState = kUpsilon;
164
0
      break;
165
0
    }
166
0
    return GREEK_UPPER_UPSILON;
167
0
168
0
  case GREEK_UPPER_IOTA_DIALYTIKA:
169
0
  case GREEK_LOWER_IOTA_DIALYTIKA:
170
0
  case GREEK_UPPER_UPSILON_DIALYTIKA:
171
0
  case GREEK_LOWER_UPSILON_DIALYTIKA:
172
0
  case COMBINING_DIAERESIS:
173
0
    aState = kDiaeresis;
174
0
    return ToUpperCase(aCh);
175
0
176
0
  // remove accent if it follows a vowel or diaeresis,
177
0
  // and set appropriate state for diphthong detection
178
0
  case COMBINING_ACUTE_ACCENT:
179
0
  case COMBINING_ACUTE_TONE_MARK:
180
0
    switch (aState) {
181
0
    case kAlpha:
182
0
      aState = kAlphaAcc;
183
0
      return uint32_t(-1); // omit this char from result string
184
0
    case kEpsilon:
185
0
      aState = kEpsilonAcc;
186
0
      return uint32_t(-1);
187
0
    case kEta:
188
0
      aState = kEtaAcc;
189
0
      return uint32_t(-1);
190
0
    case kIota:
191
0
      aState = kIotaAcc;
192
0
      return uint32_t(-1);
193
0
    case kOmicron:
194
0
      aState = kOmicronAcc;
195
0
      return uint32_t(-1);
196
0
    case kUpsilon:
197
0
      aState = kUpsilonAcc;
198
0
      return uint32_t(-1);
199
0
    case kOmicronUpsilon:
200
0
      aState = kInWord; // this completed a diphthong
201
0
      return uint32_t(-1);
202
0
    case kOmega:
203
0
      aState = kOmegaAcc;
204
0
      return uint32_t(-1);
205
0
    case kDiaeresis:
206
0
      aState = kInWord;
207
0
      return uint32_t(-1);
208
0
    default:
209
0
      break;
210
0
    }
211
0
    break;
212
0
213
0
  // combinations with dieresis+accent just strip the accent,
214
0
  // and reset to start state (don't form diphthong with following vowel)
215
0
  case GREEK_LOWER_IOTA_DIALYTIKA_TONOS:
216
0
  case GREEK_LOWER_IOTA_DIALYTIKA_OXIA:
217
0
    aState = kInWord;
218
0
    return GREEK_UPPER_IOTA_DIALYTIKA;
219
0
220
0
  case GREEK_LOWER_UPSILON_DIALYTIKA_TONOS:
221
0
  case GREEK_LOWER_UPSILON_DIALYTIKA_OXIA:
222
0
    aState = kInWord;
223
0
    return GREEK_UPPER_UPSILON_DIALYTIKA;
224
0
225
0
  case COMBINING_GREEK_DIALYTIKA_TONOS:
226
0
    aState = kInWord;
227
0
    return COMBINING_DIAERESIS;
228
0
229
0
  // strip accents from vowels, and note the vowel seen so that we can detect
230
0
  // diphthongs where diaeresis needs to be added
231
0
  case GREEK_LOWER_ALPHA_TONOS:
232
0
  case GREEK_LOWER_ALPHA_OXIA:
233
0
  case GREEK_UPPER_ALPHA_TONOS:
234
0
  case GREEK_UPPER_ALPHA_OXIA:
235
0
    aState = kAlphaAcc;
236
0
    return GREEK_UPPER_ALPHA;
237
0
238
0
  case GREEK_LOWER_EPSILON_TONOS:
239
0
  case GREEK_LOWER_EPSILON_OXIA:
240
0
  case GREEK_UPPER_EPSILON_TONOS:
241
0
  case GREEK_UPPER_EPSILON_OXIA:
242
0
    aState = kEpsilonAcc;
243
0
    return GREEK_UPPER_EPSILON;
244
0
245
0
  case GREEK_LOWER_ETA_TONOS:
246
0
  case GREEK_UPPER_ETA_TONOS:
247
0
    if (aState == kStart) {
248
0
      aState = kEtaAccMarked;
249
0
      aMarkEtaPos = true; // mark in case we need to remove the tonos later
250
0
      return GREEK_UPPER_ETA_TONOS; // treat as disjunctive eta for now
251
0
    }
252
0
    // if not in initial state, fall through to strip the accent
253
0
    MOZ_FALLTHROUGH;
254
0
255
0
  case GREEK_LOWER_ETA_OXIA:
256
0
  case GREEK_UPPER_ETA_OXIA:
257
0
    aState = kEtaAcc;
258
0
    return GREEK_UPPER_ETA;
259
0
260
0
  case GREEK_LOWER_IOTA_TONOS:
261
0
  case GREEK_LOWER_IOTA_OXIA:
262
0
  case GREEK_UPPER_IOTA_TONOS:
263
0
  case GREEK_UPPER_IOTA_OXIA:
264
0
    aState = kIotaAcc;
265
0
    return GREEK_UPPER_IOTA;
266
0
267
0
  case GREEK_LOWER_OMICRON_TONOS:
268
0
  case GREEK_LOWER_OMICRON_OXIA:
269
0
  case GREEK_UPPER_OMICRON_TONOS:
270
0
  case GREEK_UPPER_OMICRON_OXIA:
271
0
    aState = kOmicronAcc;
272
0
    return GREEK_UPPER_OMICRON;
273
0
274
0
  case GREEK_LOWER_UPSILON_TONOS:
275
0
  case GREEK_LOWER_UPSILON_OXIA:
276
0
  case GREEK_UPPER_UPSILON_TONOS:
277
0
  case GREEK_UPPER_UPSILON_OXIA:
278
0
    switch (aState) {
279
0
    case kOmicron:
280
0
      aState = kInWord; // this completed a diphthong
281
0
      break;
282
0
    default:
283
0
      aState = kUpsilonAcc;
284
0
      break;
285
0
    }
286
0
    return GREEK_UPPER_UPSILON;
287
0
288
0
  case GREEK_LOWER_OMEGA_TONOS:
289
0
  case GREEK_LOWER_OMEGA_OXIA:
290
0
  case GREEK_UPPER_OMEGA_TONOS:
291
0
  case GREEK_UPPER_OMEGA_OXIA:
292
0
    aState = kOmegaAcc;
293
0
    return GREEK_UPPER_OMEGA;
294
0
  }
295
0
296
0
  // all other characters just reset the state to either kStart or kInWord,
297
0
  // and use standard mappings
298
0
  switch (category) {
299
0
    case HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER:
300
0
    case HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER:
301
0
    case HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER:
302
0
    case HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER:
303
0
    case HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER:
304
0
    case HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK:
305
0
    case HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK:
306
0
    case HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK:
307
0
      aState = kInWord;
308
0
      break;
309
0
    default:
310
0
      aState = kStart;
311
0
      break;
312
0
  }
313
0
314
0
  return ToUpperCase(aCh);
315
0
}
316
317
} // namespace mozilla