Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/intl/lwbrk/gtest/TestLineBreak.cpp
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
3
/* This Source Code Form is subject to the terms of the Mozilla Public
4
 * License, v. 2.0. If a copy of the MPL was not distributed with this
5
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7
#include <stdio.h>
8
#include "nsXPCOM.h"
9
#include "nsIComponentManager.h"
10
#include "nsISupports.h"
11
#include "nsServiceManagerUtils.h"
12
#include "nsString.h"
13
#include "gtest/gtest.h"
14
15
#include "mozilla/intl/LineBreaker.h"
16
#include "mozilla/intl/WordBreaker.h"
17
18
static char teng1[] =
19
//          1         2         3         4         5         6         7
20
//01234567890123456789012345678901234567890123456789012345678901234567890123456789
21
 "This is a test to test(reasonable) line    break. This 0.01123 = 45 x 48.";
22
23
static uint32_t lexp1[] = {
24
  4,7,9,14,17,34,39,40,41,42,49,54,62,64,67,69,73
25
};
26
27
static uint32_t wexp1[] = {
28
  4,5,7,8,9,10,14,15,17,18,22,23,33,34,35,39,43,48,49,50,54,55,56,57,62,63,
29
  64,65,67,68,69,70,72
30
};
31
32
static char teng2[] =
33
//          1         2         3         4         5         6         7
34
//01234567890123456789012345678901234567890123456789012345678901234567890123456789
35
 "()((reasonab(l)e) line  break. .01123=45x48.";
36
37
static uint32_t lexp2[] = {
38
  17,22,23,30,44
39
};
40
41
static uint32_t wexp2[] = {
42
  4,12,13,14,15,16,17,18,22,24,29,30,31,32,37,38,43
43
};
44
45
static char teng3[] =
46
//          1         2         3         4         5         6         7
47
//01234567890123456789012345678901234567890123456789012345678901234567890123456789
48
 "It's a test to test(ronae ) line break....";
49
50
static uint32_t lexp3[] = {
51
  4,6,11,14,25,27,32,42
52
};
53
54
static uint32_t wexp3[] = {
55
  2,3,4,5,6,7,11,12,14,15,19,20,25,26,27,28,32,33,38
56
};
57
58
static char ruler1[] =
59
"          1         2         3         4         5         6         7  ";
60
static char ruler2[] =
61
"0123456789012345678901234567890123456789012345678901234567890123456789012";
62
63
bool
64
Check(const char* in, const uint32_t* out, uint32_t outlen, uint32_t i,
65
      uint32_t res[256])
66
0
{
67
0
  bool ok = true;
68
0
69
0
  if (i != outlen) {
70
0
    ok = false;
71
0
    printf("WARNING!!! return size wrong, expect %d but got %d \n",
72
0
           outlen, i);
73
0
  }
74
0
75
0
  for (uint32_t j = 0; j < i; j++) {
76
0
    if (j < outlen) {
77
0
      if (res[j] != out[j]) {
78
0
         ok = false;
79
0
         printf("[%d] expect %d but got %d\n", j, out[j], res[j]);
80
0
      }
81
0
    } else {
82
0
      ok = false;
83
0
      printf("[%d] additional %d\n", j, res[j]);
84
0
    }
85
0
  }
86
0
87
0
  if (!ok) {
88
0
    printf("string  = \n%s\n", in);
89
0
    printf("%s\n", ruler1);
90
0
    printf("%s\n", ruler2);
91
0
92
0
    printf("Expect = \n");
93
0
    for (uint32_t j = 0; j < outlen; j++) {
94
0
      printf("%d,", out[j]);
95
0
    }
96
0
97
0
    printf("\nResult = \n");
98
0
    for (uint32_t j = 0; j < i; j++) {
99
0
      printf("%d,", res[j]);
100
0
    }
101
0
    printf("\n");
102
0
  }
103
0
104
0
  return ok;
105
0
}
106
107
bool
108
TestASCIILB(mozilla::intl::LineBreaker *lb,
109
            const char* in,
110
            const uint32_t* out, uint32_t outlen)
111
0
{
112
0
  NS_ConvertASCIItoUTF16 eng1(in);
113
0
  uint32_t i;
114
0
  uint32_t res[256];
115
0
  int32_t curr;
116
0
117
0
  for (i = 0, curr = 0;
118
0
       curr != NS_LINEBREAKER_NEED_MORE_TEXT && i < 256;
119
0
       i++) {
120
0
    curr = lb->Next(eng1.get(), eng1.Length(), curr);
121
0
    res[i] = curr != NS_LINEBREAKER_NEED_MORE_TEXT ? curr : eng1.Length();
122
0
  }
123
0
124
0
  return Check(in, out, outlen, i, res);
125
0
}
126
127
bool
128
TestASCIIWB(mozilla::intl::WordBreaker *lb,
129
            const char* in,
130
            const uint32_t* out, uint32_t outlen)
131
0
{
132
0
  NS_ConvertASCIItoUTF16 eng1(in);
133
0
134
0
  uint32_t i;
135
0
  uint32_t res[256];
136
0
  int32_t curr = 0;
137
0
138
0
  for (i = 0, curr = lb->NextWord(eng1.get(), eng1.Length(), curr);
139
0
       curr != NS_WORDBREAKER_NEED_MORE_TEXT && i < 256;
140
0
       curr = lb->NextWord(eng1.get(), eng1.Length(), curr), i++) {
141
0
    res [i] = curr != NS_WORDBREAKER_NEED_MORE_TEXT ? curr : eng1.Length();
142
0
  }
143
0
144
0
  return Check(in, out, outlen, i, res);
145
0
}
146
147
TEST(LineBreak, LineBreaker)
148
0
{
149
0
  RefPtr<mozilla::intl::LineBreaker> t = mozilla::intl::LineBreaker::Create();
150
0
151
0
  ASSERT_TRUE(t);
152
0
153
0
  ASSERT_TRUE(TestASCIILB(t, teng1, lexp1, sizeof(lexp1) / sizeof(uint32_t)));
154
0
  ASSERT_TRUE(TestASCIILB(t, teng2, lexp2, sizeof(lexp2) / sizeof(uint32_t)));
155
0
  ASSERT_TRUE(TestASCIILB(t, teng3, lexp3, sizeof(lexp3) / sizeof(uint32_t)));
156
0
}
157
158
TEST(LineBreak, WordBreaker)
159
0
{
160
0
  RefPtr<mozilla::intl::WordBreaker> t = mozilla::intl::WordBreaker::Create();
161
0
  ASSERT_TRUE(t);
162
0
163
0
  ASSERT_TRUE(TestASCIIWB(t, teng1, wexp1, sizeof(wexp1) / sizeof(uint32_t)));
164
0
  ASSERT_TRUE(TestASCIIWB(t, teng2, wexp2, sizeof(wexp2) / sizeof(uint32_t)));
165
0
  ASSERT_TRUE(TestASCIIWB(t, teng3, wexp3, sizeof(wexp3) / sizeof(uint32_t)));
166
0
}
167
168
//                         012345678901234
169
static const char wb0[] = "T";
170
static const char wb1[] = "h";
171
static const char wb2[] = "is   is a int";
172
static const char wb3[] = "ernationali";
173
static const char wb4[] = "zation work.";
174
175
static const char* wb[] = { wb0, wb1, wb2, wb3, wb4 };
176
177
void
178
TestPrintWordWithBreak()
179
0
{
180
0
  uint32_t numOfFragment = sizeof(wb) / sizeof(char*);
181
0
  RefPtr<mozilla::intl::WordBreaker> wbk = mozilla::intl::WordBreaker::Create();
182
0
183
0
  nsAutoString result;
184
0
185
0
  for (uint32_t i = 0; i < numOfFragment; i++) {
186
0
    NS_ConvertASCIItoUTF16 fragText(wb[i]);
187
0
188
0
    int32_t cur = 0;
189
0
    cur = wbk->NextWord(fragText.get(), fragText.Length(), cur);
190
0
    uint32_t start = 0;
191
0
    for (uint32_t j = 0; cur != NS_WORDBREAKER_NEED_MORE_TEXT; j++) {
192
0
      result.Append(Substring(fragText, start, cur - start));
193
0
      result.Append('^');
194
0
      start = (cur >= 0 ? cur : cur - start);
195
0
      cur = wbk->NextWord(fragText.get(), fragText.Length(), cur);
196
0
    }
197
0
198
0
    result.Append(Substring(fragText, fragText.Length() - start));
199
0
200
0
    if (i != numOfFragment - 1) {
201
0
      NS_ConvertASCIItoUTF16 nextFragText(wb[i+1]);
202
0
203
0
      bool canBreak = true;
204
0
      canBreak = wbk->BreakInBetween(fragText.get(),
205
0
                                     fragText.Length(),
206
0
                                     nextFragText.get(),
207
0
                                     nextFragText.Length());
208
0
      if (canBreak) {
209
0
        result.Append('^');
210
0
      }
211
0
      fragText.Assign(nextFragText);
212
0
    }
213
0
  }
214
0
  ASSERT_STREQ("is^   ^is^ ^a^ ^  is a intzation^ ^work^ation work.",
215
0
               NS_ConvertUTF16toUTF8(result).get());
216
0
}
217
218
void
219
TestFindWordBreakFromPosition(uint32_t fragN, uint32_t offset,
220
                              const char* expected)
221
0
{
222
0
  uint32_t numOfFragment = sizeof(wb) / sizeof(char*);
223
0
  RefPtr<mozilla::intl::WordBreaker> wbk = mozilla::intl::WordBreaker::Create();
224
0
225
0
  NS_ConvertASCIItoUTF16 fragText(wb[fragN]);
226
0
227
0
  mozilla::intl::WordRange res = wbk->FindWord(fragText.get(), fragText.Length(), offset);
228
0
229
0
  bool canBreak;
230
0
  nsAutoString result(Substring(fragText, res.mBegin, res.mEnd-res.mBegin));
231
0
232
0
  if ((uint32_t)fragText.Length() == res.mEnd) {
233
0
    // if we hit the end of the fragment
234
0
    nsAutoString curFragText = fragText;
235
0
    for(uint32_t  p = fragN +1; p < numOfFragment ;p++)
236
0
    {
237
0
      NS_ConvertASCIItoUTF16 nextFragText(wb[p]);
238
0
      canBreak = wbk->BreakInBetween(curFragText.get(),
239
0
                                     curFragText.Length(),
240
0
                                     nextFragText.get(),
241
0
                                     nextFragText.Length());
242
0
      if (canBreak) {
243
0
        break;
244
0
      }
245
0
      mozilla::intl::WordRange r = wbk->FindWord(nextFragText.get(), nextFragText.Length(),
246
0
                                    0);
247
0
248
0
      result.Append(Substring(nextFragText, r.mBegin, r.mEnd - r.mBegin));
249
0
250
0
      if ((uint32_t)nextFragText.Length() != r.mEnd) {
251
0
        break;
252
0
      }
253
0
      nextFragText.Assign(curFragText);
254
0
    }
255
0
  }
256
0
257
0
  if (0 == res.mBegin) {
258
0
    // if we hit the beginning of the fragment
259
0
    nsAutoString curFragText = fragText;
260
0
    for (uint32_t p = fragN; p > 0; p--) {
261
0
      NS_ConvertASCIItoUTF16 prevFragText(wb[p-1]);
262
0
      canBreak = wbk->BreakInBetween(prevFragText.get(),
263
0
                                     prevFragText.Length(),
264
0
                                     curFragText.get(),
265
0
                                     curFragText.Length());
266
0
      if (canBreak) {
267
0
        break;
268
0
      }
269
0
      mozilla::intl::WordRange r = wbk->FindWord(prevFragText.get(), prevFragText.Length(),
270
0
                                    prevFragText.Length());
271
0
272
0
      result.Insert(Substring(prevFragText, r.mBegin, r.mEnd - r.mBegin), 0);
273
0
274
0
      if (0 != r.mBegin) {
275
0
        break;
276
0
      }
277
0
      prevFragText.Assign(curFragText);
278
0
    }
279
0
  }
280
0
281
0
  ASSERT_STREQ(expected, NS_ConvertUTF16toUTF8(result).get())
282
0
    << "FindWordBreakFromPosition(" << fragN << ", " << offset << ")";
283
0
}
284
285
TEST(LineBreak, WordBreakUsage)
286
0
{
287
0
  TestPrintWordWithBreak();
288
0
  TestFindWordBreakFromPosition(0, 0, "This");
289
0
  TestFindWordBreakFromPosition(1, 0, "his");
290
0
  TestFindWordBreakFromPosition(2, 0, "is");
291
0
  TestFindWordBreakFromPosition(2, 1, "is");
292
0
  TestFindWordBreakFromPosition(2, 9, " ");
293
0
  TestFindWordBreakFromPosition(2, 10, "internationalization");
294
0
  TestFindWordBreakFromPosition(3, 4, "ernationalization");
295
0
  TestFindWordBreakFromPosition(3, 8, "ernationalization");
296
0
  TestFindWordBreakFromPosition(4, 6, " ");
297
0
  TestFindWordBreakFromPosition(4, 7, "work");
298
0
}
299