/src/mozilla-central/intl/lwbrk/gtest/TestLineBreak.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
2 | | /* vim: set ts=8 sts=2 et sw=2 tw=80: */ |
3 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
4 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
5 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
6 | | |
7 | | #include <stdio.h> |
8 | | #include "nsXPCOM.h" |
9 | | #include "nsIComponentManager.h" |
10 | | #include "nsISupports.h" |
11 | | #include "nsServiceManagerUtils.h" |
12 | | #include "nsString.h" |
13 | | #include "gtest/gtest.h" |
14 | | |
15 | | #include "mozilla/intl/LineBreaker.h" |
16 | | #include "mozilla/intl/WordBreaker.h" |
17 | | |
18 | | static char teng1[] = |
19 | | // 1 2 3 4 5 6 7 |
20 | | //01234567890123456789012345678901234567890123456789012345678901234567890123456789 |
21 | | "This is a test to test(reasonable) line break. This 0.01123 = 45 x 48."; |
22 | | |
23 | | static uint32_t lexp1[] = { |
24 | | 4,7,9,14,17,34,39,40,41,42,49,54,62,64,67,69,73 |
25 | | }; |
26 | | |
27 | | static uint32_t wexp1[] = { |
28 | | 4,5,7,8,9,10,14,15,17,18,22,23,33,34,35,39,43,48,49,50,54,55,56,57,62,63, |
29 | | 64,65,67,68,69,70,72 |
30 | | }; |
31 | | |
32 | | static char teng2[] = |
33 | | // 1 2 3 4 5 6 7 |
34 | | //01234567890123456789012345678901234567890123456789012345678901234567890123456789 |
35 | | "()((reasonab(l)e) line break. .01123=45x48."; |
36 | | |
37 | | static uint32_t lexp2[] = { |
38 | | 17,22,23,30,44 |
39 | | }; |
40 | | |
41 | | static uint32_t wexp2[] = { |
42 | | 4,12,13,14,15,16,17,18,22,24,29,30,31,32,37,38,43 |
43 | | }; |
44 | | |
45 | | static char teng3[] = |
46 | | // 1 2 3 4 5 6 7 |
47 | | //01234567890123456789012345678901234567890123456789012345678901234567890123456789 |
48 | | "It's a test to test(ronae ) line break...."; |
49 | | |
50 | | static uint32_t lexp3[] = { |
51 | | 4,6,11,14,25,27,32,42 |
52 | | }; |
53 | | |
54 | | static uint32_t wexp3[] = { |
55 | | 2,3,4,5,6,7,11,12,14,15,19,20,25,26,27,28,32,33,38 |
56 | | }; |
57 | | |
58 | | static char ruler1[] = |
59 | | " 1 2 3 4 5 6 7 "; |
60 | | static char ruler2[] = |
61 | | "0123456789012345678901234567890123456789012345678901234567890123456789012"; |
62 | | |
63 | | bool |
64 | | Check(const char* in, const uint32_t* out, uint32_t outlen, uint32_t i, |
65 | | uint32_t res[256]) |
66 | 0 | { |
67 | 0 | bool ok = true; |
68 | 0 |
|
69 | 0 | if (i != outlen) { |
70 | 0 | ok = false; |
71 | 0 | printf("WARNING!!! return size wrong, expect %d but got %d \n", |
72 | 0 | outlen, i); |
73 | 0 | } |
74 | 0 |
|
75 | 0 | for (uint32_t j = 0; j < i; j++) { |
76 | 0 | if (j < outlen) { |
77 | 0 | if (res[j] != out[j]) { |
78 | 0 | ok = false; |
79 | 0 | printf("[%d] expect %d but got %d\n", j, out[j], res[j]); |
80 | 0 | } |
81 | 0 | } else { |
82 | 0 | ok = false; |
83 | 0 | printf("[%d] additional %d\n", j, res[j]); |
84 | 0 | } |
85 | 0 | } |
86 | 0 |
|
87 | 0 | if (!ok) { |
88 | 0 | printf("string = \n%s\n", in); |
89 | 0 | printf("%s\n", ruler1); |
90 | 0 | printf("%s\n", ruler2); |
91 | 0 |
|
92 | 0 | printf("Expect = \n"); |
93 | 0 | for (uint32_t j = 0; j < outlen; j++) { |
94 | 0 | printf("%d,", out[j]); |
95 | 0 | } |
96 | 0 |
|
97 | 0 | printf("\nResult = \n"); |
98 | 0 | for (uint32_t j = 0; j < i; j++) { |
99 | 0 | printf("%d,", res[j]); |
100 | 0 | } |
101 | 0 | printf("\n"); |
102 | 0 | } |
103 | 0 |
|
104 | 0 | return ok; |
105 | 0 | } |
106 | | |
107 | | bool |
108 | | TestASCIILB(mozilla::intl::LineBreaker *lb, |
109 | | const char* in, |
110 | | const uint32_t* out, uint32_t outlen) |
111 | 0 | { |
112 | 0 | NS_ConvertASCIItoUTF16 eng1(in); |
113 | 0 | uint32_t i; |
114 | 0 | uint32_t res[256]; |
115 | 0 | int32_t curr; |
116 | 0 |
|
117 | 0 | for (i = 0, curr = 0; |
118 | 0 | curr != NS_LINEBREAKER_NEED_MORE_TEXT && i < 256; |
119 | 0 | i++) { |
120 | 0 | curr = lb->Next(eng1.get(), eng1.Length(), curr); |
121 | 0 | res[i] = curr != NS_LINEBREAKER_NEED_MORE_TEXT ? curr : eng1.Length(); |
122 | 0 | } |
123 | 0 |
|
124 | 0 | return Check(in, out, outlen, i, res); |
125 | 0 | } |
126 | | |
127 | | bool |
128 | | TestASCIIWB(mozilla::intl::WordBreaker *lb, |
129 | | const char* in, |
130 | | const uint32_t* out, uint32_t outlen) |
131 | 0 | { |
132 | 0 | NS_ConvertASCIItoUTF16 eng1(in); |
133 | 0 |
|
134 | 0 | uint32_t i; |
135 | 0 | uint32_t res[256]; |
136 | 0 | int32_t curr = 0; |
137 | 0 |
|
138 | 0 | for (i = 0, curr = lb->NextWord(eng1.get(), eng1.Length(), curr); |
139 | 0 | curr != NS_WORDBREAKER_NEED_MORE_TEXT && i < 256; |
140 | 0 | curr = lb->NextWord(eng1.get(), eng1.Length(), curr), i++) { |
141 | 0 | res [i] = curr != NS_WORDBREAKER_NEED_MORE_TEXT ? curr : eng1.Length(); |
142 | 0 | } |
143 | 0 |
|
144 | 0 | return Check(in, out, outlen, i, res); |
145 | 0 | } |
146 | | |
147 | | TEST(LineBreak, LineBreaker) |
148 | 0 | { |
149 | 0 | RefPtr<mozilla::intl::LineBreaker> t = mozilla::intl::LineBreaker::Create(); |
150 | 0 |
|
151 | 0 | ASSERT_TRUE(t); |
152 | 0 |
|
153 | 0 | ASSERT_TRUE(TestASCIILB(t, teng1, lexp1, sizeof(lexp1) / sizeof(uint32_t))); |
154 | 0 | ASSERT_TRUE(TestASCIILB(t, teng2, lexp2, sizeof(lexp2) / sizeof(uint32_t))); |
155 | 0 | ASSERT_TRUE(TestASCIILB(t, teng3, lexp3, sizeof(lexp3) / sizeof(uint32_t))); |
156 | 0 | } |
157 | | |
158 | | TEST(LineBreak, WordBreaker) |
159 | 0 | { |
160 | 0 | RefPtr<mozilla::intl::WordBreaker> t = mozilla::intl::WordBreaker::Create(); |
161 | 0 | ASSERT_TRUE(t); |
162 | 0 |
|
163 | 0 | ASSERT_TRUE(TestASCIIWB(t, teng1, wexp1, sizeof(wexp1) / sizeof(uint32_t))); |
164 | 0 | ASSERT_TRUE(TestASCIIWB(t, teng2, wexp2, sizeof(wexp2) / sizeof(uint32_t))); |
165 | 0 | ASSERT_TRUE(TestASCIIWB(t, teng3, wexp3, sizeof(wexp3) / sizeof(uint32_t))); |
166 | 0 | } |
167 | | |
168 | | // 012345678901234 |
169 | | static const char wb0[] = "T"; |
170 | | static const char wb1[] = "h"; |
171 | | static const char wb2[] = "is is a int"; |
172 | | static const char wb3[] = "ernationali"; |
173 | | static const char wb4[] = "zation work."; |
174 | | |
175 | | static const char* wb[] = { wb0, wb1, wb2, wb3, wb4 }; |
176 | | |
177 | | void |
178 | | TestPrintWordWithBreak() |
179 | 0 | { |
180 | 0 | uint32_t numOfFragment = sizeof(wb) / sizeof(char*); |
181 | 0 | RefPtr<mozilla::intl::WordBreaker> wbk = mozilla::intl::WordBreaker::Create(); |
182 | 0 |
|
183 | 0 | nsAutoString result; |
184 | 0 |
|
185 | 0 | for (uint32_t i = 0; i < numOfFragment; i++) { |
186 | 0 | NS_ConvertASCIItoUTF16 fragText(wb[i]); |
187 | 0 |
|
188 | 0 | int32_t cur = 0; |
189 | 0 | cur = wbk->NextWord(fragText.get(), fragText.Length(), cur); |
190 | 0 | uint32_t start = 0; |
191 | 0 | for (uint32_t j = 0; cur != NS_WORDBREAKER_NEED_MORE_TEXT; j++) { |
192 | 0 | result.Append(Substring(fragText, start, cur - start)); |
193 | 0 | result.Append('^'); |
194 | 0 | start = (cur >= 0 ? cur : cur - start); |
195 | 0 | cur = wbk->NextWord(fragText.get(), fragText.Length(), cur); |
196 | 0 | } |
197 | 0 |
|
198 | 0 | result.Append(Substring(fragText, fragText.Length() - start)); |
199 | 0 |
|
200 | 0 | if (i != numOfFragment - 1) { |
201 | 0 | NS_ConvertASCIItoUTF16 nextFragText(wb[i+1]); |
202 | 0 |
|
203 | 0 | bool canBreak = true; |
204 | 0 | canBreak = wbk->BreakInBetween(fragText.get(), |
205 | 0 | fragText.Length(), |
206 | 0 | nextFragText.get(), |
207 | 0 | nextFragText.Length()); |
208 | 0 | if (canBreak) { |
209 | 0 | result.Append('^'); |
210 | 0 | } |
211 | 0 | fragText.Assign(nextFragText); |
212 | 0 | } |
213 | 0 | } |
214 | 0 | ASSERT_STREQ("is^ ^is^ ^a^ ^ is a intzation^ ^work^ation work.", |
215 | 0 | NS_ConvertUTF16toUTF8(result).get()); |
216 | 0 | } |
217 | | |
218 | | void |
219 | | TestFindWordBreakFromPosition(uint32_t fragN, uint32_t offset, |
220 | | const char* expected) |
221 | 0 | { |
222 | 0 | uint32_t numOfFragment = sizeof(wb) / sizeof(char*); |
223 | 0 | RefPtr<mozilla::intl::WordBreaker> wbk = mozilla::intl::WordBreaker::Create(); |
224 | 0 |
|
225 | 0 | NS_ConvertASCIItoUTF16 fragText(wb[fragN]); |
226 | 0 |
|
227 | 0 | mozilla::intl::WordRange res = wbk->FindWord(fragText.get(), fragText.Length(), offset); |
228 | 0 |
|
229 | 0 | bool canBreak; |
230 | 0 | nsAutoString result(Substring(fragText, res.mBegin, res.mEnd-res.mBegin)); |
231 | 0 |
|
232 | 0 | if ((uint32_t)fragText.Length() == res.mEnd) { |
233 | 0 | // if we hit the end of the fragment |
234 | 0 | nsAutoString curFragText = fragText; |
235 | 0 | for(uint32_t p = fragN +1; p < numOfFragment ;p++) |
236 | 0 | { |
237 | 0 | NS_ConvertASCIItoUTF16 nextFragText(wb[p]); |
238 | 0 | canBreak = wbk->BreakInBetween(curFragText.get(), |
239 | 0 | curFragText.Length(), |
240 | 0 | nextFragText.get(), |
241 | 0 | nextFragText.Length()); |
242 | 0 | if (canBreak) { |
243 | 0 | break; |
244 | 0 | } |
245 | 0 | mozilla::intl::WordRange r = wbk->FindWord(nextFragText.get(), nextFragText.Length(), |
246 | 0 | 0); |
247 | 0 |
|
248 | 0 | result.Append(Substring(nextFragText, r.mBegin, r.mEnd - r.mBegin)); |
249 | 0 |
|
250 | 0 | if ((uint32_t)nextFragText.Length() != r.mEnd) { |
251 | 0 | break; |
252 | 0 | } |
253 | 0 | nextFragText.Assign(curFragText); |
254 | 0 | } |
255 | 0 | } |
256 | 0 |
|
257 | 0 | if (0 == res.mBegin) { |
258 | 0 | // if we hit the beginning of the fragment |
259 | 0 | nsAutoString curFragText = fragText; |
260 | 0 | for (uint32_t p = fragN; p > 0; p--) { |
261 | 0 | NS_ConvertASCIItoUTF16 prevFragText(wb[p-1]); |
262 | 0 | canBreak = wbk->BreakInBetween(prevFragText.get(), |
263 | 0 | prevFragText.Length(), |
264 | 0 | curFragText.get(), |
265 | 0 | curFragText.Length()); |
266 | 0 | if (canBreak) { |
267 | 0 | break; |
268 | 0 | } |
269 | 0 | mozilla::intl::WordRange r = wbk->FindWord(prevFragText.get(), prevFragText.Length(), |
270 | 0 | prevFragText.Length()); |
271 | 0 |
|
272 | 0 | result.Insert(Substring(prevFragText, r.mBegin, r.mEnd - r.mBegin), 0); |
273 | 0 |
|
274 | 0 | if (0 != r.mBegin) { |
275 | 0 | break; |
276 | 0 | } |
277 | 0 | prevFragText.Assign(curFragText); |
278 | 0 | } |
279 | 0 | } |
280 | 0 |
|
281 | 0 | ASSERT_STREQ(expected, NS_ConvertUTF16toUTF8(result).get()) |
282 | 0 | << "FindWordBreakFromPosition(" << fragN << ", " << offset << ")"; |
283 | 0 | } |
284 | | |
285 | | TEST(LineBreak, WordBreakUsage) |
286 | 0 | { |
287 | 0 | TestPrintWordWithBreak(); |
288 | 0 | TestFindWordBreakFromPosition(0, 0, "This"); |
289 | 0 | TestFindWordBreakFromPosition(1, 0, "his"); |
290 | 0 | TestFindWordBreakFromPosition(2, 0, "is"); |
291 | 0 | TestFindWordBreakFromPosition(2, 1, "is"); |
292 | 0 | TestFindWordBreakFromPosition(2, 9, " "); |
293 | 0 | TestFindWordBreakFromPosition(2, 10, "internationalization"); |
294 | 0 | TestFindWordBreakFromPosition(3, 4, "ernationalization"); |
295 | 0 | TestFindWordBreakFromPosition(3, 8, "ernationalization"); |
296 | 0 | TestFindWordBreakFromPosition(4, 6, " "); |
297 | 0 | TestFindWordBreakFromPosition(4, 7, "work"); |
298 | 0 | } |
299 | | |