/src/mozilla-central/intl/lwbrk/gtest/TestLineBreak.cpp

Source (jump to first uncovered line)
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include <stdio.h>
#include "nsXPCOM.h"
#include "nsIComponentManager.h"
#include "nsISupports.h"
#include "nsServiceManagerUtils.h"
#include "nsString.h"
#include "gtest/gtest.h"

#include "mozilla/intl/LineBreaker.h"
#include "mozilla/intl/WordBreaker.h"

static char teng1[] =
//          1         2         3         4         5         6         7
//01234567890123456789012345678901234567890123456789012345678901234567890123456789
 "This is a test to test(reasonable) line    break. This 0.01123 = 45 x 48.";

static uint32_t lexp1[] = {
  4,7,9,14,17,34,39,40,41,42,49,54,62,64,67,69,73
};

static uint32_t wexp1[] = {
  4,5,7,8,9,10,14,15,17,18,22,23,33,34,35,39,43,48,49,50,54,55,56,57,62,63,
  64,65,67,68,69,70,72
};

static char teng2[] =
//          1         2         3         4         5         6         7
//01234567890123456789012345678901234567890123456789012345678901234567890123456789
 "()((reasonab(l)e) line  break. .01123=45x48.";

static uint32_t lexp2[] = {
  17,22,23,30,44
};

static uint32_t wexp2[] = {
  4,12,13,14,15,16,17,18,22,24,29,30,31,32,37,38,43
};

static char teng3[] =
//          1         2         3         4         5         6         7
//01234567890123456789012345678901234567890123456789012345678901234567890123456789
 "It's a test to test(ronae ) line break....";

static uint32_t lexp3[] = {
  4,6,11,14,25,27,32,42
};

static uint32_t wexp3[] = {
  2,3,4,5,6,7,11,12,14,15,19,20,25,26,27,28,32,33,38
};

static char ruler1[] =
"          1         2         3         4         5         6         7  ";
static char ruler2[] =
"0123456789012345678901234567890123456789012345678901234567890123456789012";

bool
Check(const char* in, const uint32_t* out, uint32_t outlen, uint32_t i,
      uint32_t res[256])
{
  bool ok = true;

  if (i != outlen) {
    ok = false;
    printf("WARNING!!! return size wrong, expect %d but got %d \n",
           outlen, i);
  }

  for (uint32_t j = 0; j < i; j++) {
    if (j < outlen) {
      if (res[j] != out[j]) {
         ok = false;
         printf("[%d] expect %d but got %d\n", j, out[j], res[j]);
      }
    } else {
      ok = false;
      printf("[%d] additional %d\n", j, res[j]);
    }
  }

  if (!ok) {
    printf("string  = \n%s\n", in);
    printf("%s\n", ruler1);
    printf("%s\n", ruler2);

    printf("Expect = \n");
    for (uint32_t j = 0; j < outlen; j++) {
      printf("%d,", out[j]);
    }

    printf("\nResult = \n");
    for (uint32_t j = 0; j < i; j++) {
      printf("%d,", res[j]);
    }
    printf("\n");
  }

  return ok;
}

bool
TestASCIILB(mozilla::intl::LineBreaker *lb,
            const char* in,
            const uint32_t* out, uint32_t outlen)
{
  NS_ConvertASCIItoUTF16 eng1(in);
  uint32_t i;
  uint32_t res[256];
  int32_t curr;

  for (i = 0, curr = 0;
       curr != NS_LINEBREAKER_NEED_MORE_TEXT && i < 256;
       i++) {
    curr = lb->Next(eng1.get(), eng1.Length(), curr);
    res[i] = curr != NS_LINEBREAKER_NEED_MORE_TEXT ? curr : eng1.Length();
  }

  return Check(in, out, outlen, i, res);
}

bool
TestASCIIWB(mozilla::intl::WordBreaker *lb,
            const char* in,
            const uint32_t* out, uint32_t outlen)
{
  NS_ConvertASCIItoUTF16 eng1(in);

  uint32_t i;
  uint32_t res[256];
  int32_t curr = 0;

  for (i = 0, curr = lb->NextWord(eng1.get(), eng1.Length(), curr);
       curr != NS_WORDBREAKER_NEED_MORE_TEXT && i < 256;
       curr = lb->NextWord(eng1.get(), eng1.Length(), curr), i++) {
    res [i] = curr != NS_WORDBREAKER_NEED_MORE_TEXT ? curr : eng1.Length();
  }

  return Check(in, out, outlen, i, res);
}

TEST(LineBreak, LineBreaker)
{
  RefPtr<mozilla::intl::LineBreaker> t = mozilla::intl::LineBreaker::Create();

  ASSERT_TRUE(t);

  ASSERT_TRUE(TestASCIILB(t, teng1, lexp1, sizeof(lexp1) / sizeof(uint32_t)));
  ASSERT_TRUE(TestASCIILB(t, teng2, lexp2, sizeof(lexp2) / sizeof(uint32_t)));
  ASSERT_TRUE(TestASCIILB(t, teng3, lexp3, sizeof(lexp3) / sizeof(uint32_t)));
}

TEST(LineBreak, WordBreaker)
{
  RefPtr<mozilla::intl::WordBreaker> t = mozilla::intl::WordBreaker::Create();
  ASSERT_TRUE(t);

  ASSERT_TRUE(TestASCIIWB(t, teng1, wexp1, sizeof(wexp1) / sizeof(uint32_t)));
  ASSERT_TRUE(TestASCIIWB(t, teng2, wexp2, sizeof(wexp2) / sizeof(uint32_t)));
  ASSERT_TRUE(TestASCIIWB(t, teng3, wexp3, sizeof(wexp3) / sizeof(uint32_t)));
}

//                         012345678901234
static const char wb0[] = "T";
static const char wb1[] = "h";
static const char wb2[] = "is   is a int";
static const char wb3[] = "ernationali";
static const char wb4[] = "zation work.";

static const char* wb[] = { wb0, wb1, wb2, wb3, wb4 };

void
TestPrintWordWithBreak()
{
  uint32_t numOfFragment = sizeof(wb) / sizeof(char*);
  RefPtr<mozilla::intl::WordBreaker> wbk = mozilla::intl::WordBreaker::Create();

  nsAutoString result;

  for (uint32_t i = 0; i < numOfFragment; i++) {
    NS_ConvertASCIItoUTF16 fragText(wb[i]);

    int32_t cur = 0;
    cur = wbk->NextWord(fragText.get(), fragText.Length(), cur);
    uint32_t start = 0;
    for (uint32_t j = 0; cur != NS_WORDBREAKER_NEED_MORE_TEXT; j++) {
      result.Append(Substring(fragText, start, cur - start));
      result.Append('^');
      start = (cur >= 0 ? cur : cur - start);
      cur = wbk->NextWord(fragText.get(), fragText.Length(), cur);
    }

    result.Append(Substring(fragText, fragText.Length() - start));

    if (i != numOfFragment - 1) {
      NS_ConvertASCIItoUTF16 nextFragText(wb[i+1]);

      bool canBreak = true;
      canBreak = wbk->BreakInBetween(fragText.get(),
                                     fragText.Length(),
                                     nextFragText.get(),
                                     nextFragText.Length());
      if (canBreak) {
        result.Append('^');
      }
      fragText.Assign(nextFragText);
    }
  }
  ASSERT_STREQ("is^   ^is^ ^a^ ^  is a intzation^ ^work^ation work.",
               NS_ConvertUTF16toUTF8(result).get());
}

void
TestFindWordBreakFromPosition(uint32_t fragN, uint32_t offset,
                              const char* expected)
{
  uint32_t numOfFragment = sizeof(wb) / sizeof(char*);
  RefPtr<mozilla::intl::WordBreaker> wbk = mozilla::intl::WordBreaker::Create();

  NS_ConvertASCIItoUTF16 fragText(wb[fragN]);

  mozilla::intl::WordRange res = wbk->FindWord(fragText.get(), fragText.Length(), offset);

  bool canBreak;
  nsAutoString result(Substring(fragText, res.mBegin, res.mEnd-res.mBegin));

  if ((uint32_t)fragText.Length() == res.mEnd) {
    // if we hit the end of the fragment
    nsAutoString curFragText = fragText;
    for(uint32_t  p = fragN +1; p < numOfFragment ;p++)
    {
      NS_ConvertASCIItoUTF16 nextFragText(wb[p]);
      canBreak = wbk->BreakInBetween(curFragText.get(),
                                     curFragText.Length(),
                                     nextFragText.get(),
                                     nextFragText.Length());
      if (canBreak) {
        break;
      }
      mozilla::intl::WordRange r = wbk->FindWord(nextFragText.get(), nextFragText.Length(),
                                    0);

      result.Append(Substring(nextFragText, r.mBegin, r.mEnd - r.mBegin));

      if ((uint32_t)nextFragText.Length() != r.mEnd) {
        break;
      }
      nextFragText.Assign(curFragText);
    }
  }

  if (0 == res.mBegin) {
    // if we hit the beginning of the fragment
    nsAutoString curFragText = fragText;
    for (uint32_t p = fragN; p > 0; p--) {
      NS_ConvertASCIItoUTF16 prevFragText(wb[p-1]);
      canBreak = wbk->BreakInBetween(prevFragText.get(),
                                     prevFragText.Length(),
                                     curFragText.get(),
                                     curFragText.Length());
      if (canBreak) {
        break;
      }
      mozilla::intl::WordRange r = wbk->FindWord(prevFragText.get(), prevFragText.Length(),
                                    prevFragText.Length());

      result.Insert(Substring(prevFragText, r.mBegin, r.mEnd - r.mBegin), 0);

      if (0 != r.mBegin) {
        break;
      }
      prevFragText.Assign(curFragText);
    }
  }

  ASSERT_STREQ(expected, NS_ConvertUTF16toUTF8(result).get())
    << "FindWordBreakFromPosition(" << fragN << ", " << offset << ")";
}

TEST(LineBreak, WordBreakUsage)
{
  TestPrintWordWithBreak();
  TestFindWordBreakFromPosition(0, 0, "This");
  TestFindWordBreakFromPosition(1, 0, "his");
  TestFindWordBreakFromPosition(2, 0, "is");
  TestFindWordBreakFromPosition(2, 1, "is");
  TestFindWordBreakFromPosition(2, 9, " ");
  TestFindWordBreakFromPosition(2, 10, "internationalization");
  TestFindWordBreakFromPosition(3, 4, "ernationalization");
  TestFindWordBreakFromPosition(3, 8, "ernationalization");
  TestFindWordBreakFromPosition(4, 6, " ");
  TestFindWordBreakFromPosition(4, 7, "work");
}


Coverage Report

Created: 2018-09-25 14:53

Line	Count	Source (jump to first uncovered line)
1		/* -- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -- */
2		/* vim: set ts=8 sts=2 et sw=2 tw=80: */
3		/* This Source Code Form is subject to the terms of the Mozilla Public
4		* License, v. 2.0. If a copy of the MPL was not distributed with this
5		* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7		#include <stdio.h>
8		#include "nsXPCOM.h"
9		#include "nsIComponentManager.h"
10		#include "nsISupports.h"
11		#include "nsServiceManagerUtils.h"
12		#include "nsString.h"
13		#include "gtest/gtest.h"
14
15		#include "mozilla/intl/LineBreaker.h"
16		#include "mozilla/intl/WordBreaker.h"
17
18		static char teng1[] =
19		// 1 2 3 4 5 6 7
20		//01234567890123456789012345678901234567890123456789012345678901234567890123456789
21		"This is a test to test(reasonable) line break. This 0.01123 = 45 x 48.";
22
23		static uint32_t lexp1[] = {
24		4,7,9,14,17,34,39,40,41,42,49,54,62,64,67,69,73
25		};
26
27		static uint32_t wexp1[] = {
28		4,5,7,8,9,10,14,15,17,18,22,23,33,34,35,39,43,48,49,50,54,55,56,57,62,63,
29		64,65,67,68,69,70,72
30		};
31
32		static char teng2[] =
33		// 1 2 3 4 5 6 7
34		//01234567890123456789012345678901234567890123456789012345678901234567890123456789
35		"()((reasonab(l)e) line break. .01123=45x48.";
36
37		static uint32_t lexp2[] = {
38		17,22,23,30,44
39		};
40
41		static uint32_t wexp2[] = {
42		4,12,13,14,15,16,17,18,22,24,29,30,31,32,37,38,43
43		};
44
45		static char teng3[] =
46		// 1 2 3 4 5 6 7
47		//01234567890123456789012345678901234567890123456789012345678901234567890123456789
48		"It's a test to test(ronae ) line break....";
49
50		static uint32_t lexp3[] = {
51		4,6,11,14,25,27,32,42
52		};
53
54		static uint32_t wexp3[] = {
55		2,3,4,5,6,7,11,12,14,15,19,20,25,26,27,28,32,33,38
56		};
57
58		static char ruler1[] =
59		" 1 2 3 4 5 6 7 ";
60		static char ruler2[] =
61		"0123456789012345678901234567890123456789012345678901234567890123456789012";
62
63		bool
64		Check(const char* in, const uint32_t* out, uint32_t outlen, uint32_t i,
65		uint32_t res[256])
66	0	{
67	0	bool ok = true;
68	0
69	0	if (i != outlen) {
70	0	ok = false;
71	0	printf("WARNING!!! return size wrong, expect %d but got %d \n",
72	0	outlen, i);
73	0	}
74	0
75	0	for (uint32_t j = 0; j < i; j++) {
76	0	if (j < outlen) {
77	0	if (res[j] != out[j]) {
78	0	ok = false;
79	0	printf("[%d] expect %d but got %d\n", j, out[j], res[j]);
80	0	}
81	0	} else {
82	0	ok = false;
83	0	printf("[%d] additional %d\n", j, res[j]);
84	0	}
85	0	}
86	0
87	0	if (!ok) {
88	0	printf("string = \n%s\n", in);
89	0	printf("%s\n", ruler1);
90	0	printf("%s\n", ruler2);
91	0
92	0	printf("Expect = \n");
93	0	for (uint32_t j = 0; j < outlen; j++) {
94	0	printf("%d,", out[j]);
95	0	}
96	0
97	0	printf("\nResult = \n");
98	0	for (uint32_t j = 0; j < i; j++) {
99	0	printf("%d,", res[j]);
100	0	}
101	0	printf("\n");
102	0	}
103	0
104	0	return ok;
105	0	}
106
107		bool
108		TestASCIILB(mozilla::intl::LineBreaker *lb,
109		const char* in,
110		const uint32_t* out, uint32_t outlen)
111	0	{
112	0	NS_ConvertASCIItoUTF16 eng1(in);
113	0	uint32_t i;
114	0	uint32_t res[256];
115	0	int32_t curr;
116	0
117	0	for (i = 0, curr = 0;
118	0	curr != NS_LINEBREAKER_NEED_MORE_TEXT && i < 256;
119	0	i++) {
120	0	curr = lb->Next(eng1.get(), eng1.Length(), curr);
121	0	res[i] = curr != NS_LINEBREAKER_NEED_MORE_TEXT ? curr : eng1.Length();
122	0	}
123	0
124	0	return Check(in, out, outlen, i, res);
125	0	}
126
127		bool
128		TestASCIIWB(mozilla::intl::WordBreaker *lb,
129		const char* in,
130		const uint32_t* out, uint32_t outlen)
131	0	{
132	0	NS_ConvertASCIItoUTF16 eng1(in);
133	0
134	0	uint32_t i;
135	0	uint32_t res[256];
136	0	int32_t curr = 0;
137	0
138	0	for (i = 0, curr = lb->NextWord(eng1.get(), eng1.Length(), curr);
139	0	curr != NS_WORDBREAKER_NEED_MORE_TEXT && i < 256;
140	0	curr = lb->NextWord(eng1.get(), eng1.Length(), curr), i++) {
141	0	res [i] = curr != NS_WORDBREAKER_NEED_MORE_TEXT ? curr : eng1.Length();
142	0	}
143	0
144	0	return Check(in, out, outlen, i, res);
145	0	}
146
147		TEST(LineBreak, LineBreaker)
148	0	{
149	0	RefPtr<mozilla::intl::LineBreaker> t = mozilla::intl::LineBreaker::Create();
150	0
151	0	ASSERT_TRUE(t);
152	0
153	0	ASSERT_TRUE(TestASCIILB(t, teng1, lexp1, sizeof(lexp1) / sizeof(uint32_t)));
154	0	ASSERT_TRUE(TestASCIILB(t, teng2, lexp2, sizeof(lexp2) / sizeof(uint32_t)));
155	0	ASSERT_TRUE(TestASCIILB(t, teng3, lexp3, sizeof(lexp3) / sizeof(uint32_t)));
156	0	}
157
158		TEST(LineBreak, WordBreaker)
159	0	{
160	0	RefPtr<mozilla::intl::WordBreaker> t = mozilla::intl::WordBreaker::Create();
161	0	ASSERT_TRUE(t);
162	0
163	0	ASSERT_TRUE(TestASCIIWB(t, teng1, wexp1, sizeof(wexp1) / sizeof(uint32_t)));
164	0	ASSERT_TRUE(TestASCIIWB(t, teng2, wexp2, sizeof(wexp2) / sizeof(uint32_t)));
165	0	ASSERT_TRUE(TestASCIIWB(t, teng3, wexp3, sizeof(wexp3) / sizeof(uint32_t)));
166	0	}
167
168		// 012345678901234
169		static const char wb0[] = "T";
170		static const char wb1[] = "h";
171		static const char wb2[] = "is is a int";
172		static const char wb3[] = "ernationali";
173		static const char wb4[] = "zation work.";
174
175		static const char* wb[] = { wb0, wb1, wb2, wb3, wb4 };
176
177		void
178		TestPrintWordWithBreak()
179	0	{
180	0	uint32_t numOfFragment = sizeof(wb) / sizeof(char*);
181	0	RefPtr<mozilla::intl::WordBreaker> wbk = mozilla::intl::WordBreaker::Create();
182	0
183	0	nsAutoString result;
184	0
185	0	for (uint32_t i = 0; i < numOfFragment; i++) {
186	0	NS_ConvertASCIItoUTF16 fragText(wb[i]);
187	0
188	0	int32_t cur = 0;
189	0	cur = wbk->NextWord(fragText.get(), fragText.Length(), cur);
190	0	uint32_t start = 0;
191	0	for (uint32_t j = 0; cur != NS_WORDBREAKER_NEED_MORE_TEXT; j++) {
192	0	result.Append(Substring(fragText, start, cur - start));
193	0	result.Append('^');
194	0	start = (cur >= 0 ? cur : cur - start);
195	0	cur = wbk->NextWord(fragText.get(), fragText.Length(), cur);
196	0	}
197	0
198	0	result.Append(Substring(fragText, fragText.Length() - start));
199	0
200	0	if (i != numOfFragment - 1) {
201	0	NS_ConvertASCIItoUTF16 nextFragText(wb[i+1]);
202	0
203	0	bool canBreak = true;
204	0	canBreak = wbk->BreakInBetween(fragText.get(),
205	0	fragText.Length(),
206	0	nextFragText.get(),
207	0	nextFragText.Length());
208	0	if (canBreak) {
209	0	result.Append('^');
210	0	}
211	0	fragText.Assign(nextFragText);
212	0	}
213	0	}
214	0	ASSERT_STREQ("is^ ^is^ ^a^ ^ is a intzation^ ^work^ation work.",
215	0	NS_ConvertUTF16toUTF8(result).get());
216	0	}
217
218		void
219		TestFindWordBreakFromPosition(uint32_t fragN, uint32_t offset,
220		const char* expected)
221	0	{
222	0	uint32_t numOfFragment = sizeof(wb) / sizeof(char*);
223	0	RefPtr<mozilla::intl::WordBreaker> wbk = mozilla::intl::WordBreaker::Create();
224	0
225	0	NS_ConvertASCIItoUTF16 fragText(wb[fragN]);
226	0
227	0	mozilla::intl::WordRange res = wbk->FindWord(fragText.get(), fragText.Length(), offset);
228	0
229	0	bool canBreak;
230	0	nsAutoString result(Substring(fragText, res.mBegin, res.mEnd-res.mBegin));
231	0
232	0	if ((uint32_t)fragText.Length() == res.mEnd) {
233	0	// if we hit the end of the fragment
234	0	nsAutoString curFragText = fragText;
235	0	for(uint32_t p = fragN +1; p < numOfFragment ;p++)
236	0	{
237	0	NS_ConvertASCIItoUTF16 nextFragText(wb[p]);
238	0	canBreak = wbk->BreakInBetween(curFragText.get(),
239	0	curFragText.Length(),
240	0	nextFragText.get(),
241	0	nextFragText.Length());
242	0	if (canBreak) {
243	0	break;
244	0	}
245	0	mozilla::intl::WordRange r = wbk->FindWord(nextFragText.get(), nextFragText.Length(),
246	0	0);
247	0
248	0	result.Append(Substring(nextFragText, r.mBegin, r.mEnd - r.mBegin));
249	0
250	0	if ((uint32_t)nextFragText.Length() != r.mEnd) {
251	0	break;
252	0	}
253	0	nextFragText.Assign(curFragText);
254	0	}
255	0	}
256	0
257	0	if (0 == res.mBegin) {
258	0	// if we hit the beginning of the fragment
259	0	nsAutoString curFragText = fragText;
260	0	for (uint32_t p = fragN; p > 0; p--) {
261	0	NS_ConvertASCIItoUTF16 prevFragText(wb[p-1]);
262	0	canBreak = wbk->BreakInBetween(prevFragText.get(),
263	0	prevFragText.Length(),
264	0	curFragText.get(),
265	0	curFragText.Length());
266	0	if (canBreak) {
267	0	break;
268	0	}
269	0	mozilla::intl::WordRange r = wbk->FindWord(prevFragText.get(), prevFragText.Length(),
270	0	prevFragText.Length());
271	0
272	0	result.Insert(Substring(prevFragText, r.mBegin, r.mEnd - r.mBegin), 0);
273	0
274	0	if (0 != r.mBegin) {
275	0	break;
276	0	}
277	0	prevFragText.Assign(curFragText);
278	0	}
279	0	}
280	0
281	0	ASSERT_STREQ(expected, NS_ConvertUTF16toUTF8(result).get())
282	0	<< "FindWordBreakFromPosition(" << fragN << ", " << offset << ")";
283	0	}
284
285		TEST(LineBreak, WordBreakUsage)
286	0	{
287	0	TestPrintWordWithBreak();
288	0	TestFindWordBreakFromPosition(0, 0, "This");
289	0	TestFindWordBreakFromPosition(1, 0, "his");
290	0	TestFindWordBreakFromPosition(2, 0, "is");
291	0	TestFindWordBreakFromPosition(2, 1, "is");
292	0	TestFindWordBreakFromPosition(2, 9, " ");
293	0	TestFindWordBreakFromPosition(2, 10, "internationalization");
294	0	TestFindWordBreakFromPosition(3, 4, "ernationalization");
295	0	TestFindWordBreakFromPosition(3, 8, "ernationalization");
296	0	TestFindWordBreakFromPosition(4, 6, " ");
297	0	TestFindWordBreakFromPosition(4, 7, "work");
298	0	}
299