/src/aspell/modules/tokenizer/basic.cpp

Source

// This file is part of The New Aspell
// Copyright (C) 2001 by Kevin Atkinson under the GNU LGPL license
// version 2.0 or 2.1.  You should have received a copy of the LGPL
// license along with this library if you did not you can find
// it at http://www.gnu.org/.

#include "tokenizer.hpp"
#include "convert.hpp"
#include "speller.hpp"


namespace acommon {

  class TokenizerBasic : public Tokenizer
  {
  public:
    bool advance();
  };

  bool TokenizerBasic::advance() {
    word_begin = word_end;
    begin_pos = end_pos;
    FilterChar * cur = word_begin;
    unsigned int cur_pos = begin_pos;
    word.clear();

    // skip spaces (non-word characters)
    while (*cur != 0 &&
     !(is_word(*cur)
       || (is_begin(*cur) && is_word(cur[1])))) 
    {
      cur_pos += cur->width;
      ++cur;
    }

    if (*cur == 0) return false;

    word_begin = cur;
    begin_pos = cur_pos;

    if (is_begin(*cur) && is_word(cur[1]))
    {
      cur_pos += cur->width;
      ++cur;
    }

    while (is_word(*cur) || 
     (is_middle(*cur) && 
      cur > word_begin && is_word(cur[-1]) &&
      is_word(cur[1]) )) 
    {
      word.append(*cur);
      cur_pos += cur->width;
      ++cur;
    }

    if (is_end(*cur))
    {
      word.append(*cur);
      cur_pos += cur->width;
      ++cur;
    }

    word.append('\0');
    word_end = cur;
    end_pos = cur_pos;

    return true;
  }
#undef increment__

  PosibErr<Tokenizer *> new_tokenizer(Speller * speller)
  {
    Tokenizer * tok = new TokenizerBasic();
    speller->setup_tokenizer(tok);
    return tok;
  }

}

Line	Count	Source
1
2		// This file is part of The New Aspell
3		// Copyright (C) 2001 by Kevin Atkinson under the GNU LGPL license
4		// version 2.0 or 2.1. You should have received a copy of the LGPL
5		// license along with this library if you did not you can find
6		// it at http://www.gnu.org/.
7
8		#include "tokenizer.hpp"
9		#include "convert.hpp"
10		#include "speller.hpp"
11
12
13		namespace acommon {
14
15		class TokenizerBasic : public Tokenizer
16		{
17		public:
18		bool advance();
19		};
20
21	25.4k	bool TokenizerBasic::advance() {
22	25.4k	word_begin = word_end;
23	25.4k	begin_pos = end_pos;
24	25.4k	FilterChar * cur = word_begin;
25	25.4k	unsigned int cur_pos = begin_pos;
26	25.4k	word.clear();
27
28		// skip spaces (non-word characters)
29	1.68M	while (*cur != 0 &&
30	1.68M	!(is_word(*cur)
31	1.65M	\|\| (is_begin(*cur) && is_word(cur[1]))))
32	1.65M	{
33	1.65M	cur_pos += cur->width;
34	1.65M	++cur;
35	1.65M	}
36
37	25.4k	if (*cur == 0) return false;
38
39	24.6k	word_begin = cur;
40	24.6k	begin_pos = cur_pos;
41
42	24.6k	if (is_begin(*cur) && is_word(cur[1]))
43	63	{
44	63	cur_pos += cur->width;
45	63	++cur;
46	63	}
47
48	9.56M	while (is_word(*cur) \|\|
49	150k	(is_middle(*cur) &&
50	126k	cur > word_begin && is_word(cur[-1]) &&
51	126k	is_word(cur[1]) ))
52	9.54M	{
53	9.54M	word.append(*cur);
54	9.54M	cur_pos += cur->width;
55	9.54M	++cur;
56	9.54M	}
57
58	24.6k	if (is_end(*cur))
59	36	{
60	36	word.append(*cur);
61	36	cur_pos += cur->width;
62	36	++cur;
63	36	}
64
65	24.6k	word.append('\0');
66	24.6k	word_end = cur;
67	24.6k	end_pos = cur_pos;
68
69	24.6k	return true;
70	25.4k	}
71		#undef increment__
72
73		PosibErr<Tokenizer > new_tokenizer(Speller speller)
74	789	{
75	789	Tokenizer * tok = new TokenizerBasic();
76	789	speller->setup_tokenizer(tok);
77	789	return tok;
78	789	}
79
80		}

Coverage Report

Created: 2026-03-26 06:12