/src/aspell/modules/tokenizer/basic.cpp

Source

// This file is part of The New Aspell
// Copyright (C) 2001 by Kevin Atkinson under the GNU LGPL license
// version 2.0 or 2.1.  You should have received a copy of the LGPL
// license along with this library if you did not you can find
// it at http://www.gnu.org/.

#include "tokenizer.hpp"
#include "convert.hpp"
#include "speller.hpp"


namespace acommon {

  class TokenizerBasic : public Tokenizer
  {
  public:
    bool advance();
  };

  bool TokenizerBasic::advance() {
    word_begin = word_end;
    begin_pos = end_pos;
    FilterChar * cur = word_begin;
    unsigned int cur_pos = begin_pos;
    word.clear();

    // skip spaces (non-word characters)
    while (*cur != 0 &&
     !(is_word(*cur)
       || (is_begin(*cur) && is_word(cur[1])))) 
    {
      cur_pos += cur->width;
      ++cur;
    }

    if (*cur == 0) return false;

    word_begin = cur;
    begin_pos = cur_pos;

    if (is_begin(*cur) && is_word(cur[1]))
    {
      cur_pos += cur->width;
      ++cur;
    }

    while (is_word(*cur) || 
     (is_middle(*cur) && 
      cur > word_begin && is_word(cur[-1]) &&
      is_word(cur[1]) )) 
    {
      word.append(*cur);
      cur_pos += cur->width;
      ++cur;
    }

    if (is_end(*cur))
    {
      word.append(*cur);
      cur_pos += cur->width;
      ++cur;
    }

    word.append('\0');
    word_end = cur;
    end_pos = cur_pos;

    return true;
  }
#undef increment__

  PosibErr<Tokenizer *> new_tokenizer(Speller * speller)
  {
    Tokenizer * tok = new TokenizerBasic();
    speller->setup_tokenizer(tok);
    return tok;
  }

}

Line	Count	Source
1
2		// This file is part of The New Aspell
3		// Copyright (C) 2001 by Kevin Atkinson under the GNU LGPL license
4		// version 2.0 or 2.1. You should have received a copy of the LGPL
5		// license along with this library if you did not you can find
6		// it at http://www.gnu.org/.
7
8		#include "tokenizer.hpp"
9		#include "convert.hpp"
10		#include "speller.hpp"
11
12
13		namespace acommon {
14
15		class TokenizerBasic : public Tokenizer
16		{
17		public:
18		bool advance();
19		};
20
21	37.6k	bool TokenizerBasic::advance() {
22	37.6k	word_begin = word_end;
23	37.6k	begin_pos = end_pos;
24	37.6k	FilterChar * cur = word_begin;
25	37.6k	unsigned int cur_pos = begin_pos;
26	37.6k	word.clear();
27
28		// skip spaces (non-word characters)
29	2.46M	while (*cur != 0 &&
30	2.46M	!(is_word(*cur)
31	2.42M	\|\| (is_begin(*cur) && is_word(cur[1]))))
32	2.42M	{
33	2.42M	cur_pos += cur->width;
34	2.42M	++cur;
35	2.42M	}
36
37	37.6k	if (*cur == 0) return false;
38
39	36.5k	word_begin = cur;
40	36.5k	begin_pos = cur_pos;
41
42	36.5k	if (is_begin(*cur) && is_word(cur[1]))
43	14	{
44	14	cur_pos += cur->width;
45	14	++cur;
46	14	}
47
48	14.3M	while (is_word(*cur) \|\|
49	380k	(is_middle(*cur) &&
50	344k	cur > word_begin && is_word(cur[-1]) &&
51	344k	is_word(cur[1]) ))
52	14.3M	{
53	14.3M	word.append(*cur);
54	14.3M	cur_pos += cur->width;
55	14.3M	++cur;
56	14.3M	}
57
58	36.5k	if (is_end(*cur))
59	1.02k	{
60	1.02k	word.append(*cur);
61	1.02k	cur_pos += cur->width;
62	1.02k	++cur;
63	1.02k	}
64
65	36.5k	word.append('\0');
66	36.5k	word_end = cur;
67	36.5k	end_pos = cur_pos;
68
69	36.5k	return true;
70	37.6k	}
71		#undef increment__
72
73		PosibErr<Tokenizer > new_tokenizer(Speller speller)
74	1.09k	{
75	1.09k	Tokenizer * tok = new TokenizerBasic();
76	1.09k	speller->setup_tokenizer(tok);
77	1.09k	return tok;
78	1.09k	}
79
80		}

Coverage Report

Created: 2025-12-14 06:42