/src/aspell/modules/tokenizer/basic.cpp

Source

// This file is part of The New Aspell
// Copyright (C) 2001 by Kevin Atkinson under the GNU LGPL license
// version 2.0 or 2.1.  You should have received a copy of the LGPL
// license along with this library if you did not you can find
// it at http://www.gnu.org/.

#include "tokenizer.hpp"
#include "convert.hpp"
#include "speller.hpp"


namespace acommon {

  class TokenizerBasic : public Tokenizer
  {
  public:
    bool advance();
  };

  bool TokenizerBasic::advance() {
    word_begin = word_end;
    begin_pos = end_pos;
    FilterChar * cur = word_begin;
    unsigned int cur_pos = begin_pos;
    word.clear();

    // skip spaces (non-word characters)
    while (*cur != 0 &&
     !(is_word(*cur)
       || (is_begin(*cur) && is_word(cur[1])))) 
    {
      cur_pos += cur->width;
      ++cur;
    }

    if (*cur == 0) return false;

    word_begin = cur;
    begin_pos = cur_pos;

    if (is_begin(*cur) && is_word(cur[1]))
    {
      cur_pos += cur->width;
      ++cur;
    }

    while (is_word(*cur) || 
     (is_middle(*cur) && 
      cur > word_begin && is_word(cur[-1]) &&
      is_word(cur[1]) )) 
    {
      word.append(*cur);
      cur_pos += cur->width;
      ++cur;
    }

    if (is_end(*cur))
    {
      word.append(*cur);
      cur_pos += cur->width;
      ++cur;
    }

    word.append('\0');
    word_end = cur;
    end_pos = cur_pos;

    return true;
  }
#undef increment__

  PosibErr<Tokenizer *> new_tokenizer(Speller * speller)
  {
    Tokenizer * tok = new TokenizerBasic();
    speller->setup_tokenizer(tok);
    return tok;
  }

}

Line	Count	Source
1
2		// This file is part of The New Aspell
3		// Copyright (C) 2001 by Kevin Atkinson under the GNU LGPL license
4		// version 2.0 or 2.1. You should have received a copy of the LGPL
5		// license along with this library if you did not you can find
6		// it at http://www.gnu.org/.
7
8		#include "tokenizer.hpp"
9		#include "convert.hpp"
10		#include "speller.hpp"
11
12
13		namespace acommon {
14
15		class TokenizerBasic : public Tokenizer
16		{
17		public:
18		bool advance();
19		};
20
21	31.7k	bool TokenizerBasic::advance() {
22	31.7k	word_begin = word_end;
23	31.7k	begin_pos = end_pos;
24	31.7k	FilterChar * cur = word_begin;
25	31.7k	unsigned int cur_pos = begin_pos;
26	31.7k	word.clear();
27
28		// skip spaces (non-word characters)
29	2.80M	while (*cur != 0 &&
30	2.80M	!(is_word(*cur)
31	2.77M	\|\| (is_begin(*cur) && is_word(cur[1]))))
32	2.77M	{
33	2.77M	cur_pos += cur->width;
34	2.77M	++cur;
35	2.77M	}
36
37	31.7k	if (*cur == 0) return false;
38
39	30.8k	word_begin = cur;
40	30.8k	begin_pos = cur_pos;
41
42	30.8k	if (is_begin(*cur) && is_word(cur[1]))
43	92	{
44	92	cur_pos += cur->width;
45	92	++cur;
46	92	}
47
48	8.80M	while (is_word(*cur) \|\|
49	576k	(is_middle(*cur) &&
50	545k	cur > word_begin && is_word(cur[-1]) &&
51	545k	is_word(cur[1]) ))
52	8.77M	{
53	8.77M	word.append(*cur);
54	8.77M	cur_pos += cur->width;
55	8.77M	++cur;
56	8.77M	}
57
58	30.8k	if (is_end(*cur))
59	440	{
60	440	word.append(*cur);
61	440	cur_pos += cur->width;
62	440	++cur;
63	440	}
64
65	30.8k	word.append('\0');
66	30.8k	word_end = cur;
67	30.8k	end_pos = cur_pos;
68
69	30.8k	return true;
70	31.7k	}
71		#undef increment__
72
73		PosibErr<Tokenizer > new_tokenizer(Speller speller)
74	855	{
75	855	Tokenizer * tok = new TokenizerBasic();
76	855	speller->setup_tokenizer(tok);
77	855	return tok;
78	855	}
79
80		}

Coverage Report

Created: 2026-06-15 06:20