/src/aspell/modules/tokenizer/basic.cpp

Source

// This file is part of The New Aspell
// Copyright (C) 2001 by Kevin Atkinson under the GNU LGPL license
// version 2.0 or 2.1.  You should have received a copy of the LGPL
// license along with this library if you did not you can find
// it at http://www.gnu.org/.

#include "tokenizer.hpp"
#include "convert.hpp"
#include "speller.hpp"


namespace acommon {

  class TokenizerBasic : public Tokenizer
  {
  public:
    bool advance();
  };

  bool TokenizerBasic::advance() {
    word_begin = word_end;
    begin_pos = end_pos;
    FilterChar * cur = word_begin;
    unsigned int cur_pos = begin_pos;
    word.clear();

    // skip spaces (non-word characters)
    while (*cur != 0 &&
     !(is_word(*cur)
       || (is_begin(*cur) && is_word(cur[1])))) 
    {
      cur_pos += cur->width;
      ++cur;
    }

    if (*cur == 0) return false;

    word_begin = cur;
    begin_pos = cur_pos;

    if (is_begin(*cur) && is_word(cur[1]))
    {
      cur_pos += cur->width;
      ++cur;
    }

    while (is_word(*cur) || 
     (is_middle(*cur) && 
      cur > word_begin && is_word(cur[-1]) &&
      is_word(cur[1]) )) 
    {
      word.append(*cur);
      cur_pos += cur->width;
      ++cur;
    }

    if (is_end(*cur))
    {
      word.append(*cur);
      cur_pos += cur->width;
      ++cur;
    }

    word.append('\0');
    word_end = cur;
    end_pos = cur_pos;

    return true;
  }
#undef increment__

  PosibErr<Tokenizer *> new_tokenizer(Speller * speller)
  {
    Tokenizer * tok = new TokenizerBasic();
    speller->setup_tokenizer(tok);
    return tok;
  }

}

Line	Count	Source
1
2		// This file is part of The New Aspell
3		// Copyright (C) 2001 by Kevin Atkinson under the GNU LGPL license
4		// version 2.0 or 2.1. You should have received a copy of the LGPL
5		// license along with this library if you did not you can find
6		// it at http://www.gnu.org/.
7
8		#include "tokenizer.hpp"
9		#include "convert.hpp"
10		#include "speller.hpp"
11
12
13		namespace acommon {
14
15		class TokenizerBasic : public Tokenizer
16		{
17		public:
18		bool advance();
19		};
20
21	34.0k	bool TokenizerBasic::advance() {
22	34.0k	word_begin = word_end;
23	34.0k	begin_pos = end_pos;
24	34.0k	FilterChar * cur = word_begin;
25	34.0k	unsigned int cur_pos = begin_pos;
26	34.0k	word.clear();
27
28		// skip spaces (non-word characters)
29	1.62M	while (*cur != 0 &&
30	1.62M	!(is_word(*cur)
31	1.59M	\|\| (is_begin(*cur) && is_word(cur[1]))))
32	1.59M	{
33	1.59M	cur_pos += cur->width;
34	1.59M	++cur;
35	1.59M	}
36
37	34.0k	if (*cur == 0) return false;
38
39	33.1k	word_begin = cur;
40	33.1k	begin_pos = cur_pos;
41
42	33.1k	if (is_begin(*cur) && is_word(cur[1]))
43	25	{
44	25	cur_pos += cur->width;
45	25	++cur;
46	25	}
47
48	11.0M	while (is_word(*cur) \|\|
49	568k	(is_middle(*cur) &&
50	534k	cur > word_begin && is_word(cur[-1]) &&
51	534k	is_word(cur[1]) ))
52	11.0M	{
53	11.0M	word.append(*cur);
54	11.0M	cur_pos += cur->width;
55	11.0M	++cur;
56	11.0M	}
57
58	33.1k	if (is_end(*cur))
59	267	{
60	267	word.append(*cur);
61	267	cur_pos += cur->width;
62	267	++cur;
63	267	}
64
65	33.1k	word.append('\0');
66	33.1k	word_end = cur;
67	33.1k	end_pos = cur_pos;
68
69	33.1k	return true;
70	34.0k	}
71		#undef increment__
72
73		PosibErr<Tokenizer > new_tokenizer(Speller speller)
74	909	{
75	909	Tokenizer * tok = new TokenizerBasic();
76	909	speller->setup_tokenizer(tok);
77	909	return tok;
78	909	}
79
80		}

Coverage Report

Created: 2026-04-29 06:41