Coverage Report

Created: 2026-06-15 06:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aspell/modules/tokenizer/basic.cpp
Line
Count
Source
1
2
// This file is part of The New Aspell
3
// Copyright (C) 2001 by Kevin Atkinson under the GNU LGPL license
4
// version 2.0 or 2.1.  You should have received a copy of the LGPL
5
// license along with this library if you did not you can find
6
// it at http://www.gnu.org/.
7
8
#include "tokenizer.hpp"
9
#include "convert.hpp"
10
#include "speller.hpp"
11
12
13
namespace acommon {
14
15
  class TokenizerBasic : public Tokenizer
16
  {
17
  public:
18
    bool advance();
19
  };
20
21
31.7k
  bool TokenizerBasic::advance() {
22
31.7k
    word_begin = word_end;
23
31.7k
    begin_pos = end_pos;
24
31.7k
    FilterChar * cur = word_begin;
25
31.7k
    unsigned int cur_pos = begin_pos;
26
31.7k
    word.clear();
27
28
    // skip spaces (non-word characters)
29
2.80M
    while (*cur != 0 &&
30
2.80M
     !(is_word(*cur)
31
2.77M
       || (is_begin(*cur) && is_word(cur[1])))) 
32
2.77M
    {
33
2.77M
      cur_pos += cur->width;
34
2.77M
      ++cur;
35
2.77M
    }
36
37
31.7k
    if (*cur == 0) return false;
38
39
30.8k
    word_begin = cur;
40
30.8k
    begin_pos = cur_pos;
41
42
30.8k
    if (is_begin(*cur) && is_word(cur[1]))
43
92
    {
44
92
      cur_pos += cur->width;
45
92
      ++cur;
46
92
    }
47
48
8.80M
    while (is_word(*cur) || 
49
576k
     (is_middle(*cur) && 
50
545k
      cur > word_begin && is_word(cur[-1]) &&
51
545k
      is_word(cur[1]) )) 
52
8.77M
    {
53
8.77M
      word.append(*cur);
54
8.77M
      cur_pos += cur->width;
55
8.77M
      ++cur;
56
8.77M
    }
57
58
30.8k
    if (is_end(*cur))
59
440
    {
60
440
      word.append(*cur);
61
440
      cur_pos += cur->width;
62
440
      ++cur;
63
440
    }
64
65
30.8k
    word.append('\0');
66
30.8k
    word_end = cur;
67
30.8k
    end_pos = cur_pos;
68
69
30.8k
    return true;
70
31.7k
  }
71
#undef increment__
72
73
  PosibErr<Tokenizer *> new_tokenizer(Speller * speller)
74
855
  {
75
855
    Tokenizer * tok = new TokenizerBasic();
76
855
    speller->setup_tokenizer(tok);
77
855
    return tok;
78
855
  }
79
80
}