Coverage Report

Created: 2026-03-26 06:12

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aspell/modules/tokenizer/basic.cpp
Line
Count
Source
1
2
// This file is part of The New Aspell
3
// Copyright (C) 2001 by Kevin Atkinson under the GNU LGPL license
4
// version 2.0 or 2.1.  You should have received a copy of the LGPL
5
// license along with this library if you did not you can find
6
// it at http://www.gnu.org/.
7
8
#include "tokenizer.hpp"
9
#include "convert.hpp"
10
#include "speller.hpp"
11
12
13
namespace acommon {
14
15
  class TokenizerBasic : public Tokenizer
16
  {
17
  public:
18
    bool advance();
19
  };
20
21
25.4k
  bool TokenizerBasic::advance() {
22
25.4k
    word_begin = word_end;
23
25.4k
    begin_pos = end_pos;
24
25.4k
    FilterChar * cur = word_begin;
25
25.4k
    unsigned int cur_pos = begin_pos;
26
25.4k
    word.clear();
27
28
    // skip spaces (non-word characters)
29
1.68M
    while (*cur != 0 &&
30
1.68M
     !(is_word(*cur)
31
1.65M
       || (is_begin(*cur) && is_word(cur[1])))) 
32
1.65M
    {
33
1.65M
      cur_pos += cur->width;
34
1.65M
      ++cur;
35
1.65M
    }
36
37
25.4k
    if (*cur == 0) return false;
38
39
24.6k
    word_begin = cur;
40
24.6k
    begin_pos = cur_pos;
41
42
24.6k
    if (is_begin(*cur) && is_word(cur[1]))
43
63
    {
44
63
      cur_pos += cur->width;
45
63
      ++cur;
46
63
    }
47
48
9.56M
    while (is_word(*cur) || 
49
150k
     (is_middle(*cur) && 
50
126k
      cur > word_begin && is_word(cur[-1]) &&
51
126k
      is_word(cur[1]) )) 
52
9.54M
    {
53
9.54M
      word.append(*cur);
54
9.54M
      cur_pos += cur->width;
55
9.54M
      ++cur;
56
9.54M
    }
57
58
24.6k
    if (is_end(*cur))
59
36
    {
60
36
      word.append(*cur);
61
36
      cur_pos += cur->width;
62
36
      ++cur;
63
36
    }
64
65
24.6k
    word.append('\0');
66
24.6k
    word_end = cur;
67
24.6k
    end_pos = cur_pos;
68
69
24.6k
    return true;
70
25.4k
  }
71
#undef increment__
72
73
  PosibErr<Tokenizer *> new_tokenizer(Speller * speller)
74
789
  {
75
789
    Tokenizer * tok = new TokenizerBasic();
76
789
    speller->setup_tokenizer(tok);
77
789
    return tok;
78
789
  }
79
80
}