Coverage Report

Created: 2026-04-29 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aspell/modules/tokenizer/basic.cpp
Line
Count
Source
1
2
// This file is part of The New Aspell
3
// Copyright (C) 2001 by Kevin Atkinson under the GNU LGPL license
4
// version 2.0 or 2.1.  You should have received a copy of the LGPL
5
// license along with this library if you did not you can find
6
// it at http://www.gnu.org/.
7
8
#include "tokenizer.hpp"
9
#include "convert.hpp"
10
#include "speller.hpp"
11
12
13
namespace acommon {
14
15
  class TokenizerBasic : public Tokenizer
16
  {
17
  public:
18
    bool advance();
19
  };
20
21
34.0k
  bool TokenizerBasic::advance() {
22
34.0k
    word_begin = word_end;
23
34.0k
    begin_pos = end_pos;
24
34.0k
    FilterChar * cur = word_begin;
25
34.0k
    unsigned int cur_pos = begin_pos;
26
34.0k
    word.clear();
27
28
    // skip spaces (non-word characters)
29
1.62M
    while (*cur != 0 &&
30
1.62M
     !(is_word(*cur)
31
1.59M
       || (is_begin(*cur) && is_word(cur[1])))) 
32
1.59M
    {
33
1.59M
      cur_pos += cur->width;
34
1.59M
      ++cur;
35
1.59M
    }
36
37
34.0k
    if (*cur == 0) return false;
38
39
33.1k
    word_begin = cur;
40
33.1k
    begin_pos = cur_pos;
41
42
33.1k
    if (is_begin(*cur) && is_word(cur[1]))
43
25
    {
44
25
      cur_pos += cur->width;
45
25
      ++cur;
46
25
    }
47
48
11.0M
    while (is_word(*cur) || 
49
568k
     (is_middle(*cur) && 
50
534k
      cur > word_begin && is_word(cur[-1]) &&
51
534k
      is_word(cur[1]) )) 
52
11.0M
    {
53
11.0M
      word.append(*cur);
54
11.0M
      cur_pos += cur->width;
55
11.0M
      ++cur;
56
11.0M
    }
57
58
33.1k
    if (is_end(*cur))
59
267
    {
60
267
      word.append(*cur);
61
267
      cur_pos += cur->width;
62
267
      ++cur;
63
267
    }
64
65
33.1k
    word.append('\0');
66
33.1k
    word_end = cur;
67
33.1k
    end_pos = cur_pos;
68
69
33.1k
    return true;
70
34.0k
  }
71
#undef increment__
72
73
  PosibErr<Tokenizer *> new_tokenizer(Speller * speller)
74
909
  {
75
909
    Tokenizer * tok = new TokenizerBasic();
76
909
    speller->setup_tokenizer(tok);
77
909
    return tok;
78
909
  }
79
80
}