Coverage Report

Created: 2023-12-08 06:59

/src/aspell/common/tokenizer.hpp
Line
Count
Source (jump to first uncovered line)
1
// This file is part of The New Aspell
2
// Copyright (C) 2001 by Kevin Atkinson under the GNU LGPL license
3
// version 2.0 or 2.1.  You should have received a copy of the LGPL
4
// license along with this library if you did not you can find
5
// it at http://www.gnu.org/.
6
7
#ifndef ACOMMON_TOKENIZER__HPP
8
#define ACOMMON_TOKENIZER__HPP
9
10
#include "char_vector.hpp"
11
#include "filter_char.hpp"
12
#include "filter_char_vector.hpp"
13
14
namespace acommon {
15
16
  class Convert;
17
  class Speller;
18
  class Config;
19
20
  class Tokenizer {
21
22
  public:
23
    Tokenizer();
24
    virtual ~Tokenizer();
25
26
    FilterChar * word_begin;
27
    FilterChar * word_end;
28
    FilterChar * end;
29
    
30
    CharVector word; // this word is in the final encoded form
31
    unsigned int begin_pos; // pointers back to the original word
32
    unsigned int end_pos;
33
    
34
    // The string passed in _must_ have a null character
35
    // at stop - 1. (ie stop must be one past the end)
36
    void reset (FilterChar * in, FilterChar * stop);
37
0
    bool at_end() const {return word_begin == word_end;}
38
    
39
    virtual bool advance() = 0; // returns false if there is nothing left
40
41
    bool is_begin(unsigned char c) const
42
2.58M
      {return char_type_[c].begin;}
43
    bool is_middle(unsigned char c) const
44
37.3k
      {return char_type_[c].middle;}
45
    bool is_end(unsigned char c) const
46
27.6k
      {return char_type_[c].end;}
47
    bool is_word(unsigned char c) const
48
19.0M
      {return char_type_[c].word;}
49
50
  public: // but don't use
51
    // The speller class is expected to fill these members in
52
    struct CharType {
53
      bool begin;
54
      bool middle;
55
      bool end;
56
      bool word;
57
178k
      CharType() : begin(false), middle(false), end(false), word(false) {}
58
    };
59
    
60
    CharType char_type_[256];
61
    Convert * conv_;
62
    FilterCharVector buf_;
63
  };
64
65
  // returns a new tokenizer and sets it up with the given speller
66
  // class
67
68
  PosibErr<Tokenizer *> new_tokenizer(Speller *);
69
70
}
71
72
#endif