Coverage Report

Created: 2025-12-31 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/tesseract/src/ccutil/bitvector.h
Line
Count
Source
1
///////////////////////////////////////////////////////////////////////
2
// File:        bitvector.h
3
// Description: Class replacement for BITVECTOR.
4
// Author:      Ray Smith
5
//
6
// (C) Copyright 2011, Google Inc.
7
// Licensed under the Apache License, Version 2.0 (the "License");
8
// you may not use this file except in compliance with the License.
9
// You may obtain a copy of the License at
10
// http://www.apache.org/licenses/LICENSE-2.0
11
// Unless required by applicable law or agreed to in writing, software
12
// distributed under the License is distributed on an "AS IS" BASIS,
13
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
// See the License for the specific language governing permissions and
15
// limitations under the License.
16
//
17
///////////////////////////////////////////////////////////////////////
18
19
#ifndef TESSERACT_CCUTIL_BITVECTOR_H_
20
#define TESSERACT_CCUTIL_BITVECTOR_H_
21
22
#include <tesseract/export.h>
23
24
#include <cassert>
25
#include <cstdint> // for uint8_t
26
#include <cstdio>
27
#include <vector>  // for std::vector
28
29
namespace tesseract {
30
31
// Trivial class to encapsulate a fixed-length array of bits, with
32
// Serialize/DeSerialize. Replaces the old macros.
33
class TESS_API BitVector {
34
public:
35
  // Fast lookup table to get the first least significant set bit in a byte.
36
  // For zero, the table has 255, but since it is a special case, most code
37
  // that uses this table will check for zero before looking up lsb_index_.
38
  static const uint8_t lsb_index_[256];
39
  // Fast lookup table to get the residual bits after zeroing the least
40
  // significant set bit in a byte.
41
  static const uint8_t lsb_eroded_[256];
42
  // Fast lookup table to give the number of set bits in a byte.
43
  static const int hamming_table_[256];
44
45
0
  BitVector() = default;
46
  // Initializes the array to length * false.
47
0
  explicit BitVector(int length) : bit_size_(length), array_(WordLength()) {
48
0
  }
49
0
  BitVector(const BitVector &src) : bit_size_(src.bit_size_), array_(src.array_) {
50
0
  }
51
  BitVector &operator=(const BitVector &src);
52
0
  ~BitVector() = default;
53
54
  // Initializes the array to length * false.
55
  void Init(int length);
56
57
0
  int empty() const {
58
0
    return bit_size_ == 0;
59
0
  }
60
61
  // Returns the number of bits that are accessible in the vector.
62
0
  int size() const {
63
0
    return bit_size_;
64
0
  }
65
66
  // Writes to the given file. Returns false in case of error.
67
  bool Serialize(FILE *fp) const;
68
  // Reads from the given file. Returns false in case of error.
69
  // If swap is true, assumes a big/little-endian swap is needed.
70
  bool DeSerialize(bool swap, FILE *fp);
71
72
  void SetAllFalse();
73
  void SetAllTrue();
74
75
  // Accessors to set/reset/get bits.
76
  // The range of index is [0, size()-1].
77
  // There is debug-only bounds checking.
78
0
  void SetBit(int index) {
79
0
    array_[WordIndex(index)] |= BitMask(index);
80
0
  }
81
0
  void ResetBit(int index) {
82
0
    array_[WordIndex(index)] &= ~BitMask(index);
83
0
  }
84
0
  void SetValue(int index, bool value) {
85
0
    if (value) {
86
0
      SetBit(index);
87
0
    } else {
88
0
      ResetBit(index);
89
0
    }
90
0
  }
91
0
  bool At(int index) const {
92
0
    return (array_[WordIndex(index)] & BitMask(index)) != 0;
93
0
  }
94
0
  bool operator[](int index) const {
95
0
    return (array_[WordIndex(index)] & BitMask(index)) != 0;
96
0
  }
97
98
  // Returns the index of the next set bit after the given index.
99
  // Useful for quickly iterating through the set bits in a sparse vector.
100
  int NextSetBit(int prev_bit) const;
101
102
  // Returns the number of set bits in the vector.
103
  int NumSetBits() const;
104
105
  // Logical in-place operations on whole bit vectors. Tries to do something
106
  // sensible if they aren't the same size, but they should be really.
107
  void operator|=(const BitVector &other);
108
  void operator&=(const BitVector &other);
109
  void operator^=(const BitVector &other);
110
  // Set subtraction *this = v1 - v2.
111
  void SetSubtract(const BitVector &v1, const BitVector &v2);
112
113
private:
114
  // Allocates memory for a vector of the given length.
115
  void Alloc(int length);
116
117
  // Computes the index to array_ for the given index, with debug range
118
  // checking.
119
0
  int WordIndex(int index) const {
120
0
    assert(0 <= index && index < bit_size_);
121
0
    return index / kBitFactor;
122
0
  }
123
  // Returns a mask to select the appropriate bit for the given index.
124
0
  uint32_t BitMask(int index) const {
125
0
    return 1 << (index & (kBitFactor - 1));
126
0
  }
127
  // Returns the number of array elements needed to represent the current
128
  // bit_size_.
129
0
  int WordLength() const {
130
0
    return (bit_size_ + kBitFactor - 1) / kBitFactor;
131
0
  }
132
  // Returns the number of bytes consumed by the array_.
133
0
  int ByteLength() const {
134
0
    return WordLength() * sizeof(array_[0]);
135
0
  }
136
137
  // Number of bits in this BitVector.
138
  int32_t bit_size_ = 0;
139
  // Array of words used to pack the bits.
140
  // Bits are stored little-endian by uint32_t word, ie by word first and then
141
  // starting with the least significant bit in each word.
142
  std::vector<uint32_t> array_;
143
  // Number of bits in an array_ element.
144
  static const int kBitFactor = sizeof(array_[0]) * 8;
145
};
146
147
} // namespace tesseract.
148
149
#endif // TESSERACT_CCUTIL_BITVECTOR_H_