/src/tesseract/src/ccutil/bitvector.h
Line | Count | Source |
1 | | /////////////////////////////////////////////////////////////////////// |
2 | | // File: bitvector.h |
3 | | // Description: Class replacement for BITVECTOR. |
4 | | // Author: Ray Smith |
5 | | // |
6 | | // (C) Copyright 2011, Google Inc. |
7 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | // you may not use this file except in compliance with the License. |
9 | | // You may obtain a copy of the License at |
10 | | // http://www.apache.org/licenses/LICENSE-2.0 |
11 | | // Unless required by applicable law or agreed to in writing, software |
12 | | // distributed under the License is distributed on an "AS IS" BASIS, |
13 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | // See the License for the specific language governing permissions and |
15 | | // limitations under the License. |
16 | | // |
17 | | /////////////////////////////////////////////////////////////////////// |
18 | | |
19 | | #ifndef TESSERACT_CCUTIL_BITVECTOR_H_ |
20 | | #define TESSERACT_CCUTIL_BITVECTOR_H_ |
21 | | |
22 | | #include <tesseract/export.h> |
23 | | |
24 | | #include <cassert> |
25 | | #include <cstdint> // for uint8_t |
26 | | #include <cstdio> |
27 | | #include <vector> // for std::vector |
28 | | |
29 | | namespace tesseract { |
30 | | |
31 | | // Trivial class to encapsulate a fixed-length array of bits, with |
32 | | // Serialize/DeSerialize. Replaces the old macros. |
33 | | class TESS_API BitVector { |
34 | | public: |
35 | | // Fast lookup table to get the first least significant set bit in a byte. |
36 | | // For zero, the table has 255, but since it is a special case, most code |
37 | | // that uses this table will check for zero before looking up lsb_index_. |
38 | | static const uint8_t lsb_index_[256]; |
39 | | // Fast lookup table to get the residual bits after zeroing the least |
40 | | // significant set bit in a byte. |
41 | | static const uint8_t lsb_eroded_[256]; |
42 | | // Fast lookup table to give the number of set bits in a byte. |
43 | | static const int hamming_table_[256]; |
44 | | |
45 | 0 | BitVector() = default; |
46 | | // Initializes the array to length * false. |
47 | 0 | explicit BitVector(int length) : bit_size_(length), array_(WordLength()) { |
48 | 0 | } |
49 | 0 | BitVector(const BitVector &src) : bit_size_(src.bit_size_), array_(src.array_) { |
50 | 0 | } |
51 | | BitVector &operator=(const BitVector &src); |
52 | 0 | ~BitVector() = default; |
53 | | |
54 | | // Initializes the array to length * false. |
55 | | void Init(int length); |
56 | | |
57 | 0 | int empty() const { |
58 | 0 | return bit_size_ == 0; |
59 | 0 | } |
60 | | |
61 | | // Returns the number of bits that are accessible in the vector. |
62 | 0 | int size() const { |
63 | 0 | return bit_size_; |
64 | 0 | } |
65 | | |
66 | | // Writes to the given file. Returns false in case of error. |
67 | | bool Serialize(FILE *fp) const; |
68 | | // Reads from the given file. Returns false in case of error. |
69 | | // If swap is true, assumes a big/little-endian swap is needed. |
70 | | bool DeSerialize(bool swap, FILE *fp); |
71 | | |
72 | | void SetAllFalse(); |
73 | | void SetAllTrue(); |
74 | | |
75 | | // Accessors to set/reset/get bits. |
76 | | // The range of index is [0, size()-1]. |
77 | | // There is debug-only bounds checking. |
78 | 0 | void SetBit(int index) { |
79 | 0 | array_[WordIndex(index)] |= BitMask(index); |
80 | 0 | } |
81 | 0 | void ResetBit(int index) { |
82 | 0 | array_[WordIndex(index)] &= ~BitMask(index); |
83 | 0 | } |
84 | 0 | void SetValue(int index, bool value) { |
85 | 0 | if (value) { |
86 | 0 | SetBit(index); |
87 | 0 | } else { |
88 | 0 | ResetBit(index); |
89 | 0 | } |
90 | 0 | } |
91 | 0 | bool At(int index) const { |
92 | 0 | return (array_[WordIndex(index)] & BitMask(index)) != 0; |
93 | 0 | } |
94 | 0 | bool operator[](int index) const { |
95 | 0 | return (array_[WordIndex(index)] & BitMask(index)) != 0; |
96 | 0 | } |
97 | | |
98 | | // Returns the index of the next set bit after the given index. |
99 | | // Useful for quickly iterating through the set bits in a sparse vector. |
100 | | int NextSetBit(int prev_bit) const; |
101 | | |
102 | | // Returns the number of set bits in the vector. |
103 | | int NumSetBits() const; |
104 | | |
105 | | // Logical in-place operations on whole bit vectors. Tries to do something |
106 | | // sensible if they aren't the same size, but they should be really. |
107 | | void operator|=(const BitVector &other); |
108 | | void operator&=(const BitVector &other); |
109 | | void operator^=(const BitVector &other); |
110 | | // Set subtraction *this = v1 - v2. |
111 | | void SetSubtract(const BitVector &v1, const BitVector &v2); |
112 | | |
113 | | private: |
114 | | // Allocates memory for a vector of the given length. |
115 | | void Alloc(int length); |
116 | | |
117 | | // Computes the index to array_ for the given index, with debug range |
118 | | // checking. |
119 | 0 | int WordIndex(int index) const { |
120 | 0 | assert(0 <= index && index < bit_size_); |
121 | 0 | return index / kBitFactor; |
122 | 0 | } |
123 | | // Returns a mask to select the appropriate bit for the given index. |
124 | 0 | uint32_t BitMask(int index) const { |
125 | 0 | return 1 << (index & (kBitFactor - 1)); |
126 | 0 | } |
127 | | // Returns the number of array elements needed to represent the current |
128 | | // bit_size_. |
129 | 0 | int WordLength() const { |
130 | 0 | return (bit_size_ + kBitFactor - 1) / kBitFactor; |
131 | 0 | } |
132 | | // Returns the number of bytes consumed by the array_. |
133 | 0 | int ByteLength() const { |
134 | 0 | return WordLength() * sizeof(array_[0]); |
135 | 0 | } |
136 | | |
137 | | // Number of bits in this BitVector. |
138 | | int32_t bit_size_ = 0; |
139 | | // Array of words used to pack the bits. |
140 | | // Bits are stored little-endian by uint32_t word, ie by word first and then |
141 | | // starting with the least significant bit in each word. |
142 | | std::vector<uint32_t> array_; |
143 | | // Number of bits in an array_ element. |
144 | | static const int kBitFactor = sizeof(array_[0]) * 8; |
145 | | }; |
146 | | |
147 | | } // namespace tesseract. |
148 | | |
149 | | #endif // TESSERACT_CCUTIL_BITVECTOR_H_ |