/src/tesseract/src/ccutil/bitvector.h

Source
///////////////////////////////////////////////////////////////////////
// File:        bitvector.h
// Description: Class replacement for BITVECTOR.
// Author:      Ray Smith
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////

#ifndef TESSERACT_CCUTIL_BITVECTOR_H_
#define TESSERACT_CCUTIL_BITVECTOR_H_

#include <tesseract/export.h>

#include <cassert>
#include <cstdint> // for uint8_t
#include <cstdio>
#include <vector>  // for std::vector

namespace tesseract {

// Trivial class to encapsulate a fixed-length array of bits, with
// Serialize/DeSerialize. Replaces the old macros.
class TESS_API BitVector {
public:
  // Fast lookup table to get the first least significant set bit in a byte.
  // For zero, the table has 255, but since it is a special case, most code
  // that uses this table will check for zero before looking up lsb_index_.
  static const uint8_t lsb_index_[256];
  // Fast lookup table to get the residual bits after zeroing the least
  // significant set bit in a byte.
  static const uint8_t lsb_eroded_[256];
  // Fast lookup table to give the number of set bits in a byte.
  static const int hamming_table_[256];

  BitVector() = default;
  // Initializes the array to length * false.
  explicit BitVector(int length) : bit_size_(length), array_(WordLength()) {
  }
  BitVector(const BitVector &src) : bit_size_(src.bit_size_), array_(src.array_) {
  }
  BitVector &operator=(const BitVector &src);
  ~BitVector() = default;

  // Initializes the array to length * false.
  void Init(int length);

  int empty() const {
    return bit_size_ == 0;
  }

  // Returns the number of bits that are accessible in the vector.
  int size() const {
    return bit_size_;
  }

  // Writes to the given file. Returns false in case of error.
  bool Serialize(FILE *fp) const;
  // Reads from the given file. Returns false in case of error.
  // If swap is true, assumes a big/little-endian swap is needed.
  bool DeSerialize(bool swap, FILE *fp);

  void SetAllFalse();
  void SetAllTrue();

  // Accessors to set/reset/get bits.
  // The range of index is [0, size()-1].
  // There is debug-only bounds checking.
  void SetBit(int index) {
    array_[WordIndex(index)] |= BitMask(index);
  }
  void ResetBit(int index) {
    array_[WordIndex(index)] &= ~BitMask(index);
  }
  void SetValue(int index, bool value) {
    if (value) {
      SetBit(index);
    } else {
      ResetBit(index);
    }
  }
  bool At(int index) const {
    return (array_[WordIndex(index)] & BitMask(index)) != 0;
  }
  bool operator[](int index) const {
    return (array_[WordIndex(index)] & BitMask(index)) != 0;
  }

  // Returns the index of the next set bit after the given index.
  // Useful for quickly iterating through the set bits in a sparse vector.
  int NextSetBit(int prev_bit) const;

  // Returns the number of set bits in the vector.
  int NumSetBits() const;

  // Logical in-place operations on whole bit vectors. Tries to do something
  // sensible if they aren't the same size, but they should be really.
  void operator|=(const BitVector &other);
  void operator&=(const BitVector &other);
  void operator^=(const BitVector &other);
  // Set subtraction *this = v1 - v2.
  void SetSubtract(const BitVector &v1, const BitVector &v2);

private:
  // Allocates memory for a vector of the given length.
  void Alloc(int length);

  // Computes the index to array_ for the given index, with debug range
  // checking.
  int WordIndex(int index) const {
    assert(0 <= index && index < bit_size_);
    return index / kBitFactor;
  }
  // Returns a mask to select the appropriate bit for the given index.
  uint32_t BitMask(int index) const {
    return 1 << (index & (kBitFactor - 1));
  }
  // Returns the number of array elements needed to represent the current
  // bit_size_.
  int WordLength() const {
    return (bit_size_ + kBitFactor - 1) / kBitFactor;
  }
  // Returns the number of bytes consumed by the array_.
  int ByteLength() const {
    return WordLength() * sizeof(array_[0]);
  }

  // Number of bits in this BitVector.
  int32_t bit_size_ = 0;
  // Array of words used to pack the bits.
  // Bits are stored little-endian by uint32_t word, ie by word first and then
  // starting with the least significant bit in each word.
  std::vector<uint32_t> array_;
  // Number of bits in an array_ element.
  static const int kBitFactor = sizeof(array_[0]) * 8;
};

} // namespace tesseract.

#endif // TESSERACT_CCUTIL_BITVECTOR_H_

Line	Count	Source
1		///////////////////////////////////////////////////////////////////////
2		// File: bitvector.h
3		// Description: Class replacement for BITVECTOR.
4		// Author: Ray Smith
5		//
6		// (C) Copyright 2011, Google Inc.
7		// Licensed under the Apache License, Version 2.0 (the "License");
8		// you may not use this file except in compliance with the License.
9		// You may obtain a copy of the License at
10		// http://www.apache.org/licenses/LICENSE-2.0
11		// Unless required by applicable law or agreed to in writing, software
12		// distributed under the License is distributed on an "AS IS" BASIS,
13		// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14		// See the License for the specific language governing permissions and
15		// limitations under the License.
16		//
17		///////////////////////////////////////////////////////////////////////
18
19		#ifndef TESSERACT_CCUTIL_BITVECTOR_H_
20		#define TESSERACT_CCUTIL_BITVECTOR_H_
21
22		#include <tesseract/export.h>
23
24		#include <cassert>
25		#include <cstdint> // for uint8_t
26		#include <cstdio>
27		#include <vector> // for std::vector
28
29		namespace tesseract {
30
31		// Trivial class to encapsulate a fixed-length array of bits, with
32		// Serialize/DeSerialize. Replaces the old macros.
33		class TESS_API BitVector {
34		public:
35		// Fast lookup table to get the first least significant set bit in a byte.
36		// For zero, the table has 255, but since it is a special case, most code
37		// that uses this table will check for zero before looking up lsb_index_.
38		static const uint8_t lsb_index_[256];
39		// Fast lookup table to get the residual bits after zeroing the least
40		// significant set bit in a byte.
41		static const uint8_t lsb_eroded_[256];
42		// Fast lookup table to give the number of set bits in a byte.
43		static const int hamming_table_[256];
44
45	0	BitVector() = default;
46		// Initializes the array to length * false.
47	0	explicit BitVector(int length) : bit_size_(length), array_(WordLength()) {
48	0	}
49	0	BitVector(const BitVector &src) : bit_size_(src.bit_size_), array_(src.array_) {
50	0	}
51		BitVector &operator=(const BitVector &src);
52	0	~BitVector() = default;
53
54		// Initializes the array to length * false.
55		void Init(int length);
56
57	0	int empty() const {
58	0	return bit_size_ == 0;
59	0	}
60
61		// Returns the number of bits that are accessible in the vector.
62	0	int size() const {
63	0	return bit_size_;
64	0	}
65
66		// Writes to the given file. Returns false in case of error.
67		bool Serialize(FILE *fp) const;
68		// Reads from the given file. Returns false in case of error.
69		// If swap is true, assumes a big/little-endian swap is needed.
70		bool DeSerialize(bool swap, FILE *fp);
71
72		void SetAllFalse();
73		void SetAllTrue();
74
75		// Accessors to set/reset/get bits.
76		// The range of index is [0, size()-1].
77		// There is debug-only bounds checking.
78	0	void SetBit(int index) {
79	0	array_[WordIndex(index)] \|= BitMask(index);
80	0	}
81	0	void ResetBit(int index) {
82	0	array_[WordIndex(index)] &= ~BitMask(index);
83	0	}
84	0	void SetValue(int index, bool value) {
85	0	if (value) {
86	0	SetBit(index);
87	0	} else {
88	0	ResetBit(index);
89	0	}
90	0	}
91	0	bool At(int index) const {
92	0	return (array_[WordIndex(index)] & BitMask(index)) != 0;
93	0	}
94	0	bool operator[](int index) const {
95	0	return (array_[WordIndex(index)] & BitMask(index)) != 0;
96	0	}
97
98		// Returns the index of the next set bit after the given index.
99		// Useful for quickly iterating through the set bits in a sparse vector.
100		int NextSetBit(int prev_bit) const;
101
102		// Returns the number of set bits in the vector.
103		int NumSetBits() const;
104
105		// Logical in-place operations on whole bit vectors. Tries to do something
106		// sensible if they aren't the same size, but they should be really.
107		void operator\|=(const BitVector &other);
108		void operator&=(const BitVector &other);
109		void operator^=(const BitVector &other);
110		// Set subtraction *this = v1 - v2.
111		void SetSubtract(const BitVector &v1, const BitVector &v2);
112
113		private:
114		// Allocates memory for a vector of the given length.
115		void Alloc(int length);
116
117		// Computes the index to array_ for the given index, with debug range
118		// checking.
119	0	int WordIndex(int index) const {
120	0	assert(0 <= index && index < bit_size_);
121	0	return index / kBitFactor;
122	0	}
123		// Returns a mask to select the appropriate bit for the given index.
124	0	uint32_t BitMask(int index) const {
125	0	return 1 << (index & (kBitFactor - 1));
126	0	}
127		// Returns the number of array elements needed to represent the current
128		// bit_size_.
129	0	int WordLength() const {
130	0	return (bit_size_ + kBitFactor - 1) / kBitFactor;
131	0	}
132		// Returns the number of bytes consumed by the array_.
133	0	int ByteLength() const {
134	0	return WordLength() * sizeof(array_[0]);
135	0	}
136
137		// Number of bits in this BitVector.
138		int32_t bit_size_ = 0;
139		// Array of words used to pack the bits.
140		// Bits are stored little-endian by uint32_t word, ie by word first and then
141		// starting with the least significant bit in each word.
142		std::vector<uint32_t> array_;
143		// Number of bits in an array_ element.
144		static const int kBitFactor = sizeof(array_[0]) * 8;
145		};
146
147		} // namespace tesseract.
148
149		#endif // TESSERACT_CCUTIL_BITVECTOR_H_

Coverage Report

Created: 2025-12-31 06:25