/src/openbabel/include/openbabel/bitvec.h
Line | Count | Source |
1 | | /********************************************************************** |
2 | | bitvec.h - Vector of bits. |
3 | | |
4 | | Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc. |
5 | | Some portions Copyright (C) 2001-2006 by Geoffrey R. Hutchison |
6 | | |
7 | | This file is part of the Open Babel project. |
8 | | For more information, see <http://openbabel.org/> |
9 | | |
10 | | This program is free software; you can redistribute it and/or modify |
11 | | it under the terms of the GNU General Public License as published by |
12 | | the Free Software Foundation version 2 of the License. |
13 | | |
14 | | This program is distributed in the hope that it will be useful, |
15 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | | GNU General Public License for more details. |
18 | | ***********************************************************************/ |
19 | | |
20 | | #ifndef OB_BITVEC_H |
21 | | #define OB_BITVEC_H |
22 | | |
23 | | #include <openbabel/babelconfig.h> |
24 | | |
25 | | #include <vector> |
26 | | #include <string> |
27 | | |
28 | | #if defined(_MSC_VER) && _MSC_VER <= 1600 |
29 | | // Assuming 32bit integer |
30 | | typedef unsigned uint32_t; |
31 | | #else |
32 | | #include <inttypes.h> |
33 | | #endif |
34 | | |
35 | | // Use uint32_t |
36 | 10.6k | #define SETWORD 32 |
37 | | // SETWORD = 2 ^ WORDROLL |
38 | 727M | #define WORDROLL 5 |
39 | | // WORDMASK = SETWORD - 1 |
40 | 691M | #define WORDMASK 31 |
41 | | |
42 | 5.77M | #define WORDSIZE_OF_BITSIZE( bit_size ) ( ( bit_size >> WORDROLL ) + (( bit_size & WORDMASK ) ? 1 : 0) ) |
43 | | |
44 | | #ifndef STARTWORDS |
45 | 6.67M | #define STARTWORDS 10 |
46 | | #endif // STARTWORDS |
47 | | |
48 | | namespace OpenBabel |
49 | | { |
50 | | /// A speed-optimized vector of bits |
51 | | /** This class implements a fast vector of bits |
52 | | using internally a vector of fixed size unsigned ints (uint32_t). |
53 | | Any bits which are out of reach of the current size |
54 | | are considered to be zero. |
55 | | Streamlined, corrected and documented by kshepherd1@users.sourceforge.net |
56 | | */ |
57 | | class OBERROR OBBitVec |
58 | | { |
59 | | public: |
60 | | typedef std::vector<uint32_t> word_vector; |
61 | | |
62 | | private: |
63 | | /// The number of <b>words</b> currently stored ( NOT bit count ) |
64 | | size_t _size; //was unsigned |
65 | | /// A vector of words used to store the bit values |
66 | | word_vector _set; |
67 | | |
68 | | public: |
69 | | /// Construct a bit vector of the default size |
70 | | /** Construct a bit vector of STARTWORDS size, |
71 | | cleared to all zero bits. |
72 | | */ |
73 | | OBBitVec() |
74 | 6.67M | :_set(STARTWORDS, 0) |
75 | 6.67M | { _size = _set.size(); } |
76 | | /// Construct a bit vector of maxbits bits |
77 | | /** Construct a bit vector with a size in bits |
78 | | of \p size_in_bits rounded up to the nearest word |
79 | | and cleared to all zero bits. |
80 | | \param[in] size_in_bits The number of bits for which to reserve space |
81 | | */ |
82 | | OBBitVec(unsigned size_in_bits) |
83 | 5.62M | :_set(WORDSIZE_OF_BITSIZE(size_in_bits), 0) |
84 | 5.62M | { _size = _set.size(); } |
85 | | /// Copy constructor (result has same number of bits) |
86 | | /** Construct a bit vector which is an exact |
87 | | duplicate of \p bv. |
88 | | \param[in] bv The other bit vector to copy to this |
89 | | */ |
90 | | OBBitVec(const OBBitVec & bv) |
91 | 1.46M | :_size(0) |
92 | 1.46M | { (*this) = bv; } |
93 | | /// Set the \p bit_offset 'th bit to 1 |
94 | | void SetBitOn(unsigned bit_offset); |
95 | | /// Set the \p bit_offset 'th bit to 0 |
96 | | void SetBitOff(unsigned bit_offset); |
97 | | /// Set the range of bits from \p lo_bit_offset to \p hi_bit_offset to 1 |
98 | | void SetRangeOn(unsigned lo_bit_offset, unsigned hi_bit_offset); |
99 | | /// Set the range of bits from \p lo_bit_offset to \p hi_bit_offset to 0 |
100 | | void SetRangeOff(unsigned lo_bit_offset, unsigned hi_bit_offset); |
101 | | /// Reduce the size of the vector by or-ing the excess bits over the start |
102 | | void Fold(unsigned new_bit_size); |
103 | | /// Find the first true bit at or after \p bit_offset |
104 | | /** Searches the vector for the first true value, starting at the \p bit_offset 'th bit |
105 | | \param[in] bit_offset the first bit to consider |
106 | | \return the bit offset of the first true bit at or after \p bit_offset, or -1 if there is none |
107 | | */ |
108 | | int FirstBit(unsigned bit_offset = 0) const |
109 | 23.7k | { |
110 | 23.7k | return (BitIsSet(bit_offset) ? 0 : NextBit(bit_offset)); |
111 | 23.7k | } |
112 | | /// Find the next true bit after \p last_bit_offset |
113 | | int NextBit(int last_bit_offset) const; |
114 | | /// Return the bit offset of the last bit (for iterating) i.e. -1 |
115 | 35.0M | int EndBit() const { return -1; } |
116 | | /// Return the number of words ( NOT the number of bits ). |
117 | 42.1G | size_t GetSize() const { return(_size); } |
118 | | /// Return the number of bits which are set to 1 in the vector |
119 | | unsigned CountBits() const; |
120 | | |
121 | | /// Are there no bits set to 1 in this vector? |
122 | | bool IsEmpty() const; |
123 | | /// Reserve space for \p size_in_bits bits |
124 | | /** Reserve space for \p size_in_bits bits rounded up |
125 | | \param[in] size_in_bits the number of bits |
126 | | \return true if enlargement was necessary, false otherwise |
127 | | */ |
128 | | bool Resize(unsigned size_in_bits) |
129 | 158k | { |
130 | 158k | return ResizeWords( WORDSIZE_OF_BITSIZE(size_in_bits) ); |
131 | 158k | } |
132 | | /// Reserve space for \p size_in_words words |
133 | | /** Reserve space for \p size_in_words words |
134 | | \param[in] size_in_words the number of words |
135 | | \return true if enlargement was necessary, false otherwise |
136 | | */ |
137 | | bool ResizeWords(unsigned size_in_words) |
138 | 2.17M | { |
139 | 2.17M | if (size_in_words <= _size) |
140 | 60.8k | return false; |
141 | 2.11M | _set.resize(size_in_words, 0); // increase the vector with zeroed bits |
142 | 2.11M | _size = _set.size(); |
143 | 2.11M | return true; |
144 | 2.17M | } |
145 | | /// Asks if the \p bit_offset 'th bit is set |
146 | | /** Is the \p bit_offset 'th bit set ? |
147 | | \param[in] bit_offset a zero based offset into the bit vector |
148 | | \return true if it is set, false otherwise |
149 | | */ |
150 | | bool BitIsSet(unsigned bit_offset) const |
151 | 526M | { |
152 | 526M | bool rtn = false; |
153 | 526M | unsigned word_offset = bit_offset >> WORDROLL; |
154 | 526M | if (word_offset < GetSize()) |
155 | 525M | { |
156 | 525M | bit_offset &= WORDMASK; |
157 | 525M | rtn = (( _set[word_offset] >> bit_offset ) & 1); |
158 | 525M | } |
159 | 526M | return rtn; |
160 | 526M | } |
161 | | /// Sets the bits listed as bit offsets |
162 | | void FromVecInt(const std::vector<int> & bit_offsets); |
163 | | /// Sets the bits listed as a string of integers |
164 | | void FromString(const std::string & line, int bits); |
165 | | /// List the offsets of the bits which are set |
166 | | void ToVecInt(std::vector<int> & bit_offsets) const; |
167 | | /// Set all bits to zero |
168 | | void Clear(); |
169 | | /// Inverts every bit in the vector |
170 | | /** Inverts the entire vector. |
171 | | Note that this may give unexpected results, as the vector |
172 | | can be considered to end in an arbitrary number of zero bits. |
173 | | */ |
174 | | void Negate() |
175 | 0 | { |
176 | 0 | for (word_vector::iterator wx = _set.begin(), wy = _set.end(); wx != wy; ++wx) |
177 | 0 | * wx = ~(* wx); |
178 | 0 | } |
179 | | /// Return a copy of the internal vector of words, at the end of \p vec |
180 | | /** Copy the internal word vector. |
181 | | The copy is appended to \p vec. |
182 | | \param[out] vec a vector of words to which to append the data |
183 | | */ |
184 | | void GetWords(word_vector & vec) |
185 | 0 | { |
186 | 0 | vec.insert(vec.end(), _set.begin(),_set.end()); |
187 | 0 | } |
188 | | |
189 | | /// Assignment operator |
190 | | OBBitVec & operator= (const OBBitVec & bv); |
191 | | /// And-equals operator |
192 | | OBBitVec & operator&= (const OBBitVec & bv); |
193 | | /// Or-equals operator |
194 | | OBBitVec & operator|= (const OBBitVec & bv); |
195 | | /// Or-equals operator for integer |
196 | | /** Or the bit at offset \p bit_offset with 1 |
197 | | */ |
198 | | OBBitVec & operator|= (int bit_offset) |
199 | 44.4M | { |
200 | 44.4M | SetBitOn(bit_offset); |
201 | 44.4M | return(*this); |
202 | 44.4M | } |
203 | | /// Exclusive-or-equals operator |
204 | | OBBitVec & operator^= (const OBBitVec & bv); |
205 | | /// Minus-equals operator |
206 | | OBBitVec & operator-= (const OBBitVec & bv); |
207 | | /// Plus-equals operator |
208 | | OBBitVec & operator+= (const OBBitVec & bv); |
209 | | /// Asks if the \p bit_offset 'th bit is set |
210 | | /** Is the \p bit_offset 'th bit set ? |
211 | | \param[in] bit_offset a zero based offset into the bit vector |
212 | | \return true if it is set, false otherwise |
213 | | */ |
214 | | bool operator[] (int bit_offset) const |
215 | 82.5M | { return BitIsSet(bit_offset); } |
216 | | |
217 | | /// Or operator |
218 | | friend OBERROR OBBitVec operator| (const OBBitVec & bv1, const OBBitVec & bv2); |
219 | | /// And operator |
220 | | friend OBERROR OBBitVec operator& (const OBBitVec & bv1,const OBBitVec & bv2); |
221 | | /// Exclusive-or operator |
222 | | friend OBERROR OBBitVec operator^ (const OBBitVec & bv1,const OBBitVec & bv2); |
223 | | /// Minus operator |
224 | | friend OBERROR OBBitVec operator- (const OBBitVec & bv1,const OBBitVec & bv2); |
225 | | /// Equivalency operator |
226 | | friend OBERROR bool operator== (const OBBitVec & bv1,const OBBitVec & bv2); |
227 | | /// Smaller-than operator |
228 | | friend OBERROR bool operator< (const OBBitVec & bv1, const OBBitVec & bv2); |
229 | | |
230 | | /// Input from a stream |
231 | | friend OBERROR std::istream& operator>> ( std::istream & is, OBBitVec & bv ); |
232 | | /// Output to a stream |
233 | | friend OBERROR std::ostream& operator<< ( std::ostream & os, const OBBitVec & bv ) ; |
234 | | }; |
235 | | |
236 | | /// The Tanimoto coefficient, which may be regarded as the proportion of the "on-bits" which are shared. |
237 | | OBERROR double Tanimoto(const OBBitVec & bv1, const OBBitVec & bv2); |
238 | | |
239 | | } // end namespace OpenBabel |
240 | | |
241 | | #endif // OB_BITVEC_H |
242 | | |
243 | | //! \file bitvec.h |
244 | | //! \brief Fast and efficient bitstring class |