/src/openbabel/include/openbabel/bitvec.h

Source
/**********************************************************************
bitvec.h - Vector of bits.

Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc.
Some portions Copyright (C) 2001-2006 by Geoffrey R. Hutchison

This file is part of the Open Babel project.
For more information, see <http://openbabel.org/>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
***********************************************************************/

#ifndef OB_BITVEC_H
#define OB_BITVEC_H

#include <openbabel/babelconfig.h>

#include <vector>
#include <string>

#if defined(_MSC_VER) && _MSC_VER <= 1600
  // Assuming 32bit integer
  typedef unsigned uint32_t;
#else
  #include <inttypes.h>
#endif

// Use uint32_t
#define SETWORD 32
// SETWORD = 2 ^ WORDROLL
#define WORDROLL 5
// WORDMASK = SETWORD - 1
#define WORDMASK 31

#define WORDSIZE_OF_BITSIZE( bit_size ) ( ( bit_size >> WORDROLL ) + (( bit_size & WORDMASK ) ? 1 : 0) )

#ifndef STARTWORDS
#define STARTWORDS 10
#endif // STARTWORDS

namespace OpenBabel
  {
  /// A speed-optimized vector of bits
  /** This class implements a fast vector of bits
      using internally a vector of fixed size unsigned ints (uint32_t).
      Any bits which are out of reach of the current size
      are considered to be zero.
      Streamlined, corrected and documented by kshepherd1@users.sourceforge.net
  */
  class OBERROR OBBitVec
    {
    public:
      typedef std::vector<uint32_t> word_vector;

  private:
    /// The number of <b>words</b> currently stored ( NOT bit count )
      size_t _size; //was unsigned
    /// A vector of words used to store the bit values
      word_vector _set;

    public:
    /// Construct a bit vector of the default size
    /** Construct a bit vector of STARTWORDS size,
        cleared to all zero bits.
    */
      OBBitVec()
    :_set(STARTWORDS, 0)
        { _size = _set.size(); }
    /// Construct a bit vector of maxbits bits
    /** Construct a bit vector with a size in bits
        of \p size_in_bits rounded up to the nearest word
      and cleared to all zero bits.
      \param[in]  size_in_bits The number of bits for which to reserve space
    */
      OBBitVec(unsigned size_in_bits)
    :_set(WORDSIZE_OF_BITSIZE(size_in_bits), 0)
        { _size = _set.size(); }
      /// Copy constructor (result has same number of bits)
    /** Construct a bit vector which is an exact
        duplicate of \p bv.
      \param[in]  bv The other bit vector to copy to this
    */
      OBBitVec(const OBBitVec & bv)
    :_size(0)
      { (*this) = bv; }
    /// Set the \p bit_offset 'th bit to 1
      void SetBitOn(unsigned bit_offset);
    /// Set the \p bit_offset 'th bit to 0
      void SetBitOff(unsigned bit_offset);
    /// Set the range of bits from \p lo_bit_offset to \p hi_bit_offset to 1
      void SetRangeOn(unsigned lo_bit_offset, unsigned hi_bit_offset);
    /// Set the range of bits from \p lo_bit_offset to \p hi_bit_offset to 0
      void SetRangeOff(unsigned lo_bit_offset, unsigned hi_bit_offset);
    /// Reduce the size of the vector by or-ing the excess bits over the start
      void Fold(unsigned new_bit_size);
      /// Find the first true bit at or after \p bit_offset
      /** Searches the vector for the first true value, starting at the \p bit_offset 'th bit
          \param[in] bit_offset the first bit to consider
      \return the bit offset of the first true bit at or after \p bit_offset, or -1 if there is none
    */
      int FirstBit(unsigned bit_offset = 0) const
        {
          return (BitIsSet(bit_offset) ? 0  : NextBit(bit_offset));
        }
      /// Find the next true bit after \p last_bit_offset
      int NextBit(int last_bit_offset) const;
      /// Return the bit offset of the last bit (for iterating) i.e. -1
      int EndBit() const {  return -1; }
      /// Return the number of words ( NOT the number of bits ).
      size_t GetSize() const    { return(_size);    }
      /// Return the number of bits which are set to 1 in the vector
      unsigned CountBits() const;

    /// Are there no bits set to 1 in this vector?
      bool IsEmpty() const;
      /// Reserve space for \p size_in_bits bits
    /** Reserve space for \p size_in_bits bits rounded up
        \param[in] size_in_bits the number of bits
        \return true if enlargement was necessary, false otherwise
    */
      bool Resize(unsigned size_in_bits)
      {
    return ResizeWords( WORDSIZE_OF_BITSIZE(size_in_bits) );
    }
      /// Reserve space for \p size_in_words words
    /** Reserve space for \p size_in_words words
        \param[in] size_in_words the number of words
        \return true if enlargement was necessary, false otherwise
    */
    bool ResizeWords(unsigned size_in_words)
      {
    if (size_in_words <= _size)
      return false;
    _set.resize(size_in_words, 0); // increase the vector with zeroed bits
    _size = _set.size();
    return true;
    }
      /// Asks if the \p bit_offset 'th bit is set
    /** Is the \p bit_offset 'th bit set ?
          \param[in] bit_offset a zero based offset into the bit vector
      \return true if it is set, false otherwise
    */
      bool BitIsSet(unsigned bit_offset) const
        {
      bool rtn = false;
      unsigned word_offset = bit_offset >> WORDROLL;
      if (word_offset < GetSize())
        {
        bit_offset &= WORDMASK;
        rtn = (( _set[word_offset] >> bit_offset ) & 1);
      }
          return rtn;
        }
      /// Sets the bits listed as bit offsets
    void FromVecInt(const std::vector<int> & bit_offsets);
      /// Sets the bits listed as a string of integers
    void FromString(const std::string & line, int bits);
      /// List the offsets of the bits which are set
    void ToVecInt(std::vector<int> & bit_offsets) const;
    /// Set all bits to zero
      void Clear();
      /// Inverts every bit in the vector
    /** Inverts the entire vector.
        Note that this may give unexpected results, as the vector
      can be considered to end in an arbitrary number of zero bits.
    */
      void Negate()
        {
      for (word_vector::iterator wx = _set.begin(), wy = _set.end(); wx != wy; ++wx)
        * wx = ~(* wx);
        }
      /// Return a copy of the internal vector of words, at the end of \p vec
    /** Copy the internal word vector.
        The copy is appended to \p vec.
      \param[out] vec a vector of words to which to append the data
    */
      void GetWords(word_vector & vec)
        {
    vec.insert(vec.end(), _set.begin(),_set.end());
        }

      /// Assignment operator
      OBBitVec & operator= (const OBBitVec & bv);
      /// And-equals operator
      OBBitVec & operator&= (const OBBitVec & bv);
      /// Or-equals operator
      OBBitVec & operator|= (const OBBitVec & bv);
      /// Or-equals operator for integer
    /** Or the bit at offset \p bit_offset with 1
    */
      OBBitVec & operator|= (int bit_offset)
        {
          SetBitOn(bit_offset);
          return(*this);
        }
      /// Exclusive-or-equals operator
      OBBitVec & operator^= (const OBBitVec & bv);
      /// Minus-equals operator
      OBBitVec & operator-= (const OBBitVec & bv);
      /// Plus-equals operator
      OBBitVec & operator+= (const OBBitVec & bv);
      /// Asks if the \p bit_offset 'th bit is set
    /** Is the \p bit_offset 'th bit set ?
          \param[in] bit_offset a zero based offset into the bit vector
      \return true if it is set, false otherwise
    */
      bool operator[] (int bit_offset) const
        { return BitIsSet(bit_offset); }

      /// Or operator
      friend OBERROR OBBitVec operator| (const OBBitVec & bv1, const OBBitVec & bv2);
      /// And operator
      friend OBERROR OBBitVec operator& (const OBBitVec & bv1,const OBBitVec & bv2);
      /// Exclusive-or operator
      friend OBERROR OBBitVec operator^ (const OBBitVec & bv1,const OBBitVec & bv2);
      /// Minus operator
      friend OBERROR OBBitVec operator- (const OBBitVec & bv1,const OBBitVec & bv2);
      /// Equivalency operator
      friend OBERROR bool operator== (const OBBitVec & bv1,const OBBitVec & bv2);
      /// Smaller-than operator
      friend OBERROR bool operator< (const OBBitVec & bv1, const OBBitVec & bv2);

      /// Input from a stream
      friend OBERROR std::istream& operator>> ( std::istream & is, OBBitVec & bv );
      /// Output to a stream
      friend OBERROR std::ostream& operator<< ( std::ostream & os, const OBBitVec & bv ) ;
    };

  /// The Tanimoto coefficient, which may be regarded as the proportion of the "on-bits" which are shared.
  OBERROR double Tanimoto(const OBBitVec & bv1, const OBBitVec & bv2);

  } // end namespace OpenBabel

#endif // OB_BITVEC_H

//! \file bitvec.h
//! \brief Fast and efficient bitstring class

Coverage Report

Created: 2025-12-14 06:46

Line	Count	Source
1		/**********************************************************************
2		bitvec.h - Vector of bits.
3
4		Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc.
5		Some portions Copyright (C) 2001-2006 by Geoffrey R. Hutchison
6
7		This file is part of the Open Babel project.
8		For more information, see <http://openbabel.org/>
9
10		This program is free software; you can redistribute it and/or modify
11		it under the terms of the GNU General Public License as published by
12		the Free Software Foundation version 2 of the License.
13
14		This program is distributed in the hope that it will be useful,
15		but WITHOUT ANY WARRANTY; without even the implied warranty of
16		MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17		GNU General Public License for more details.
18		***********************************************************************/
19
20		#ifndef OB_BITVEC_H
21		#define OB_BITVEC_H
22
23		#include <openbabel/babelconfig.h>
24
25		#include <vector>
26		#include <string>
27
28		#if defined(_MSC_VER) && _MSC_VER <= 1600
29		// Assuming 32bit integer
30		typedef unsigned uint32_t;
31		#else
32		#include <inttypes.h>
33		#endif
34
35		// Use uint32_t
36	10.6k	#define SETWORD 32
37		// SETWORD = 2 ^ WORDROLL
38	727M	#define WORDROLL 5
39		// WORDMASK = SETWORD - 1
40	691M	#define WORDMASK 31
41
42	5.77M	#define WORDSIZE_OF_BITSIZE( bit_size ) ( ( bit_size >> WORDROLL ) + (( bit_size & WORDMASK ) ? 1 : 0) )
43
44		#ifndef STARTWORDS
45	6.67M	#define STARTWORDS 10
46		#endif // STARTWORDS
47
48		namespace OpenBabel
49		{
50		/// A speed-optimized vector of bits
51		/** This class implements a fast vector of bits
52		using internally a vector of fixed size unsigned ints (uint32_t).
53		Any bits which are out of reach of the current size
54		are considered to be zero.
55		Streamlined, corrected and documented by kshepherd1@users.sourceforge.net
56		*/
57		class OBERROR OBBitVec
58		{
59		public:
60		typedef std::vector<uint32_t> word_vector;
61
62		private:
63		/// The number of <b>words</b> currently stored ( NOT bit count )
64		size_t _size; //was unsigned
65		/// A vector of words used to store the bit values
66		word_vector _set;
67
68		public:
69		/// Construct a bit vector of the default size
70		/** Construct a bit vector of STARTWORDS size,
71		cleared to all zero bits.
72		*/
73		OBBitVec()
74	6.67M	:_set(STARTWORDS, 0)
75	6.67M	{ _size = _set.size(); }
76		/// Construct a bit vector of maxbits bits
77		/** Construct a bit vector with a size in bits
78		of \p size_in_bits rounded up to the nearest word
79		and cleared to all zero bits.
80		\param[in] size_in_bits The number of bits for which to reserve space
81		*/
82		OBBitVec(unsigned size_in_bits)
83	5.62M	:_set(WORDSIZE_OF_BITSIZE(size_in_bits), 0)
84	5.62M	{ _size = _set.size(); }
85		/// Copy constructor (result has same number of bits)
86		/** Construct a bit vector which is an exact
87		duplicate of \p bv.
88		\param[in] bv The other bit vector to copy to this
89		*/
90		OBBitVec(const OBBitVec & bv)
91	1.46M	:_size(0)
92	1.46M	{ (*this) = bv; }
93		/// Set the \p bit_offset 'th bit to 1
94		void SetBitOn(unsigned bit_offset);
95		/// Set the \p bit_offset 'th bit to 0
96		void SetBitOff(unsigned bit_offset);
97		/// Set the range of bits from \p lo_bit_offset to \p hi_bit_offset to 1
98		void SetRangeOn(unsigned lo_bit_offset, unsigned hi_bit_offset);
99		/// Set the range of bits from \p lo_bit_offset to \p hi_bit_offset to 0
100		void SetRangeOff(unsigned lo_bit_offset, unsigned hi_bit_offset);
101		/// Reduce the size of the vector by or-ing the excess bits over the start
102		void Fold(unsigned new_bit_size);
103		/// Find the first true bit at or after \p bit_offset
104		/** Searches the vector for the first true value, starting at the \p bit_offset 'th bit
105		\param[in] bit_offset the first bit to consider
106		\return the bit offset of the first true bit at or after \p bit_offset, or -1 if there is none
107		*/
108		int FirstBit(unsigned bit_offset = 0) const
109	23.7k	{
110	23.7k	return (BitIsSet(bit_offset) ? 0 : NextBit(bit_offset));
111	23.7k	}
112		/// Find the next true bit after \p last_bit_offset
113		int NextBit(int last_bit_offset) const;
114		/// Return the bit offset of the last bit (for iterating) i.e. -1
115	35.0M	int EndBit() const { return -1; }
116		/// Return the number of words ( NOT the number of bits ).
117	42.1G	size_t GetSize() const { return(_size); }
118		/// Return the number of bits which are set to 1 in the vector
119		unsigned CountBits() const;
120
121		/// Are there no bits set to 1 in this vector?
122		bool IsEmpty() const;
123		/// Reserve space for \p size_in_bits bits
124		/** Reserve space for \p size_in_bits bits rounded up
125		\param[in] size_in_bits the number of bits
126		\return true if enlargement was necessary, false otherwise
127		*/
128		bool Resize(unsigned size_in_bits)
129	158k	{
130	158k	return ResizeWords( WORDSIZE_OF_BITSIZE(size_in_bits) );
131	158k	}
132		/// Reserve space for \p size_in_words words
133		/** Reserve space for \p size_in_words words
134		\param[in] size_in_words the number of words
135		\return true if enlargement was necessary, false otherwise
136		*/
137		bool ResizeWords(unsigned size_in_words)
138	2.17M	{
139	2.17M	if (size_in_words <= _size)
140	60.8k	return false;
141	2.11M	_set.resize(size_in_words, 0); // increase the vector with zeroed bits
142	2.11M	_size = _set.size();
143	2.11M	return true;
144	2.17M	}
145		/// Asks if the \p bit_offset 'th bit is set
146		/** Is the \p bit_offset 'th bit set ?
147		\param[in] bit_offset a zero based offset into the bit vector
148		\return true if it is set, false otherwise
149		*/
150		bool BitIsSet(unsigned bit_offset) const
151	526M	{
152	526M	bool rtn = false;
153	526M	unsigned word_offset = bit_offset >> WORDROLL;
154	526M	if (word_offset < GetSize())
155	525M	{
156	525M	bit_offset &= WORDMASK;
157	525M	rtn = (( _set[word_offset] >> bit_offset ) & 1);
158	525M	}
159	526M	return rtn;
160	526M	}
161		/// Sets the bits listed as bit offsets
162		void FromVecInt(const std::vector<int> & bit_offsets);
163		/// Sets the bits listed as a string of integers
164		void FromString(const std::string & line, int bits);
165		/// List the offsets of the bits which are set
166		void ToVecInt(std::vector<int> & bit_offsets) const;
167		/// Set all bits to zero
168		void Clear();
169		/// Inverts every bit in the vector
170		/** Inverts the entire vector.
171		Note that this may give unexpected results, as the vector
172		can be considered to end in an arbitrary number of zero bits.
173		*/
174		void Negate()
175	0	{
176	0	for (word_vector::iterator wx = _set.begin(), wy = _set.end(); wx != wy; ++wx)
177	0	* wx = ~(* wx);
178	0	}
179		/// Return a copy of the internal vector of words, at the end of \p vec
180		/** Copy the internal word vector.
181		The copy is appended to \p vec.
182		\param[out] vec a vector of words to which to append the data
183		*/
184		void GetWords(word_vector & vec)
185	0	{
186	0	vec.insert(vec.end(), _set.begin(),_set.end());
187	0	}
188
189		/// Assignment operator
190		OBBitVec & operator= (const OBBitVec & bv);
191		/// And-equals operator
192		OBBitVec & operator&= (const OBBitVec & bv);
193		/// Or-equals operator
194		OBBitVec & operator\|= (const OBBitVec & bv);
195		/// Or-equals operator for integer
196		/** Or the bit at offset \p bit_offset with 1
197		*/
198		OBBitVec & operator\|= (int bit_offset)
199	44.4M	{
200	44.4M	SetBitOn(bit_offset);
201	44.4M	return(*this);
202	44.4M	}
203		/// Exclusive-or-equals operator
204		OBBitVec & operator^= (const OBBitVec & bv);
205		/// Minus-equals operator
206		OBBitVec & operator-= (const OBBitVec & bv);
207		/// Plus-equals operator
208		OBBitVec & operator+= (const OBBitVec & bv);
209		/// Asks if the \p bit_offset 'th bit is set
210		/** Is the \p bit_offset 'th bit set ?
211		\param[in] bit_offset a zero based offset into the bit vector
212		\return true if it is set, false otherwise
213		*/
214		bool operator[] (int bit_offset) const
215	82.5M	{ return BitIsSet(bit_offset); }
216
217		/// Or operator
218		friend OBERROR OBBitVec operator\| (const OBBitVec & bv1, const OBBitVec & bv2);
219		/// And operator
220		friend OBERROR OBBitVec operator& (const OBBitVec & bv1,const OBBitVec & bv2);
221		/// Exclusive-or operator
222		friend OBERROR OBBitVec operator^ (const OBBitVec & bv1,const OBBitVec & bv2);
223		/// Minus operator
224		friend OBERROR OBBitVec operator- (const OBBitVec & bv1,const OBBitVec & bv2);
225		/// Equivalency operator
226		friend OBERROR bool operator== (const OBBitVec & bv1,const OBBitVec & bv2);
227		/// Smaller-than operator
228		friend OBERROR bool operator< (const OBBitVec & bv1, const OBBitVec & bv2);
229
230		/// Input from a stream
231		friend OBERROR std::istream& operator>> ( std::istream & is, OBBitVec & bv );
232		/// Output to a stream
233		friend OBERROR std::ostream& operator<< ( std::ostream & os, const OBBitVec & bv ) ;
234		};
235
236		/// The Tanimoto coefficient, which may be regarded as the proportion of the "on-bits" which are shared.
237		OBERROR double Tanimoto(const OBBitVec & bv1, const OBBitVec & bv2);
238
239		} // end namespace OpenBabel
240
241		#endif // OB_BITVEC_H
242
243		//! \file bitvec.h
244		//! \brief Fast and efficient bitstring class