Coverage Report

Created: 2026-01-17 06:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openbabel/include/openbabel/fingerprint.h
Line
Count
Source
1
/**********************************************************************
2
fingerprint.h - Base class for fingerprints and fast searching
3
4
Copyright (C) 2005 by Chris Morley
5
6
This file is part of the Open Babel project.
7
For more information, see <http://openbabel.org/>
8
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation version 2 of the License.
12
13
This program is distributed in the hope that it will be useful,
14
but WITHOUT ANY WARRANTY; without even the implied warranty of
15
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
GNU General Public License for more details.
17
***********************************************************************/
18
19
#ifndef OB_FINGERPRINT_H
20
#define OB_FINGERPRINT_H
21
22
#include <list>
23
#include <map>
24
#include <set>
25
#include <vector>
26
#include <string>
27
28
#include <openbabel/plugin.h>
29
30
#ifndef OBFPRT
31
#define OBFPRT
32
#endif
33
34
namespace OpenBabel
35
{
36
  class OBBase; //Forward declaration; used only as pointer.
37
38
/// \brief The base class for fingerprints
39
class OBFPRT OBFingerprint : public OBPlugin
40
{
41
//see end of cpp file for detailed documentation
42
43
MAKE_PLUGIN(OBFingerprint)
44
45
const char* TypeID() override
46
18
  {
47
18
    return "fingerprints";
48
18
  }
49
50
  //Rest of OBFingerprints declarations
51
public:
52
53
0
  virtual ~OBFingerprint(){}
54
55
  /// Sets the nth bit
56
  void SetBit(std::vector<unsigned int>& vec, const unsigned int n);
57
58
  ///return true if the nth bit is set;
59
  bool GetBit(const std::vector<unsigned int>& vec, const unsigned int n);
60
61
    /// Repeatedly ORs the top half with the bottom half until no smaller than nbits
62
  void Fold(std::vector<unsigned int>& vec, unsigned int nbits);
63
64
  /// \return fingerprint in vector, which may be resized, folded to nbits (if nbits!=0)
65
  virtual bool GetFingerprint(OBBase* pOb, std::vector<unsigned int>& fp, int nbits=0)=0;
66
67
  /// Optional flags
68
  enum FptFlag{FPT_UNIQUEBITS=1, FPT_NOINFO=2};
69
0
  virtual unsigned int Flags() { return 0;};
70
  //// \since version 2.3
71
0
  virtual void SetFlags(unsigned int){}
72
73
  /// \return a description of each bit that is set (or unset, if bSet=false)
74
  /// \since version 2.2
75
  virtual std::string DescribeBits(const std::vector<unsigned int> /* fp */,
76
                                   bool /* bSet */ =true)
77
0
  {
78
0
    std::string txt("");
79
0
    return txt;
80
0
  }
81
82
  /// \return the Tanimoto coefficient between two vectors (vector<unsigned int>& SeekPositions)
83
  static double Tanimoto(const std::vector<unsigned int>& vec1, const std::vector<unsigned int>& vec2);
84
85
  /// Inline version of Tanimoto() taking a pointer for the second vector
86
  static double Tanimoto(const std::vector<unsigned int>& vec1, const unsigned int* p2)
87
0
  {
88
    ///If used for two vectors, vec1 and vec2, call as Tanimoto(vec1, &vec2[0]);
89
0
    int andbits=0, orbits=0;
90
0
    unsigned int i;
91
0
    for (i=0;i<vec1.size();++i)
92
0
    {
93
0
      int andfp = vec1[i] & p2[i];
94
0
      int orfp = vec1[i] | p2[i];
95
      // Count bits
96
      /* GCC 3.4 supports a "population count" builtin, which on many targets is
97
         implemented with a single instruction.  There is a fallback definition
98
         in libgcc in case a target does not have one, which should be just as
99
         good as the static function below.  */
100
0
#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
101
0
      andbits += __builtin_popcount(andfp);
102
0
      orbits += __builtin_popcount(orfp);
103
#else
104
      for(;andfp;andfp=andfp<<1)
105
        if(andfp<0) ++andbits;
106
      for(;orfp;orfp=orfp<<1)
107
        if(orfp<0) ++orbits;
108
#endif
109
0
    }
110
0
      return((double)andbits/(double)orbits);
111
0
  };
112
113
17
  static unsigned int Getbitsperint(){ return bitsperint; }
114
115
private:
116
  ///Function object to set bits
117
  struct bit_or
118
  {
119
    unsigned int operator()(const unsigned int a, const unsigned int b)
120
0
    {
121
0
      return a | b;
122
0
    }
123
  };
124
125
126
public:
127
/// \return a pointer to a fingerprint (the default if ID is empty), or NULL if not available
128
  ///For backward compatibility;  a synonym of OBFingerprint::FindType
129
3
static OBFingerprint* FindFingerprint(const char* ID){ return FindType(ID);}
130
131
private:
132
  static const unsigned int bitsperint;// = 8 * sizeof(unsigned int);
133
};
134
135
//Fast search routines
136
/// \struct FptIndexHeader fingerprint.h <openbabel/fingerprint.h>
137
/// \brief Header for fastsearch index file
138
struct OBFPRT FptIndexHeader
139
{
140
  unsigned int headerlength;///<offset to data: sizeof(FptIndexHeader)
141
  unsigned int nEntries;    ///<number of fingerprints
142
  unsigned int words;       ///<number 32bit words per fingerprint
143
  char fpid[15];            ///<ID of the fingerprint type
144
  char seek64; //if true, seek data consists of 64bit long values (only zero in legacy indices)
145
  char datafilename[256];   ///<the data that this is an index to
146
};
147
148
/// \struct FptIndex fingerprint.h <openbabel/fingerprint.h>
149
/// \brief Structure of fastsearch index files
150
struct OBFPRT FptIndex
151
{
152
  FptIndexHeader header;
153
  std::vector<unsigned int> fptdata;
154
  std::vector<unsigned long> seekdata;
155
  bool Read(std::istream* pIndexstream);
156
  bool ReadIndex(std::istream* pIndexstream);
157
  bool ReadHeader(std::istream* pIndexstream);
158
159
  /// \return A pointer to FP used or NULL and an error message
160
  OBFingerprint* CheckFP();
161
};
162
163
/// \class FastSearch fingerprint.h <openbabel/fingerprint.h>
164
/// \brief Class to search fingerprint index files
165
class OBFPRT FastSearch
166
{
167
//see end of cpp file for detailed documentation
168
public:
169
  /// \brief Loads an index from a file and returns the name of the datafile
170
  std::string ReadIndexFile(std::string IndexFilename);
171
  std::string ReadIndex(std::istream* pIndexstream);
172
173
0
  virtual ~FastSearch(){};
174
175
  /// \brief Does substructure search and returns vector of the file positions of matches
176
  bool    Find(OBBase* pOb, std::vector<unsigned long>& SeekPositions, unsigned int MaxCandidates);
177
178
  /// \brief Similar to Find() but all bits of matching fingerprints have to be the same
179
  /// \since version 2.1
180
  bool    FindMatch(OBBase* pOb, std::vector<unsigned long>& SeekPositions,
181
                            unsigned int MaxCandidates);
182
183
  /// \return A multimap containing objects whose Tanimoto coefficients with the target
184
  /// is greater than the value specified.
185
  bool    FindSimilar(OBBase* pOb, std::multimap<double, unsigned long>& SeekposMap,
186
    double MinTani, double MaxTani = 1.1 );
187
188
  /// \return A multimap containing the nCandidates objects with largest Tanimoto
189
  ///  coefficients with the target.
190
  bool    FindSimilar(OBBase* pOb, std::multimap<double, unsigned long>& SeekposMap,
191
    int nCandidates=0);
192
193
  /// \return a pointer to the fingerprint type used to constuct the index
194
0
  OBFingerprint* GetFingerprint() const{ return _pFP;};
195
196
  /// \return a pointer to the index header containing size info etc.
197
0
  const FptIndexHeader& GetIndexHeader() const{ return _index.header;};
198
199
private:
200
  FptIndex   _index;
201
  OBFingerprint* _pFP;
202
};
203
204
/// \class FastSearchIndexer fingerprint.h <openbabel/fingerprint.h>
205
/// \brief Class to prepare fingerprint index files See FastSearch class for details
206
class OBFPRT FastSearchIndexer
207
{
208
//see end of cpp file for detailed documentation
209
public:
210
  ///\brief Constructor with a new index
211
  FastSearchIndexer(std::string& datafilename, std::ostream* os, std::string& fpid,
212
      int FptBits=0, int nmols=0);
213
214
  ///\brief Constructor using existing index
215
  FastSearchIndexer(FptIndex* pindex, std::ostream* os, int nmols=0);
216
217
  ~FastSearchIndexer();
218
219
  ///\brief Called for each object
220
  bool Add(OBBase* pOb, std::streampos seekpos);
221
222
private:
223
  std::ostream* _indexstream;
224
  FptIndex*   _pindex;
225
  OBFingerprint* _pFP;
226
  int _nbits;
227
};
228
229
} //namespace OpenBabel
230
#endif
231
232
//! \file fingerprint.h
233
//! \brief Declaration of OBFingerprint base class and fastsearch classes