Coverage Report

Created: 2025-08-26 06:55

/src/openbabel/include/openbabel/isomorphism.h
Line
Count
Source (jump to first uncovered line)
1
/**********************************************************************
2
  isomophism.h - OBIsomorphismMapper class for finding isomorphisms.
3
4
  Copyright (C) 2010 by Tim Vandermeersch
5
6
  This file is part of the Open Babel project.
7
  For more information, see <http://openbabel.org/>
8
9
  This program is free software; you can redistribute it and/or modify
10
  it under the terms of the GNU General Public License as published by
11
  the Free Software Foundation; either version 2 of the License, or
12
  (at your option) any later version.
13
14
  This program is distributed in the hope that it will be useful,
15
  but WITHOUT ANY WARRANTY; without even the implied warranty of
16
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
  GNU General Public License for more details.
18
19
  You should have received a copy of the GNU General Public License
20
  along with this program; if not, write to the Free Software
21
  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
  02110-1301, USA.
23
 **********************************************************************/
24
#ifndef OB_ISOMORPHISM_H
25
#define OB_ISOMORPHISM_H
26
27
#include <openbabel/bitvec.h>
28
29
namespace OpenBabel {
30
31
  class OBQuery;
32
  class OBMol;
33
34
  ///@addtogroup substructure Substructure Searching
35
  ///@{
36
37
  /**
38
   * @class OBIsomorphismMapper isomorphism.h <openbabel/isomorphism.h>
39
   * @since version 2.3
40
   * @brief Abstract class defining interface for isomorphism (i.e. substructure) searches.
41
   *
42
   * The OBIsomorphism class is an abstract class which defines an interface for
43
   * performing isomorphism (i.e. substructure) searches. It uses a OBQuery and
44
   * tries to map this onto a queried OBMol. A single mapping is represented by
45
   * a OBIsomorphismMapper::Mapping which is a std::map mapping query indexes to
46
   * queried indexes. Both query and queried indexes in the map start from 0.
47
   * Multiple mappings can be stored in a OBIsomorphismMapper::Mappings object
48
   * which is a std::vector of OBIsomorphismMapper objects.
49
   *
50
   * Since this is an abstract class with pure virtual methods, this class can't
51
   * be instantiated directly. To get a pointer to a subclass, the GetInstance()
52
   * method can be used which also sets the query. Once an instance is obtained,
53
   * the desired mapping function can be used to perform the mapping (i.e. MapFirst(),
54
   * MapUnique() or MapAll()).
55
   *
56
   * A typical example:
57
   * @code
58
   * OBMol *queried;
59
   * // ... initialize queried ...
60
   * OBQuery *query = CompileSmilesQuery("c1ccccc1");
61
   * OBIsomorphismMapper *mapper = OBIsomorphismMapper::GetInstance(query);
62
   * OBIsomorphismMapper::Mappings maps = mapper->MapUnique(mol);
63
   *
64
   * std::cout << "found " << maps.size() << " unique mappings" << std::endl;
65
   *
66
   * delete mapper;
67
   * delete query;
68
   * @endcode
69
   *
70
   * All mapping methods take an optional mask parameter. This can be used to
71
   * restrict the search to a part of the queried OBMol. The masked atoms in
72
   * the OBBitVec are  indexed from 1. A special case of isomorphism search is
73
   * an automorphism search where the query and queried molecule are the same.
74
   * Automorphism searches can be done using the MapAll method but an additional
75
   * FindAutomorphisms() function is provided for convenience.
76
   */
77
  class OBAPI OBIsomorphismMapper
78
  {
79
    public:
80
      /**
81
       * @typedef std::vector< std::pair<unsigned int,unsigned int> > Mapping
82
       * Type for an individual mapping.
83
       */
84
      typedef std::vector< std::pair<unsigned int,unsigned int> > Mapping;
85
      /**
86
       * @typedef std::vector<OBIsomorphismMapper::Mapping> Mappings
87
       * Type for a collection (std::vector) of Mapping objects.
88
       */
89
      typedef std::vector< Mapping > Mappings;
90
91
      /**
92
       * Constructor. OBIsomorphismMapper is an abstract class, use GetInstance() to
93
       * get an instance of a derived class.
94
       * @param query The search query.
95
       */
96
      OBIsomorphismMapper(OBQuery *query);
97
      virtual ~OBIsomorphismMapper();
98
99
      /**
100
       * Get a pointer to an instance of the specified @p algorithm. This pointer
101
       * has to be delted when the instance is no longer needed.
102
       * @param query The search query to be mapped.
103
       * @param algorithm The algorithm for the mapper.
104
       * @return OBIsomorphismMapper instance or 0 if there is no subclass implementing
105
       * the specified @p algorithm.
106
       */
107
      static OBIsomorphismMapper* GetInstance(OBQuery *query, const std::string &algorithm = std::string("VF2"));
108
109
      /**
110
       * Find a single mapping in @p queried.
111
       * @param queried The molecule to search.
112
       * @param map Reference to the object to store the result in.
113
       * @param mask A mask to restrict the search to a part of the queried molecule.
114
       * The default empty mask will result in all atoms being considered. The mask
115
       * indexes start from 1 (i.e. OBAtom::GetIdx()).
116
       */
117
      virtual void MapFirst(const OBMol *queried, Mapping &map, const OBBitVec &mask = OBBitVec()) = 0;
118
      /**
119
       * Find all unique mappings in @p queried. A mapping is unique when there is no previous
120
       * mapping covering the same queried atoms. For two mappings, some overlap is allowed but
121
       * at least one atom should be different.
122
       * @param queried The molecule to search.
123
       * @param maps Reference to the object to store the results in.
124
       * @param mask A mask to restrict the search to a part of the queried molecule.
125
       * The default empty mask will result in all atoms being considered. The mask
126
       * indexes start from 1 (i.e. OBAtom::GetIdx()).
127
       */
128
      virtual void MapUnique(const OBMol *queried, Mappings &maps, const OBBitVec &mask = OBBitVec()) = 0;
129
      /**
130
       * Find all mappings in @p queried. This function is used by FindAutomorphisms()
131
       * with a query that is a copy of the queried molecule (taking the mask into
132
       * account).
133
       * @param queried The molecule to search.
134
       * @param maps Reference to the object to store the results in.
135
       * @param mask A mask to restrict the search to a part of the queried molecule.
136
       * @param maxMemory Memory limit for the @p maps object in bytes. Default is 300MB.
137
       * The default empty mask will result in all atoms being considered. The mask
138
       * indexes start from 1 (i.e. OBAtom::GetIdx()).
139
       */
140
      virtual void MapAll(const OBMol *queried, Mappings &maps, const OBBitVec &mask = OBBitVec(), std::size_t maxMemory = 3000000) = 0;
141
142
      /**
143
       * @class Functor isomorphism.h <openbabel/isomorphism.h>
144
       * @brief Functor base class to be used in combination with MapGeneric.
145
       * @see @ref MapGeneric
146
       * @since 2.3
147
       */
148
      class Functor
149
      {
150
        public:
151
0
          virtual ~Functor() {}
152
          /**
153
           * This function is called every time an isomorphism is discovered.
154
           * Returing true, will abort the mapping process. The map is passed
155
           * as non-const reference and may be modified (e.g. swap).
156
           *
157
           * @see @ref MapGeneric
158
           */
159
          virtual bool operator()(Mapping &map) = 0;
160
      };
161
      /**
162
       * Find all mappings in @p queried. The functor will be called when a mapping is found.
163
       * @param functor The functor to handle found mappings.
164
       * @param queried The molecule to search.
165
       * @param mask A mask to restrict the search to a part of the queried molecule.
166
       * The default empty mask will result in all atoms being considered. The mask
167
       * indexes start from 1 (i.e. OBAtom::GetIdx()).
168
       */
169
      virtual void MapGeneric(Functor &functor, const OBMol *queried, const OBBitVec &mask = OBBitVec()) = 0;
170
171
172
      /**
173
       * Set the timeout in seconds.
174
       */
175
0
      void SetTimeout(unsigned int seconds) { m_timeout = seconds; }
176
177
    protected:
178
      OBQuery *m_query; //!< The search query.
179
      unsigned int m_timeout; //!< The timeout in seconds
180
  };
181
182
  inline bool MapsTo(const OBIsomorphismMapper::Mapping &map, unsigned int queryIndex, unsigned int &queriedIndex)
183
0
  {
184
0
    OBIsomorphismMapper::Mapping::const_iterator i;
185
0
    for (i = map.begin(); i != map.end(); ++i)
186
0
      if (i->first == queryIndex) {
187
0
        queriedIndex = i->second;
188
0
        return true;
189
0
      }
190
191
0
    return false;
192
0
  }
193
194
195
  /**
196
   * @typedef OBIsomorphismMapper::Mapping Automorphism
197
   * @brief A single automorphic permutation.
198
   * @since 2.3
199
   */
200
  typedef OBIsomorphismMapper::Mapping Automorphism;
201
  /**
202
   * @typedef OBIsomorphismMapper::Mappings Automorphisms
203
   * @brief A group of automorphic permutations.
204
   * @since 2.3
205
   */
206
  typedef OBIsomorphismMapper::Mappings Automorphisms;
207
208
  /**
209
   * Find the automorphisms of a molecule by using an OBIsomorphismMapper. This
210
   * function is a wrapper for FindAutomorphisms with a functor to store all
211
   * automorphisms.
212
   *
213
   * @param mol The molecule for which to find the automorphisms.
214
   * @param aut The result will be stored here.
215
   * @param symmetry_classes The graph invariants to use. See OBGraphSym or use
216
   * the FindAutomorphisms function that computes these for you below.
217
   * @param mask A bit vector specifying which atoms to consider. An empty mask
218
   * will consider all atoms. The bits are indexed from 1 (i.e. OBAtom::GetIdx()).
219
   * @param maxMemory Maximum memory limit for @p aut. The number of automorphisms
220
   * for certain graphs can be large. The default is 300MB, consider using a functor
221
   * to process automorphisms when they are found.
222
   *
223
   * @since version 2.3
224
   */
225
  OBAPI bool FindAutomorphisms(OBMol *mol, std::vector<OBIsomorphismMapper::Mapping> &aut, const std::vector<unsigned int> &symmetry_classes,
226
      const OBBitVec &mask = OBBitVec(), std::size_t maxMemory = 3000000);
227
  /**
228
   * Find the automorphisms of a molecule by using an OBIsomorphismMapper. This
229
   * function will first find the graph invariants (i.e. symmetry_classes) using
230
   * the mask. This function is a wrapper for FindAutomorphisms with a functor to
231
   * store all automorphisms.
232
   *
233
   * @since version 2.3
234
   */
235
  OBAPI bool FindAutomorphisms(OBMol *mol, std::vector<OBIsomorphismMapper::Mapping> &aut, const OBBitVec &mask = OBBitVec(),
236
      std::size_t maxMemory = 3000000);
237
238
  /**
239
   * Find the automorphisms of a molecule by using an OBIsomorphismMapper. This
240
   * is the main implementation for finding automorphisms and uses an
241
   * OBIsomorphismMapper::Functor to process found isomorphisms. Wrapper functions
242
   * are provided to find and store all automorphisms but the number of automorphisms
243
   * can be large for certain graphs making it not feasible to store all automorphisms
244
   * in memory (RAM).
245
   *
246
   * @see  @ref MapGeneric
247
   * @since version 2.3
248
   */
249
  OBAPI void FindAutomorphisms(OBIsomorphismMapper::Functor &functor, OBMol *mol,
250
      const std::vector<unsigned int> &symmetry_classes, const OBBitVec &mask = OBBitVec());
251
252
  /**
253
   * @page substructure Substructure Search
254
   * @since version 2.3
255
   *
256
   * Substructure searching is finding a mapping for a query to a target molecule.
257
   * Such a mapping is also known as a graph isomorphism. A graph isomorphism maps
258
   * the vertexes (i.e. atoms) from the query to vertexes in the molecule such that
259
   * two vertexes adjacent in the query are also adjacent in the target molecule.
260
   * In other words, no bonds are broken and no new bonds are formed.
261
   *
262
   * @section smarts SMARTS Substructure Search
263
   * Smarts is an extension of smiles to create powerful queries. Smarts substructure
264
   * search has been available in OpenBabel for many years. It is also used for many
265
   * of OpenBabel's algorithms. Although smarts is only a syntax for queries, the
266
   * implementation has it's own matching algorithm. For many purposes smarts are the
267
   * easiest way to do substructure searches. See the OBSmartsPattern documentation
268
   * for details on how to use smarts.
269
   *
270
   * @section query Queries
271
   * Since OpenBabel version 2.3, there are some classes for representing generic
272
   * queries. The OBQuery, OBQueryAtom and OBQueryBond class define interfaces that
273
   * can be reimplemented to get custom behavior. The classes also contain some
274
   * methods to access topological information which are used by the mapping
275
   * algorithms. The default implementations allow very simple exact substructure
276
   * searches to be performed but subclassing allows very advanced queries to be
277
   * used (e.g. smarts).
278
   *
279
   * While it is possible to construct these queries manually, "compilers" are
280
   * provided to convert a query representation to a OBQuery object. Currently,
281
   * only two exact substructure search compilers exist. The first is
282
   * CompileMoleculeQuery which converts an OBMol object to an OBQuery object.
283
   * The second is CompileSmilesQuery and converts a smiles string to an OBQuery
284
   * object.
285
   *
286
   * @code
287
   * #include <openbabel/query.h>
288
   * using namespace OpenBabel;
289
   *
290
   * OBMol *mol = new OBMol;
291
   *
292
   * // ... read molecule ...
293
   *
294
   * OBQuery *query;
295
   * query = CompileMoleculeQuery(mol);
296
   * query = CompileSmilesQuery("c1ccccc1CC(=O)O");
297
   * @endcode
298
   *
299
   * @section mapping Mapping Isomorphisms
300
   * The OBIsomorphismMapper class defined an interface for mapping queries to
301
   * target molecules. Multiple implementations can be added but they all do the
302
   * same. The MapFirst, MapUnique and MapAll methods are used for gettings the
303
   * map(s).
304
   *
305
   * @subsection MapFirst
306
   * This method returns the first map found. The main reason for getting only
307
   * one map is improved performance since it is considerably faster than
308
   * MapUnique and MapAll. However, depending on the use case a single map is
309
   * all that is needed. For example, to check if a molecule in a database
310
   * contains a phenyl ring, a single mapping is enough.
311
   *
312
   * @subsection MapUnique
313
   * MapUnique returns all unique maps. A map is considered unique if there is
314
   * no other map covering exactly the same atoms in the target molecule. For
315
   * example, when a phenyl query is performed on a molecule with 2 phenyl rings,
316
   * MapUnique will return 2 maps. These 2 maps are selected from the 24 found
317
   * non-duplicate maps (6 atoms to start from * 2 directions (CW/CCW) * 2 rings).
318
   *
319
   * @subsection MapAll
320
   * MapAll returns all non-duplicate maps. For example, when a phenyl query is
321
   * performed on a molecule with 2 phenyl rings, MapAll will return 24 maps
322
   * (6 atoms to start from * 2 directions (CW/CCW) * 2 rings).
323
   *
324
   * @subsection MapGeneric
325
   * MapGeneric takes a functor object and calls the functor to handle found
326
   * isomorphisms. This allows for custom mapping results to be obtained by
327
   * filtering or avoid storing all results. To implement a custom functor,
328
   * a simple class that inherits OBIsomorphismMapper::Functor and implements
329
   * the required operator().
330
   *
331
   * @code
332
   * #include <openbabel/isomorphism.h>
333
   * using namespace OpenBabel;
334
   *
335
   * class MyCustomFunctor : public OBIsomorphismMapper::Functor
336
   * {
337
   *   private:
338
   *     // store all mappings in m_data
339
   *     std::vector<OBIsomorphismMapper::Mapping> &m_data;
340
   *   public:
341
   *     MyCustomFunctor(std::vector<OBIsomorphismMapper::Mapping> &data) : m_data(data) {}
342
   *     bool operator()(OBIsomorphismMapper::Mapping &map)
343
   *     {
344
   *       // code to handle found isomorphism...
345
   *       // examples: - store the mapping
346
   *       //           - filter mappings
347
   *       //           - use the found map in some way
348
   *
349
   *       m_data.push_back(map);
350
   *
351
   *       // continue mapping
352
   *       return false;
353
   *     }
354
   * }
355
   * @endcode
356
   *
357
   * @section automorphisms Automorphisms
358
   * The automorphisms of a graph or molecule are a group of isomorphism mappings
359
   * of the molecule onto itself (i.e. the query and target are the same). The
360
   * automorphisms make it easy to take symmetry into account. See FindAutomorphisms
361
   * for detials.
362
   *
363
   *
364
   */
365
366
  ///@}
367
368
}
369
370
#endif
371
372
/// @file isomorphism.h
373
/// @brief OBIsomorphismMapper class for finding isomorphisms.