/src/openbabel/include/openbabel/isomorphism.h
Line | Count | Source (jump to first uncovered line) |
1 | | /********************************************************************** |
2 | | isomophism.h - OBIsomorphismMapper class for finding isomorphisms. |
3 | | |
4 | | Copyright (C) 2010 by Tim Vandermeersch |
5 | | |
6 | | This file is part of the Open Babel project. |
7 | | For more information, see <http://openbabel.org/> |
8 | | |
9 | | This program is free software; you can redistribute it and/or modify |
10 | | it under the terms of the GNU General Public License as published by |
11 | | the Free Software Foundation; either version 2 of the License, or |
12 | | (at your option) any later version. |
13 | | |
14 | | This program is distributed in the hope that it will be useful, |
15 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | | GNU General Public License for more details. |
18 | | |
19 | | You should have received a copy of the GNU General Public License |
20 | | along with this program; if not, write to the Free Software |
21 | | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
22 | | 02110-1301, USA. |
23 | | **********************************************************************/ |
24 | | #ifndef OB_ISOMORPHISM_H |
25 | | #define OB_ISOMORPHISM_H |
26 | | |
27 | | #include <openbabel/bitvec.h> |
28 | | |
29 | | namespace OpenBabel { |
30 | | |
31 | | class OBQuery; |
32 | | class OBMol; |
33 | | |
34 | | ///@addtogroup substructure Substructure Searching |
35 | | ///@{ |
36 | | |
37 | | /** |
38 | | * @class OBIsomorphismMapper isomorphism.h <openbabel/isomorphism.h> |
39 | | * @since version 2.3 |
40 | | * @brief Abstract class defining interface for isomorphism (i.e. substructure) searches. |
41 | | * |
42 | | * The OBIsomorphism class is an abstract class which defines an interface for |
43 | | * performing isomorphism (i.e. substructure) searches. It uses a OBQuery and |
44 | | * tries to map this onto a queried OBMol. A single mapping is represented by |
45 | | * a OBIsomorphismMapper::Mapping which is a std::map mapping query indexes to |
46 | | * queried indexes. Both query and queried indexes in the map start from 0. |
47 | | * Multiple mappings can be stored in a OBIsomorphismMapper::Mappings object |
48 | | * which is a std::vector of OBIsomorphismMapper objects. |
49 | | * |
50 | | * Since this is an abstract class with pure virtual methods, this class can't |
51 | | * be instantiated directly. To get a pointer to a subclass, the GetInstance() |
52 | | * method can be used which also sets the query. Once an instance is obtained, |
53 | | * the desired mapping function can be used to perform the mapping (i.e. MapFirst(), |
54 | | * MapUnique() or MapAll()). |
55 | | * |
56 | | * A typical example: |
57 | | * @code |
58 | | * OBMol *queried; |
59 | | * // ... initialize queried ... |
60 | | * OBQuery *query = CompileSmilesQuery("c1ccccc1"); |
61 | | * OBIsomorphismMapper *mapper = OBIsomorphismMapper::GetInstance(query); |
62 | | * OBIsomorphismMapper::Mappings maps = mapper->MapUnique(mol); |
63 | | * |
64 | | * std::cout << "found " << maps.size() << " unique mappings" << std::endl; |
65 | | * |
66 | | * delete mapper; |
67 | | * delete query; |
68 | | * @endcode |
69 | | * |
70 | | * All mapping methods take an optional mask parameter. This can be used to |
71 | | * restrict the search to a part of the queried OBMol. The masked atoms in |
72 | | * the OBBitVec are indexed from 1. A special case of isomorphism search is |
73 | | * an automorphism search where the query and queried molecule are the same. |
74 | | * Automorphism searches can be done using the MapAll method but an additional |
75 | | * FindAutomorphisms() function is provided for convenience. |
76 | | */ |
77 | | class OBAPI OBIsomorphismMapper |
78 | | { |
79 | | public: |
80 | | /** |
81 | | * @typedef std::vector< std::pair<unsigned int,unsigned int> > Mapping |
82 | | * Type for an individual mapping. |
83 | | */ |
84 | | typedef std::vector< std::pair<unsigned int,unsigned int> > Mapping; |
85 | | /** |
86 | | * @typedef std::vector<OBIsomorphismMapper::Mapping> Mappings |
87 | | * Type for a collection (std::vector) of Mapping objects. |
88 | | */ |
89 | | typedef std::vector< Mapping > Mappings; |
90 | | |
91 | | /** |
92 | | * Constructor. OBIsomorphismMapper is an abstract class, use GetInstance() to |
93 | | * get an instance of a derived class. |
94 | | * @param query The search query. |
95 | | */ |
96 | | OBIsomorphismMapper(OBQuery *query); |
97 | | virtual ~OBIsomorphismMapper(); |
98 | | |
99 | | /** |
100 | | * Get a pointer to an instance of the specified @p algorithm. This pointer |
101 | | * has to be delted when the instance is no longer needed. |
102 | | * @param query The search query to be mapped. |
103 | | * @param algorithm The algorithm for the mapper. |
104 | | * @return OBIsomorphismMapper instance or 0 if there is no subclass implementing |
105 | | * the specified @p algorithm. |
106 | | */ |
107 | | static OBIsomorphismMapper* GetInstance(OBQuery *query, const std::string &algorithm = std::string("VF2")); |
108 | | |
109 | | /** |
110 | | * Find a single mapping in @p queried. |
111 | | * @param queried The molecule to search. |
112 | | * @param map Reference to the object to store the result in. |
113 | | * @param mask A mask to restrict the search to a part of the queried molecule. |
114 | | * The default empty mask will result in all atoms being considered. The mask |
115 | | * indexes start from 1 (i.e. OBAtom::GetIdx()). |
116 | | */ |
117 | | virtual void MapFirst(const OBMol *queried, Mapping &map, const OBBitVec &mask = OBBitVec()) = 0; |
118 | | /** |
119 | | * Find all unique mappings in @p queried. A mapping is unique when there is no previous |
120 | | * mapping covering the same queried atoms. For two mappings, some overlap is allowed but |
121 | | * at least one atom should be different. |
122 | | * @param queried The molecule to search. |
123 | | * @param maps Reference to the object to store the results in. |
124 | | * @param mask A mask to restrict the search to a part of the queried molecule. |
125 | | * The default empty mask will result in all atoms being considered. The mask |
126 | | * indexes start from 1 (i.e. OBAtom::GetIdx()). |
127 | | */ |
128 | | virtual void MapUnique(const OBMol *queried, Mappings &maps, const OBBitVec &mask = OBBitVec()) = 0; |
129 | | /** |
130 | | * Find all mappings in @p queried. This function is used by FindAutomorphisms() |
131 | | * with a query that is a copy of the queried molecule (taking the mask into |
132 | | * account). |
133 | | * @param queried The molecule to search. |
134 | | * @param maps Reference to the object to store the results in. |
135 | | * @param mask A mask to restrict the search to a part of the queried molecule. |
136 | | * @param maxMemory Memory limit for the @p maps object in bytes. Default is 300MB. |
137 | | * The default empty mask will result in all atoms being considered. The mask |
138 | | * indexes start from 1 (i.e. OBAtom::GetIdx()). |
139 | | */ |
140 | | virtual void MapAll(const OBMol *queried, Mappings &maps, const OBBitVec &mask = OBBitVec(), std::size_t maxMemory = 3000000) = 0; |
141 | | |
142 | | /** |
143 | | * @class Functor isomorphism.h <openbabel/isomorphism.h> |
144 | | * @brief Functor base class to be used in combination with MapGeneric. |
145 | | * @see @ref MapGeneric |
146 | | * @since 2.3 |
147 | | */ |
148 | | class Functor |
149 | | { |
150 | | public: |
151 | 0 | virtual ~Functor() {} |
152 | | /** |
153 | | * This function is called every time an isomorphism is discovered. |
154 | | * Returing true, will abort the mapping process. The map is passed |
155 | | * as non-const reference and may be modified (e.g. swap). |
156 | | * |
157 | | * @see @ref MapGeneric |
158 | | */ |
159 | | virtual bool operator()(Mapping &map) = 0; |
160 | | }; |
161 | | /** |
162 | | * Find all mappings in @p queried. The functor will be called when a mapping is found. |
163 | | * @param functor The functor to handle found mappings. |
164 | | * @param queried The molecule to search. |
165 | | * @param mask A mask to restrict the search to a part of the queried molecule. |
166 | | * The default empty mask will result in all atoms being considered. The mask |
167 | | * indexes start from 1 (i.e. OBAtom::GetIdx()). |
168 | | */ |
169 | | virtual void MapGeneric(Functor &functor, const OBMol *queried, const OBBitVec &mask = OBBitVec()) = 0; |
170 | | |
171 | | |
172 | | /** |
173 | | * Set the timeout in seconds. |
174 | | */ |
175 | 0 | void SetTimeout(unsigned int seconds) { m_timeout = seconds; } |
176 | | |
177 | | protected: |
178 | | OBQuery *m_query; //!< The search query. |
179 | | unsigned int m_timeout; //!< The timeout in seconds |
180 | | }; |
181 | | |
182 | | inline bool MapsTo(const OBIsomorphismMapper::Mapping &map, unsigned int queryIndex, unsigned int &queriedIndex) |
183 | 0 | { |
184 | 0 | OBIsomorphismMapper::Mapping::const_iterator i; |
185 | 0 | for (i = map.begin(); i != map.end(); ++i) |
186 | 0 | if (i->first == queryIndex) { |
187 | 0 | queriedIndex = i->second; |
188 | 0 | return true; |
189 | 0 | } |
190 | | |
191 | 0 | return false; |
192 | 0 | } |
193 | | |
194 | | |
195 | | /** |
196 | | * @typedef OBIsomorphismMapper::Mapping Automorphism |
197 | | * @brief A single automorphic permutation. |
198 | | * @since 2.3 |
199 | | */ |
200 | | typedef OBIsomorphismMapper::Mapping Automorphism; |
201 | | /** |
202 | | * @typedef OBIsomorphismMapper::Mappings Automorphisms |
203 | | * @brief A group of automorphic permutations. |
204 | | * @since 2.3 |
205 | | */ |
206 | | typedef OBIsomorphismMapper::Mappings Automorphisms; |
207 | | |
208 | | /** |
209 | | * Find the automorphisms of a molecule by using an OBIsomorphismMapper. This |
210 | | * function is a wrapper for FindAutomorphisms with a functor to store all |
211 | | * automorphisms. |
212 | | * |
213 | | * @param mol The molecule for which to find the automorphisms. |
214 | | * @param aut The result will be stored here. |
215 | | * @param symmetry_classes The graph invariants to use. See OBGraphSym or use |
216 | | * the FindAutomorphisms function that computes these for you below. |
217 | | * @param mask A bit vector specifying which atoms to consider. An empty mask |
218 | | * will consider all atoms. The bits are indexed from 1 (i.e. OBAtom::GetIdx()). |
219 | | * @param maxMemory Maximum memory limit for @p aut. The number of automorphisms |
220 | | * for certain graphs can be large. The default is 300MB, consider using a functor |
221 | | * to process automorphisms when they are found. |
222 | | * |
223 | | * @since version 2.3 |
224 | | */ |
225 | | OBAPI bool FindAutomorphisms(OBMol *mol, std::vector<OBIsomorphismMapper::Mapping> &aut, const std::vector<unsigned int> &symmetry_classes, |
226 | | const OBBitVec &mask = OBBitVec(), std::size_t maxMemory = 3000000); |
227 | | /** |
228 | | * Find the automorphisms of a molecule by using an OBIsomorphismMapper. This |
229 | | * function will first find the graph invariants (i.e. symmetry_classes) using |
230 | | * the mask. This function is a wrapper for FindAutomorphisms with a functor to |
231 | | * store all automorphisms. |
232 | | * |
233 | | * @since version 2.3 |
234 | | */ |
235 | | OBAPI bool FindAutomorphisms(OBMol *mol, std::vector<OBIsomorphismMapper::Mapping> &aut, const OBBitVec &mask = OBBitVec(), |
236 | | std::size_t maxMemory = 3000000); |
237 | | |
238 | | /** |
239 | | * Find the automorphisms of a molecule by using an OBIsomorphismMapper. This |
240 | | * is the main implementation for finding automorphisms and uses an |
241 | | * OBIsomorphismMapper::Functor to process found isomorphisms. Wrapper functions |
242 | | * are provided to find and store all automorphisms but the number of automorphisms |
243 | | * can be large for certain graphs making it not feasible to store all automorphisms |
244 | | * in memory (RAM). |
245 | | * |
246 | | * @see @ref MapGeneric |
247 | | * @since version 2.3 |
248 | | */ |
249 | | OBAPI void FindAutomorphisms(OBIsomorphismMapper::Functor &functor, OBMol *mol, |
250 | | const std::vector<unsigned int> &symmetry_classes, const OBBitVec &mask = OBBitVec()); |
251 | | |
252 | | /** |
253 | | * @page substructure Substructure Search |
254 | | * @since version 2.3 |
255 | | * |
256 | | * Substructure searching is finding a mapping for a query to a target molecule. |
257 | | * Such a mapping is also known as a graph isomorphism. A graph isomorphism maps |
258 | | * the vertexes (i.e. atoms) from the query to vertexes in the molecule such that |
259 | | * two vertexes adjacent in the query are also adjacent in the target molecule. |
260 | | * In other words, no bonds are broken and no new bonds are formed. |
261 | | * |
262 | | * @section smarts SMARTS Substructure Search |
263 | | * Smarts is an extension of smiles to create powerful queries. Smarts substructure |
264 | | * search has been available in OpenBabel for many years. It is also used for many |
265 | | * of OpenBabel's algorithms. Although smarts is only a syntax for queries, the |
266 | | * implementation has it's own matching algorithm. For many purposes smarts are the |
267 | | * easiest way to do substructure searches. See the OBSmartsPattern documentation |
268 | | * for details on how to use smarts. |
269 | | * |
270 | | * @section query Queries |
271 | | * Since OpenBabel version 2.3, there are some classes for representing generic |
272 | | * queries. The OBQuery, OBQueryAtom and OBQueryBond class define interfaces that |
273 | | * can be reimplemented to get custom behavior. The classes also contain some |
274 | | * methods to access topological information which are used by the mapping |
275 | | * algorithms. The default implementations allow very simple exact substructure |
276 | | * searches to be performed but subclassing allows very advanced queries to be |
277 | | * used (e.g. smarts). |
278 | | * |
279 | | * While it is possible to construct these queries manually, "compilers" are |
280 | | * provided to convert a query representation to a OBQuery object. Currently, |
281 | | * only two exact substructure search compilers exist. The first is |
282 | | * CompileMoleculeQuery which converts an OBMol object to an OBQuery object. |
283 | | * The second is CompileSmilesQuery and converts a smiles string to an OBQuery |
284 | | * object. |
285 | | * |
286 | | * @code |
287 | | * #include <openbabel/query.h> |
288 | | * using namespace OpenBabel; |
289 | | * |
290 | | * OBMol *mol = new OBMol; |
291 | | * |
292 | | * // ... read molecule ... |
293 | | * |
294 | | * OBQuery *query; |
295 | | * query = CompileMoleculeQuery(mol); |
296 | | * query = CompileSmilesQuery("c1ccccc1CC(=O)O"); |
297 | | * @endcode |
298 | | * |
299 | | * @section mapping Mapping Isomorphisms |
300 | | * The OBIsomorphismMapper class defined an interface for mapping queries to |
301 | | * target molecules. Multiple implementations can be added but they all do the |
302 | | * same. The MapFirst, MapUnique and MapAll methods are used for gettings the |
303 | | * map(s). |
304 | | * |
305 | | * @subsection MapFirst |
306 | | * This method returns the first map found. The main reason for getting only |
307 | | * one map is improved performance since it is considerably faster than |
308 | | * MapUnique and MapAll. However, depending on the use case a single map is |
309 | | * all that is needed. For example, to check if a molecule in a database |
310 | | * contains a phenyl ring, a single mapping is enough. |
311 | | * |
312 | | * @subsection MapUnique |
313 | | * MapUnique returns all unique maps. A map is considered unique if there is |
314 | | * no other map covering exactly the same atoms in the target molecule. For |
315 | | * example, when a phenyl query is performed on a molecule with 2 phenyl rings, |
316 | | * MapUnique will return 2 maps. These 2 maps are selected from the 24 found |
317 | | * non-duplicate maps (6 atoms to start from * 2 directions (CW/CCW) * 2 rings). |
318 | | * |
319 | | * @subsection MapAll |
320 | | * MapAll returns all non-duplicate maps. For example, when a phenyl query is |
321 | | * performed on a molecule with 2 phenyl rings, MapAll will return 24 maps |
322 | | * (6 atoms to start from * 2 directions (CW/CCW) * 2 rings). |
323 | | * |
324 | | * @subsection MapGeneric |
325 | | * MapGeneric takes a functor object and calls the functor to handle found |
326 | | * isomorphisms. This allows for custom mapping results to be obtained by |
327 | | * filtering or avoid storing all results. To implement a custom functor, |
328 | | * a simple class that inherits OBIsomorphismMapper::Functor and implements |
329 | | * the required operator(). |
330 | | * |
331 | | * @code |
332 | | * #include <openbabel/isomorphism.h> |
333 | | * using namespace OpenBabel; |
334 | | * |
335 | | * class MyCustomFunctor : public OBIsomorphismMapper::Functor |
336 | | * { |
337 | | * private: |
338 | | * // store all mappings in m_data |
339 | | * std::vector<OBIsomorphismMapper::Mapping> &m_data; |
340 | | * public: |
341 | | * MyCustomFunctor(std::vector<OBIsomorphismMapper::Mapping> &data) : m_data(data) {} |
342 | | * bool operator()(OBIsomorphismMapper::Mapping &map) |
343 | | * { |
344 | | * // code to handle found isomorphism... |
345 | | * // examples: - store the mapping |
346 | | * // - filter mappings |
347 | | * // - use the found map in some way |
348 | | * |
349 | | * m_data.push_back(map); |
350 | | * |
351 | | * // continue mapping |
352 | | * return false; |
353 | | * } |
354 | | * } |
355 | | * @endcode |
356 | | * |
357 | | * @section automorphisms Automorphisms |
358 | | * The automorphisms of a graph or molecule are a group of isomorphism mappings |
359 | | * of the molecule onto itself (i.e. the query and target are the same). The |
360 | | * automorphisms make it easy to take symmetry into account. See FindAutomorphisms |
361 | | * for detials. |
362 | | * |
363 | | * |
364 | | */ |
365 | | |
366 | | ///@} |
367 | | |
368 | | } |
369 | | |
370 | | #endif |
371 | | |
372 | | /// @file isomorphism.h |
373 | | /// @brief OBIsomorphismMapper class for finding isomorphisms. |