/src/openbabel/src/graphsym.cpp
Line | Count | Source |
1 | | /********************************************************************** |
2 | | graphsym.cpp - Symmetry detection algorithm |
3 | | |
4 | | Copyright (C) 2009-2010 by Tim Vandermeersch |
5 | | Copyright (C) 2005-2006, eMolecules, Inc. (www.emolecules.com) |
6 | | |
7 | | This file is part of the Open Babel project. |
8 | | For more information, see <http://openbabel.org/> |
9 | | |
10 | | This program is free software; you can redistribute it and/or modify |
11 | | it under the terms of the GNU General Public License as published by |
12 | | the Free Software Foundation version 2 of the License. |
13 | | |
14 | | This program is distributed in the hope that it will be useful, |
15 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | | GNU General Public License for more details. |
18 | | |
19 | | You should have received a copy of the GNU General Public License |
20 | | along with this program; if not, write to the Free Software |
21 | | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
22 | | 02110-1301, USA. |
23 | | **********************************************************************/ |
24 | | |
25 | | #include <openbabel/graphsym.h> |
26 | | #include <openbabel/babelconfig.h> |
27 | | #include <openbabel/mol.h> |
28 | | #include <openbabel/atom.h> |
29 | | #include <openbabel/bond.h> |
30 | | #include <openbabel/ring.h> |
31 | | #include <openbabel/obiter.h> |
32 | | #include <openbabel/generic.h> |
33 | | |
34 | | #include <openbabel/stereo/cistrans.h> |
35 | | #include <openbabel/stereo/tetrahedral.h> |
36 | | #include <openbabel/elements.h> |
37 | | |
38 | | #include <iterator> // std::istream_iterator |
39 | | #include <cassert> |
40 | | #include <algorithm> |
41 | | #include <cmath> |
42 | | #include <limits> |
43 | | |
44 | | #include "stereo/stereoutil.h" |
45 | | |
46 | | using namespace std; |
47 | | |
48 | | |
49 | | // debug function |
50 | | template<typename T> |
51 | | void print_vector(const std::string &label, const std::vector<T> &v) |
52 | | { |
53 | | std::cout << label << ": "; |
54 | | for (std::size_t i = 0; i < v.size(); ++i) |
55 | | if (v[i] < 10) |
56 | | std::cout << " " << v[i] << " "; |
57 | | else |
58 | | std::cout << v[i] << " "; |
59 | | |
60 | | std::cout << endl; |
61 | | } |
62 | | |
63 | | // debug function |
64 | | void print_sym_classes(const std::string &label, const std::vector<std::pair<OpenBabel::OBAtom*, unsigned int> > &atom_sym_classes) |
65 | 0 | { |
66 | 0 | cout << label << ": "; |
67 | 0 | for (unsigned int i = 0; i < atom_sym_classes.size(); i++) |
68 | 0 | cout << atom_sym_classes[i].second << " "; |
69 | 0 | cout << endl; |
70 | 0 | } |
71 | | |
72 | | namespace OpenBabel { |
73 | | |
74 | | class OBGraphSymPrivate |
75 | | { |
76 | | public: |
77 | | OBBitVec _frag_atoms; |
78 | | OBMol* _pmol; |
79 | | std::vector<unsigned int> _canonLabels; |
80 | | OBStereoUnitSet _stereoUnits; |
81 | | |
82 | | unsigned int GetHvyDegree(OBAtom *atom); |
83 | | unsigned int GetHvyBondSum(OBAtom *atom); |
84 | | void FindRingAtoms(OBBitVec &ring_atoms); |
85 | | void CreateNewClassVector(std::vector<std::pair<OBAtom*,unsigned int> > &vp1, |
86 | | std::vector<std::pair<OBAtom*,unsigned int> > &vp2); |
87 | | static void CreateNewClassVector(OBMol *mol, std::vector<std::pair<OBAtom*,unsigned int> > &vp1, |
88 | | std::vector<std::pair<OBAtom*,unsigned int> > &vp2); |
89 | | void GetGIVector(std::vector<unsigned int> &vid); |
90 | | bool GetGTDVector(std::vector<int> >d); |
91 | | static void CountAndRenumberClasses(std::vector<std::pair<OBAtom*,unsigned int> > &vp, unsigned int &count); |
92 | | int ExtendInvariants(std::vector<std::pair<OBAtom*, unsigned int> > &symmetry_classes); |
93 | | int CalculateSymmetry(std::vector<unsigned int> &symmetry_classes); |
94 | | int Iterate(std::vector<unsigned int> &symmetry_classes); |
95 | | void CanonicalLabels(const std::vector<unsigned int> &symmetry_classes, std::vector<unsigned int> &canon_labels, int maxSeconds); |
96 | | }; |
97 | | |
98 | 14.0k | OBGraphSym::OBGraphSym(OBMol* pmol, const OBBitVec* frag_atoms) : d(new OBGraphSymPrivate) |
99 | 14.0k | { |
100 | 14.0k | d->_pmol = pmol; |
101 | 14.0k | if (frag_atoms) { |
102 | 12.8k | d->_frag_atoms = *frag_atoms; |
103 | 12.8k | } else { |
104 | 1.22k | d->_frag_atoms.Resize(d->_pmol->NumAtoms()); |
105 | 1.22k | FOR_ATOMS_OF_MOL(a, d->_pmol) |
106 | 818k | d->_frag_atoms.SetBitOn(a->GetIdx()); |
107 | 1.22k | } |
108 | 14.0k | } |
109 | | |
110 | | OBGraphSym::~OBGraphSym() |
111 | 14.0k | { |
112 | 14.0k | delete d; |
113 | 14.0k | } |
114 | | |
115 | | const unsigned int OBGraphSym::NoSymmetryClass = 0x7FFFFFFF; |
116 | | |
117 | | /** |
118 | | * Functions for use by the sort() method of a vector. |
119 | | */ |
120 | | inline bool CompareUnsigned(const unsigned int &a,const unsigned int &b) |
121 | 34.4k | { |
122 | 34.4k | return(a<b); |
123 | 34.4k | } |
124 | | |
125 | | inline bool ComparePairFirst(const std::pair<OBAtom*,unsigned int> &a,const std::pair<OBAtom*,unsigned int> &b) |
126 | 0 | { |
127 | 0 | return(a.first->GetIdx() < b.first->GetIdx()); |
128 | 0 | } |
129 | | |
130 | | inline bool ComparePairSecond(const std::pair<OBAtom*,unsigned int> &a,const std::pair<OBAtom*,unsigned int> &b) |
131 | 20.6M | { |
132 | 20.6M | return(a.second < b.second); |
133 | 20.6M | } |
134 | | |
135 | | |
136 | | /** |
137 | | * Like OBAtom::GetHvyDegree(): Counts the number non-hydrogen |
138 | | * neighbors, but doesn't count atoms not in the fragment. |
139 | | */ |
140 | | unsigned int OBGraphSymPrivate::GetHvyDegree(OBAtom *atom) |
141 | 832k | { |
142 | 832k | unsigned int count = 0; |
143 | 832k | OBBond *bond; |
144 | 832k | OBAtom *nbr; |
145 | | |
146 | 832k | vector<OBBond*>::iterator bi; |
147 | 837k | for (bond = atom->BeginBond(bi); bond; bond = atom->NextBond(bi)) { |
148 | 5.47k | nbr = bond->GetNbrAtom(atom); |
149 | 5.47k | if (_frag_atoms.BitIsSet(nbr->GetIdx()) && nbr->GetAtomicNum() != OBElements::Hydrogen) |
150 | 5.47k | count++; |
151 | 5.47k | } |
152 | | |
153 | 832k | return(count); |
154 | 832k | } |
155 | | |
156 | | /** |
157 | | * Sums the bond order over the bonds from this atom to other atoms |
158 | | * in the fragment. Single = 1, double = 2, triple = 3, aromatic = 1.6, |
159 | | * but sum is rounded to nearest integer. |
160 | | * |
161 | | * This is used for fragment symmetry perception instead of the "implicit |
162 | | * valence" used by the standard OpenBabel symmetry perception. It |
163 | | * has the same effect, but we don't have to worry about hydrogen counts, |
164 | | * EXCEPT for aromatic N, where the difference between n and [nH] is |
165 | | * critical. |
166 | | */ |
167 | | unsigned int OBGraphSymPrivate::GetHvyBondSum(OBAtom *atom) |
168 | 832k | { |
169 | 832k | float count = 0.0f; |
170 | 832k | OBBond *bond; |
171 | 832k | OBAtom *nbr; |
172 | | |
173 | 832k | vector<OBBond*>::iterator bi; |
174 | 837k | for (bond = atom->BeginBond(bi); bond; bond = atom->NextBond(bi)) { |
175 | 5.47k | nbr = bond->GetNbrAtom(atom); |
176 | 5.47k | if (_frag_atoms.BitIsSet(nbr->GetIdx()) && nbr->GetAtomicNum() != OBElements::Hydrogen) { |
177 | 5.47k | if (bond->IsAromatic()) |
178 | 0 | count += 1.6f; |
179 | 5.47k | else |
180 | 5.47k | count += (float)bond->GetBondOrder(); |
181 | 5.47k | } |
182 | 5.47k | } |
183 | 832k | if (atom->GetAtomicNum() == 7 && atom->IsAromatic() && atom->GetTotalDegree() == 3) { |
184 | 0 | count += 1.0f; // [nH] - add another bond |
185 | 0 | } |
186 | 832k | return(int(count + 0.5)); // round to nearest int |
187 | 832k | } |
188 | | |
189 | | /** |
190 | | * Calculates the graph theoretical distance of each atom. |
191 | | * Vector is indexed from zero. |
192 | | * |
193 | | * NOTE: "Indexed from zero" means it's one off from the atom->GetIdx() |
194 | | * that's used to index atoms inside the molecule! |
195 | | * |
196 | | * NOTE: This function is hard to decipher, and seems to be misnamed. |
197 | | * A "distance" should be be between two atoms, but there's more here |
198 | | * than that. It seems to be doing a breadth-first search to find the |
199 | | * most-distant atom from each atom, and reporting the number of steps |
200 | | * (which happens to be the graph-theoretical distance) to that atom. |
201 | | * The name "Graph Theoretical Distance" is thus misleading. |
202 | | */ |
203 | | bool OBGraphSymPrivate::GetGTDVector(vector<int> >d) |
204 | 14.0k | { |
205 | 14.0k | gtd.clear(); |
206 | 14.0k | gtd.resize(_pmol->NumAtoms()); |
207 | | |
208 | 14.0k | int gtdcount, natom; |
209 | 14.0k | OBBitVec used, curr, next; |
210 | 14.0k | OBAtom *atom, *atom1; |
211 | 14.0k | OBBond *bond; |
212 | 14.0k | vector<OBNodeBase*>::iterator ai; |
213 | 14.0k | vector<OBBond*>::iterator j; |
214 | | |
215 | 14.0k | next.Clear(); |
216 | | |
217 | 3.67M | for (atom = _pmol->BeginAtom(ai); atom; atom = _pmol->NextAtom(ai)) { |
218 | | |
219 | 3.65M | int idx = atom->GetIdx(); |
220 | 3.65M | if (!_frag_atoms.BitIsSet(idx)) { // Not in this fragment? |
221 | 2.82M | gtd[idx-1] = OBGraphSym::NoSymmetryClass; |
222 | 2.82M | continue; |
223 | 2.82M | } |
224 | | |
225 | 832k | gtdcount = 0; |
226 | 832k | used.Clear();curr.Clear(); |
227 | 832k | used.SetBitOn(idx); |
228 | 832k | curr.SetBitOn(idx); |
229 | | |
230 | 1.67M | while (!curr.IsEmpty()) { |
231 | 840k | next.Clear(); |
232 | 1.75M | for (natom = curr.NextBit(-1);natom != curr.EndBit();natom = curr.NextBit(natom)) { |
233 | 913k | atom1 = _pmol->GetAtom(natom); |
234 | 913k | if (!_frag_atoms.BitIsSet(atom1->GetIdx())) |
235 | 0 | continue; |
236 | 1.11M | for (bond = atom1->BeginBond(j);bond;bond = atom1->NextBond(j)) { |
237 | 204k | int nbr_idx = bond->GetNbrAtomIdx(atom1); |
238 | 204k | if ( _frag_atoms.BitIsSet(nbr_idx) |
239 | 204k | && !used.BitIsSet(nbr_idx) |
240 | 99.5k | && !curr.BitIsSet(nbr_idx) |
241 | 99.5k | && bond->GetNbrAtom(atom1)->GetAtomicNum() != OBElements::Hydrogen) |
242 | 99.5k | next.SetBitOn(nbr_idx); |
243 | 204k | } |
244 | 913k | } |
245 | 840k | used |= next; |
246 | 840k | curr = next; |
247 | 840k | gtdcount++; |
248 | 840k | } |
249 | 832k | gtd[idx-1] = gtdcount; |
250 | 832k | } |
251 | | |
252 | 14.0k | return(true); |
253 | 14.0k | } |
254 | | |
255 | | /** |
256 | | * Finds all atoms that are part of a ring in the current fragment. |
257 | | * We start with the whole molecule's rings, and eliminate any that |
258 | | * have atoms not in the subset. For the rings that are left, mark |
259 | | * each atom of the ring as a ring atom. |
260 | | * |
261 | | * @return A bit vector where TRUE means it's a ring atom. |
262 | | */ |
263 | | void OBGraphSymPrivate::FindRingAtoms(OBBitVec &ring_atoms) |
264 | 14.0k | { |
265 | 14.0k | vector<OBRing*> sssRings; |
266 | 14.0k | vector<OBRing*>::iterator ri; |
267 | | |
268 | 14.0k | ring_atoms.Resize(_pmol->NumAtoms()); |
269 | 14.0k | ring_atoms.Clear(); |
270 | | |
271 | 14.0k | sssRings = _pmol->GetSSSR(); |
272 | 15.0k | for (ri = sssRings.begin(); ri != sssRings.end(); ++ri) { |
273 | 971 | OBRing *ring = *ri; |
274 | 971 | OBBitVec bvtmp = _frag_atoms & ring->_pathset; // intersection: fragment and ring |
275 | 971 | if (bvtmp == ring->_pathset) // all ring atoms in fragment? |
276 | 489 | ring_atoms |= ring->_pathset; // yes - add this ring's atoms |
277 | 971 | } |
278 | 14.0k | } |
279 | | |
280 | | /** |
281 | | * Calculates a set of graph invariant indexes using the graph theoretical |
282 | | * distance, number of connected heavy atoms, aromatic boolean, ring |
283 | | * boolean, atomic number, and summation of bond orders connected to the |
284 | | * atom. |
285 | | * |
286 | | * We have to recalculate which atoms are in rings by taking the fragment's |
287 | | * atoms into account when we generate the graph invarients. |
288 | | * |
289 | | * Vector is indexed from zero (not one, like atom->GetIdx()). |
290 | | * |
291 | | * NOTE: This may need to be extended to include the bond-invariant properties, |
292 | | * particularly the size of all rings the bond is in (from a SSSR). |
293 | | */ |
294 | | void OBGraphSymPrivate::GetGIVector(vector<unsigned int> &vid) |
295 | 14.0k | { |
296 | | // Prepare the vector... |
297 | 14.0k | vid.clear(); |
298 | 14.0k | vid.resize(_pmol->NumAtoms()); |
299 | | |
300 | | // The "graph theoretical distance" for each atom (see comments in the function) |
301 | 14.0k | vector<int> v; |
302 | 14.0k | GetGTDVector(v); |
303 | | |
304 | | // Compute the ring atoms for this particular fragment (set of atoms) |
305 | 14.0k | OBBitVec ring_atoms; |
306 | 14.0k | FindRingAtoms(ring_atoms); |
307 | | |
308 | 14.0k | int i; |
309 | 14.0k | OBAtom *atom; |
310 | 14.0k | vector<OBNodeBase*>::iterator ai; |
311 | 3.67M | for (i=0, atom = _pmol->BeginAtom(ai); atom; atom = _pmol->NextAtom(ai)) { |
312 | | // vid[i] = 0; |
313 | 3.65M | vid[i] = OBGraphSym::NoSymmetryClass; |
314 | 3.65M | if (_frag_atoms.BitIsSet(atom->GetIdx())) { |
315 | 832k | vid[i] = |
316 | 832k | v[i] // 10 bits: graph-theoretical distance |
317 | 832k | | (GetHvyDegree(atom) <<10) // 4 bits: heavy valence |
318 | 832k | | (((atom->IsAromatic()) ? 1 : 0) <<14) // 1 bit: aromaticity |
319 | 832k | | (((ring_atoms.BitIsSet(atom->GetIdx())) ? 1 : 0)<<15) // 1 bit: ring atom |
320 | 832k | | (atom->GetAtomicNum() <<16) // 7 bits: atomic number |
321 | 832k | | (GetHvyBondSum(atom) <<23) // 4 bits: heavy bond sum |
322 | 832k | | ((7 + atom->GetFormalCharge()) <<27); // 4 bits: formal charge |
323 | 832k | } |
324 | 3.65M | i++; |
325 | 3.65M | } |
326 | 14.0k | } |
327 | | |
328 | | /** |
329 | | * Creates a new vector of symmetry classes based on an existing |
330 | | * vector. (Helper routine to GetGIDVector.) On return, vp2 will |
331 | | * have newly-extended connectivity sums, but the numbers (the class |
332 | | * IDs) are very large. |
333 | | * |
334 | | * (Comments by CJ) This appears to compute the "extended connectivity |
335 | | * sums" similar to those described by Weininger, Morgan, etc. It uses |
336 | | * vp1 as its starting point (the current connectivity sums), and puts |
337 | | * the new sums in vp2. Note that vp1 is modified along the way. |
338 | | * |
339 | | * Note that, per Weininger's warning, this assumes the initial class |
340 | | * ID's are less than 100, which is a BAD assumption, e.g. OCC...CCN |
341 | | * would have more than 100 symmetry classes if the chain is more than |
342 | | * 98 carbons long. Should change this to use Weininger's product of |
343 | | * corresponding primes. |
344 | | */ |
345 | | void OBGraphSymPrivate::CreateNewClassVector(std::vector<std::pair<OBAtom*,unsigned int> > &vp1, |
346 | | std::vector<std::pair<OBAtom*,unsigned int> > &vp2) |
347 | 14.6k | { |
348 | 14.6k | unsigned int m,id; |
349 | 14.6k | OBAtom *atom, *nbr; |
350 | 14.6k | vector<OBBond*>::iterator nbr_iter; |
351 | 14.6k | vector<unsigned int>::iterator k; |
352 | 14.6k | vector<pair<OBAtom*,unsigned int> >::iterator vp_iter; |
353 | | |
354 | | #if DEBUG2 |
355 | | cout << "CreateNewClassVector: START\n"; |
356 | | //print_vector_pairs(" ", vp1); |
357 | | #endif |
358 | | |
359 | | // There may be fewer atoms than in the whole molecule, so we can't |
360 | | // index the vp1 array by atom->GetIdx(). Instead, create a quick |
361 | | // mapping vector of idx-to-index for vp1. |
362 | 14.6k | vector<int> idx2index(_pmol->NumAtoms() + 1, -1); // natoms + 1 |
363 | 14.6k | int index = 0; |
364 | 1.66M | for (vp_iter = vp1.begin(); vp_iter != vp1.end(); ++vp_iter) { |
365 | 1.65M | int idx = vp_iter->first->GetIdx(); |
366 | 1.65M | idx2index[idx] = index++; |
367 | 1.65M | } |
368 | | |
369 | | // vp2 will hold the newly-extended symmetry classes |
370 | 14.6k | vp2.resize(vp1.size()); |
371 | 14.6k | vp2.clear(); |
372 | | |
373 | | // Loop over original atoms. |
374 | | // Create a new extended varient for each atom. Get its neighbors' class ID's, |
375 | | // sort them into ascending order, and create a sum of (c0 + c1*10^2 + c2*10^4 + ...) |
376 | | // which becomes the new class ID (where c0 is the current classID). |
377 | | |
378 | 1.66M | for (vp_iter = vp1.begin(); vp_iter != vp1.end(); ++vp_iter) { |
379 | 1.65M | atom = vp_iter->first; |
380 | 1.65M | id = vp_iter->second; |
381 | 1.65M | vector<unsigned int> vtmp; |
382 | 1.66M | for (nbr = atom->BeginNbrAtom(nbr_iter); nbr; nbr = atom->NextNbrAtom(nbr_iter)) { |
383 | 14.4k | int idx = nbr->GetIdx(); |
384 | 14.4k | if (_frag_atoms.BitIsSet(idx)) |
385 | 14.4k | vtmp.push_back(vp1[idx2index[idx]].second); |
386 | 14.4k | } |
387 | | |
388 | 1.65M | sort(vtmp.begin(),vtmp.end(),CompareUnsigned); |
389 | 1.66M | for (m = 100, k = vtmp.begin(); k != vtmp.end(); ++k, m*=100) |
390 | 14.4k | id += *k * m; |
391 | 1.65M | vp2.push_back(pair<OBAtom*,unsigned int> (atom, id)); |
392 | 1.65M | } |
393 | | #if DEBUG2 |
394 | | cout << "CreateNewClassVector: FINISH\n"; |
395 | | //print_vector_pairs(" ", vp2); |
396 | | #endif |
397 | 14.6k | } |
398 | | |
399 | | void OBGraphSymPrivate::CreateNewClassVector(OBMol *mol, std::vector<std::pair<OBAtom*,unsigned int> > &vp1, |
400 | | std::vector<std::pair<OBAtom*,unsigned int> > &vp2) |
401 | 0 | { |
402 | 0 | int m,id; |
403 | 0 | OBAtom *atom, *nbr; |
404 | 0 | vector<OBBond*>::iterator nbr_iter; |
405 | 0 | vector<unsigned int>::iterator k; |
406 | 0 | vector<pair<OBAtom*,unsigned int> >::iterator vp_iter; |
407 | |
|
408 | | #if DEBUG2 |
409 | | cout << "CreateNewClassVector: START\n"; |
410 | | //print_vector_pairs(" ", vp1); |
411 | | #endif |
412 | | |
413 | | // There may be fewer atoms than in the whole molecule, so we can't |
414 | | // index the vp1 array by atom->GetIdx(). Instead, create a quick |
415 | | // mapping vector of idx-to-index for vp1. |
416 | 0 | vector<int> idx2index(mol->NumAtoms() + 1, -1); // natoms + 1 |
417 | 0 | int index = 0; |
418 | 0 | for (vp_iter = vp1.begin(); vp_iter != vp1.end(); ++vp_iter) { |
419 | 0 | int idx = vp_iter->first->GetIdx(); |
420 | 0 | idx2index[idx] = index++; |
421 | 0 | } |
422 | | |
423 | | // vp2 will hold the newly-extended symmetry classes |
424 | 0 | vp2.resize(vp1.size()); |
425 | 0 | vp2.clear(); |
426 | | |
427 | | // Loop over original atoms. |
428 | | // Create a new extended varient for each atom. Get its neighbors' class ID's, |
429 | | // sort them into ascending order, and create a sum of (c0 + c1*10^2 + c2*10^4 + ...) |
430 | | // which becomes the new class ID (where c0 is the current classID). |
431 | |
|
432 | 0 | for (vp_iter = vp1.begin(); vp_iter != vp1.end(); ++vp_iter) { |
433 | 0 | atom = vp_iter->first; |
434 | 0 | id = vp_iter->second; |
435 | 0 | vector<unsigned int> vtmp; |
436 | 0 | for (nbr = atom->BeginNbrAtom(nbr_iter); nbr; nbr = atom->NextNbrAtom(nbr_iter)) { |
437 | 0 | int idx = nbr->GetIdx(); |
438 | 0 | vtmp.push_back(vp1[idx2index[idx]].second); |
439 | 0 | } |
440 | |
|
441 | 0 | sort(vtmp.begin(),vtmp.end(),CompareUnsigned); |
442 | 0 | for (m = 100, k = vtmp.begin(); k != vtmp.end(); ++k, m*=100) |
443 | 0 | id += *k * m; |
444 | 0 | vp2.push_back(pair<OBAtom*,unsigned int> (atom, id)); |
445 | 0 | } |
446 | | #if DEBUG2 |
447 | | cout << "CreateNewClassVector: FINISH\n"; |
448 | | //print_vector_pairs(" ", vp2); |
449 | | #endif |
450 | |
|
451 | 0 | } |
452 | | |
453 | | /** |
454 | | * Counts the number of unique symmetry classes in a list. |
455 | | * |
456 | | * (NOTE: CJ -- It also appears to MODIFY the list. It sorts it in order |
457 | | * of class ID, then renumbers the ID's zero through N-1. See the comments |
458 | | * in CreateNewClassVector() about how it returns very large numbers for the |
459 | | * class IDs it creates. These are replaced by lower, sequential numbers here.) |
460 | | */ |
461 | | void OBGraphSymPrivate::CountAndRenumberClasses(std::vector<std::pair<OBAtom*,unsigned int> > &vp, |
462 | | unsigned int &count) |
463 | 28.6k | { |
464 | 28.6k | count = 1; |
465 | 28.6k | vector<pair<OBAtom*,unsigned int> >::iterator k; |
466 | | |
467 | 28.6k | sort(vp.begin(), vp.end(), ComparePairSecond); |
468 | 28.6k | k = vp.begin(); |
469 | 28.6k | if (k != vp.end()) { |
470 | 28.4k | unsigned int id = k->second; |
471 | 28.4k | if (id) { |
472 | 28.4k | k->second = 1; |
473 | 28.4k | ++k; |
474 | 2.48M | for (;k != vp.end(); ++k) { |
475 | 2.45M | if (k->second != id) { |
476 | 9.67k | id = k->second; |
477 | 9.67k | k->second = ++count; |
478 | 2.44M | } else { |
479 | 2.44M | k->second = count; |
480 | 2.44M | } |
481 | 2.45M | } |
482 | 28.4k | } |
483 | 28.4k | } |
484 | 28.6k | } |
485 | | |
486 | | /** |
487 | | * This is the core of symmetry analysis. Starting with a set of |
488 | | * classes on each atom, it "spreads" them using a sum-of-invariants |
489 | | * of each atom's class and its neighbors' classes. This iterates |
490 | | * until a stable solution is found (further spreading doesn't |
491 | | * change the answer). |
492 | | * |
493 | | * @return The number of distinct symmetry classes found. |
494 | | */ |
495 | | int OBGraphSymPrivate::ExtendInvariants(std::vector<std::pair<OBAtom*, unsigned int> > &symmetry_classes) |
496 | 14.0k | { |
497 | 14.0k | unsigned int nclasses1, nclasses2; |
498 | 14.0k | vector<pair<OBAtom*,unsigned int> > tmp_classes; |
499 | | |
500 | | // How many classes are we starting with? (The "renumber" part isn't relevant.) |
501 | 14.0k | CountAndRenumberClasses(symmetry_classes, nclasses1); |
502 | | |
503 | 14.0k | unsigned int nfragatoms = _frag_atoms.CountBits(); |
504 | | |
505 | | // LOOP: Do extended sum-of-invarients until no further changes are |
506 | | // noted. (Note: This is inefficient, as it re-computes extended sums |
507 | | // and re-sorts the entire list each time. You can save a lot of time by |
508 | | // only recomputing and resorting within regions where there is a tie |
509 | | // initially. But it's a lot more code.) |
510 | | |
511 | 14.0k | if (nclasses1 < nfragatoms) { |
512 | 543 | for (int i = 0; i < 100;i++) { //sanity check - shouldn't ever hit this number |
513 | 543 | CreateNewClassVector(symmetry_classes, tmp_classes); |
514 | 543 | CountAndRenumberClasses(tmp_classes, nclasses2); |
515 | 543 | symmetry_classes = tmp_classes; |
516 | 543 | if (nclasses1 == nclasses2) break; |
517 | 54 | nclasses1 = nclasses2; |
518 | 54 | } |
519 | 489 | } |
520 | | |
521 | 14.0k | CreateNewClassVector(symmetry_classes, tmp_classes); |
522 | 14.0k | CountAndRenumberClasses(tmp_classes, nclasses2); |
523 | | |
524 | | |
525 | 14.0k | if (nclasses1 != nclasses2) { |
526 | 0 | symmetry_classes = tmp_classes; |
527 | 0 | return ExtendInvariants(symmetry_classes); |
528 | 0 | } |
529 | | |
530 | | |
531 | 14.0k | return nclasses1; |
532 | 14.0k | } |
533 | | |
534 | | /** |
535 | | * Calculates a set of canonical symmetry identifiers for a molecule. |
536 | | * Atoms with the same symmetry ID are symmetrically equivalent. By |
537 | | * "canonical", we mean it generates a repeatable labelling of the |
538 | | * atoms, i.e. the same fragment will get the same symmetry labels in |
539 | | * any molecule in which it occurs. |
540 | | * |
541 | | * Vector is indexed from zero, corresponding to (atom->GetIdx() - 1). |
542 | | * |
543 | | * The bit vector "_frag_atoms" specifies a fragment of the molecule, |
544 | | * where each bit represents the presence or absence of the atom in |
545 | | * the fragment. Symmetry is computed as though the fragment is the |
546 | | * only part that exists. |
547 | | */ |
548 | | int OBGraphSymPrivate::CalculateSymmetry(std::vector<unsigned int> &atom_sym_classes) |
549 | 14.0k | { |
550 | 14.0k | vector<unsigned int> vgi; |
551 | 14.0k | vector<OBNodeBase*>::iterator j; |
552 | 14.0k | OBAtom *atom; |
553 | | |
554 | | // Get vector of graph invariants. These are the starting "symmetry classes". |
555 | 14.0k | GetGIVector(vgi); |
556 | | |
557 | | |
558 | | // Create a vector-of-pairs, associating each atom with its Class ID. |
559 | 14.0k | std::vector<std::pair<OBAtom*, unsigned int> > symmetry_classes; |
560 | 3.67M | for (atom = _pmol->BeginAtom(j); atom; atom = _pmol->NextAtom(j)) { |
561 | 3.65M | int idx = atom->GetIdx(); |
562 | 3.65M | if (_frag_atoms.BitIsSet(idx)) |
563 | 832k | symmetry_classes.push_back(pair<OBAtom*, unsigned int> (atom, vgi[idx-1])); |
564 | | //else |
565 | | // symmetry_classes.push_back(pair<OBAtom*, unsigned int> (atom, OBGraphSym::NoSymmetryClass)); |
566 | 3.65M | } |
567 | | |
568 | | |
569 | | // The heart of the matter: Do extended sum-of-invariants until no further |
570 | | // changes are noted. |
571 | 14.0k | int nclasses = ExtendInvariants(symmetry_classes); |
572 | | |
573 | | // Convert to a vector indexed by Index |
574 | | // Atoms not in the fragment will have a value of OBGraphSym::NoSymmetryClass |
575 | 14.0k | atom_sym_classes.clear(); |
576 | 14.0k | atom_sym_classes.resize(_pmol->NumAtoms(), OBGraphSym::NoSymmetryClass); |
577 | 846k | for (unsigned int i = 0; i < symmetry_classes.size(); ++i) { |
578 | 832k | atom_sym_classes[symmetry_classes.at(i).first->GetIndex()] = symmetry_classes.at(i).second; |
579 | 832k | } |
580 | | |
581 | | // Store the symmetry classes in an OBPairData |
582 | 14.0k | stringstream temp; |
583 | 14.0k | vector<unsigned int>::iterator sym_iter = atom_sym_classes.begin(); |
584 | 14.0k | if (sym_iter != atom_sym_classes.end()) |
585 | 13.9k | temp << (*sym_iter++); |
586 | 3.65M | for (; sym_iter != atom_sym_classes.end(); ++sym_iter) |
587 | 3.64M | temp << " " << (*sym_iter); |
588 | | |
589 | 14.0k | OBPairData *symData = new OBPairData; |
590 | 14.0k | symData->SetAttribute("OpenBabel Symmetry Classes"); |
591 | 14.0k | symData->SetOrigin(local); //will not show as sdf or cml property |
592 | 14.0k | symData->SetValue(temp.str()); |
593 | 14.0k | _pmol->SetData(symData); |
594 | | |
595 | 14.0k | return nclasses; |
596 | 14.0k | } |
597 | | |
598 | | int OBGraphSym::GetSymmetry(std::vector<unsigned int> &symmetry_classes) |
599 | 14.0k | { |
600 | 14.0k | ClearSymmetry(); // For the moment just recalculate the symmetry classes |
601 | | |
602 | | // Check to see whether we have already calculated the symmetry classes |
603 | 14.0k | OBPairData *pd = dynamic_cast<OBPairData*>(d->_pmol->GetData("OpenBabel Symmetry Classes")); |
604 | | |
605 | 14.0k | int nclasses = 0; |
606 | 14.0k | if (!pd) { |
607 | 14.0k | nclasses = d->CalculateSymmetry(symmetry_classes); |
608 | 14.0k | } else { |
609 | 1 | istringstream iss(pd->GetValue()); |
610 | 1 | symmetry_classes.clear(); |
611 | 1 | copy(istream_iterator<unsigned int>(iss), |
612 | 1 | istream_iterator<unsigned int>(), |
613 | 1 | back_inserter<vector<unsigned int> >(symmetry_classes)); |
614 | | // Now find the number of unique elements |
615 | 1 | vector<unsigned int> copy_sym = symmetry_classes; |
616 | 1 | sort(copy_sym.begin(), copy_sym.end()); |
617 | 1 | vector<unsigned int>::iterator end_pos = unique(copy_sym.begin(), copy_sym.end()); // Requires sorted elements |
618 | 1 | nclasses = end_pos - copy_sym.begin(); |
619 | 1 | } |
620 | | |
621 | 14.0k | return nclasses; |
622 | 14.0k | } |
623 | | |
624 | | int OBGraphSymPrivate::Iterate(vector<unsigned int> &symClasses) |
625 | 0 | { |
626 | | // Create a vector-of-pairs, associating each atom with its Class ID. |
627 | 0 | vector<OBAtom*>::iterator j; |
628 | 0 | std::vector<std::pair<OBAtom*, unsigned int> > symmetry_classes; |
629 | 0 | for (OBAtom *atom = _pmol->BeginAtom(j); atom; atom = _pmol->NextAtom(j)) { |
630 | 0 | int idx = atom->GetIdx(); |
631 | 0 | if (_frag_atoms.BitIsSet(idx)) |
632 | 0 | symmetry_classes.push_back(pair<OBAtom*, unsigned int> (atom, symClasses[idx-1])); |
633 | 0 | } |
634 | | |
635 | | // The heart of the matter: Do extended sum-of-invariants until no further |
636 | | // changes are noted. |
637 | 0 | int nclasses = ExtendInvariants(symmetry_classes); |
638 | | |
639 | | // Convert to a vector indexed by Index |
640 | | // Atoms not in the fragment will have a value of OBGraphSym::NoSymmetryClass |
641 | 0 | symClasses.clear(); |
642 | 0 | symClasses.resize(_pmol->NumAtoms(), OBGraphSym::NoSymmetryClass); |
643 | 0 | for (unsigned int i = 0; i < symmetry_classes.size(); ++i) { |
644 | 0 | symClasses[symmetry_classes.at(i).first->GetIndex()] = symmetry_classes.at(i).second; |
645 | 0 | } |
646 | |
|
647 | 0 | return nclasses; |
648 | 0 | } |
649 | | |
650 | | // Clears perceived symmetry |
651 | | void OBGraphSym::ClearSymmetry() |
652 | 14.0k | { |
653 | 14.0k | d->_pmol->DeleteData("OpenBabel Symmetry Classes"); |
654 | 14.0k | } |
655 | | |
656 | | |
657 | | |
658 | | |
659 | | |
660 | | |
661 | | |
662 | | |
663 | | |
664 | | |
665 | | |
666 | | |
667 | | |
668 | | |
669 | | |
670 | | |
671 | | } // namespace OpenBabel |
672 | | |
673 | | //! \file graphsym.cpp |
674 | | //! \brief Handle and perceive graph symmtery for canonical numbering. |