/src/rdkit/Code/GraphMol/Aromaticity.cpp
Line | Count | Source |
1 | | // |
2 | | // Copyright (C) 2003-2022 Greg Landrum and other RDKit contributors |
3 | | // |
4 | | // @@ All Rights Reserved @@ |
5 | | // This file is part of the RDKit. |
6 | | // The contents are covered by the terms of the BSD license |
7 | | // which is included in the file license.txt, found at the root |
8 | | // of the RDKit source tree. |
9 | | // |
10 | | #include <GraphMol/RDKitBase.h> |
11 | | #include <GraphMol/QueryBond.h> |
12 | | #include <GraphMol/QueryOps.h> |
13 | | #include <GraphMol/Rings.h> |
14 | | #include <RDGeneral/types.h> |
15 | | #include <boost/dynamic_bitset.hpp> |
16 | | #include <set> |
17 | | |
18 | | // introduced for the sake of efficiency |
19 | | // this is the maximum ring size that will be considered |
20 | | // as a candidate for fused-ring aromaticity. This is picked to |
21 | | // be a bit bigger than the outer ring in a porphyrin |
22 | | // This came up while fixing sf.net issue249 |
23 | | const unsigned int maxFusedAromaticRingSize = 24; |
24 | | |
25 | | /**************************************************************** |
26 | | Here are some molecules that have troubled us aromaticity wise |
27 | | |
28 | | 1. We get 2 aromatic rings for this molecule, but daylight says none. Also |
29 | | if we replace [O+] with the N daylight says we have two aromatic rings |
30 | | [O-][N+]1=CC2=CC=C[O+]2[Cu]13[O+]4C=CC=C4C=[N+]3[O-] |
31 | | |
32 | | 2. We the fused ring system with all the rings in it is considered aroamtic by |
33 | | our code. |
34 | | This is because we count electrons from buried atoms as well when |
35 | | we are dealing with fused rings |
36 | | |
37 | | 3. Here's a weird fused ring case, Pattern NAN, A overall |
38 | | O=C3C2=CC1=CC=COC1=CC2=CC=C3 |
39 | | ************************************************************/ |
40 | | namespace RingUtils { |
41 | | using namespace RDKit; |
42 | | |
43 | | void pickFusedRings(int curr, const INT_INT_VECT_MAP &neighMap, INT_VECT &res, |
44 | 20.2M | boost::dynamic_bitset<> &done, int depth) { |
45 | 20.2M | auto pos = neighMap.find(curr); |
46 | 20.2M | PRECONDITION(pos != neighMap.end(), "bad argument"); |
47 | 20.2M | done[curr] = 1; |
48 | 20.2M | res.push_back(curr); |
49 | | |
50 | 20.2M | const auto &neighs = pos->second; |
51 | 120M | for (int neigh : neighs) { |
52 | 120M | if (!done[neigh]) { |
53 | 12.2M | pickFusedRings(neigh, neighMap, res, done, depth + 1); |
54 | 12.2M | } |
55 | 120M | } |
56 | 20.2M | } |
57 | | |
58 | 7.93M | bool checkFused(const INT_VECT &rids, INT_INT_VECT_MAP &ringNeighs) { |
59 | 7.93M | auto nrings = rdcast<int>(ringNeighs.size()); |
60 | 7.93M | boost::dynamic_bitset<> done(nrings); |
61 | 7.93M | int rid; |
62 | 7.93M | INT_VECT fused; |
63 | | |
64 | | // mark all rings in the system other than those in rids as done |
65 | 434M | for (const auto &nci : ringNeighs) { |
66 | 434M | rid = nci.first; |
67 | 434M | if (std::find(rids.begin(), rids.end(), rid) == rids.end()) { |
68 | 390M | done[rid] = 1; |
69 | 390M | } |
70 | 434M | } |
71 | | |
72 | | // then pick a fused system from the remaining (i.e. rids) |
73 | | // If the rings in rids are fused we should get back all of them |
74 | | // in fused |
75 | | // if we get a smaller number in fused then rids are not fused |
76 | 7.93M | pickFusedRings(rids.front(), ringNeighs, fused, done); |
77 | | |
78 | 7.93M | CHECK_INVARIANT(fused.size() <= rids.size(), ""); |
79 | 7.93M | return (fused.size() == rids.size()); |
80 | 7.93M | } |
81 | | |
82 | | void makeRingNeighborMap(const VECT_INT_VECT &brings, |
83 | | INT_INT_VECT_MAP &neighMap, unsigned int maxSize, |
84 | 15.9k | unsigned int maxOverlapSize) { |
85 | 15.9k | auto nrings = rdcast<int>(brings.size()); |
86 | 15.9k | int i, j; |
87 | 15.9k | INT_VECT ring1; |
88 | | |
89 | 235k | for (i = 0; i < nrings; ++i) { |
90 | | // create an empty INT_VECT at neighMap[i] if it does not yet exist |
91 | 219k | neighMap[i]; |
92 | 219k | if (maxSize && brings[i].size() > maxSize) { |
93 | 1.14k | continue; |
94 | 1.14k | } |
95 | 218k | ring1 = brings[i]; |
96 | 44.3M | for (j = i + 1; j < nrings; ++j) { |
97 | 44.1M | if (maxSize && brings[j].size() > maxSize) { |
98 | 41.4k | continue; |
99 | 41.4k | } |
100 | 44.0M | INT_VECT inter; |
101 | 44.0M | Intersect(ring1, brings[j], inter); |
102 | 44.0M | if (inter.size() > 0 && |
103 | 527k | (!maxOverlapSize || inter.size() <= maxOverlapSize)) { |
104 | 504k | neighMap[i].push_back(j); |
105 | 504k | neighMap[j].push_back(i); |
106 | 504k | } |
107 | 44.0M | } |
108 | 218k | } |
109 | 15.9k | } |
110 | | |
111 | | } // end of namespace RingUtils |
112 | | |
113 | | // local utility namespace |
114 | | namespace { |
115 | | using namespace RDKit; |
116 | | |
117 | | typedef enum { |
118 | | VacantElectronDonorType, |
119 | | OneElectronDonorType, |
120 | | TwoElectronDonorType, |
121 | | OneOrTwoElectronDonorType, |
122 | | AnyElectronDonorType, |
123 | | NoElectronDonorType |
124 | | } ElectronDonorType; // used in setting aromaticity |
125 | | typedef std::vector<ElectronDonorType> VECT_EDON_TYPE; |
126 | | typedef VECT_EDON_TYPE::iterator VECT_EDON_TYPE_I; |
127 | | typedef VECT_EDON_TYPE::const_iterator VECT_EDON_TYPE_CI; |
128 | | |
129 | | /****************************************************************************** |
130 | | * SUMMARY: |
131 | | * Apply Huckel rule to the specified ring and mark the bonds and atoms in the |
132 | | * ring accordingly. |
133 | | * |
134 | | * ARGUMENTS: |
135 | | * mol - molecule of interest |
136 | | * ring - list of atoms that form the simple ring |
137 | | * edon - list of electron donor type of the atoms in the molecule |
138 | | * |
139 | | * RETURN : |
140 | | * none |
141 | | * |
142 | | * ASSUMPTIONS: |
143 | | * the ring has to a simple ring and the electron donor type of each atom |
144 | | * in the ring have been computed |
145 | | ******************************************************************************/ |
146 | | static bool applyHuckel(ROMol &mol, const INT_VECT &ring, |
147 | | const VECT_EDON_TYPE &edon, unsigned int minRingSize); |
148 | | |
149 | | static void applyHuckelToFused( |
150 | | ROMol &mol, // molecule of interest |
151 | | const VECT_INT_VECT &srings, // list of all ring as atom IDS |
152 | | const VECT_INT_VECT &brings, // list of all rings as bond ids |
153 | | const INT_VECT &fused, // list of ring ids in the current fused system |
154 | | const VECT_EDON_TYPE &edon, // electron donor state for each atom |
155 | | INT_INT_VECT_MAP &ringNeighs, |
156 | | int &narom, // number of aromatic ring so far |
157 | | unsigned int maxNumFusedRings, const std::vector<Bond *> &bondsByIdx, |
158 | | unsigned int minRingSize = 0); |
159 | | |
160 | | void markAtomsBondsArom(const VECT_INT_VECT &brings, const INT_VECT &ringIds, |
161 | | std::set<unsigned int> &doneBonds, |
162 | 62.1k | const std::vector<Bond *> &bondsByIdx) { |
163 | | // mark the bonds |
164 | | // here we want to be careful. We don't want to mark the fusing bonds |
165 | | // as aromatic - only the outside bonds in a fused system are marked aromatic. |
166 | | // - loop through the rings and count the number of times each bond appears in |
167 | | // all the fused rings. |
168 | | // - bonds that appears only once are marked aromatic |
169 | 62.1k | INT_MAP_INT bndCntr; |
170 | | |
171 | 269k | for (auto ri : ringIds) { |
172 | 269k | const auto &bring = brings[ri]; |
173 | 1.35M | for (auto bi : bring) { |
174 | 1.35M | ++bndCntr[bi]; |
175 | 1.35M | } |
176 | 269k | } |
177 | | // now mark single or double bonds that have a count of 1 and the atoms they |
178 | | // connect as aromatic |
179 | 813k | for (const auto &bci : bndCntr) { |
180 | 813k | if (bci.second == 1) { |
181 | 411k | auto bond = bondsByIdx[bci.first]; |
182 | 411k | bond->setIsAromatic(true); |
183 | 411k | switch (bond->getBondType()) { |
184 | 45.9k | case Bond::SINGLE: |
185 | 70.9k | case Bond::DOUBLE: |
186 | 70.9k | bond->setBondType(Bond::AROMATIC); |
187 | 70.9k | bond->getBeginAtom()->setIsAromatic(true); |
188 | 70.9k | bond->getEndAtom()->setIsAromatic(true); |
189 | 70.9k | break; |
190 | 340k | default: |
191 | 340k | break; |
192 | 411k | } |
193 | 411k | doneBonds.insert(bond->getIdx()); |
194 | 411k | } |
195 | 813k | } |
196 | 62.1k | } |
197 | | |
198 | 4.85M | void getMinMaxAtomElecs(ElectronDonorType dtype, int &atlw, int &atup) { |
199 | 4.85M | switch (dtype) { |
200 | 1.33M | case AnyElectronDonorType: |
201 | 1.33M | atlw = 1; |
202 | 1.33M | atup = 2; |
203 | 1.33M | break; |
204 | 0 | case OneOrTwoElectronDonorType: |
205 | 0 | atlw = 1; |
206 | 0 | atup = 2; |
207 | 0 | break; |
208 | 1.87M | case OneElectronDonorType: |
209 | 1.87M | atlw = atup = 1; |
210 | 1.87M | break; |
211 | 1.64M | case TwoElectronDonorType: |
212 | 1.64M | atlw = atup = 2; |
213 | 1.64M | break; |
214 | 0 | case NoElectronDonorType: |
215 | 1.41k | case VacantElectronDonorType: |
216 | 1.41k | default: |
217 | 1.41k | atlw = atup = 0; |
218 | 1.41k | break; |
219 | 4.85M | } |
220 | 4.85M | } |
221 | | |
222 | 471k | bool incidentNonCyclicMultipleBond(const Atom *at, int &who) { |
223 | 471k | PRECONDITION(at, "bad atom"); |
224 | | // check if "at" has an non-cyclic multiple bond on it |
225 | | // if yes check which atom this bond goes to |
226 | | // and record the atomID in who |
227 | 471k | const auto &mol = at->getOwningMol(); |
228 | 993k | for (const auto bond : mol.atomBonds(at)) { |
229 | 993k | if (!mol.getRingInfo()->numBondRings(bond->getIdx())) { |
230 | 23.8k | if (bond->getValenceContrib(at) >= 2.0) { |
231 | 3.39k | who = bond->getOtherAtomIdx(at->getIdx()); |
232 | 3.39k | return true; |
233 | 3.39k | } |
234 | 23.8k | } |
235 | 993k | } |
236 | 467k | return false; |
237 | 471k | } |
238 | | |
239 | 55.5k | bool incidentCyclicMultipleBond(const Atom *at) { |
240 | 55.5k | PRECONDITION(at, "bad atom"); |
241 | 55.5k | const auto &mol = at->getOwningMol(); |
242 | 137k | for (const auto bond : mol.atomBonds(at)) { |
243 | 137k | if (mol.getRingInfo()->numBondRings(bond->getIdx())) { |
244 | 126k | if (bond->getValenceContrib(at) >= 2.0) { |
245 | 16.1k | return true; |
246 | 16.1k | } |
247 | 126k | } |
248 | 137k | } |
249 | 39.4k | return false; |
250 | 55.5k | } |
251 | | |
252 | 379k | bool incidentMultipleBond(const Atom *at) { |
253 | 379k | PRECONDITION(at, "bad atom"); |
254 | 379k | const auto &mol = at->getOwningMol(); |
255 | 379k | auto deg = at->getDegree() + at->getNumExplicitHs(); |
256 | 788k | for (const auto bond : mol.atomBonds(at)) { |
257 | 788k | if (!std::lround(bond->getValenceContrib(at))) { |
258 | 2.12k | --deg; |
259 | 2.12k | } |
260 | 788k | } |
261 | 379k | return at->getValence(Atom::ValenceType::EXPLICIT) != deg; |
262 | 379k | } |
263 | | |
264 | | bool applyHuckel(ROMol &, const INT_VECT &ring, const VECT_EDON_TYPE &edon, |
265 | 1.35M | unsigned int minRingSize) { |
266 | 1.35M | if (ring.size() < minRingSize) { |
267 | 0 | return false; |
268 | 0 | } |
269 | 1.35M | int atlw, atup, rlw, rup, rie; |
270 | 1.35M | bool aromatic = false; |
271 | 1.35M | rlw = 0; |
272 | 1.35M | rup = 0; |
273 | 1.35M | unsigned int nAnyElectronDonorType = 0; |
274 | 6.06M | for (auto idx : ring) { |
275 | 6.06M | ElectronDonorType edonType = edon[idx]; |
276 | 6.06M | if (edonType == AnyElectronDonorType) { |
277 | 2.54M | ++nAnyElectronDonorType; |
278 | 2.54M | if (nAnyElectronDonorType > 1) { |
279 | 1.21M | return false; |
280 | 1.21M | } |
281 | 2.54M | } |
282 | 4.85M | getMinMaxAtomElecs(edonType, atlw, atup); |
283 | 4.85M | rlw += atlw; |
284 | 4.85M | rup += atup; |
285 | 4.85M | } |
286 | | |
287 | 148k | if (rup >= 6) { |
288 | 235k | for (rie = rlw; rie <= rup; ++rie) { |
289 | 184k | if ((rie - 2) % 4 == 0) { |
290 | 56.7k | aromatic = true; |
291 | 56.7k | break; |
292 | 56.7k | } |
293 | 184k | } |
294 | 107k | } else if (rup == 2) { |
295 | 5.38k | aromatic = true; |
296 | 5.38k | } |
297 | 148k | return aromatic; |
298 | 1.35M | } |
299 | | |
300 | | void applyHuckelToFused( |
301 | | ROMol &mol, // molecule of interest |
302 | | const VECT_INT_VECT &srings, // list of all ring as atom IDS |
303 | | const VECT_INT_VECT &brings, // list of all rings as bond ids |
304 | | const INT_VECT &fused, // list of ring ids in the current fused system |
305 | | const VECT_EDON_TYPE &edon, // electron donor state for each atom |
306 | | INT_INT_VECT_MAP &ringNeighs, // list of neighbors for each candidate ring |
307 | | int &narom, // number of aromatic ring so far |
308 | | unsigned int maxNumFusedRings, const std::vector<Bond *> &bondsByIdx, |
309 | 15.2k | unsigned int minRingSize) { |
310 | | // this function check huckel rule on a fused system it starts |
311 | | // with the individual rings in the system and then proceeds to |
312 | | // check for larger system i.e. if we have a 3 ring fused system, |
313 | | // huckel rule checked first on all the 1 ring subsystems then 2 |
314 | | // rung subsystems etc. |
315 | | |
316 | 15.2k | std::unordered_set<int> aromRings; |
317 | 15.2k | auto nrings = rdcast<unsigned int>(fused.size()); |
318 | 15.2k | INT_VECT curRs; |
319 | 15.2k | curRs.push_back(fused.front()); |
320 | 15.2k | int pos = -1; |
321 | 15.2k | unsigned int i; |
322 | 15.2k | unsigned int curSize = 0; |
323 | 15.2k | INT_VECT comb; |
324 | | |
325 | 15.2k | size_t nRingBonds; |
326 | 15.2k | { |
327 | 15.2k | boost::dynamic_bitset<> fusedBonds(mol.getNumBonds()); |
328 | 26.3k | for (auto ridx : fused) { |
329 | 314k | for (auto bidx : brings[ridx]) { |
330 | 314k | fusedBonds[bidx] = true; |
331 | 314k | } |
332 | 26.3k | } |
333 | 15.2k | nRingBonds = rdcast<unsigned int>(fusedBonds.count()); |
334 | 15.2k | } |
335 | 15.2k | std::set<unsigned int> doneBonds; |
336 | 7.97M | while (1) { |
337 | 7.97M | if (pos == -1) { |
338 | | // If a ring system has more than 300 rings and a ring combination search |
339 | | // larger than 2 is reached, the calculation becomes exponentially longer, |
340 | | // in some case it never completes. |
341 | 37.7k | if ((curSize == 2) && (nrings > 300)) { |
342 | 0 | BOOST_LOG(rdWarningLog) |
343 | 0 | << "Aromaticity detection halted on some rings due to ring system size." |
344 | 0 | << std::endl; |
345 | 0 | break; |
346 | 0 | } |
347 | | |
348 | 37.7k | ++curSize; |
349 | | // check if we are done with all the atoms in the fused |
350 | | // system, if so quit. This is a fix for Issue252 REVIEW: is |
351 | | // this check sufficient or should we add an additional |
352 | | // constraint on the number of combinations of rings in a |
353 | | // fused system that we will try. The number of combinations |
354 | | // can obviously be quite large when the number of rings in |
355 | | // the fused system is large |
356 | 37.7k | if (curSize > std::min(nrings, maxNumFusedRings) || |
357 | 22.8k | doneBonds.size() >= nRingBonds) { |
358 | 15.2k | break; |
359 | 15.2k | } |
360 | 22.5k | comb.resize(curSize); |
361 | 22.5k | std::iota(comb.begin(), comb.end(), 0); |
362 | 22.5k | pos = 0; |
363 | 7.93M | } else { |
364 | 7.93M | pos = nextCombination(comb, nrings); |
365 | 7.93M | } |
366 | | |
367 | 7.96M | if (pos == -1) { |
368 | 22.5k | continue; |
369 | 22.5k | } |
370 | | |
371 | 7.93M | curRs.clear(); |
372 | 7.93M | std::transform(comb.begin(), comb.end(), std::back_inserter(curRs), |
373 | 44.1M | [&fused](const int i) { return fused[i]; }); |
374 | | |
375 | | // check if the picked subsystem is fused |
376 | 7.93M | if (ringNeighs.size() && !RingUtils::checkFused(curRs, ringNeighs)) { |
377 | 6.58M | continue; |
378 | 6.58M | } |
379 | | |
380 | | // check aromaticity on the current fused system |
381 | 1.35M | INT_VECT atsInRingSystem(mol.getNumAtoms(), 0); |
382 | 7.14M | for (auto ridx : curRs) { |
383 | 33.5M | for (auto rid : srings[ridx]) { |
384 | 33.5M | ++atsInRingSystem[rid]; |
385 | 33.5M | } |
386 | 7.14M | } |
387 | 1.35M | INT_VECT unon; |
388 | 127M | for (i = 0; i < atsInRingSystem.size(); ++i) { |
389 | | // condition for inclusion of an atom in the aromaticity of a fused ring |
390 | | // system is that it's present in one or two of the rings. this was #2895: |
391 | | // the central atom in acepentalene was being included in the count of |
392 | | // aromatic atoms |
393 | 125M | if (atsInRingSystem[i] == 1 || atsInRingSystem[i] == 2) { |
394 | 12.6M | unon.push_back(i); |
395 | 12.6M | } |
396 | 125M | } |
397 | 1.35M | if (applyHuckel(mol, unon, edon, minRingSize)) { |
398 | | // mark the atoms and bonds in these rings to be aromatic |
399 | 62.1k | markAtomsBondsArom(brings, curRs, doneBonds, bondsByIdx); |
400 | | |
401 | | // add the ring IDs to the aromatic rings found so far |
402 | | // avoid duplicates |
403 | 62.1k | std::copy(curRs.begin(), curRs.end(), |
404 | 62.1k | std::inserter(aromRings, aromRings.begin())); |
405 | 62.1k | } // end check huckel rule |
406 | 1.35M | } // end while(1) |
407 | 15.2k | narom += rdcast<int>(aromRings.size()); |
408 | 15.2k | } |
409 | | |
410 | | bool isAtomCandForArom(const Atom *at, const ElectronDonorType edon, |
411 | | bool allowThirdRow = true, bool allowTripleBonds = true, |
412 | | bool allowHigherExceptions = true, bool onlyCorN = false, |
413 | 1.15M | bool allowExocyclicMultipleBonds = true) { |
414 | 1.15M | PRECONDITION(at, "bad atom"); |
415 | 1.15M | if (onlyCorN && at->getAtomicNum() != 6 && at->getAtomicNum() != 7) { |
416 | 0 | return false; |
417 | 0 | } |
418 | 1.15M | if (!allowThirdRow && at->getAtomicNum() > 10) { |
419 | 0 | return false; |
420 | 0 | } |
421 | | |
422 | | // limit aromaticity to: |
423 | | // - the first two rows of the periodic table |
424 | | // - Se and Te |
425 | 1.15M | if (at->getAtomicNum() > 18 && |
426 | 540k | (!allowHigherExceptions || |
427 | 540k | (at->getAtomicNum() != 34 && at->getAtomicNum() != 52))) { |
428 | 539k | return false; |
429 | 539k | } |
430 | 614k | switch (edon) { |
431 | 2.40k | case VacantElectronDonorType: |
432 | 428k | case OneElectronDonorType: |
433 | 475k | case TwoElectronDonorType: |
434 | 475k | case OneOrTwoElectronDonorType: |
435 | 502k | case AnyElectronDonorType: |
436 | 502k | break; |
437 | 111k | default: |
438 | 111k | return (false); |
439 | 614k | } |
440 | | |
441 | | // atoms that aren't in their default valence state also get shut out |
442 | 502k | auto defVal = |
443 | 502k | PeriodicTable::getTable()->getDefaultValence(at->getAtomicNum()); |
444 | 502k | if (defVal > 0 && rdcast<int>(at->getTotalValence()) > |
445 | 458k | (PeriodicTable::getTable()->getDefaultValence( |
446 | 458k | at->getAtomicNum() - at->getFormalCharge()))) { |
447 | 3.00k | return false; |
448 | 3.00k | } |
449 | | |
450 | | // heteroatoms or charged carbons with radicals also disqualify us from being |
451 | | // considered. This was github issue 432 (heteroatoms) and 1936 (charged |
452 | | // carbons) |
453 | 499k | if (at->getNumRadicalElectrons() && |
454 | 620 | (at->getAtomicNum() != 6 || at->getFormalCharge())) { |
455 | 387 | return false; |
456 | 387 | } |
457 | | |
458 | | // We are going to explicitly disallow atoms that have more |
459 | | // than one double or triple bond. This is to handle |
460 | | // the situation: |
461 | | // C1=C=NC=N1 (sf.net bug 1934360) |
462 | 499k | int nUnsaturations = |
463 | 499k | at->getValence(Atom::ValenceType::EXPLICIT) - at->getDegree(); |
464 | 499k | if (nUnsaturations > 1) { |
465 | 4.90k | unsigned int nMult = 0; |
466 | 4.90k | const auto &mol = at->getOwningMol(); |
467 | 10.7k | for (const auto bond : mol.atomBonds(at)) { |
468 | 10.7k | switch (bond->getBondType()) { |
469 | 2.56k | case Bond::SINGLE: |
470 | 2.67k | case Bond::AROMATIC: |
471 | 2.67k | break; |
472 | 6.00k | case Bond::DOUBLE: |
473 | 6.00k | ++nMult; |
474 | 6.00k | break; |
475 | 1.28k | case Bond::TRIPLE: |
476 | 1.28k | if (!allowTripleBonds) { |
477 | 0 | return false; |
478 | 0 | } |
479 | 1.28k | ++nMult; |
480 | 1.28k | break; |
481 | 821 | default: |
482 | | // hopefully we had enough sense that we don't even |
483 | | // get here with these bonds... If we do land here, |
484 | | // just bail... I have no good answer for them. |
485 | 821 | break; |
486 | 10.7k | } |
487 | 10.7k | if (nMult > 1) { |
488 | 2.86k | break; |
489 | 2.86k | } |
490 | 10.7k | } |
491 | 4.90k | if (nMult > 1) { |
492 | 2.86k | return (false); |
493 | 2.86k | } |
494 | 4.90k | } |
495 | | |
496 | 496k | if (!allowExocyclicMultipleBonds) { |
497 | 0 | const auto &mol = at->getOwningMol(); |
498 | 0 | for (const auto bond : mol.atomBonds(at)) { |
499 | 0 | if ((bond->getBondType() == Bond::DOUBLE || |
500 | 0 | bond->getBondType() == Bond::TRIPLE) && |
501 | 0 | !queryIsBondInRing(bond)) { |
502 | 0 | return false; |
503 | 0 | } |
504 | 0 | } |
505 | 0 | } |
506 | | |
507 | 496k | return (true); |
508 | 496k | } |
509 | | |
510 | | ElectronDonorType getAtomDonorTypeArom( |
511 | 1.15M | const Atom *at, bool exocyclicBondsStealElectrons = true) { |
512 | 1.15M | PRECONDITION(at, "bad atom"); |
513 | 1.15M | const auto &mol = at->getOwningMol(); |
514 | 1.15M | if (at->getAtomicNum() == 0) { |
515 | 43.9k | return incidentCyclicMultipleBond(at) ? OneElectronDonorType |
516 | 43.9k | : AnyElectronDonorType; |
517 | 43.9k | } |
518 | | |
519 | 1.10M | ElectronDonorType res = NoElectronDonorType; |
520 | 1.10M | auto nelec = MolOps::countAtomElec(at); |
521 | 1.10M | int who = -1; |
522 | 1.10M | if (nelec < 0) { |
523 | 638k | res = NoElectronDonorType; |
524 | 638k | } else if (nelec == 0) { |
525 | 11.6k | if (incidentNonCyclicMultipleBond(at, who)) { |
526 | | // This is borderline: no electron to spare but may have an empty |
527 | | // p-orbital |
528 | | // Not sure if this should return vacantElectronDonorType |
529 | | // FIX: explicitly doing this as a note for potential problems |
530 | | // |
531 | 30 | res = VacantElectronDonorType; |
532 | 11.6k | } else if (incidentCyclicMultipleBond(at)) { |
533 | | // no electron but has one in a in cycle multiple bond |
534 | 50 | res = OneElectronDonorType; |
535 | 11.6k | } else { |
536 | | // no multiple bonds no electrons |
537 | 11.6k | res = NoElectronDonorType; |
538 | 11.6k | } |
539 | 459k | } else if (nelec == 1) { |
540 | 382k | if (incidentNonCyclicMultipleBond(at, who)) { |
541 | | // the only available electron is going to be from the |
542 | | // external multiple bond this electron will not be available |
543 | | // for aromaticity if this atom is bonded to a more electro |
544 | | // negative atom |
545 | 3.18k | const auto at2 = mol.getAtomWithIdx(who); |
546 | 3.18k | if (exocyclicBondsStealElectrons && |
547 | 3.18k | PeriodicTable::getTable()->moreElectroNegative(at2->getAtomicNum(), |
548 | 3.18k | at->getAtomicNum())) { |
549 | 2.26k | res = VacantElectronDonorType; |
550 | 2.26k | } else { |
551 | 914 | res = OneElectronDonorType; |
552 | 914 | } |
553 | 379k | } else { |
554 | | // require that the atom have at least one multiple bond |
555 | 379k | if (incidentMultipleBond(at)) { |
556 | 379k | res = OneElectronDonorType; |
557 | 379k | } |
558 | | // account for the tropylium and cyclopropenyl cation cases |
559 | 366 | else if (at->getFormalCharge() == 1) { |
560 | 105 | res = VacantElectronDonorType; |
561 | 105 | } |
562 | 379k | } |
563 | 382k | } else { |
564 | 76.7k | if (incidentNonCyclicMultipleBond(at, who)) { |
565 | | // for cases with more than one electron : |
566 | | // if there is an incident multiple bond with an element that |
567 | | // is more electronegative than the this atom, count one less |
568 | | // electron |
569 | 180 | const auto at2 = mol.getAtomWithIdx(who); |
570 | 180 | if (exocyclicBondsStealElectrons && |
571 | 180 | PeriodicTable::getTable()->moreElectroNegative(at2->getAtomicNum(), |
572 | 180 | at->getAtomicNum())) { |
573 | 14 | --nelec; |
574 | 14 | } |
575 | 180 | } |
576 | 76.7k | if (nelec % 2 == 1) { |
577 | 30.4k | res = OneElectronDonorType; |
578 | 46.2k | } else { |
579 | 46.2k | res = TwoElectronDonorType; |
580 | 46.2k | } |
581 | 76.7k | } |
582 | 1.10M | return res; |
583 | 1.15M | } |
584 | | } // namespace |
585 | | |
586 | | namespace RDKit { |
587 | | namespace MolOps { |
588 | 52.8k | bool isBondOrderQuery(const Bond *bond) { |
589 | 52.8k | if (bond->hasQuery()) { |
590 | 28.9k | auto q = dynamic_cast<const QueryBond *>(bond)->getQuery(); |
591 | | // complex bond type queries are also bond order queries! |
592 | 28.9k | if (q->getTypeLabel() == "BondOrder" || |
593 | 23.0k | QueryOps::hasComplexBondTypeQuery(*q)) { |
594 | 5.93k | return true; |
595 | 5.93k | } |
596 | 28.9k | } |
597 | 46.8k | return false; |
598 | 52.8k | } |
599 | 6.02M | int countAtomElec(const Atom *at) { |
600 | 6.02M | PRECONDITION(at, "bad atom"); |
601 | | |
602 | | // default valence : |
603 | 6.02M | auto dv = PeriodicTable::getTable()->getDefaultValence(at->getAtomicNum()); |
604 | 6.02M | if (dv <= 1) { |
605 | | // univalent elements can't be either aromatic or conjugated |
606 | 1.01M | return -1; |
607 | 1.01M | } |
608 | | |
609 | | // total atom degree: |
610 | 5.00M | int degree = at->getDegree() + at->getTotalNumHs(); |
611 | | |
612 | 5.00M | const auto &mol = at->getOwningMol(); |
613 | 10.0M | for (const auto bond : mol.atomBonds(at)) { |
614 | | // don't count bonds that aren't actually contributing to the valence here: |
615 | | // if the bond is "real" (not undefined or zero), it always contributes to |
616 | | // valence/degree, and in case the bond is a query bond with no order, we |
617 | | // still need to check if the query is a bond query |
618 | 10.0M | if (!static_cast<Bond *>(bond)->getValenceContrib(at) && |
619 | 52.8k | !isBondOrderQuery(bond)) { |
620 | 46.8k | --degree; |
621 | 46.8k | } |
622 | 10.0M | } |
623 | | |
624 | | // if we are more than 3 coordinated we should not be aromatic |
625 | 5.00M | if (degree > 3) { |
626 | 507k | return -1; |
627 | 507k | } |
628 | | |
629 | | // number of lone pair electrons = (outer shell elecs) - (default valence) |
630 | 4.50M | auto nlp = PeriodicTable::getTable()->getNouterElecs(at->getAtomicNum()) - dv; |
631 | | |
632 | | // subtract the charge to get the true number of lone pair electrons: |
633 | 4.50M | nlp = std::max(nlp - at->getFormalCharge(), 0); |
634 | | |
635 | 4.50M | int nRadicals = at->getNumRadicalElectrons(); |
636 | | |
637 | | // num electrons available for donation into the pi system: |
638 | 4.50M | int res = (dv - degree) + nlp - nRadicals; |
639 | | |
640 | 4.50M | if (res > 1) { |
641 | | // if we have an incident bond with order higher than 2, |
642 | | // (e.g. triple or higher), we only want to return 1 electron |
643 | | // we detect this using the total unsaturation, because we |
644 | | // know that there aren't multiple unsaturations (detected |
645 | | // above in isAtomCandForArom()) |
646 | 2.05M | int nUnsaturations = |
647 | 2.05M | at->getValence(Atom::ValenceType::EXPLICIT) - at->getDegree(); |
648 | 2.05M | if (nUnsaturations > 1) { |
649 | 1.46M | res = 1; |
650 | 1.46M | } |
651 | 2.05M | } |
652 | | |
653 | 4.50M | return res; |
654 | 5.00M | } |
655 | | |
656 | | namespace { |
657 | 0 | int mdlAromaticityHelper(RWMol &mol, const VECT_INT_VECT &srings) { |
658 | 0 | int narom = 0; |
659 | | // loop over all the atoms in the rings that can be candidates |
660 | | // for aromaticity |
661 | | // Atoms are candidates if |
662 | | // - it is part of ring |
663 | | // - has one or more electron to donate or has empty p-orbitals |
664 | 0 | int natoms = mol.getNumAtoms(); |
665 | 0 | boost::dynamic_bitset<> acands(natoms); |
666 | 0 | boost::dynamic_bitset<> aseen(natoms); |
667 | 0 | VECT_EDON_TYPE edon(natoms); |
668 | |
|
669 | 0 | VECT_INT_VECT cRings; // holder for rings that are candidates for aromaticity |
670 | 0 | for (auto &sring : srings) { |
671 | 0 | bool allAromatic = true; |
672 | 0 | bool allDummy = true; |
673 | |
|
674 | 0 | for (auto firstIdx : sring) { |
675 | 0 | const auto at = mol.getAtomWithIdx(firstIdx); |
676 | |
|
677 | 0 | if (allDummy && at->getAtomicNum() != 0) { |
678 | 0 | allDummy = false; |
679 | 0 | } |
680 | |
|
681 | 0 | if (aseen[firstIdx]) { |
682 | 0 | if (!acands[firstIdx]) { |
683 | 0 | allAromatic = false; |
684 | 0 | } |
685 | 0 | continue; |
686 | 0 | } |
687 | 0 | aseen[firstIdx] = 1; |
688 | | |
689 | | // now that the atom is part of ring check if it can donate |
690 | | // electron or has empty orbitals. Record the donor type |
691 | | // information in 'edon' - we will need it when we get to |
692 | | // the Huckel rule later |
693 | 0 | edon[firstIdx] = getAtomDonorTypeArom(at, false); |
694 | | // we only accept one electron donors? |
695 | 0 | if (edon[firstIdx] != OneElectronDonorType) { |
696 | 0 | allAromatic = false; |
697 | 0 | continue; |
698 | 0 | } |
699 | 0 | const bool allowThirdRow = false; |
700 | 0 | const bool allowTripleBonds = false; |
701 | 0 | const bool allowHigherExceptions = false; |
702 | 0 | const bool onlyCorN = true; |
703 | 0 | const bool allowExocyclicMultipleBonds = false; |
704 | 0 | acands[firstIdx] = isAtomCandForArom( |
705 | 0 | at, edon[firstIdx], allowThirdRow, allowTripleBonds, |
706 | 0 | allowHigherExceptions, onlyCorN, allowExocyclicMultipleBonds); |
707 | 0 | if (!acands[firstIdx]) { |
708 | 0 | allAromatic = false; |
709 | 0 | } |
710 | 0 | } |
711 | 0 | if (allAromatic && !allDummy) { |
712 | 0 | cRings.push_back(sring); |
713 | 0 | } |
714 | 0 | } |
715 | | |
716 | | // first convert all rings to bonds ids |
717 | 0 | VECT_INT_VECT brings; |
718 | 0 | RingUtils::convertToBonds(cRings, brings, mol); |
719 | | |
720 | | // make the neighbor map for the rings |
721 | | // i.e. a ring is a neighbor a another candidate ring if |
722 | | // shares at least one bond |
723 | | // useful to figure out fused systems |
724 | 0 | INT_INT_VECT_MAP neighMap; |
725 | 0 | RingUtils::makeRingNeighborMap(brings, neighMap, maxFusedAromaticRingSize, 1); |
726 | | |
727 | | // now loop over all the candidate rings and check the |
728 | | // huckel rule - of course paying attention to fused systems. |
729 | 0 | INT_VECT doneRs; |
730 | 0 | int curr = 0; |
731 | 0 | auto cnrs = rdcast<int>(cRings.size()); |
732 | 0 | boost::dynamic_bitset<> fusDone(cnrs); |
733 | 0 | INT_VECT fused; |
734 | |
|
735 | 0 | std::vector<Bond *> bondsByIdx; |
736 | 0 | bondsByIdx.reserve(mol.getNumBonds()); |
737 | 0 | for (const auto b : mol.bonds()) { |
738 | 0 | bondsByIdx.push_back(b); |
739 | 0 | } |
740 | |
|
741 | 0 | while (curr < cnrs) { |
742 | 0 | fused.clear(); |
743 | 0 | RingUtils::pickFusedRings(curr, neighMap, fused, fusDone); |
744 | 0 | const unsigned int maxFused = 6; |
745 | 0 | const unsigned int minRingSize = 6; |
746 | 0 | applyHuckelToFused(mol, cRings, brings, fused, edon, neighMap, narom, |
747 | 0 | maxFused, bondsByIdx, minRingSize); |
748 | |
|
749 | 0 | int rix; |
750 | 0 | for (rix = 0; rix < cnrs; ++rix) { |
751 | 0 | if (!fusDone[rix]) { |
752 | 0 | curr = rix; |
753 | 0 | break; |
754 | 0 | } |
755 | 0 | } |
756 | 0 | if (rix == cnrs) { |
757 | 0 | break; |
758 | 0 | } |
759 | 0 | } |
760 | |
|
761 | 0 | mol.setProp(common_properties::numArom, narom, true); |
762 | |
|
763 | 0 | return narom; |
764 | 0 | } |
765 | | |
766 | 0 | int mmff94AromaticityHelper(RWMol &mol, const VECT_INT_VECT &srings) { |
767 | | // set aromaticity as done in MMFF94 init |
768 | 0 | if (!mol.hasProp(common_properties::_MMFFSanitized)) { |
769 | 0 | bool isAromaticSet = false; |
770 | 0 | for (const auto atom : mol.atoms()) { |
771 | 0 | if (atom->getIsAromatic()) { |
772 | 0 | isAromaticSet = true; |
773 | 0 | break; |
774 | 0 | } |
775 | 0 | } |
776 | 0 | if (isAromaticSet) { |
777 | 0 | MolOps::Kekulize(mol, true); |
778 | 0 | } |
779 | 0 | mol.setProp(common_properties::_MMFFSanitized, 1, true); |
780 | 0 | } |
781 | 0 | setMMFFAromaticity(mol); |
782 | | |
783 | | // count aromatic rings for return value |
784 | 0 | int narom = 1; |
785 | 0 | for (auto &sring : srings) { |
786 | 0 | bool isAromRing = true; |
787 | 0 | for (auto &aid : sring) { |
788 | 0 | Atom *atom = mol.getAtomWithIdx(aid); |
789 | 0 | if (!atom->getIsAromatic()) { |
790 | 0 | isAromRing = false; |
791 | 0 | break; |
792 | 0 | } |
793 | 0 | } |
794 | 0 | if (isAromRing) { |
795 | 0 | narom++; |
796 | 0 | } |
797 | 0 | } |
798 | |
|
799 | 0 | return narom; |
800 | 0 | } |
801 | | |
802 | | // use minRingSize=0 or maxRingSize=0 to ignore these constraints |
803 | | int aromaticityHelper(RWMol &mol, const VECT_INT_VECT &srings, |
804 | | unsigned int minRingSize, unsigned int maxRingSize, |
805 | 8.40k | bool includeFused) { |
806 | 8.40k | int narom = 0; |
807 | | // loop over all the atoms in the rings that can be candidates |
808 | | // for aromaticity |
809 | | // Atoms are candidates if |
810 | | // - it is part of ring |
811 | | // - has one or more electron to donate or has empty p-orbitals |
812 | 8.40k | int natoms = mol.getNumAtoms(); |
813 | 8.40k | boost::dynamic_bitset<> acands(natoms); |
814 | 8.40k | boost::dynamic_bitset<> aseen(natoms); |
815 | 8.40k | VECT_EDON_TYPE edon(natoms); |
816 | | |
817 | 8.40k | VECT_INT_VECT cRings; // holder for rings that are candidates for aromaticity |
818 | 94.7k | for (auto &sring : srings) { |
819 | 94.7k | size_t ringSz = sring.size(); |
820 | | // test ring size: |
821 | 94.7k | if ((minRingSize && ringSz < minRingSize) || |
822 | 94.7k | (maxRingSize && ringSz > maxRingSize)) { |
823 | 0 | continue; |
824 | 0 | } |
825 | 94.7k | bool allAromatic = true; |
826 | 94.7k | bool allDummy = true; |
827 | 1.93M | for (auto firstIdx : sring) { |
828 | 1.93M | const auto at = mol.getAtomWithIdx(firstIdx); |
829 | | |
830 | 1.93M | if (allDummy && !isAtomDummy(at)) { |
831 | 91.5k | allDummy = false; |
832 | 91.5k | } |
833 | | |
834 | 1.93M | if (aseen[firstIdx]) { |
835 | 781k | if (!acands[firstIdx]) { |
836 | 484k | allAromatic = false; |
837 | 484k | } |
838 | 781k | continue; |
839 | 781k | } |
840 | 1.15M | aseen[firstIdx] = 1; |
841 | | |
842 | | // now that the atom is part of ring check if it can donate |
843 | | // electron or has empty orbitals. Record the donor type |
844 | | // information in 'edon' - we will need it when we get to |
845 | | // the Huckel rule later |
846 | 1.15M | edon[firstIdx] = getAtomDonorTypeArom(at); |
847 | 1.15M | acands[firstIdx] = isAtomCandForArom(at, edon[firstIdx]); |
848 | 1.15M | if (!acands[firstIdx]) { |
849 | 657k | allAromatic = false; |
850 | 657k | } |
851 | 1.15M | } |
852 | 94.7k | if (allAromatic && !allDummy) { |
853 | 26.3k | cRings.push_back(sring); |
854 | 26.3k | } |
855 | 94.7k | } |
856 | | |
857 | | // first convert all rings to bonds ids |
858 | 8.40k | VECT_INT_VECT brings; |
859 | 8.40k | RingUtils::convertToBonds(cRings, brings, mol); |
860 | | |
861 | 8.40k | std::vector<Bond *> bondsByIdx; |
862 | 8.40k | bondsByIdx.reserve(mol.getNumBonds()); |
863 | 2.66M | for (auto b : mol.bonds()) { |
864 | 2.66M | bondsByIdx.push_back(b); |
865 | 2.66M | } |
866 | | |
867 | 8.40k | if (!includeFused) { |
868 | | // now loop over all the candidate rings and check the |
869 | | // huckel rule - skipping fused systems |
870 | 0 | INT_INT_VECT_MAP neighMap; |
871 | 0 | for (size_t ri = 0; ri < cRings.size(); ++ri) { |
872 | 0 | INT_VECT fused; |
873 | 0 | fused.push_back(ri); |
874 | 0 | const unsigned int maxFused = 6; |
875 | 0 | const unsigned int minRingSize = 0; |
876 | 0 | applyHuckelToFused(mol, cRings, brings, fused, edon, neighMap, narom, |
877 | 0 | maxFused, bondsByIdx, minRingSize); |
878 | 0 | } |
879 | 8.40k | } else { |
880 | | // make the neighbor map for the rings |
881 | | // i.e. a ring is a neighbor a another candidate ring if |
882 | | // shares at least one bond |
883 | | // useful to figure out fused systems |
884 | 8.40k | INT_INT_VECT_MAP neighMap; |
885 | 8.40k | RingUtils::makeRingNeighborMap(brings, neighMap, maxFusedAromaticRingSize, |
886 | 8.40k | 1); |
887 | | |
888 | | // now loop over all the candidate rings and check the |
889 | | // huckel rule - of course paying attention to fused systems. |
890 | 8.40k | INT_VECT doneRs; |
891 | 8.40k | int curr = 0; |
892 | 8.40k | auto cnrs = rdcast<int>(cRings.size()); |
893 | 8.40k | boost::dynamic_bitset<> fusDone(cnrs); |
894 | 8.40k | INT_VECT fused; |
895 | 21.0k | while (curr < cnrs) { |
896 | 15.2k | fused.clear(); |
897 | 15.2k | RingUtils::pickFusedRings(curr, neighMap, fused, fusDone); |
898 | 15.2k | applyHuckelToFused(mol, cRings, brings, fused, edon, neighMap, narom, 6, |
899 | 15.2k | bondsByIdx); |
900 | | |
901 | 15.2k | int rix; |
902 | 537k | for (rix = 0; rix < cnrs; ++rix) { |
903 | 535k | if (!fusDone[rix]) { |
904 | 12.6k | curr = rix; |
905 | 12.6k | break; |
906 | 12.6k | } |
907 | 535k | } |
908 | 15.2k | if (rix == cnrs) { |
909 | 2.55k | break; |
910 | 2.55k | } |
911 | 15.2k | } |
912 | 8.40k | } |
913 | | |
914 | 8.40k | mol.setProp(common_properties::numArom, narom, true); |
915 | | |
916 | 8.40k | return narom; |
917 | 8.40k | } |
918 | | |
919 | | } // end of anonymous namespace |
920 | | |
921 | | // sets the aromaticity flags according to MMFF |
922 | 0 | void setMMFFAromaticity(RWMol &mol) { |
923 | 0 | bool moveToNextRing = false; |
924 | 0 | bool isNOSinRing = false; |
925 | 0 | bool aromRingsAllSet = false; |
926 | 0 | bool exoDoubleBond = false; |
927 | 0 | bool canBeAromatic = false; |
928 | 0 | unsigned int i; |
929 | 0 | unsigned int j; |
930 | 0 | unsigned int nextInRing; |
931 | 0 | unsigned int pi_e = 0; |
932 | 0 | int nAromSet = 0; |
933 | 0 | int old_nAromSet = -1; |
934 | 0 | RingInfo *ringInfo = mol.getRingInfo(); |
935 | 0 | Atom *atom; |
936 | 0 | Bond *bond; |
937 | 0 | const VECT_INT_VECT &atomRings = ringInfo->atomRings(); |
938 | 0 | ROMol::ADJ_ITER nbrIdx; |
939 | 0 | ROMol::ADJ_ITER endNbrs; |
940 | 0 | boost::dynamic_bitset<> aromBitVect(mol.getNumAtoms()); |
941 | 0 | boost::dynamic_bitset<> aromRingBitVect(atomRings.size()); |
942 | |
|
943 | 0 | while ((!aromRingsAllSet) && atomRings.size() && (nAromSet > old_nAromSet)) { |
944 | | // loop over all rings |
945 | 0 | for (i = 0; i < atomRings.size(); ++i) { |
946 | | // add 2 pi electrons for each double bond in the ring |
947 | 0 | for (j = 0, pi_e = 0, moveToNextRing = false, isNOSinRing = false, |
948 | 0 | exoDoubleBond = false; |
949 | 0 | (!moveToNextRing) && (j < atomRings[i].size()); ++j) { |
950 | 0 | atom = mol.getAtomWithIdx(atomRings[i][j]); |
951 | | // remember if this atom is nitrogen, oxygen or divalent sulfur |
952 | 0 | if ((atom->getAtomicNum() == 7) || (atom->getAtomicNum() == 8) || |
953 | 0 | ((atom->getAtomicNum() == 16) && (atom->getDegree() == 2))) { |
954 | 0 | isNOSinRing = true; |
955 | 0 | } |
956 | | // check whether this atom is double-bonded to next one in the ring |
957 | 0 | nextInRing = (j == (atomRings[i].size() - 1)) ? atomRings[i][0] |
958 | 0 | : atomRings[i][j + 1]; |
959 | 0 | if (mol.getBondBetweenAtoms(atomRings[i][j], nextInRing) |
960 | 0 | ->getBondType() == Bond::DOUBLE) { |
961 | 0 | pi_e += 2; |
962 | 0 | } |
963 | | // if this is not a double bond, check whether this is carbon |
964 | | // or nitrogen with total bond order = 4 |
965 | 0 | else { |
966 | 0 | atom = mol.getAtomWithIdx(atomRings[i][j]); |
967 | | // if not, move on |
968 | 0 | if ((atom->getAtomicNum() != 6) && |
969 | 0 | (!((atom->getAtomicNum() == 7) && |
970 | 0 | ((atom->getValence(Atom::ValenceType::EXPLICIT) + |
971 | 0 | atom->getNumImplicitHs()) == 4)))) { |
972 | 0 | continue; |
973 | 0 | } |
974 | | // loop over neighbors |
975 | 0 | boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom); |
976 | 0 | for (; nbrIdx != endNbrs; ++nbrIdx) { |
977 | 0 | const Atom *nbrAtom = mol[*nbrIdx]; |
978 | | // if the neighbor is one of the ring atoms, skip it |
979 | | // since we are looking for exocyclic neighbors |
980 | 0 | if (std::find(atomRings[i].begin(), atomRings[i].end(), |
981 | 0 | nbrAtom->getIdx()) != atomRings[i].end()) { |
982 | 0 | continue; |
983 | 0 | } |
984 | | // it the neighbor is single-bonded, skip it |
985 | 0 | if (mol.getBondBetweenAtoms(atomRings[i][j], nbrAtom->getIdx()) |
986 | 0 | ->getBondType() == Bond::SINGLE) { |
987 | 0 | continue; |
988 | 0 | } |
989 | | // if the neighbor is in a ring and its aromaticity |
990 | | // bit has not yet been set, then move to the next ring |
991 | | // we'll take care of this later |
992 | 0 | if (queryIsAtomInRing(nbrAtom) && |
993 | 0 | (!(aromBitVect[nbrAtom->getIdx()]))) { |
994 | 0 | moveToNextRing = true; |
995 | 0 | break; |
996 | 0 | } |
997 | | // if the neighbor is in an aromatic ring and is |
998 | | // double-bonded to the current atom, add 1 pi electron |
999 | 0 | if (mol.getBondBetweenAtoms(atomRings[i][j], nbrAtom->getIdx()) |
1000 | 0 | ->getBondType() == Bond::DOUBLE) { |
1001 | 0 | if (nbrAtom->getIsAromatic()) { |
1002 | 0 | ++pi_e; |
1003 | 0 | } else { |
1004 | 0 | exoDoubleBond = true; |
1005 | 0 | } |
1006 | 0 | } |
1007 | 0 | } |
1008 | 0 | } |
1009 | 0 | } |
1010 | | // if we quit the loop at an early stage because aromaticity |
1011 | | // had not yet been set, then move to the next ring |
1012 | 0 | if (moveToNextRing) { |
1013 | 0 | continue; |
1014 | 0 | } |
1015 | | // loop again over all ring atoms |
1016 | 0 | for (j = 0, canBeAromatic = true; j < atomRings[i].size(); ++j) { |
1017 | | // set aromaticity as perceived |
1018 | 0 | aromBitVect[atomRings[i][j]] = 1; |
1019 | 0 | atom = mol.getAtomWithIdx(atomRings[i][j]); |
1020 | | // if this is is a non-sp2 carbon or nitrogen |
1021 | | // then this ring can't be aromatic |
1022 | 0 | if (((atom->getAtomicNum() == 6) || (atom->getAtomicNum() == 7)) && |
1023 | 0 | (atom->getHybridization() != Atom::SP2)) { |
1024 | 0 | canBeAromatic = false; |
1025 | 0 | } |
1026 | 0 | } |
1027 | | // if this ring can't be aromatic, move to the next one |
1028 | 0 | if (!canBeAromatic) { |
1029 | 0 | continue; |
1030 | 0 | } |
1031 | | // if there is N, O, S; no exocyclic double bonds; |
1032 | | // the ring has an odd number of terms: add 2 pi electrons |
1033 | 0 | if (isNOSinRing && (!exoDoubleBond) && (atomRings[i].size() % 2)) { |
1034 | 0 | pi_e += 2; |
1035 | 0 | } |
1036 | | // if this ring satisfies the 4n+2 rule, |
1037 | | // then mark its atoms as aromatic |
1038 | 0 | if ((pi_e > 2) && (!((pi_e - 2) % 4))) { |
1039 | 0 | aromRingBitVect[i] = 1; |
1040 | 0 | for (j = 0; j < atomRings[i].size(); ++j) { |
1041 | 0 | atom = mol.getAtomWithIdx(atomRings[i][j]); |
1042 | 0 | atom->setIsAromatic(true); |
1043 | 0 | } |
1044 | 0 | } |
1045 | 0 | } |
1046 | | // termination criterion: if we did not manage to set any more |
1047 | | // aromatic atoms compared to the previous iteration, then |
1048 | | // stop looping |
1049 | 0 | old_nAromSet = nAromSet; |
1050 | 0 | nAromSet = 0; |
1051 | 0 | aromRingsAllSet = true; |
1052 | 0 | for (i = 0; i < atomRings.size(); ++i) { |
1053 | 0 | for (j = 0; j < atomRings[i].size(); ++j) { |
1054 | 0 | if (aromBitVect[atomRings[i][j]]) { |
1055 | 0 | ++nAromSet; |
1056 | 0 | } else { |
1057 | 0 | aromRingsAllSet = false; |
1058 | 0 | } |
1059 | 0 | } |
1060 | 0 | } |
1061 | 0 | } |
1062 | 0 | for (i = 0; i < atomRings.size(); ++i) { |
1063 | | // if the ring is not aromatic, move to the next one |
1064 | 0 | if (!aromRingBitVect[i]) { |
1065 | 0 | continue; |
1066 | 0 | } |
1067 | 0 | for (j = 0; j < atomRings[i].size(); ++j) { |
1068 | | // mark all ring bonds as aromatic |
1069 | 0 | nextInRing = (j == (atomRings[i].size() - 1)) ? atomRings[i][0] |
1070 | 0 | : atomRings[i][j + 1]; |
1071 | 0 | bond = mol.getBondBetweenAtoms(atomRings[i][j], nextInRing); |
1072 | 0 | bond->setBondType(Bond::AROMATIC); |
1073 | 0 | bond->setIsAromatic(true); |
1074 | 0 | } |
1075 | 0 | } |
1076 | 0 | for (i = 0; i < atomRings.size(); ++i) { |
1077 | | // if the ring is not aromatic, move to the next one |
1078 | 0 | if (!aromRingBitVect[i]) { |
1079 | 0 | continue; |
1080 | 0 | } |
1081 | 0 | for (j = 0; j < atomRings[i].size(); ++j) { |
1082 | 0 | atom = mol.getAtomWithIdx(atomRings[i][j]); |
1083 | 0 | if (atom->getAtomicNum() != 6) { |
1084 | 0 | int iv = atom->calcImplicitValence(false); |
1085 | 0 | atom->calcExplicitValence(false); |
1086 | 0 | if (iv) { |
1087 | 0 | atom->setNumExplicitHs(iv); |
1088 | 0 | atom->calcImplicitValence(false); |
1089 | 0 | } |
1090 | 0 | } |
1091 | 0 | } |
1092 | 0 | } |
1093 | 0 | } |
1094 | | |
1095 | 8.40k | int setAromaticity(RWMol &mol, AromaticityModel model, int (*func)(RWMol &)) { |
1096 | | // This function used to check if the input molecule came |
1097 | | // with aromaticity information, assumed it is correct and |
1098 | | // did not touch it. Now it ignores that information entirely. |
1099 | | |
1100 | | // first find the all the simple rings in the molecule |
1101 | 8.40k | VECT_INT_VECT srings; |
1102 | 8.40k | if (mol.getRingInfo()->isInitialized()) { |
1103 | 8.40k | srings = mol.getRingInfo()->atomRings(); |
1104 | 8.40k | } else { |
1105 | 0 | MolOps::symmetrizeSSSR(mol, srings); |
1106 | 0 | } |
1107 | | |
1108 | 8.40k | int res; |
1109 | 8.40k | switch (model) { |
1110 | 8.40k | case AROMATICITY_DEFAULT: |
1111 | 8.40k | case AROMATICITY_RDKIT: |
1112 | 8.40k | res = aromaticityHelper(mol, srings, 0, 0, true); |
1113 | 8.40k | break; |
1114 | 0 | case AROMATICITY_SIMPLE: |
1115 | 0 | res = aromaticityHelper(mol, srings, 5, 6, false); |
1116 | 0 | break; |
1117 | 0 | case AROMATICITY_MDL: |
1118 | 0 | res = mdlAromaticityHelper(mol, srings); |
1119 | 0 | break; |
1120 | 0 | case AROMATICITY_MMFF94: |
1121 | 0 | res = mmff94AromaticityHelper(mol, srings); |
1122 | 0 | break; |
1123 | 0 | case AROMATICITY_CUSTOM: |
1124 | 0 | PRECONDITION( |
1125 | 0 | func, |
1126 | 0 | "function must be set when aromaticity model is AROMATICITY_CUSTOM"); |
1127 | 0 | res = func(mol); |
1128 | 0 | break; |
1129 | 0 | default: |
1130 | 0 | throw ValueErrorException("Bad AromaticityModel"); |
1131 | 8.40k | } |
1132 | 8.39k | return res; |
1133 | 8.40k | } |
1134 | | |
1135 | | }; // end of namespace MolOps |
1136 | | }; // end of namespace RDKit |