/src/rdkit/Code/GraphMol/FindStereo.cpp
Line | Count | Source |
1 | | // |
2 | | // Copyright (C) 2020 Greg Landrum and T5 Informatics GmbH |
3 | | // |
4 | | // @@ All Rights Reserved @@ |
5 | | // This file is part of the RDKit. |
6 | | // The contents are covered by the terms of the BSD license |
7 | | // which is included in the file license.txt, found at the root |
8 | | // of the RDKit source tree. |
9 | | // |
10 | | #include <GraphMol/RDKitBase.h> |
11 | | #include <GraphMol/new_canon.h> |
12 | | #include <RDGeneral/types.h> |
13 | | #include <algorithm> |
14 | | #include <RDGeneral/utils.h> |
15 | | #include <RDGeneral/Invariant.h> |
16 | | #include <RDGeneral/RDLog.h> |
17 | | |
18 | | #include <boost/dynamic_bitset.hpp> |
19 | | #include "Chirality.h" |
20 | | #include <GraphMol/QueryOps.h> |
21 | | |
22 | | namespace RDKit { |
23 | | namespace Chirality { |
24 | | |
25 | | namespace detail { |
26 | | |
27 | 0 | bool isAtomPotentialNontetrahedralCenter(const Atom *atom) { |
28 | 0 | PRECONDITION(atom, "atom is null"); |
29 | 0 | auto nzdegree = Chirality::detail::getAtomNonzeroDegree(atom); |
30 | 0 | auto impHDegree = atom->getTotalNumHs(); |
31 | 0 | auto tnzdegree = nzdegree + impHDegree; |
32 | 0 | auto anum = atom->getAtomicNum(); |
33 | 0 | if (tnzdegree > 6 || tnzdegree < 2 || (anum < 12 && anum != 4)) { |
34 | 0 | return false; |
35 | 0 | } |
36 | 0 | auto chiralType = atom->getChiralTag(); |
37 | 0 | if (chiralType >= Atom::ChiralType::CHI_SQUAREPLANAR && |
38 | 0 | chiralType <= Atom::ChiralType::CHI_OCTAHEDRAL) { |
39 | 0 | return true; |
40 | 0 | } |
41 | | |
42 | | // with at least four neighbors but nothing specified we can start to imagine |
43 | | // that it might be enhanced stereo |
44 | 0 | if (chiralType == Atom::ChiralType::CHI_UNSPECIFIED && tnzdegree >= 4) { |
45 | 0 | return true; |
46 | 0 | } |
47 | | |
48 | 0 | return false; |
49 | 0 | } |
50 | 0 | bool isAtomPotentialTetrahedralCenter(const Atom *atom) { |
51 | 0 | PRECONDITION(atom, "atom is null"); |
52 | 0 | auto nzDegree = getAtomNonzeroDegree(atom); |
53 | 0 | auto tnzDegree = nzDegree + atom->getTotalNumHs(); |
54 | 0 | if (tnzDegree > 4) { |
55 | 0 | return false; |
56 | 0 | } else { |
57 | 0 | const auto &mol = atom->getOwningMol(); |
58 | 0 | if (nzDegree == 4) { |
59 | | // chirality is always possible with 4 nbrs |
60 | 0 | return true; |
61 | 0 | } else if (nzDegree <= 1) { |
62 | | // chirality is never possible with 0 or 1 nbr |
63 | 0 | return false; |
64 | 0 | } else if (nzDegree < 3 && |
65 | 0 | (atom->getAtomicNum() != 15 && atom->getAtomicNum() != 33)) { |
66 | | // less than three neighbors is never stereogenic |
67 | | // unless it is a phosphine/arsine with implicit H |
68 | 0 | return false; |
69 | 0 | } else if (atom->getAtomicNum() == 15 || atom->getAtomicNum() == 33) { |
70 | | // from logical flow: degree is 2 or 3 (implicit H) |
71 | | // Since InChI Software v. 1.02-standard (2009), phosphines and arsines |
72 | | // are always treated as stereogenic even with H atom neighbors. |
73 | | // Accept automatically. |
74 | 0 | return true; |
75 | 0 | } else if (nzDegree == 3) { |
76 | | // three-coordinate with a single H we'll accept automatically: |
77 | 0 | if (atom->getTotalNumHs() == 1) { |
78 | 0 | if (detail::has_protium_neighbor(mol, atom)) { |
79 | | // more than one H is never stereogenic |
80 | 0 | return false; |
81 | 0 | } |
82 | 0 | return true; |
83 | 0 | } else { |
84 | | // otherwise we default to not being a legal center |
85 | 0 | bool legalCenter = false; |
86 | | // but there are a few special cases we'll accept |
87 | | // sulfur or selenium with either a positive charge or a double |
88 | | // bond: |
89 | 0 | if ((atom->getAtomicNum() == 16 || atom->getAtomicNum() == 34) && |
90 | 0 | (atom->getValence(Atom::ValenceType::EXPLICIT) == 4 || |
91 | 0 | (atom->getValence(Atom::ValenceType::EXPLICIT) == 3 && |
92 | 0 | atom->getFormalCharge() == 1))) { |
93 | 0 | legalCenter = true; |
94 | 0 | } else if (atom->getAtomicNum() == 7) { |
95 | | // three-coordinate N additional requirements: |
96 | | // in a ring of size 3 (from InChI) |
97 | | // OR |
98 | | /// is a bridgehead atom (RDKit extension) |
99 | | // Also: cannot be SP2 hybridized or have a conjugated bond |
100 | | // (this was Github #7434) |
101 | 0 | if (atom->getHybridization() == Atom::HybridizationType::SP3 && |
102 | 0 | !MolOps::atomHasConjugatedBond(atom) && |
103 | 0 | (mol.getRingInfo()->isAtomInRingOfSize(atom->getIdx(), 3) || |
104 | 0 | queryIsAtomBridgehead(atom))) { |
105 | 0 | legalCenter = true; |
106 | 0 | } |
107 | 0 | } |
108 | 0 | return legalCenter; |
109 | 0 | } |
110 | 0 | } else { |
111 | 0 | return false; |
112 | 0 | } |
113 | 0 | } |
114 | 0 | } |
115 | | |
116 | | bool isAtomPotentialStereoAtom(const Atom *atom, |
117 | 0 | bool allowNontetrahehdralStereo) { |
118 | 0 | return isAtomPotentialTetrahedralCenter(atom) || |
119 | 0 | (allowNontetrahehdralStereo && |
120 | 0 | isAtomPotentialNontetrahedralCenter(atom)); |
121 | 0 | } |
122 | | |
123 | 0 | bool isAtomPotentialStereoAtom(const Atom *atom) { |
124 | 0 | return isAtomPotentialStereoAtom(atom, getAllowNontetrahedralChirality()); |
125 | 0 | } |
126 | | |
127 | 0 | StereoInfo getStereoInfo(const Bond *bond) { |
128 | 0 | PRECONDITION(bond, "bond is null"); |
129 | 0 | StereoInfo sinfo; |
130 | 0 | const auto beginAtom = bond->getBeginAtom(); |
131 | 0 | const auto endAtom = bond->getEndAtom(); |
132 | 0 | if (bond->getBondType() == Bond::BondType::DOUBLE) { |
133 | 0 | if (beginAtom->getDegree() < 1 || endAtom->getDegree() < 1 || |
134 | 0 | beginAtom->getDegree() > 3 || endAtom->getDegree() > 3) { |
135 | 0 | throw ValueErrorException("invalid atom degree in getStereoInfo(bond)"); |
136 | 0 | } |
137 | | |
138 | 0 | sinfo.type = StereoType::Bond_Double; |
139 | 0 | sinfo.centeredOn = bond->getIdx(); |
140 | 0 | sinfo.controllingAtoms.reserve(4); |
141 | |
|
142 | 0 | bool seenSquiggleBond = false; |
143 | 0 | const auto &mol = bond->getOwningMol(); |
144 | |
|
145 | 0 | auto explore_bond_end = [&mol, &bond, &sinfo, |
146 | 0 | &seenSquiggleBond](const Atom *atom) { |
147 | 0 | for (const auto nbr : mol.atomBonds(atom)) { |
148 | 0 | if (nbr->getIdx() != bond->getIdx()) { |
149 | 0 | if (nbr->getBondDir() == Bond::BondDir::UNKNOWN) { |
150 | 0 | seenSquiggleBond = true; |
151 | 0 | } |
152 | 0 | sinfo.controllingAtoms.push_back( |
153 | 0 | nbr->getOtherAtomIdx(atom->getIdx())); |
154 | 0 | } |
155 | 0 | } |
156 | |
|
157 | 0 | for (unsigned i = atom->getDegree(); i < 3; ++i) { |
158 | 0 | sinfo.controllingAtoms.push_back(Atom::NOATOM); |
159 | 0 | } |
160 | 0 | }; |
161 | |
|
162 | 0 | explore_bond_end(beginAtom); |
163 | 0 | explore_bond_end(endAtom); |
164 | |
|
165 | 0 | if (!seenSquiggleBond) { |
166 | | // check to see if either the begin or end atoms has the _UnknownStereo |
167 | | // property set. This happens if there was a squiggle bond to an H |
168 | 0 | int explicitUnknownStereo = 0; |
169 | 0 | if ((bond->getBeginAtom()->getPropIfPresent<int>( |
170 | 0 | common_properties::_UnknownStereo, explicitUnknownStereo) && |
171 | 0 | explicitUnknownStereo) || |
172 | 0 | (bond->getEndAtom()->getPropIfPresent<int>( |
173 | 0 | common_properties::_UnknownStereo, explicitUnknownStereo) && |
174 | 0 | explicitUnknownStereo)) { |
175 | 0 | seenSquiggleBond = true; |
176 | 0 | } |
177 | 0 | } |
178 | |
|
179 | 0 | Bond::BondStereo stereo = bond->getStereo(); |
180 | 0 | if (stereo == Bond::BondStereo::STEREOANY || |
181 | 0 | bond->getBondDir() == Bond::BondDir::EITHERDOUBLE || seenSquiggleBond) { |
182 | 0 | sinfo.specified = Chirality::StereoSpecified::Unknown; |
183 | 0 | } else if (stereo != Bond::BondStereo::STEREONONE) { |
184 | 0 | if (stereo == Bond::BondStereo::STEREOE || |
185 | 0 | stereo == Bond::BondStereo::STEREOZ) { |
186 | 0 | stereo = Chirality::translateEZLabelToCisTrans(stereo); |
187 | 0 | } |
188 | 0 | sinfo.specified = Chirality::StereoSpecified::Specified; |
189 | 0 | const auto satoms = bond->getStereoAtoms(); |
190 | 0 | if (satoms.size() != 2) { |
191 | 0 | throw ValueErrorException("only can support 2 stereo neighbors"); |
192 | 0 | } |
193 | 0 | bool firstAtBegin; |
194 | 0 | if (satoms[0] == static_cast<int>(sinfo.controllingAtoms[0])) { |
195 | 0 | firstAtBegin = true; |
196 | 0 | } else if (satoms[0] == static_cast<int>(sinfo.controllingAtoms[1])) { |
197 | 0 | firstAtBegin = false; |
198 | 0 | } else { |
199 | 0 | throw ValueErrorException("controlling atom mismatch at begin"); |
200 | 0 | } |
201 | 0 | bool firstAtEnd; |
202 | 0 | if (satoms[1] == static_cast<int>(sinfo.controllingAtoms[2])) { |
203 | 0 | firstAtEnd = true; |
204 | 0 | } else if (satoms[1] == static_cast<int>(sinfo.controllingAtoms[3])) { |
205 | 0 | firstAtEnd = false; |
206 | 0 | } else { |
207 | 0 | throw ValueErrorException("controlling atom mismatch at end"); |
208 | 0 | } |
209 | 0 | auto mismatch = firstAtBegin ^ firstAtEnd; |
210 | 0 | if (mismatch) { |
211 | 0 | stereo = (stereo == Bond::BondStereo::STEREOCIS |
212 | 0 | ? Bond::BondStereo::STEREOTRANS |
213 | 0 | : Bond::BondStereo::STEREOCIS); |
214 | 0 | } |
215 | 0 | switch (stereo) { |
216 | 0 | case Bond::BondStereo::STEREOCIS: |
217 | 0 | sinfo.descriptor = Chirality::StereoDescriptor::Bond_Cis; |
218 | 0 | break; |
219 | 0 | case Bond::BondStereo::STEREOTRANS: |
220 | 0 | sinfo.descriptor = Chirality::StereoDescriptor::Bond_Trans; |
221 | 0 | break; |
222 | 0 | default: |
223 | 0 | UNDER_CONSTRUCTION("unrecognized bond stereo type"); |
224 | 0 | } |
225 | 0 | } else { |
226 | 0 | sinfo.specified = Chirality::StereoSpecified::Unspecified; |
227 | 0 | } |
228 | 0 | } else if (bond->getBondType() == Bond::BondType::SINGLE && |
229 | 0 | (bond->getStereo() == Bond::BondStereo::STEREOATROPCCW || |
230 | 0 | bond->getStereo() == Bond::BondStereo::STEREOATROPCW)) { |
231 | 0 | if (beginAtom->getDegree() < 2 || endAtom->getDegree() < 2 || |
232 | 0 | beginAtom->getDegree() > 3 || endAtom->getDegree() > 3) { |
233 | 0 | throw ValueErrorException("invalid atom degree in getStereoInfo(bond)"); |
234 | 0 | } |
235 | | |
236 | 0 | sinfo.type = StereoType::Bond_Atropisomer; |
237 | 0 | sinfo.centeredOn = bond->getIdx(); |
238 | 0 | sinfo.controllingAtoms.reserve(4); |
239 | |
|
240 | 0 | const auto &mol = bond->getOwningMol(); |
241 | 0 | for (const auto nbr : mol.atomBonds(beginAtom)) { |
242 | 0 | if (nbr->getIdx() != bond->getIdx()) { |
243 | 0 | sinfo.controllingAtoms.push_back( |
244 | 0 | nbr->getOtherAtomIdx(beginAtom->getIdx())); |
245 | 0 | } |
246 | 0 | } |
247 | 0 | if (beginAtom->getDegree() == 2) { |
248 | 0 | sinfo.controllingAtoms.push_back(Atom::NOATOM); |
249 | 0 | } |
250 | 0 | for (const auto nbr : mol.atomBonds(endAtom)) { |
251 | 0 | if (nbr->getIdx() != bond->getIdx()) { |
252 | 0 | sinfo.controllingAtoms.push_back( |
253 | 0 | nbr->getOtherAtomIdx(endAtom->getIdx())); |
254 | 0 | } |
255 | 0 | } |
256 | 0 | if (endAtom->getDegree() == 2) { |
257 | 0 | sinfo.controllingAtoms.push_back(Atom::NOATOM); |
258 | 0 | } |
259 | |
|
260 | 0 | Bond::BondStereo stereo = bond->getStereo(); |
261 | 0 | sinfo.specified = Chirality::StereoSpecified::Specified; |
262 | 0 | switch (stereo) { |
263 | 0 | case Bond::BondStereo::STEREOATROPCW: |
264 | 0 | sinfo.descriptor = Chirality::StereoDescriptor::Bond_AtropCW; |
265 | 0 | break; |
266 | 0 | case Bond::BondStereo::STEREOATROPCCW: |
267 | 0 | sinfo.descriptor = Chirality::StereoDescriptor::Bond_AtropCCW; |
268 | 0 | break; |
269 | 0 | default: |
270 | 0 | UNDER_CONSTRUCTION("unrecognized bond stereo type"); |
271 | 0 | } |
272 | 0 | } else { |
273 | 0 | sinfo.type = StereoType::Unspecified; |
274 | 0 | } |
275 | | |
276 | 0 | return sinfo; |
277 | 0 | } |
278 | | |
279 | 0 | StereoInfo getStereoInfo(const Atom *atom) { |
280 | 0 | PRECONDITION(atom, "atom is null"); |
281 | 0 | StereoInfo sinfo; |
282 | |
|
283 | 0 | sinfo.type = StereoType::Atom_Tetrahedral; |
284 | 0 | sinfo.centeredOn = atom->getIdx(); |
285 | 0 | sinfo.controllingAtoms.reserve(atom->getDegree()); |
286 | |
|
287 | 0 | const auto &mol = atom->getOwningMol(); |
288 | 0 | int explicitUnknownStereo = 0; |
289 | 0 | for (const auto &nbri : boost::make_iterator_range(mol.getAtomBonds(atom))) { |
290 | 0 | const auto &bnd = mol[nbri]; |
291 | 0 | if (bnd->getBondDir() == Bond::UNKNOWN) { |
292 | 0 | explicitUnknownStereo = 1; |
293 | 0 | } else if (!explicitUnknownStereo) { |
294 | 0 | bnd->getPropIfPresent<int>(common_properties::_UnknownStereo, |
295 | 0 | explicitUnknownStereo); |
296 | 0 | } |
297 | 0 | sinfo.controllingAtoms.push_back(bnd->getOtherAtomIdx(atom->getIdx())); |
298 | 0 | } |
299 | 0 | std::vector<unsigned> origNbrOrder = sinfo.controllingAtoms; |
300 | 0 | std::sort(sinfo.controllingAtoms.begin(), sinfo.controllingAtoms.end()); |
301 | |
|
302 | 0 | if (explicitUnknownStereo) { |
303 | 0 | sinfo.specified = StereoSpecified::Unknown; |
304 | 0 | } else { |
305 | 0 | Atom::ChiralType stereo = atom->getChiralTag(); |
306 | 0 | if (stereo == Atom::ChiralType::CHI_TETRAHEDRAL_CCW || |
307 | 0 | stereo == Atom::ChiralType::CHI_TETRAHEDRAL_CW) { |
308 | 0 | sinfo.specified = StereoSpecified::Specified; |
309 | 0 | unsigned nSwaps = |
310 | 0 | countSwapsToInterconvert(origNbrOrder, sinfo.controllingAtoms); |
311 | 0 | if (nSwaps % 2) { |
312 | 0 | stereo = (stereo == Atom::ChiralType::CHI_TETRAHEDRAL_CCW |
313 | 0 | ? Atom::ChiralType::CHI_TETRAHEDRAL_CW |
314 | 0 | : Atom::ChiralType::CHI_TETRAHEDRAL_CCW); |
315 | 0 | } |
316 | 0 | switch (stereo) { |
317 | 0 | case Atom::ChiralType::CHI_TETRAHEDRAL_CCW: |
318 | 0 | sinfo.descriptor = StereoDescriptor::Tet_CCW; |
319 | 0 | break; |
320 | 0 | case Atom::ChiralType::CHI_TETRAHEDRAL_CW: |
321 | 0 | sinfo.descriptor = StereoDescriptor::Tet_CW; |
322 | 0 | break; |
323 | 0 | default: |
324 | 0 | UNDER_CONSTRUCTION("unrecognized chiral flag"); |
325 | 0 | } |
326 | 0 | } else if (getAllowNontetrahedralChirality() && |
327 | 0 | isAtomPotentialNontetrahedralCenter(atom)) { |
328 | 0 | if (stereo == Atom::CHI_UNSPECIFIED) { |
329 | 0 | switch (atom->getTotalDegree()) { |
330 | 0 | case 4: |
331 | | // don't assume non-tetrahedral chirality |
332 | 0 | stereo = Atom::ChiralType::CHI_TETRAHEDRAL; |
333 | 0 | break; |
334 | 0 | case 5: |
335 | 0 | stereo = Atom::ChiralType::CHI_TRIGONALBIPYRAMIDAL; |
336 | 0 | break; |
337 | 0 | case 6: |
338 | 0 | stereo = Atom::ChiralType::CHI_OCTAHEDRAL; |
339 | 0 | break; |
340 | 0 | default: |
341 | 0 | break; |
342 | 0 | } |
343 | 0 | } |
344 | 0 | sinfo.descriptor = StereoDescriptor::None; |
345 | 0 | switch (stereo) { |
346 | 0 | case Atom::ChiralType::CHI_TETRAHEDRAL: |
347 | 0 | sinfo.type = StereoType::Atom_Tetrahedral; |
348 | 0 | break; |
349 | 0 | case Atom::ChiralType::CHI_SQUAREPLANAR: |
350 | 0 | sinfo.type = StereoType::Atom_SquarePlanar; |
351 | 0 | break; |
352 | 0 | case Atom::ChiralType::CHI_TRIGONALBIPYRAMIDAL: |
353 | 0 | sinfo.type = StereoType::Atom_TrigonalBipyramidal; |
354 | 0 | break; |
355 | 0 | case Atom::ChiralType::CHI_OCTAHEDRAL: |
356 | 0 | sinfo.type = StereoType::Atom_Octahedral; |
357 | 0 | break; |
358 | 0 | default: |
359 | 0 | break; |
360 | 0 | } |
361 | 0 | unsigned int permutation; |
362 | 0 | if (atom->getPropIfPresent(common_properties::_chiralPermutation, |
363 | 0 | permutation)) { |
364 | 0 | sinfo.permutation = permutation; |
365 | 0 | if (!permutation) { |
366 | | // a permutation of zero is an explicit statement that the chirality |
367 | | // is unknown |
368 | 0 | sinfo.specified = Chirality::StereoSpecified::Unknown; |
369 | 0 | } else { |
370 | 0 | sinfo.specified = Chirality::StereoSpecified::Specified; |
371 | 0 | } |
372 | 0 | } |
373 | 0 | } |
374 | 0 | } |
375 | 0 | return sinfo; |
376 | 0 | } |
377 | | |
378 | 0 | bool isBondPotentialStereoBond(const Bond *bond) { |
379 | 0 | PRECONDITION(bond, "bond is null"); |
380 | 0 | if (bond->getBondType() != Bond::BondType::DOUBLE) { |
381 | 0 | return false; |
382 | 0 | } |
383 | | |
384 | | // at the moment the condition for being a potential stereo bond is that |
385 | | // each of the beginning and end neighbors must have at least 2 explicit |
386 | | // neighbors but no more than 3 total neighbors. |
387 | | // if it's a ring bond, the smallest ring it's in must have at least 8 |
388 | | // members |
389 | | // (this is common with InChI) |
390 | 0 | const auto beginAtom = bond->getBeginAtom(); |
391 | 0 | auto begDegree = beginAtom->getTotalDegree(); |
392 | 0 | const auto endAtom = bond->getEndAtom(); |
393 | 0 | auto endDegree = endAtom->getTotalDegree(); |
394 | 0 | if (begDegree > 1 && begDegree < 4 && endDegree > 1 && endDegree < 4 && |
395 | 0 | beginAtom->getTotalNumHs(true) < 2 && endAtom->getTotalNumHs(true) < 2) { |
396 | | // check rings |
397 | 0 | const auto ri = bond->getOwningMol().getRingInfo(); |
398 | 0 | for (const auto &bring : ri->bondRings()) { |
399 | 0 | if (bring.size() < minRingSizeForDoubleBondStereo && |
400 | 0 | std::find(bring.begin(), bring.end(), bond->getIdx()) != |
401 | 0 | bring.end()) { |
402 | 0 | return false; |
403 | 0 | } |
404 | 0 | } |
405 | 0 | return true; |
406 | 0 | } else { |
407 | 0 | return false; |
408 | 0 | } |
409 | 0 | } |
410 | | } // namespace detail |
411 | | |
412 | 0 | std::string getBondSymbol(const Bond *bond) { |
413 | | // FIX: this is not complete |
414 | 0 | PRECONDITION(bond, "bad bond"); |
415 | 0 | std::string res; |
416 | 0 | if (bond->getIsAromatic()) { |
417 | 0 | res = ":"; |
418 | 0 | } else { |
419 | 0 | switch (bond->getBondType()) { |
420 | 0 | case Bond::BondType::SINGLE: |
421 | 0 | res = "-"; |
422 | 0 | break; |
423 | 0 | case Bond::BondType::DOUBLE: |
424 | 0 | res = "="; |
425 | 0 | break; |
426 | 0 | case Bond::BondType::TRIPLE: |
427 | 0 | res = "#"; |
428 | 0 | break; |
429 | 0 | case Bond::BondType::AROMATIC: |
430 | 0 | res = ":"; |
431 | 0 | break; |
432 | 0 | default: |
433 | 0 | res = "?"; |
434 | 0 | break; |
435 | 0 | } |
436 | 0 | } |
437 | 0 | return res; |
438 | 0 | } |
439 | | |
440 | | namespace { |
441 | 0 | inline std::string getAtomCompareSymbol(const Atom &atom) { |
442 | | // we originally tried this with boost::format, but it was WAY slower |
443 | 0 | return std::to_string(atom.getIsotope()) + atom.getSymbol() + |
444 | 0 | std::to_string(atom.getFormalCharge()); |
445 | 0 | } |
446 | | |
447 | | bool areStereobondControllingAtomsDupes( |
448 | | const ROMol &mol, const Bond &bond, unsigned controllingAtom1, |
449 | | unsigned controllingAtom2, const std::vector<unsigned> &atomRanks, |
450 | | const boost::dynamic_bitset<> &possibleAtoms, |
451 | | const boost::dynamic_bitset<> &knownAtoms, |
452 | | const boost::dynamic_bitset<> &possibleBonds, |
453 | 0 | const boost::dynamic_bitset<> &knownBonds) { |
454 | 0 | PRECONDITION( |
455 | 0 | controllingAtom1 != Atom::NOATOM && controllingAtom2 != Atom::NOATOM, |
456 | 0 | "Missing a controlling atom"); |
457 | |
|
458 | 0 | if (atomRanks[controllingAtom1] != atomRanks[controllingAtom2]) { |
459 | 0 | return false; |
460 | 0 | } |
461 | | |
462 | | // Now that we know we have 2 neighbors with the same rank, check whether |
463 | | // there is a common even sized ring between the controlling atoms in which |
464 | | // the atom opposite of the bond may be chiral, which would break the tie |
465 | 0 | auto ringInfo = mol.getRingInfo(); |
466 | 0 | auto atom1Members = ringInfo->atomMembers(controllingAtom1); |
467 | 0 | auto atom2Members = ringInfo->atomMembers(controllingAtom2); |
468 | |
|
469 | 0 | auto it1 = atom1Members.begin(); |
470 | 0 | auto it2 = atom2Members.begin(); |
471 | 0 | while (it1 != atom1Members.end() && it2 != atom2Members.end()) { |
472 | 0 | if (*it1 < *it2) { |
473 | 0 | ++it1; |
474 | 0 | continue; |
475 | 0 | } else if (*it1 > *it2) { |
476 | 0 | ++it2; |
477 | 0 | continue; |
478 | 0 | } |
479 | | |
480 | 0 | auto ring = ringInfo->atomRings().at(*it1); |
481 | 0 | ++it1; |
482 | 0 | ++it2; |
483 | |
|
484 | 0 | if (ring.size() % 2) { |
485 | | // The common ring is odd-sized, so we can't have a tie-breaking atom |
486 | | // directly across the ring, so skip this ring. |
487 | 0 | continue; |
488 | 0 | } |
489 | | |
490 | 0 | for (auto bondEnd : {bond.getBeginAtomIdx(), bond.getEndAtomIdx()}) { |
491 | 0 | auto bondEndPosItr = std::find(ring.begin(), ring.end(), bondEnd); |
492 | 0 | if (bondEndPosItr != ring.end()) { |
493 | 0 | auto bondEndPos = bondEndPosItr - ring.begin(); |
494 | 0 | auto oppositePos = (bondEndPos + ring.size() / 2) % ring.size(); |
495 | |
|
496 | 0 | auto oppositeIdx = ring[oppositePos]; |
497 | 0 | if (possibleAtoms[oppositeIdx] || knownAtoms[oppositeIdx]) { |
498 | 0 | return false; |
499 | 0 | } |
500 | | |
501 | | // atropisomer test: find the bond from the opposite atom that in not in |
502 | | // the ring (if there are three bonds total) |
503 | | |
504 | 0 | if (mol.getAtomWithIdx(oppositeIdx)->getDegree() == 3) { |
505 | 0 | for (const auto &nbr : |
506 | 0 | mol.atomBonds(mol.getAtomWithIdx(oppositeIdx))) { |
507 | 0 | auto outOtherAtom = nbr->getOtherAtomIdx(oppositeIdx); |
508 | 0 | auto bondOutPosItr = |
509 | 0 | std::find(ring.begin(), ring.end(), outOtherAtom); |
510 | 0 | if (bondOutPosItr == ring.end()) { |
511 | 0 | if (possibleBonds[nbr->getIdx()] || knownBonds[nbr->getIdx()]) { |
512 | 0 | return false; |
513 | 0 | } |
514 | 0 | } |
515 | 0 | } |
516 | 0 | } |
517 | 0 | } |
518 | 0 | } |
519 | 0 | } |
520 | 0 | return true; |
521 | 0 | } |
522 | | |
523 | | } // namespace |
524 | | |
525 | | namespace { |
526 | | void initAtomInfo(ROMol &mol, bool flagPossible, bool cleanIt, |
527 | | boost::dynamic_bitset<> &knownAtoms, |
528 | | std::vector<std::string> &atomSymbols, |
529 | 0 | boost::dynamic_bitset<> &possibleAtoms) { |
530 | 0 | bool allowNontetrahedralStereo = getAllowNontetrahedralChirality(); |
531 | 0 | for (const auto atom : mol.atoms()) { |
532 | 0 | if (atom->needsUpdatePropertyCache()) { |
533 | 0 | atom->updatePropertyCache(false); |
534 | 0 | } |
535 | |
|
536 | 0 | auto aidx = atom->getIdx(); |
537 | 0 | atomSymbols[aidx] = getAtomCompareSymbol(*atom); |
538 | 0 | if (detail::isAtomPotentialStereoAtom(atom, allowNontetrahedralStereo)) { |
539 | 0 | auto sinfo = detail::getStereoInfo(atom); |
540 | 0 | switch (sinfo.specified) { |
541 | 0 | case Chirality::StereoSpecified::Unknown: |
542 | 0 | knownAtoms.set(aidx); |
543 | 0 | atomSymbols[aidx] += std::to_string(aidx); |
544 | 0 | break; |
545 | 0 | case Chirality::StereoSpecified::Specified: |
546 | 0 | knownAtoms.set(aidx); |
547 | 0 | if (sinfo.descriptor == StereoDescriptor::Tet_CCW) { |
548 | 0 | atomSymbols[aidx] += "_CCW"; |
549 | 0 | } else if (sinfo.descriptor == StereoDescriptor::Tet_CW) { |
550 | 0 | atomSymbols[aidx] += "_CW"; |
551 | 0 | } else { |
552 | 0 | atomSymbols[aidx] += "_STEREO"; |
553 | 0 | } |
554 | 0 | break; |
555 | 0 | case Chirality::StereoSpecified::Unspecified: |
556 | 0 | if (flagPossible) { |
557 | 0 | possibleAtoms.set(aidx); |
558 | 0 | if (!cleanIt) { |
559 | 0 | atomSymbols[aidx] += "_" + std::to_string(aidx); |
560 | 0 | } |
561 | 0 | } |
562 | 0 | break; |
563 | 0 | default: |
564 | 0 | throw ValueErrorException("bad StereoInfo.specified type"); |
565 | 0 | } |
566 | 0 | } else if (cleanIt) { |
567 | 0 | atom->setChiralTag(Atom::ChiralType::CHI_UNSPECIFIED); |
568 | 0 | } |
569 | 0 | } |
570 | 0 | } |
571 | | |
572 | | void initBondInfo(ROMol &mol, bool flagPossible, bool cleanIt, |
573 | | boost::dynamic_bitset<> &knownBonds, |
574 | | std::vector<std::string> &bondSymbols, |
575 | 0 | boost::dynamic_bitset<> &possibleBonds) { |
576 | 0 | for (const auto bond : mol.bonds()) { |
577 | 0 | auto bidx = bond->getIdx(); |
578 | 0 | bondSymbols[bidx] = getBondSymbol(bond); |
579 | 0 | if (detail::isBondPotentialStereoBond(bond)) { |
580 | 0 | auto sinfo = detail::getStereoInfo(bond); |
581 | 0 | switch (sinfo.specified) { |
582 | 0 | case Chirality::StereoSpecified::Unknown: |
583 | 0 | knownBonds.set(bidx); |
584 | 0 | bondSymbols[bidx] += "_" + std::to_string(bidx); |
585 | 0 | break; |
586 | 0 | case Chirality::StereoSpecified::Specified: |
587 | 0 | knownBonds.set(bidx); |
588 | 0 | if (sinfo.descriptor == StereoDescriptor::Bond_Cis) { |
589 | 0 | bondSymbols[bidx] += "_cis"; |
590 | 0 | } else if (sinfo.descriptor == StereoDescriptor::Bond_Trans) { |
591 | 0 | bondSymbols[bidx] += "_trans"; |
592 | 0 | } else { |
593 | 0 | bondSymbols[bidx] += "_STEREO"; |
594 | 0 | } |
595 | 0 | break; |
596 | 0 | case Chirality::StereoSpecified::Unspecified: |
597 | 0 | if (flagPossible) { |
598 | 0 | possibleBonds.set(bidx); |
599 | 0 | if (!cleanIt) { |
600 | 0 | bondSymbols[bidx] += "_" + std::to_string(bidx); |
601 | 0 | } |
602 | 0 | } |
603 | 0 | break; |
604 | 0 | default: |
605 | 0 | throw ValueErrorException("bad StereoInfo.specified type"); |
606 | 0 | } |
607 | 0 | } else { |
608 | 0 | auto currentStereo = bond->getStereo(); |
609 | 0 | if (currentStereo != Bond::BondStereo::STEREOATROPCW && |
610 | 0 | currentStereo != Bond::BondStereo::STEREOATROPCCW) { |
611 | 0 | if (cleanIt) { |
612 | 0 | bond->setStereo(Bond::BondStereo::STEREONONE); |
613 | 0 | } |
614 | 0 | } else { |
615 | 0 | knownBonds.set(bidx); |
616 | 0 | if (currentStereo == Bond::BondStereo::STEREOATROPCW) { |
617 | 0 | bondSymbols[bidx] += "_atropcw"; |
618 | 0 | } else if (currentStereo == Bond::BondStereo::STEREOATROPCCW) { |
619 | 0 | bondSymbols[bidx] += "_atropccw"; |
620 | 0 | } |
621 | 0 | } |
622 | 0 | } |
623 | 0 | } |
624 | 0 | } |
625 | | void flagRingStereo(ROMol &mol, |
626 | | std::vector<unsigned int> &possibleRingStereoAtoms, |
627 | | std::vector<unsigned int> &possibleRingStereoBonds, |
628 | | const boost::dynamic_bitset<> &knownAtoms, |
629 | | const boost::dynamic_bitset<> *possibleAtoms, |
630 | | const boost::dynamic_bitset<> &knownBonds, |
631 | 0 | const boost::dynamic_bitset<> *possibleBonds) { |
632 | | // flag possible ring stereo cases. The relevant cases here are: |
633 | | // 1) even-sized rings with possible (or specified) atoms opposite each |
634 | | // other, like CC1CC(C)C1 or CC1CCC(C)CC1 |
635 | | // 2) atoms sharing a bond which fuses two or more rings, like the |
636 | | // central |
637 | | // bond in C1CCC2CCCCC2C1 |
638 | |
|
639 | 0 | auto ringInfo = mol.getRingInfo(); |
640 | 0 | boost::dynamic_bitset<> possibleAtomsInRing(mol.getNumAtoms()); |
641 | 0 | for (unsigned int ridx = 0; ridx < ringInfo->atomRings().size(); ++ridx) { |
642 | 0 | const auto å = ringInfo->atomRings()[ridx]; |
643 | 0 | const auto &bring = ringInfo->bondRings()[ridx]; |
644 | 0 | unsigned int nHere = 0; |
645 | 0 | auto sz = aring.size(); |
646 | 0 | bool ringIsOddSized = sz % 2; |
647 | 0 | auto halfSize = sz / 2 + ringIsOddSized; |
648 | |
|
649 | 0 | possibleAtomsInRing.reset(); |
650 | 0 | for (unsigned int ai = 0; ai < sz; ++ai) { |
651 | 0 | auto aidx = aring[ai]; |
652 | 0 | if (!knownAtoms[aidx] && (!possibleAtoms || !possibleAtoms->test(aidx))) { |
653 | 0 | continue; |
654 | 0 | } |
655 | | |
656 | 0 | for (unsigned int ringDivisor : {2, 3}) { |
657 | 0 | bool ringIsMultipleOfDivisor = ((sz % ringDivisor) == 0); |
658 | 0 | auto incrementSize = sz / ringDivisor; |
659 | |
|
660 | 0 | if (ringIsMultipleOfDivisor) { |
661 | | // find the two indices of the atoms 1/3 the way around the ring |
662 | |
|
663 | 0 | unsigned int otherFoundByBondCount = 0; |
664 | 0 | unsigned int otherFoundByAtomCount = 0; |
665 | 0 | for (unsigned int indexIncrement = incrementSize; indexIncrement < sz; |
666 | 0 | indexIncrement += incrementSize) { |
667 | 0 | auto otherIdx = aring[(ai + indexIncrement) % sz]; |
668 | 0 | auto otherAtom = mol.getAtomWithIdx(otherIdx); |
669 | |
|
670 | 0 | for (auto bond : mol.atomBonds(otherAtom)) { |
671 | 0 | auto bidx = bond->getIdx(); |
672 | 0 | if ((knownBonds[bidx] || |
673 | 0 | (possibleBonds && possibleBonds->test(bidx))) && |
674 | 0 | std::find(bring.begin(), bring.end(), bidx) == bring.end()) { |
675 | 0 | otherFoundByBondCount++; |
676 | 0 | break; |
677 | 0 | } |
678 | 0 | } |
679 | 0 | if (otherFoundByBondCount == 0) { |
680 | 0 | if (knownAtoms[otherIdx] || |
681 | 0 | (possibleAtoms && possibleAtoms->test(otherIdx))) { |
682 | 0 | otherFoundByAtomCount++; |
683 | 0 | } |
684 | 0 | } |
685 | 0 | } |
686 | |
|
687 | 0 | if (otherFoundByBondCount == ringDivisor - 1 || |
688 | 0 | otherFoundByAtomCount == ringDivisor - 1) { |
689 | 0 | nHere += 1 + otherFoundByBondCount; |
690 | 0 | for (unsigned int indexIncrement = 0; indexIncrement < sz; |
691 | 0 | indexIncrement += incrementSize) { |
692 | 0 | possibleAtomsInRing.set(aring[(ai + indexIncrement) % sz]); |
693 | 0 | } |
694 | 0 | if (ringDivisor == 2) { |
695 | 0 | mol.getAtomWithIdx(aidx)->setProp( |
696 | 0 | common_properties::_ringStereoOtherAtom, |
697 | 0 | aring[(ai + incrementSize) % sz]); |
698 | 0 | } |
699 | |
|
700 | 0 | continue; |
701 | 0 | } |
702 | 0 | } |
703 | 0 | } |
704 | | |
705 | | // if the atom is in more than one ring, explore the common edge to |
706 | | // see if we can find another potentially chiral atom |
707 | 0 | if (ringInfo->numAtomRings(aidx) > 1) { |
708 | 0 | auto previousOtherIdx = aidx; |
709 | 0 | for (size_t step = 1; step <= halfSize; ++step) { |
710 | 0 | auto otherIdx = aring[(ai + step) % sz]; |
711 | 0 | auto bnd = mol.getBondBetweenAtoms(previousOtherIdx, otherIdx); |
712 | 0 | if (ringInfo->numBondRings(bnd->getIdx()) < 2) { |
713 | | // We reached the end of the common edge. |
714 | 0 | break; |
715 | 0 | } |
716 | 0 | if (knownAtoms[otherIdx] || |
717 | 0 | (possibleAtoms && possibleAtoms->test(otherIdx))) { |
718 | | // We found another chiral atom, no need to keep |
719 | | // searching. |
720 | 0 | nHere += 2; |
721 | 0 | possibleAtomsInRing.set(aidx); |
722 | 0 | possibleAtomsInRing.set(otherIdx); |
723 | 0 | mol.getAtomWithIdx(aidx)->setProp( |
724 | 0 | common_properties::_ringStereoOtherAtom, otherIdx); |
725 | 0 | mol.getAtomWithIdx(otherIdx)->setProp( |
726 | 0 | common_properties::_ringStereoOtherAtom, aidx); |
727 | 0 | break; |
728 | 0 | } |
729 | 0 | previousOtherIdx = otherIdx; |
730 | 0 | } |
731 | 0 | } |
732 | 0 | } |
733 | | // if the ring contains at least two atoms with possible stereo, |
734 | | // then each of those possibleAtoms should be included for ring stereo |
735 | 0 | if (nHere > 1) { |
736 | 0 | for (auto aidx : aring) { |
737 | 0 | if (possibleAtomsInRing[aidx]) { |
738 | 0 | ++possibleRingStereoAtoms[aidx]; |
739 | 0 | } |
740 | 0 | } |
741 | 0 | for (auto bidx : bring) { |
742 | 0 | ++possibleRingStereoBonds[bidx]; |
743 | 0 | } |
744 | 0 | } |
745 | 0 | } |
746 | 0 | } |
747 | | |
748 | | bool updateAtoms(ROMol &mol, const std::vector<unsigned int> &aranks, |
749 | | std::vector<std::string> &atomSymbols, |
750 | | boost::dynamic_bitset<> &possibleAtoms, |
751 | | boost::dynamic_bitset<> &knownAtoms, |
752 | | boost::dynamic_bitset<> &fixedAtoms, |
753 | | std::vector<unsigned int> &possibleRingStereoAtoms, |
754 | | std::vector<unsigned int> &possibleRingStereoBonds, |
755 | 0 | std::vector<StereoInfo> &sinfos) { |
756 | 0 | bool needAnotherRound = false; |
757 | 0 | for (const auto atom : mol.atoms()) { |
758 | 0 | auto aidx = atom->getIdx(); |
759 | 0 | if (knownAtoms[aidx] || possibleAtoms[aidx]) { |
760 | 0 | auto sinfo = detail::getStereoInfo(atom); |
761 | 0 | if (fixedAtoms[aidx]) { |
762 | 0 | sinfos.push_back(std::move(sinfo)); |
763 | 0 | } else { |
764 | 0 | std::vector<unsigned int> nbrs; |
765 | 0 | nbrs.reserve(sinfo.controllingAtoms.size()); |
766 | 0 | bool haveADupe = false; |
767 | 0 | if (sinfo.type == StereoType::Atom_Tetrahedral) { |
768 | 0 | for (auto nbrIdx : sinfo.controllingAtoms) { |
769 | 0 | auto rnk = aranks[nbrIdx]; |
770 | 0 | if (std::find(nbrs.begin(), nbrs.end(), rnk) != nbrs.end()) { |
771 | | // ok, we just hit a duplicate rank. If the atom we're |
772 | | // concerned about is a candidate for ring stereo and the bond |
773 | | // to the atom with the duplicate rank is a ring bond |
774 | 0 | if (possibleRingStereoAtoms[aidx]) { |
775 | 0 | auto bnd = mol.getBondBetweenAtoms(aidx, nbrIdx); |
776 | 0 | if (!bnd || !possibleRingStereoBonds[bnd->getIdx()]) { |
777 | 0 | haveADupe = true; |
778 | 0 | break; |
779 | 0 | } |
780 | 0 | } else { |
781 | 0 | haveADupe = true; |
782 | 0 | break; |
783 | 0 | } |
784 | 0 | } else { |
785 | 0 | nbrs.push_back(rnk); |
786 | 0 | } |
787 | 0 | } |
788 | 0 | } |
789 | 0 | if (!haveADupe) { |
790 | 0 | auto acs = atomSymbols[aidx]; |
791 | 0 | if (!possibleAtoms[aidx]) { |
792 | 0 | auto sortednbrs = nbrs; |
793 | 0 | std::sort(sortednbrs.begin(), sortednbrs.end()); |
794 | | // FIX: only works for tetrahedral at the moment |
795 | 0 | if (sinfo.type == Chirality::StereoType::Atom_Tetrahedral) { |
796 | 0 | auto nSwaps = countSwapsToInterconvert(nbrs, sortednbrs); |
797 | 0 | if (nSwaps % 2 && |
798 | 0 | (sinfo.descriptor == Chirality::StereoDescriptor::Tet_CCW || |
799 | 0 | sinfo.descriptor == Chirality::StereoDescriptor::Tet_CW)) { |
800 | 0 | sinfo.descriptor = |
801 | 0 | sinfo.descriptor == Chirality::StereoDescriptor::Tet_CCW |
802 | 0 | ? Chirality::StereoDescriptor::Tet_CW |
803 | 0 | : Chirality::StereoDescriptor::Tet_CCW; |
804 | 0 | } |
805 | 0 | if (sinfo.descriptor == Chirality::StereoDescriptor::Tet_CW) { |
806 | 0 | acs = getAtomCompareSymbol(*atom) + "_CW"; |
807 | 0 | } else if (sinfo.descriptor == |
808 | 0 | Chirality::StereoDescriptor::Tet_CCW) { |
809 | 0 | acs = getAtomCompareSymbol(*atom) + "_CCW"; |
810 | 0 | } |
811 | 0 | } |
812 | 0 | fixedAtoms.set(aidx); |
813 | 0 | } |
814 | 0 | if (atomSymbols[aidx] != acs) { |
815 | 0 | atomSymbols[aidx] = acs; |
816 | 0 | needAnotherRound = true; |
817 | 0 | } |
818 | 0 | sinfos.push_back(std::move(sinfo)); |
819 | 0 | } else { |
820 | | // Only do another round if we change anything here |
821 | 0 | needAnotherRound |= possibleAtoms[aidx]; |
822 | 0 | possibleAtoms[aidx] = 0; |
823 | 0 | atomSymbols[aidx] = getAtomCompareSymbol(*atom); |
824 | | |
825 | | // if this was creating possible ring stereo, update that info now |
826 | 0 | if (possibleRingStereoAtoms[aidx]) { |
827 | 0 | possibleRingStereoAtoms[aidx] = 0; |
828 | 0 | needAnotherRound = true; |
829 | | // we're no longer in any ring with possible ring stereo. Go |
830 | | // update all the other atoms/bonds in rings that we're in: |
831 | 0 | for (unsigned int ridx = 0; |
832 | 0 | ridx < mol.getRingInfo()->atomRings().size(); ++ridx) { |
833 | 0 | const auto å = mol.getRingInfo()->atomRings()[ridx]; |
834 | 0 | unsigned int nHere = 0; |
835 | 0 | for (auto raidx : aring) { |
836 | | // Ring stereo changed, so un-fix atoms in this ring so we can |
837 | | // recheck them in the next iteration, for the case that they |
838 | | // are no longer after the current atom was declared |
839 | | // non-chiral |
840 | 0 | fixedAtoms[raidx] = false; |
841 | 0 | nHere += (possibleRingStereoAtoms[raidx] > 0); |
842 | 0 | } |
843 | 0 | if (nHere <= 1) { |
844 | | // if the ring can't transmit stereo anymore, update the |
845 | | // counts |
846 | 0 | if (nHere == 1) { |
847 | | // update the last potential ring stereo atom in the ring, |
848 | | // since it can't have ring stereo alone. |
849 | 0 | for (auto raidx : aring) { |
850 | 0 | if (possibleRingStereoAtoms[raidx]) { |
851 | 0 | --possibleRingStereoAtoms[raidx]; |
852 | 0 | break; |
853 | 0 | } |
854 | 0 | } |
855 | 0 | } |
856 | 0 | for (auto rbidx : mol.getRingInfo()->bondRings()[ridx]) { |
857 | 0 | if (possibleRingStereoBonds[rbidx]) { |
858 | 0 | --possibleRingStereoBonds[rbidx]; |
859 | 0 | } |
860 | 0 | } |
861 | 0 | } |
862 | 0 | } |
863 | 0 | } |
864 | 0 | } |
865 | 0 | } |
866 | 0 | } |
867 | 0 | } |
868 | 0 | return needAnotherRound; |
869 | 0 | } |
870 | | |
871 | | bool updateBonds(ROMol &mol, const std::vector<unsigned int> &aranks, |
872 | | std::vector<std::string> &bondSymbols, |
873 | | const boost::dynamic_bitset<> &possibleAtoms, |
874 | | boost::dynamic_bitset<> &possibleBonds, |
875 | | const boost::dynamic_bitset<> &knownAtoms, |
876 | | const boost::dynamic_bitset<> &knownBonds, |
877 | | boost::dynamic_bitset<> &fixedBonds, |
878 | 0 | std::vector<StereoInfo> &sinfos) { |
879 | 0 | bool needAnotherRound = false; |
880 | 0 | for (const auto bond : mol.bonds()) { |
881 | 0 | auto bidx = bond->getIdx(); |
882 | 0 | if (knownBonds[bidx] || possibleBonds[bidx]) { |
883 | 0 | auto sinfo = detail::getStereoInfo(bond); |
884 | 0 | if (sinfo.type == Chirality::StereoType::Unspecified) { |
885 | 0 | continue; // not a double bond nor an atropisomer bond |
886 | 0 | } |
887 | | |
888 | 0 | ASSERT_INVARIANT(sinfo.controllingAtoms.size() == 4, |
889 | 0 | "bad controlling atoms size"); |
890 | |
|
891 | 0 | if ((sinfo.controllingAtoms[0] == Atom::NOATOM && |
892 | 0 | sinfo.controllingAtoms[1] == Atom::NOATOM) || |
893 | 0 | (sinfo.controllingAtoms[2] == Atom::NOATOM && |
894 | 0 | sinfo.controllingAtoms[3] == Atom::NOATOM)) { |
895 | | // we have a bond with no neighbors on one side, which means it must |
896 | | // have a single implicit H on that side. Since the H is implicit, |
897 | | // there is no way to know whether it is cis or trans. |
898 | 0 | ASSERT_INVARIANT( |
899 | 0 | sinfo.specified != StereoSpecified::Specified, |
900 | 0 | "stereo bond without neighbors can only be unspecified"); |
901 | 0 | fixedBonds.set(bidx); |
902 | 0 | } |
903 | |
|
904 | 0 | if (fixedBonds[bidx]) { |
905 | 0 | sinfos.push_back(std::move(sinfo)); |
906 | 0 | } else { |
907 | 0 | bool haveADupe = false; |
908 | 0 | bool needsSwap = false; |
909 | 0 | if (sinfo.controllingAtoms[0] != Atom::NOATOM && |
910 | 0 | sinfo.controllingAtoms[1] != Atom::NOATOM) { |
911 | 0 | if (areStereobondControllingAtomsDupes( |
912 | 0 | mol, *bond, sinfo.controllingAtoms[0], |
913 | 0 | sinfo.controllingAtoms[1], aranks, possibleAtoms, knownAtoms, |
914 | 0 | possibleBonds, knownBonds)) { |
915 | 0 | haveADupe = true; |
916 | 0 | } else if (aranks[sinfo.controllingAtoms[0]] < |
917 | 0 | aranks[sinfo.controllingAtoms[1]]) { |
918 | 0 | std::swap(sinfo.controllingAtoms[0], sinfo.controllingAtoms[1]); |
919 | 0 | needsSwap = !needsSwap; |
920 | 0 | } |
921 | 0 | } |
922 | 0 | if (sinfo.controllingAtoms[2] != Atom::NOATOM && |
923 | 0 | sinfo.controllingAtoms[3] != Atom::NOATOM) { |
924 | 0 | if (areStereobondControllingAtomsDupes( |
925 | 0 | mol, *bond, sinfo.controllingAtoms[2], |
926 | 0 | sinfo.controllingAtoms[3], aranks, possibleAtoms, knownAtoms, |
927 | 0 | possibleBonds, knownBonds)) { |
928 | 0 | haveADupe = true; |
929 | 0 | } else if (aranks[sinfo.controllingAtoms[2]] < |
930 | 0 | aranks[sinfo.controllingAtoms[3]]) { |
931 | 0 | std::swap(sinfo.controllingAtoms[2], sinfo.controllingAtoms[3]); |
932 | 0 | needsSwap = !needsSwap; |
933 | 0 | } |
934 | 0 | } |
935 | 0 | if (!haveADupe) { |
936 | 0 | if (needsSwap && (sinfo.descriptor == StereoDescriptor::Bond_Cis || |
937 | 0 | sinfo.descriptor == StereoDescriptor::Bond_Trans)) { |
938 | 0 | sinfo.descriptor = sinfo.descriptor == StereoDescriptor::Bond_Cis |
939 | 0 | ? StereoDescriptor::Bond_Trans |
940 | 0 | : StereoDescriptor::Bond_Cis; |
941 | 0 | } |
942 | 0 | auto gbs = bondSymbols[bidx]; |
943 | 0 | if (sinfo.specified == StereoSpecified::Specified) { |
944 | 0 | switch (sinfo.descriptor) { |
945 | 0 | case StereoDescriptor::Bond_Cis: |
946 | 0 | gbs += "_cis"; |
947 | 0 | break; |
948 | 0 | case StereoDescriptor::Bond_Trans: |
949 | 0 | gbs += "_trans"; |
950 | 0 | break; |
951 | 0 | default: |
952 | 0 | break; |
953 | 0 | } |
954 | 0 | } else if (sinfo.specified == StereoSpecified::Unknown) { |
955 | 0 | gbs += "_unk"; |
956 | 0 | } |
957 | 0 | if (bondSymbols[bidx] != gbs) { |
958 | 0 | bondSymbols[bidx] = gbs; |
959 | 0 | needAnotherRound = true; |
960 | 0 | } |
961 | 0 | if (!possibleBonds[bidx]) { |
962 | 0 | fixedBonds.set(bidx); |
963 | 0 | } |
964 | 0 | sinfos.push_back(std::move(sinfo)); |
965 | 0 | } else if (possibleBonds[bidx]) { |
966 | 0 | possibleBonds[bidx] = 0; |
967 | 0 | bondSymbols[bidx] = getBondSymbol(bond); |
968 | 0 | needAnotherRound = true; |
969 | 0 | } |
970 | 0 | } |
971 | 0 | } |
972 | 0 | } |
973 | 0 | return needAnotherRound; |
974 | 0 | } |
975 | | |
976 | | void cleanMolStereo(ROMol &mol, const boost::dynamic_bitset<> &fixedAtoms, |
977 | | const boost::dynamic_bitset<> &knownAtoms, |
978 | | const boost::dynamic_bitset<> &fixedBonds, |
979 | 0 | const boost::dynamic_bitset<> &knownBonds) { |
980 | 0 | for (auto atom : mol.atoms()) { |
981 | 0 | const auto i = atom->getIdx(); |
982 | 0 | if (!fixedAtoms[i] && knownAtoms[i]) { |
983 | 0 | switch (atom->getChiralTag()) { |
984 | 0 | case Atom::ChiralType::CHI_TETRAHEDRAL_CCW: |
985 | 0 | case Atom::ChiralType::CHI_TETRAHEDRAL_CW: |
986 | 0 | atom->setChiralTag(Atom::ChiralType::CHI_UNSPECIFIED); |
987 | 0 | for (auto nbrBond : mol.atomBonds(atom)) { |
988 | 0 | auto bondDir = nbrBond->getBondDir(); |
989 | 0 | if (bondDir == Bond::BondDir::BEGINDASH || |
990 | 0 | bondDir == Bond::BondDir::BEGINWEDGE) { |
991 | 0 | nbrBond->setBondDir(Bond::BondDir::NONE); |
992 | 0 | } |
993 | 0 | } |
994 | 0 | break; |
995 | 0 | case Atom::ChiralType::CHI_TETRAHEDRAL: |
996 | 0 | case Atom::ChiralType::CHI_SQUAREPLANAR: |
997 | 0 | case Atom::ChiralType::CHI_TRIGONALBIPYRAMIDAL: |
998 | 0 | case Atom::ChiralType::CHI_OCTAHEDRAL: |
999 | 0 | atom->setProp(common_properties::_chiralPermutation, 0); |
1000 | 0 | break; |
1001 | 0 | default: |
1002 | 0 | break; |
1003 | 0 | } |
1004 | 0 | } |
1005 | 0 | } |
1006 | | |
1007 | 0 | bool removedStereo = false; |
1008 | 0 | for (auto bond : mol.bonds()) { |
1009 | 0 | const auto i = bond->getIdx(); |
1010 | 0 | if (!fixedBonds[i] && knownBonds[i]) { |
1011 | 0 | bond->setStereo(Bond::BondStereo::STEREONONE); |
1012 | 0 | bond->setBondDir(Bond::BondDir::NONE); |
1013 | 0 | bond->getStereoAtoms().clear(); |
1014 | 0 | removedStereo = true; |
1015 | 0 | } |
1016 | 0 | } |
1017 | | |
1018 | | // remove any slash bond dirs that do not have a stereo neighbor |
1019 | |
|
1020 | 0 | if (removedStereo) { |
1021 | 0 | for (auto bond : mol.bonds()) { |
1022 | 0 | auto bondDir = bond->getBondDir(); |
1023 | 0 | if (bondDir == Bond::BondDir::ENDDOWNRIGHT || |
1024 | 0 | bondDir == Bond::BondDir::ENDUPRIGHT) { |
1025 | 0 | bool dirOk = false; |
1026 | 0 | for (auto bondEnd : {bond->getBeginAtom(), bond->getEndAtom()}) { |
1027 | 0 | for (auto nbrBond : mol.atomBonds(bondEnd)) { |
1028 | 0 | if (nbrBond != bond && |
1029 | 0 | nbrBond->getStereo() != Bond::BondStereo::STEREONONE) { |
1030 | 0 | dirOk = true; |
1031 | 0 | break; |
1032 | 0 | } |
1033 | 0 | } |
1034 | 0 | if (!dirOk) { |
1035 | 0 | bond->setBondDir(Bond::BondDir::NONE); |
1036 | 0 | } |
1037 | 0 | } |
1038 | 0 | } |
1039 | 0 | } |
1040 | 0 | } |
1041 | 0 | } |
1042 | | } // namespace |
1043 | | |
1044 | | void findChiralAtomSpecialCases(ROMol &mol, |
1045 | | boost::dynamic_bitset<> &possibleSpecialCases, |
1046 | | const std::vector<unsigned int> &atomRanks); |
1047 | | |
1048 | | std::vector<StereoInfo> runCleanup(ROMol &mol, bool flagPossible, |
1049 | 0 | bool cleanIt) { |
1050 | | // This potentially does two passes of "canonicalization" to identify |
1051 | | // stereo atoms/bonds: |
1052 | | // - if cleanIt is true we start with a pass which ignores possible |
1053 | | // stereo atoms/bonds and which removes the stereo spec from any |
1054 | | // atom/bond which doesn't have unique neighbors |
1055 | | // - if flagPossible is true we do a pass where each unspecified |
1056 | | // possible stereocenter is treated as if it were different from all |
1057 | | // others. This allows us to identify every possible stereo atom/bond |
1058 | |
|
1059 | 0 | boost::dynamic_bitset<> knownAtoms(mol.getNumAtoms()); |
1060 | 0 | std::vector<std::string> atomSymbols(mol.getNumAtoms()); |
1061 | 0 | boost::dynamic_bitset<> possibleAtoms(mol.getNumAtoms()); |
1062 | 0 | initAtomInfo(mol, flagPossible, cleanIt, knownAtoms, atomSymbols, |
1063 | 0 | possibleAtoms); |
1064 | |
|
1065 | 0 | std::vector<std::string> bondSymbols(mol.getNumBonds()); |
1066 | 0 | boost::dynamic_bitset<> knownBonds(mol.getNumBonds()); |
1067 | 0 | boost::dynamic_bitset<> possibleBonds(mol.getNumBonds()); |
1068 | 0 | initBondInfo(mol, flagPossible, cleanIt, knownBonds, bondSymbols, |
1069 | 0 | possibleBonds); |
1070 | | |
1071 | | // copy the original sets of possible atoms/bonds. We need them in the |
1072 | | // second pass |
1073 | 0 | auto origPossibleAtoms = possibleAtoms; |
1074 | 0 | auto origPossibleBonds = possibleBonds; |
1075 | | |
1076 | | // tracks the number of rings with possible ring stereo that the atom is |
1077 | | // in (only set for potential stereoatoms) |
1078 | 0 | std::vector<unsigned int> possibleRingStereoAtoms(mol.getNumAtoms()); |
1079 | | // tracks the number of rings with possible ring stereo that the bond is |
1080 | | // in (set for all bonds) |
1081 | 0 | std::vector<unsigned int> possibleRingStereoBonds(mol.getNumBonds()); |
1082 | | |
1083 | | // identify atoms which can be involved in ring stereo |
1084 | 0 | flagRingStereo(mol, possibleRingStereoAtoms, possibleRingStereoBonds, |
1085 | 0 | knownAtoms, cleanIt ? nullptr : &possibleAtoms, knownBonds, |
1086 | 0 | cleanIt ? nullptr : &possibleBonds); |
1087 | | |
1088 | | // our return value |
1089 | 0 | std::vector<StereoInfo> res; |
1090 | | |
1091 | | // these are used to track which atoms/bonds have been altered |
1092 | 0 | boost::dynamic_bitset<> fixedAtoms(mol.getNumAtoms()); |
1093 | 0 | boost::dynamic_bitset<> fixedBonds(mol.getNumBonds()); |
1094 | | |
1095 | | // used to tell rankFragmentAtoms to use all atoms: |
1096 | 0 | boost::dynamic_bitset<> atomsInPlay(mol.getNumAtoms()); |
1097 | 0 | atomsInPlay.set(); |
1098 | 0 | boost::dynamic_bitset<> bondsInPlay(mol.getNumBonds()); |
1099 | 0 | bondsInPlay.set(); |
1100 | |
|
1101 | 0 | #define LOCAL_CANON 0 |
1102 | | #if LOCAL_CANON |
1103 | | std::vector<Canon::canon_atom> canonAtoms(mol.getNumAtoms()); |
1104 | | Canon::AtomCompareFunctor ftor(&canonAtoms.front(), mol, &atomsInPlay, |
1105 | | &bondsInPlay); |
1106 | | ftor.df_useIsotopes = false; |
1107 | | ftor.df_useChirality = false; |
1108 | | auto atomOrder = new int[mol.getNumAtoms()]; |
1109 | | #endif |
1110 | 0 | std::vector<unsigned int> aranks(mol.getNumAtoms()); |
1111 | 0 | bool needAnotherRound = true; |
1112 | 0 | while (needAnotherRound) { |
1113 | 0 | res.clear(); |
1114 | | #if LOCAL_CANON |
1115 | | // find symmetry classes with the canonicalization code |
1116 | | Canon::detail::initFragmentCanonAtoms(mol, canonAtoms, false, &atomSymbols, |
1117 | | &bondSymbols, atomsInPlay, |
1118 | | bondsInPlay, needsInit); |
1119 | | needsInit = false; |
1120 | | |
1121 | | const bool includeChirality = false; |
1122 | | const bool includeIsotopes = false; |
1123 | | const bool breakTies = false; |
1124 | | memset(atomOrder, 0, mol.getNumAtoms() * sizeof(int)); |
1125 | | Canon::detail::rankWithFunctor(ftor, breakTies, atomOrder, true, |
1126 | | includeChirality, &atomsInPlay, |
1127 | | &bondsInPlay); |
1128 | | for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) { |
1129 | | aranks[atomOrder[i]] = canonAtoms[atomOrder[i]].index; |
1130 | | } |
1131 | | #else |
1132 | 0 | const bool includeChirality = false; |
1133 | 0 | const bool includeIsotopes = false; |
1134 | 0 | const bool breakTies = false; |
1135 | 0 | const bool includeAtomMaps = false; |
1136 | 0 | const bool includeChiralPresence = false; |
1137 | 0 | const bool useRingStereo = false; |
1138 | | // Now apply the canonical atom ranking code with basic connectivity |
1139 | | // invariants The necessary condition for chirality is that an atom's |
1140 | | // neighbors must have unique ranks |
1141 | 0 | Canon::rankFragmentAtoms(mol, aranks, atomsInPlay, bondsInPlay, |
1142 | 0 | &atomSymbols, &bondSymbols, breakTies, |
1143 | 0 | includeChirality, includeIsotopes, includeAtomMaps, |
1144 | 0 | includeChiralPresence, useRingStereo); |
1145 | 0 | #endif |
1146 | | // check if any new atoms definitely now have stereo; do another loop if |
1147 | | // so |
1148 | 0 | needAnotherRound = updateAtoms( |
1149 | 0 | mol, aranks, atomSymbols, possibleAtoms, knownAtoms, fixedAtoms, |
1150 | 0 | possibleRingStereoAtoms, possibleRingStereoBonds, res); |
1151 | | // check if any new bonds definitely now have stereo; do another loop if |
1152 | | // so |
1153 | 0 | needAnotherRound |= |
1154 | 0 | updateBonds(mol, aranks, bondSymbols, possibleAtoms, possibleBonds, |
1155 | 0 | knownAtoms, knownBonds, fixedBonds, res); |
1156 | 0 | } |
1157 | |
|
1158 | 0 | if (cleanIt) { |
1159 | | // remove stereo specs from atoms/bonds which should not have them |
1160 | 0 | cleanMolStereo(mol, fixedAtoms, knownAtoms, fixedBonds, knownBonds); |
1161 | 0 | } |
1162 | 0 | if (flagPossible && (possibleAtoms != origPossibleAtoms || |
1163 | 0 | possibleBonds != origPossibleBonds)) { |
1164 | | // if we're doing "flagPossible" mode and have done some cleanup, then |
1165 | | // we need to do another iteration |
1166 | |
|
1167 | 0 | possibleAtoms = origPossibleAtoms; |
1168 | | // flag every center/bond where we removed stereo as possible: |
1169 | 0 | for (auto i = 0u; i < mol.getNumAtoms(); ++i) { |
1170 | 0 | if (!fixedAtoms[i] && knownAtoms[i]) { |
1171 | 0 | possibleAtoms[i] = 1; |
1172 | 0 | knownAtoms[i] = 0; |
1173 | 0 | } |
1174 | 0 | if (possibleAtoms[i]) { |
1175 | 0 | atomSymbols[i] += "_" + std::to_string(i); |
1176 | 0 | } |
1177 | 0 | } |
1178 | 0 | possibleBonds = origPossibleBonds; |
1179 | 0 | for (auto i = 0u; i < mol.getNumBonds(); ++i) { |
1180 | 0 | if (!fixedBonds[i] && knownBonds[i]) { |
1181 | 0 | possibleBonds[i] = 1; |
1182 | 0 | knownBonds[i] = 0; |
1183 | 0 | } |
1184 | 0 | if (possibleBonds[i]) { |
1185 | 0 | bondSymbols[i] += "_" + std::to_string(i); |
1186 | 0 | } |
1187 | 0 | } |
1188 | |
|
1189 | 0 | flagRingStereo(mol, possibleRingStereoAtoms, possibleRingStereoBonds, |
1190 | 0 | knownAtoms, &possibleAtoms, knownBonds, &possibleBonds); |
1191 | |
|
1192 | 0 | needAnotherRound = true; |
1193 | 0 | while (needAnotherRound) { |
1194 | 0 | res.clear(); |
1195 | | |
1196 | | // std::copy(atomSymbols.begin(), atomSymbols.end(), |
1197 | | // std::ostream_iterator<std::string>(std::cerr, " ")); |
1198 | | // std::cerr << std::endl; |
1199 | | // std::copy(bondSymbols.begin(), bondSymbols.end(), |
1200 | | // std::ostream_iterator<std::string>(std::cerr, " ")); |
1201 | | // std::cerr << std::endl; |
1202 | |
|
1203 | | #if LOCAL_CANON |
1204 | | Canon::detail::initFragmentCanonAtoms(mol, canonAtoms, false, |
1205 | | &atomSymbols, &bondSymbols, |
1206 | | atomsInPlay, bondsInPlay, true); |
1207 | | needsInit = false; |
1208 | | |
1209 | | const bool includeChirality = false; |
1210 | | const bool breakTies = false; |
1211 | | memset(atomOrder, 0, mol.getNumAtoms() * sizeof(int)); |
1212 | | Canon::detail::rankWithFunctor(ftor, breakTies, atomOrder, true, |
1213 | | includeChirality, &atomsInPlay, |
1214 | | &bondsInPlay); |
1215 | | for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) { |
1216 | | aranks[atomOrder[i]] = canonAtoms[atomOrder[i]].index; |
1217 | | } |
1218 | | |
1219 | | #else |
1220 | | // we will use the canonicalization code |
1221 | 0 | const bool breakTies = false; |
1222 | 0 | const bool includeChirality = false; |
1223 | 0 | const bool includeIsotopes = false; |
1224 | 0 | const bool includeAtomMaps = false; |
1225 | 0 | const bool includeChiralPresence = false; |
1226 | 0 | const bool useRingStereo = false; |
1227 | 0 | Canon::rankFragmentAtoms( |
1228 | 0 | mol, aranks, atomsInPlay, bondsInPlay, &atomSymbols, &bondSymbols, |
1229 | 0 | breakTies, includeChirality, includeIsotopes, includeAtomMaps, |
1230 | 0 | includeChiralPresence, useRingStereo); |
1231 | 0 | #endif |
1232 | 0 | needAnotherRound = updateAtoms( |
1233 | 0 | mol, aranks, atomSymbols, possibleAtoms, knownAtoms, fixedAtoms, |
1234 | 0 | possibleRingStereoAtoms, possibleRingStereoBonds, res); |
1235 | 0 | needAnotherRound |= |
1236 | 0 | updateBonds(mol, aranks, bondSymbols, possibleAtoms, possibleBonds, |
1237 | 0 | knownAtoms, knownBonds, fixedBonds, res); |
1238 | 0 | } |
1239 | 0 | } |
1240 | |
|
1241 | 0 | boost::dynamic_bitset<> possibleSpecialCases(mol.getNumAtoms()); |
1242 | 0 | findChiralAtomSpecialCases(mol, possibleSpecialCases, aranks); |
1243 | |
|
1244 | 0 | for (const auto atom : mol.atoms()) { |
1245 | 0 | atom->setProp<unsigned int>(common_properties::_ChiralAtomRank, |
1246 | 0 | aranks[atom->getIdx()], true); |
1247 | 0 | } |
1248 | |
|
1249 | | #if LOCAL_CANON |
1250 | | delete[] atomOrder; |
1251 | | Canon::detail::freeCanonAtoms(canonAtoms); |
1252 | | #endif |
1253 | 0 | return res; |
1254 | 0 | } |
1255 | | |
1256 | | //} // namespace |
1257 | | |
1258 | | std::vector<StereoInfo> findPotentialStereo(ROMol &mol, bool cleanIt, |
1259 | 0 | bool findPossible) { |
1260 | 0 | if (!mol.getRingInfo()->isSymmSssr()) { |
1261 | 0 | MolOps::symmetrizeSSSR(mol); |
1262 | 0 | } |
1263 | |
|
1264 | 0 | if (mol.needsUpdatePropertyCache()) { |
1265 | 0 | mol.updatePropertyCache(false); |
1266 | 0 | } |
1267 | 0 | std::vector<StereoInfo> res = runCleanup(mol, findPossible, cleanIt); |
1268 | 0 | mol.setProp("_potentialStereo", res, true); |
1269 | 0 | return res; |
1270 | 0 | } |
1271 | | |
1272 | | // const_casts are always ugly, but we know that findPotentialStereo() |
1273 | | // doesn't modify the molecule if cleanIt is false: |
1274 | 0 | std::vector<StereoInfo> findPotentialStereo(const ROMol &mol) { |
1275 | 0 | bool cleanIt = false; |
1276 | 0 | return findPotentialStereo(const_cast<ROMol &>(mol), cleanIt); |
1277 | 0 | } |
1278 | | |
1279 | | } // namespace Chirality |
1280 | | } // namespace RDKit |