/src/rdkit/Code/GraphMol/AddHs.cpp
Line | Count | Source |
1 | | // |
2 | | // Copyright (C) 2003-2025 Greg Landrum and other RDKit contributors |
3 | | // |
4 | | // @@ All Rights Reserved @@ |
5 | | // This file is part of the RDKit. |
6 | | // The contents are covered by the terms of the BSD license |
7 | | // which is included in the file license.txt, found at the root |
8 | | // of the RDKit source tree. |
9 | | // |
10 | | #include "RDKitBase.h" |
11 | | #include <list> |
12 | | #include "QueryAtom.h" |
13 | | #include "QueryOps.h" |
14 | | #include "MonomerInfo.h" |
15 | | #include "Chirality.h" |
16 | | #include <Geometry/Transform3D.h> |
17 | | #include <Geometry/point.h> |
18 | | #include <boost/algorithm/string/classification.hpp> |
19 | | #include <boost/dynamic_bitset.hpp> |
20 | | #include <boost/range/iterator_range.hpp> |
21 | | |
22 | | constexpr double sq_dist_zero_tol = 1.e-4; |
23 | | |
24 | | namespace RDKit { |
25 | | |
26 | | // Local utility functionality: |
27 | | namespace { |
28 | 0 | Atom *getAtomNeighborNot(ROMol *mol, const Atom *atom, const Atom *other) { |
29 | 0 | PRECONDITION(mol, "bad molecule"); |
30 | 0 | PRECONDITION(atom, "bad atom"); |
31 | 0 | PRECONDITION(atom->getDegree() > 1, "bad degree"); |
32 | 0 | PRECONDITION(other, "bad atom"); |
33 | 0 | Atom *res = nullptr; |
34 | |
|
35 | 0 | ROMol::ADJ_ITER nbrIdx, endNbrs; |
36 | 0 | boost::tie(nbrIdx, endNbrs) = mol->getAtomNeighbors(atom); |
37 | 0 | while (nbrIdx != endNbrs) { |
38 | 0 | if (*nbrIdx != other->getIdx()) { |
39 | 0 | res = mol->getAtomWithIdx(*nbrIdx); |
40 | 0 | break; |
41 | 0 | } |
42 | 0 | ++nbrIdx; |
43 | 0 | } |
44 | |
|
45 | 0 | POSTCONDITION(res, "no neighbor found"); |
46 | 0 | return res; |
47 | 0 | } |
48 | | |
49 | 0 | void AssignHsResidueInfo(RWMol &mol) { |
50 | 0 | int max_serial = 0; |
51 | 0 | unsigned int stopIdx = mol.getNumAtoms(); |
52 | 0 | for (unsigned int aidx = 0; aidx < stopIdx; ++aidx) { |
53 | 0 | auto *info = |
54 | 0 | (AtomPDBResidueInfo *)(mol.getAtomWithIdx(aidx)->getMonomerInfo()); |
55 | 0 | if (info && info->getMonomerType() == AtomMonomerInfo::PDBRESIDUE && |
56 | 0 | info->getSerialNumber() > max_serial) { |
57 | 0 | max_serial = info->getSerialNumber(); |
58 | 0 | } |
59 | 0 | } |
60 | |
|
61 | 0 | AtomPDBResidueInfo *current_info = nullptr; |
62 | 0 | int current_h_id = 0; |
63 | 0 | for (unsigned int aidx = 0; aidx < stopIdx; ++aidx) { |
64 | 0 | Atom *newAt = mol.getAtomWithIdx(aidx); |
65 | 0 | auto *info = (AtomPDBResidueInfo *)(newAt->getMonomerInfo()); |
66 | 0 | if (info && info->getMonomerType() == AtomMonomerInfo::PDBRESIDUE) { |
67 | 0 | ROMol::ADJ_ITER begin, end; |
68 | 0 | boost::tie(begin, end) = mol.getAtomNeighbors(newAt); |
69 | 0 | while (begin != end) { |
70 | 0 | if (mol.getAtomWithIdx(*begin)->getAtomicNum() == 1) { |
71 | | // Make all Hs unique - increment id even for existing |
72 | 0 | ++current_h_id; |
73 | | // skip if hydrogen already has PDB info |
74 | 0 | auto *h_info = (AtomPDBResidueInfo *)mol.getAtomWithIdx(*begin) |
75 | 0 | ->getMonomerInfo(); |
76 | 0 | if (h_info && |
77 | 0 | h_info->getMonomerType() == AtomMonomerInfo::PDBRESIDUE) { |
78 | 0 | continue; |
79 | 0 | } |
80 | | // the hydrogens have unique names on residue basis (H1, H2, ...) |
81 | 0 | if (!current_info || |
82 | 0 | current_info->getResidueNumber() != info->getResidueNumber() || |
83 | 0 | current_info->getChainId() != info->getChainId()) { |
84 | 0 | current_h_id = 1; |
85 | 0 | current_info = info; |
86 | 0 | } |
87 | 0 | std::string h_label = std::to_string(current_h_id); |
88 | 0 | if (h_label.length() > 3) { |
89 | 0 | h_label = h_label.substr(h_label.length() - 3, 3); |
90 | 0 | } |
91 | 0 | while (h_label.length() < 3) { |
92 | 0 | h_label = h_label + " "; |
93 | 0 | } |
94 | 0 | h_label = "H" + h_label; |
95 | | // wrap around id to '3H12' |
96 | 0 | h_label = h_label.substr(3, 1) + h_label.substr(0, 3); |
97 | 0 | AtomPDBResidueInfo *newInfo = new AtomPDBResidueInfo( |
98 | 0 | h_label, max_serial, "", info->getResidueName(), |
99 | 0 | info->getResidueNumber(), info->getChainId(), "", 1.0, 0.0, |
100 | 0 | info->getIsHeteroAtom()); |
101 | 0 | mol.getAtomWithIdx(*begin)->setMonomerInfo(newInfo); |
102 | |
|
103 | 0 | ++max_serial; |
104 | 0 | } |
105 | 0 | ++begin; |
106 | 0 | } |
107 | 0 | } |
108 | 0 | } |
109 | 0 | } |
110 | | |
111 | 0 | std::map<unsigned int, std::vector<unsigned int>> getIsoMap(const ROMol &mol) { |
112 | 0 | std::map<unsigned int, std::vector<unsigned int>> isoMap; |
113 | 0 | for (auto atom : mol.atoms()) { |
114 | 0 | if (atom->hasProp(common_properties::_isotopicHs)) { |
115 | 0 | atom->clearProp(common_properties::_isotopicHs); |
116 | 0 | } |
117 | 0 | } |
118 | 0 | for (auto bond : mol.bonds()) { |
119 | 0 | auto ba = bond->getBeginAtom(); |
120 | 0 | auto ea = bond->getEndAtom(); |
121 | 0 | int ha = -1; |
122 | 0 | unsigned int iso; |
123 | 0 | if (ba->getAtomicNum() == 1 && ba->getIsotope() && |
124 | 0 | ea->getAtomicNum() != 1) { |
125 | 0 | ha = ea->getIdx(); |
126 | 0 | iso = ba->getIsotope(); |
127 | 0 | } else if (ea->getAtomicNum() == 1 && ea->getIsotope() && |
128 | 0 | ba->getAtomicNum() != 1) { |
129 | 0 | ha = ba->getIdx(); |
130 | 0 | iso = ea->getIsotope(); |
131 | 0 | } |
132 | 0 | if (ha == -1) { |
133 | 0 | continue; |
134 | 0 | } |
135 | 0 | auto &v = isoMap[ha]; |
136 | 0 | v.push_back(iso); |
137 | 0 | } |
138 | 0 | return isoMap; |
139 | 0 | } |
140 | | |
141 | 373 | bool may_need_extra_H(const ROMol &mol, const Atom *atom) { |
142 | 373 | unsigned single_bonds = 0; |
143 | 373 | unsigned aromatic_bonds = 0; |
144 | 647 | for (auto bond : mol.atomBonds(atom)) { |
145 | 647 | if (bond->getBondType() == Bond::SINGLE) { |
146 | 266 | ++single_bonds; |
147 | 381 | } else if (bond->getBondType() == Bond::AROMATIC) { |
148 | 80 | ++aromatic_bonds; |
149 | 301 | } else { |
150 | 301 | return false; |
151 | 301 | } |
152 | 647 | } |
153 | 72 | return single_bonds == 1 && aromatic_bonds == 2 && |
154 | 4 | atom->getTotalValence() == 3; |
155 | 373 | } |
156 | | |
157 | | } // end of unnamed namespace |
158 | | |
159 | | namespace MolOps { |
160 | | |
161 | | namespace { |
162 | | RDGeom::Point3D pickBisector(const RDGeom::Point3D &nbr1Vect, |
163 | | const RDGeom::Point3D &nbr2Vect, |
164 | 0 | const RDGeom::Point3D &nbr3Vect) { |
165 | 0 | auto dirVect = nbr2Vect + nbr3Vect; |
166 | 0 | if (dirVect.lengthSq() < sq_dist_zero_tol) { |
167 | | // nbr2Vect and nbr3Vect are anti-parallel (was #3854) |
168 | 0 | dirVect = nbr2Vect; |
169 | 0 | std::swap(dirVect.x, dirVect.y); |
170 | 0 | dirVect.x *= -1; |
171 | 0 | } |
172 | 0 | if (dirVect.dotProduct(nbr1Vect) < 0) { |
173 | 0 | dirVect *= -1; |
174 | 0 | } |
175 | |
|
176 | 0 | return dirVect; |
177 | 0 | } |
178 | | } // namespace |
179 | | |
180 | | void setTerminalAtomCoords(ROMol &mol, unsigned int idx, |
181 | 0 | unsigned int otherIdx) { |
182 | | // we will loop over all the coordinates |
183 | 0 | PRECONDITION(otherIdx != idx, "degenerate atoms"); |
184 | 0 | Atom *atom = mol.getAtomWithIdx(idx); |
185 | 0 | PRECONDITION(mol.getAtomDegree(atom) == 1, "bad atom degree"); |
186 | 0 | const Bond *bond = mol.getBondBetweenAtoms(otherIdx, idx); |
187 | 0 | PRECONDITION(bond, "no bond between atoms"); |
188 | |
|
189 | 0 | const Atom *otherAtom = mol.getAtomWithIdx(otherIdx); |
190 | 0 | double bondLength = |
191 | 0 | PeriodicTable::getTable()->getRb0(1) + |
192 | 0 | PeriodicTable::getTable()->getRb0(otherAtom->getAtomicNum()); |
193 | |
|
194 | 0 | RDGeom::Point3D dirVect(0, 0, 0); |
195 | |
|
196 | 0 | RDGeom::Point3D perpVect, rotnAxis, nbrPerp; |
197 | 0 | RDGeom::Point3D nbr1Vect, nbr2Vect, nbr3Vect; |
198 | 0 | RDGeom::Transform3D tform; |
199 | 0 | RDGeom::Point3D otherPos, atomPos; |
200 | |
|
201 | 0 | const Atom *nbr1 = nullptr, *nbr2 = nullptr, *nbr3 = nullptr; |
202 | 0 | const Bond *nbrBond; |
203 | 0 | ROMol::ADJ_ITER nbrIdx, endNbrs; |
204 | |
|
205 | 0 | switch (otherAtom->getDegree()) { |
206 | 0 | case 1: |
207 | | // -------------------------------------------------------------------------- |
208 | | // No other atoms present: |
209 | | // -------------------------------------------------------------------------- |
210 | | // loop over the conformations and set the coordinates |
211 | 0 | for (auto cfi = mol.beginConformers(); cfi != mol.endConformers(); |
212 | 0 | cfi++) { |
213 | 0 | if ((*cfi)->is3D()) { |
214 | 0 | dirVect.z = 1; |
215 | 0 | } else { |
216 | 0 | dirVect.x = 1; |
217 | 0 | } |
218 | 0 | otherPos = (*cfi)->getAtomPos(otherIdx); |
219 | 0 | atomPos = otherPos + dirVect * ((*cfi)->is3D() ? bondLength : 1.0); |
220 | 0 | (*cfi)->setAtomPos(idx, atomPos); |
221 | 0 | } |
222 | 0 | break; |
223 | | |
224 | 0 | case 2: |
225 | | // -------------------------------------------------------------------------- |
226 | | // One other neighbor: |
227 | | // -------------------------------------------------------------------------- |
228 | 0 | nbr1 = getAtomNeighborNot(&mol, otherAtom, atom); |
229 | 0 | for (auto cfi = mol.beginConformers(); cfi != mol.endConformers(); |
230 | 0 | ++cfi) { |
231 | 0 | otherPos = (*cfi)->getAtomPos(otherIdx); |
232 | 0 | RDGeom::Point3D nbr1Pos = (*cfi)->getAtomPos(nbr1->getIdx()); |
233 | | // get a normalized vector pointing away from the neighbor: |
234 | 0 | nbr1Vect = nbr1Pos - otherPos; |
235 | 0 | if (nbr1Vect.lengthSq() < sq_dist_zero_tol) { |
236 | | // no difference, which likely indicates that we have redundant atoms. |
237 | | // just put it on top of the heavy atom. This was #678 |
238 | 0 | (*cfi)->setAtomPos(idx, otherPos); |
239 | 0 | continue; |
240 | 0 | } |
241 | 0 | nbr1Vect.normalize(); |
242 | 0 | nbr1Vect *= -1; |
243 | | |
244 | | // ok, nbr1Vect points away from the other atom, figure out where |
245 | | // this H goes: |
246 | 0 | switch (otherAtom->getHybridization()) { |
247 | 0 | case Atom::SP3: |
248 | | // get a perpendicular to nbr1Vect: |
249 | 0 | if ((*cfi)->is3D()) { |
250 | 0 | perpVect = nbr1Vect.getPerpendicular(); |
251 | 0 | } else { |
252 | 0 | perpVect.z = 1.0; |
253 | 0 | } |
254 | | // and move off it: |
255 | 0 | tform.SetRotation((180 - 109.471) * M_PI / 180., perpVect); |
256 | 0 | dirVect = tform * nbr1Vect; |
257 | 0 | atomPos = otherPos + dirVect * ((*cfi)->is3D() ? bondLength : 1.0); |
258 | 0 | (*cfi)->setAtomPos(idx, atomPos); |
259 | 0 | break; |
260 | 0 | case Atom::SP2: |
261 | | // default 3D position is to just take an arbitrary perpendicular |
262 | | // for 2D we take the normal to the xy plane |
263 | 0 | if ((*cfi)->is3D()) { |
264 | 0 | perpVect = nbr1Vect.getPerpendicular(); |
265 | 0 | } else { |
266 | 0 | perpVect.z = 1.0; |
267 | 0 | } |
268 | 0 | if (nbr1->getDegree() > 1) { |
269 | | // can we use the neighboring atom to establish a perpendicular? |
270 | 0 | nbrBond = mol.getBondBetweenAtoms(otherIdx, nbr1->getIdx()); |
271 | 0 | if (nbrBond->getIsAromatic() || |
272 | 0 | nbrBond->getBondType() == Bond::DOUBLE || |
273 | 0 | nbrBond->getIsConjugated()) { |
274 | 0 | nbr2 = getAtomNeighborNot(&mol, nbr1, otherAtom); |
275 | 0 | nbr2Vect = |
276 | 0 | nbr1Pos.directionVector((*cfi)->getAtomPos(nbr2->getIdx())); |
277 | 0 | auto crossProd = nbr2Vect.crossProduct(nbr1Vect); |
278 | | |
279 | | // if nbr1 and nbr2 are aligned, the perpendicular will be null, |
280 | | // and we'll just keep the default calculated above. Otherwise |
281 | | // we use the cross product |
282 | 0 | if (crossProd.lengthSq() >= sq_dist_zero_tol) { |
283 | 0 | perpVect = crossProd; |
284 | 0 | } |
285 | 0 | } |
286 | 0 | } |
287 | 0 | perpVect.normalize(); |
288 | | // rotate the nbr1Vect 60 degrees about perpVect and we're done: |
289 | 0 | tform.SetRotation(60. * M_PI / 180., perpVect); |
290 | 0 | dirVect = tform * nbr1Vect; |
291 | 0 | atomPos = otherPos + dirVect * ((*cfi)->is3D() ? bondLength : 1.0); |
292 | 0 | (*cfi)->setAtomPos(idx, atomPos); |
293 | 0 | break; |
294 | 0 | case Atom::SP: |
295 | | // just lay the H along the vector: |
296 | 0 | dirVect = nbr1Vect; |
297 | 0 | atomPos = otherPos + dirVect * ((*cfi)->is3D() ? bondLength : 1.0); |
298 | 0 | (*cfi)->setAtomPos(idx, atomPos); |
299 | 0 | break; |
300 | 0 | default: |
301 | | // FIX: handle other hybridizations |
302 | | // for now, just lay the H along the vector: |
303 | 0 | dirVect = nbr1Vect; |
304 | 0 | atomPos = otherPos + dirVect * ((*cfi)->is3D() ? bondLength : 1.0); |
305 | 0 | (*cfi)->setAtomPos(idx, atomPos); |
306 | 0 | } |
307 | 0 | } |
308 | 0 | break; |
309 | 0 | case 3: |
310 | | // -------------------------------------------------------------------------- |
311 | | // Two other neighbors: |
312 | | // -------------------------------------------------------------------------- |
313 | 0 | boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(otherAtom); |
314 | 0 | while (nbrIdx != endNbrs) { |
315 | 0 | if (*nbrIdx != idx) { |
316 | 0 | if (!nbr1) { |
317 | 0 | nbr1 = mol.getAtomWithIdx(*nbrIdx); |
318 | 0 | } else { |
319 | 0 | nbr2 = mol.getAtomWithIdx(*nbrIdx); |
320 | 0 | } |
321 | 0 | } |
322 | 0 | ++nbrIdx; |
323 | 0 | } |
324 | 0 | TEST_ASSERT(nbr1); |
325 | 0 | TEST_ASSERT(nbr2); |
326 | 0 | for (auto cfi = mol.beginConformers(); cfi != mol.endConformers(); |
327 | 0 | ++cfi) { |
328 | | // start along the average of the two vectors: |
329 | 0 | otherPos = (*cfi)->getAtomPos(otherIdx); |
330 | 0 | nbr1Vect = otherPos - (*cfi)->getAtomPos(nbr1->getIdx()); |
331 | 0 | nbr2Vect = otherPos - (*cfi)->getAtomPos(nbr2->getIdx()); |
332 | 0 | if (nbr1Vect.lengthSq() < sq_dist_zero_tol || |
333 | 0 | nbr2Vect.lengthSq() < sq_dist_zero_tol) { |
334 | | // no difference, which likely indicates that we have redundant atoms. |
335 | | // just put it on top of the heavy atom. This was #678 |
336 | 0 | (*cfi)->setAtomPos(idx, otherPos); |
337 | 0 | continue; |
338 | 0 | } |
339 | 0 | nbr1Vect.normalize(); |
340 | 0 | nbr2Vect.normalize(); |
341 | 0 | dirVect = nbr1Vect + nbr2Vect; |
342 | |
|
343 | 0 | if (dirVect.lengthSq() < sq_dist_zero_tol) { |
344 | | // nbr1Vect and nbr2Vect are non-null, but they may |
345 | | // still cancel each other out |
346 | 0 | continue; |
347 | 0 | } |
348 | 0 | dirVect.normalize(); |
349 | 0 | if ((*cfi)->is3D()) { |
350 | 0 | switch (otherAtom->getHybridization()) { |
351 | 0 | case Atom::SP3: |
352 | | // get the perpendicular to the neighbors: |
353 | 0 | nbrPerp = nbr1Vect.crossProduct(nbr2Vect); |
354 | | // and the perpendicular to that: |
355 | 0 | rotnAxis = nbrPerp.crossProduct(dirVect); |
356 | | // and then rotate about that: |
357 | 0 | rotnAxis.normalize(); |
358 | 0 | tform.SetRotation((109.471 / 2) * M_PI / 180., rotnAxis); |
359 | 0 | dirVect = tform * dirVect; |
360 | 0 | atomPos = |
361 | 0 | otherPos + dirVect * ((*cfi)->is3D() ? bondLength : 1.0); |
362 | 0 | (*cfi)->setAtomPos(idx, atomPos); |
363 | 0 | break; |
364 | 0 | case Atom::SP2: |
365 | | // don't need to do anything here, the H atom goes right on the |
366 | | // direction vector |
367 | 0 | atomPos = |
368 | 0 | otherPos + dirVect * ((*cfi)->is3D() ? bondLength : 1.0); |
369 | 0 | (*cfi)->setAtomPos(idx, atomPos); |
370 | 0 | break; |
371 | 0 | default: |
372 | | // FIX: handle other hybridizations |
373 | | // for now, just lay the H along the neighbor vector; |
374 | 0 | atomPos = |
375 | 0 | otherPos + dirVect * ((*cfi)->is3D() ? bondLength : 1.0); |
376 | 0 | (*cfi)->setAtomPos(idx, atomPos); |
377 | 0 | break; |
378 | 0 | } |
379 | 0 | } else { |
380 | | // don't need to do anything here, the H atom goes right on the |
381 | | // direction vector |
382 | 0 | atomPos = otherPos + dirVect; |
383 | 0 | (*cfi)->setAtomPos(idx, atomPos); |
384 | 0 | } |
385 | 0 | } |
386 | 0 | break; |
387 | 0 | case 4: |
388 | | // -------------------------------------------------------------------------- |
389 | | // Three other neighbors: |
390 | | // -------------------------------------------------------------------------- |
391 | 0 | boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(otherAtom); |
392 | | |
393 | | // We're using chiral tag for checking chirality, so we just take the |
394 | | // initial order |
395 | 0 | while (nbrIdx != endNbrs) { |
396 | 0 | if (*nbrIdx != idx) { |
397 | 0 | if (!nbr1) { |
398 | 0 | nbr1 = mol.getAtomWithIdx(*nbrIdx); |
399 | 0 | } else if (!nbr2) { |
400 | 0 | nbr2 = mol.getAtomWithIdx(*nbrIdx); |
401 | 0 | } else { |
402 | 0 | nbr3 = mol.getAtomWithIdx(*nbrIdx); |
403 | 0 | } |
404 | 0 | } |
405 | 0 | ++nbrIdx; |
406 | 0 | } |
407 | |
|
408 | 0 | TEST_ASSERT(nbr1); |
409 | 0 | TEST_ASSERT(nbr2); |
410 | 0 | TEST_ASSERT(nbr3); |
411 | |
|
412 | 0 | for (auto cfi = mol.beginConformers(); cfi != mol.endConformers(); |
413 | 0 | ++cfi) { |
414 | 0 | otherPos = (*cfi)->getAtomPos(otherIdx); |
415 | 0 | nbr1Vect = otherPos - (*cfi)->getAtomPos(nbr1->getIdx()); |
416 | 0 | nbr2Vect = otherPos - (*cfi)->getAtomPos(nbr2->getIdx()); |
417 | 0 | nbr3Vect = otherPos - (*cfi)->getAtomPos(nbr3->getIdx()); |
418 | 0 | if (nbr1Vect.lengthSq() < sq_dist_zero_tol || |
419 | 0 | nbr2Vect.lengthSq() < sq_dist_zero_tol || |
420 | 0 | nbr3Vect.lengthSq() < sq_dist_zero_tol) { |
421 | | // no difference, which likely indicates that we have redundant atoms. |
422 | | // just put it on top of the heavy atom. This was #678 |
423 | 0 | (*cfi)->setAtomPos(idx, otherPos); |
424 | 0 | continue; |
425 | 0 | } |
426 | 0 | nbr1Vect.normalize(); |
427 | 0 | nbr2Vect.normalize(); |
428 | 0 | nbr3Vect.normalize(); |
429 | | |
430 | | // if three neighboring atoms are more or less planar, this |
431 | | // is going to be in a quasi-random (but almost definitely bad) |
432 | | // direction... |
433 | | // correct for this (issue 2951221): |
434 | 0 | if ((*cfi)->is3D()) { |
435 | 0 | if (fabs(nbr3Vect.dotProduct(nbr1Vect.crossProduct(nbr2Vect))) < |
436 | 0 | 0.1) { |
437 | | // compute the normal: |
438 | 0 | dirVect = nbr1Vect.crossProduct(nbr2Vect); |
439 | | |
440 | | // Each of the nbr vectors is non-null, but there might be pairs |
441 | | // that cancel each other out. Try to find a direction from atoms |
442 | | // that do not overlap. |
443 | 0 | if (dirVect.lengthSq() < sq_dist_zero_tol) { |
444 | | // This definition of dirVect reverses the parity around otherIdx |
445 | | // the change of sign restores it |
446 | 0 | dirVect = nbr1Vect.crossProduct(nbr3Vect) * -1; |
447 | 0 | } |
448 | 0 | if (dirVect.lengthSq() < sq_dist_zero_tol) { |
449 | 0 | dirVect = nbr2Vect.crossProduct(nbr3Vect); |
450 | 0 | } |
451 | | // We couldn't find a good direction |
452 | 0 | if (dirVect.lengthSq() < sq_dist_zero_tol) { |
453 | 0 | continue; |
454 | 0 | } |
455 | | |
456 | 0 | std::string cipCode; |
457 | 0 | if (otherAtom->getPropIfPresent(common_properties::_CIPCode, |
458 | 0 | cipCode)) { |
459 | | // the heavy atom is a chiral center, make sure |
460 | | // that we went go the right direction to preserve |
461 | | // its chirality. We use the chiral volume for this: |
462 | 0 | RDGeom::Point3D v1 = dirVect - nbr3Vect; |
463 | 0 | RDGeom::Point3D v2 = nbr1Vect - nbr3Vect; |
464 | 0 | RDGeom::Point3D v3 = nbr2Vect - nbr3Vect; |
465 | 0 | double vol = v1.dotProduct(v2.crossProduct(v3)); |
466 | |
|
467 | 0 | if ((otherAtom->getChiralTag() == |
468 | 0 | Atom::ChiralType::CHI_TETRAHEDRAL_CCW && |
469 | 0 | vol < 0) || |
470 | 0 | (otherAtom->getChiralTag() == |
471 | 0 | Atom::ChiralType::CHI_TETRAHEDRAL_CW && |
472 | 0 | vol > 0)) { |
473 | 0 | dirVect *= -1; |
474 | 0 | } |
475 | 0 | } |
476 | 0 | } else { |
477 | 0 | dirVect = nbr1Vect + nbr2Vect + nbr3Vect; |
478 | 0 | } |
479 | 0 | } else { |
480 | | // we're in flatland |
481 | | |
482 | | // github #3879 and #908: find the two neighbors with the largest |
483 | | // outer angle between them and then place the H to bisect that angle |
484 | | // This is recommendation ST-1.1.4 from the 2006 IUPAC "Graphical |
485 | | // representation of stereochemical configuration" guideline |
486 | 0 | auto angle12 = nbr1Vect.angleTo(nbr2Vect); |
487 | 0 | auto angle13 = nbr1Vect.angleTo(nbr3Vect); |
488 | 0 | auto angle23 = nbr2Vect.angleTo(nbr3Vect); |
489 | 0 | auto accum1 = angle12 + angle13; |
490 | 0 | auto accum2 = angle12 + angle23; |
491 | 0 | auto accum3 = angle13 + angle23; |
492 | 0 | if (accum1 <= accum2 && accum1 <= accum3) { |
493 | 0 | dirVect = pickBisector(nbr1Vect, nbr2Vect, nbr3Vect); |
494 | 0 | } else if (accum2 <= accum1 && accum2 <= accum3) { |
495 | 0 | dirVect = pickBisector(nbr2Vect, nbr1Vect, nbr3Vect); |
496 | 0 | } else { |
497 | 0 | dirVect = pickBisector(nbr3Vect, nbr1Vect, nbr2Vect); |
498 | 0 | } |
499 | 0 | } |
500 | | |
501 | 0 | dirVect.normalize(); |
502 | 0 | atomPos = otherPos + dirVect * ((*cfi)->is3D() ? bondLength : 1.0); |
503 | 0 | (*cfi)->setAtomPos(idx, atomPos); |
504 | 0 | } |
505 | 0 | break; |
506 | 0 | default: |
507 | | // -------------------------------------------------------------------------- |
508 | | // FIX: figure out what to do here |
509 | | // -------------------------------------------------------------------------- |
510 | 0 | atomPos = otherPos + dirVect * bondLength; |
511 | 0 | for (auto cfi = mol.beginConformers(); cfi != mol.endConformers(); |
512 | 0 | ++cfi) { |
513 | 0 | (*cfi)->setAtomPos(idx, atomPos); |
514 | 0 | } |
515 | 0 | break; |
516 | 0 | } |
517 | 0 | } |
518 | | |
519 | | namespace { |
520 | 0 | bool isQueryAtom(const RWMol &mol, const Atom &atom) { |
521 | 0 | if (atom.hasQuery()) { |
522 | 0 | return true; |
523 | 0 | } |
524 | 0 | for (const auto bnd : mol.atomBonds(&atom)) { |
525 | 0 | if (bnd->hasQuery()) { |
526 | 0 | return true; |
527 | 0 | } |
528 | 0 | } |
529 | 0 | return false; |
530 | 0 | } |
531 | | } // namespace |
532 | | void addHs(RWMol &mol, const AddHsParameters ¶ms, |
533 | 0 | const UINT_VECT *onlyOnAtoms) { |
534 | | // when we hit each atom, clear its computed properties |
535 | | // NOTE: it is essential that we not clear the ring info in the |
536 | | // molecule's computed properties. We don't want to have to |
537 | | // regenerate that. This caused Issue210 and Issue212: |
538 | 0 | mol.clearComputedProps(false); |
539 | | |
540 | | // precompute the number of hydrogens we are going to add so that we can |
541 | | // pre-allocate the necessary space on the conformations of the molecule |
542 | | // for their coordinates |
543 | 0 | unsigned int numAddHyds = 0; |
544 | 0 | boost::dynamic_bitset<> onAtoms(mol.getNumAtoms()); |
545 | 0 | if (onlyOnAtoms) { |
546 | 0 | for (auto atIdx : *onlyOnAtoms) { |
547 | 0 | onAtoms.set(atIdx); |
548 | 0 | } |
549 | 0 | } else { |
550 | 0 | onAtoms.set(); |
551 | 0 | } |
552 | 0 | std::vector<unsigned int> numExplicitHs(mol.getNumAtoms(), 0); |
553 | 0 | std::vector<unsigned int> numImplicitHs(mol.getNumAtoms(), 0); |
554 | 0 | for (auto at : mol.atoms()) { |
555 | 0 | numExplicitHs[at->getIdx()] = at->getNumExplicitHs(); |
556 | 0 | numImplicitHs[at->getIdx()] = at->getNumImplicitHs(); |
557 | 0 | if (onAtoms[at->getIdx()]) { |
558 | 0 | if (params.skipQueries && isQueryAtom(mol, *at)) { |
559 | 0 | onAtoms.set(at->getIdx(), 0); |
560 | 0 | continue; |
561 | 0 | } |
562 | 0 | numAddHyds += at->getNumExplicitHs(); |
563 | 0 | if (!params.explicitOnly) { |
564 | 0 | numAddHyds += at->getNumImplicitHs(); |
565 | 0 | } |
566 | 0 | } |
567 | 0 | } |
568 | 0 | unsigned int nSize = mol.getNumAtoms() + numAddHyds; |
569 | | |
570 | | // loop over the conformations of the molecule and allocate new space |
571 | | // for the H locations (need to do this even if we aren't adding coords so |
572 | | // that the conformers have the correct number of atoms). |
573 | 0 | for (auto cfi = mol.beginConformers(); cfi != mol.endConformers(); ++cfi) { |
574 | 0 | (*cfi)->reserve(nSize); |
575 | 0 | } |
576 | |
|
577 | 0 | unsigned int stopIdx = mol.getNumAtoms(); |
578 | 0 | for (unsigned int aidx = 0; aidx < stopIdx; ++aidx) { |
579 | 0 | if (!onAtoms[aidx]) { |
580 | 0 | continue; |
581 | 0 | } |
582 | | |
583 | 0 | Atom *newAt = mol.getAtomWithIdx(aidx); |
584 | |
|
585 | 0 | std::vector<unsigned int> isoHs; |
586 | 0 | if (newAt->getPropIfPresent(common_properties::_isotopicHs, isoHs)) { |
587 | 0 | newAt->clearProp(common_properties::_isotopicHs); |
588 | 0 | } |
589 | 0 | std::vector<unsigned int>::const_iterator isoH = isoHs.begin(); |
590 | 0 | unsigned int newIdx; |
591 | 0 | newAt->clearComputedProps(); |
592 | | // always convert explicit Hs |
593 | 0 | unsigned int onumexpl = numExplicitHs[aidx]; |
594 | 0 | for (unsigned int i = 0; i < onumexpl; i++) { |
595 | 0 | newIdx = mol.addAtom(new Atom(1), false, true); |
596 | 0 | mol.addBond(aidx, newIdx, Bond::SINGLE); |
597 | 0 | auto hAtom = mol.getAtomWithIdx(newIdx); |
598 | 0 | hAtom->updatePropertyCache(); |
599 | 0 | if (params.addCoords) { |
600 | 0 | setTerminalAtomCoords(mol, newIdx, aidx); |
601 | 0 | } |
602 | 0 | if (isoH != isoHs.end()) { |
603 | 0 | hAtom->setIsotope(*isoH); |
604 | 0 | ++isoH; |
605 | 0 | } |
606 | 0 | } |
607 | | // clear the local property |
608 | 0 | newAt->setNumExplicitHs(0); |
609 | |
|
610 | 0 | if (!params.explicitOnly) { |
611 | | // take care of implicits |
612 | 0 | for (unsigned int i = 0; i < numImplicitHs[aidx]; i++) { |
613 | 0 | newIdx = mol.addAtom(new Atom(1), false, true); |
614 | 0 | mol.addBond(aidx, newIdx, Bond::SINGLE); |
615 | | // set the isImplicit label so that we can strip these back |
616 | | // off later if need be. |
617 | 0 | auto hAtom = mol.getAtomWithIdx(newIdx); |
618 | 0 | hAtom->setProp(common_properties::isImplicit, 1); |
619 | 0 | hAtom->updatePropertyCache(); |
620 | 0 | if (params.addCoords) { |
621 | 0 | setTerminalAtomCoords(mol, newIdx, aidx); |
622 | 0 | } |
623 | 0 | if (isoH != isoHs.end()) { |
624 | 0 | hAtom->setIsotope(*isoH); |
625 | 0 | ++isoH; |
626 | 0 | } |
627 | 0 | } |
628 | 0 | } |
629 | | // update the atom's derived properties (valence count, etc.) |
630 | | // no sense in being strict here (was github #2782) |
631 | 0 | newAt->updatePropertyCache(false); |
632 | 0 | if (isoH != isoHs.end()) { |
633 | 0 | BOOST_LOG(rdWarningLog) << "extra H isotope information found on atom " |
634 | 0 | << newAt->getIdx() << std::endl; |
635 | 0 | } |
636 | 0 | } |
637 | | // take care of AtomPDBResidueInfo for Hs if root atom has it |
638 | 0 | if (params.addResidueInfo) { |
639 | 0 | AssignHsResidueInfo(mol); |
640 | 0 | } |
641 | 0 | } |
642 | | |
643 | | namespace { |
644 | | // returns whether or not an adjustment was made, in case we want that info |
645 | | bool adjustStereoAtomsIfRequired(RWMol &mol, const Atom *atom, |
646 | 13.7k | const Atom *heavyAtom) { |
647 | 13.7k | PRECONDITION(atom != nullptr, "bad atom"); |
648 | 13.7k | PRECONDITION(heavyAtom != nullptr, "bad heavy atom"); |
649 | | // nothing we can do if the degree is only 2 (and we should have covered |
650 | | // that earlier anyway) |
651 | 13.7k | if (heavyAtom->getDegree() == 2) { |
652 | 8.63k | return false; |
653 | 8.63k | } |
654 | 5.08k | const auto &cbnd = |
655 | 5.08k | mol.getBondBetweenAtoms(atom->getIdx(), heavyAtom->getIdx()); |
656 | 5.08k | if (!cbnd) { |
657 | 0 | return false; |
658 | 0 | } |
659 | 5.08k | for (const auto &nbri : |
660 | 608k | boost::make_iterator_range(mol.getAtomBonds(heavyAtom))) { |
661 | 608k | Bond *bnd = mol[nbri]; |
662 | 608k | if (bnd->getBondType() == Bond::DOUBLE && |
663 | 2.41k | bnd->getStereo() > Bond::STEREOANY) { |
664 | 18 | auto sAtomIt = std::find(bnd->getStereoAtoms().begin(), |
665 | 18 | bnd->getStereoAtoms().end(), atom->getIdx()); |
666 | 18 | if (sAtomIt != bnd->getStereoAtoms().end()) { |
667 | | // sAtomIt points to the position of this atom's index in the list. |
668 | | // find the index of another atom attached to the heavy atom and |
669 | | // use it to update sAtomIt |
670 | 9 | unsigned int dblNbrIdx = bnd->getOtherAtomIdx(heavyAtom->getIdx()); |
671 | 9 | for (const auto &nbri : |
672 | 23 | boost::make_iterator_range(mol.getAtomNeighbors(heavyAtom))) { |
673 | 23 | const auto &nbr = mol[nbri]; |
674 | 23 | if (nbr->getIdx() == dblNbrIdx || nbr->getIdx() == atom->getIdx()) { |
675 | 14 | continue; |
676 | 14 | } |
677 | 9 | *sAtomIt = nbr->getIdx(); |
678 | 9 | bool madeAdjustment = true; |
679 | 9 | switch (bnd->getStereo()) { |
680 | 7 | case Bond::STEREOCIS: |
681 | 7 | bnd->setStereo(Bond::STEREOTRANS); |
682 | 7 | break; |
683 | 2 | case Bond::STEREOTRANS: |
684 | 2 | bnd->setStereo(Bond::STEREOCIS); |
685 | 2 | break; |
686 | 0 | default: |
687 | | // I think we shouldn't need to do anything with E and Z... |
688 | 0 | madeAdjustment = false; |
689 | 0 | break; |
690 | 9 | } |
691 | 9 | return madeAdjustment; |
692 | 9 | } |
693 | 9 | } |
694 | 18 | } |
695 | 608k | } |
696 | 5.07k | return false; |
697 | 5.08k | } |
698 | | |
699 | 13.7k | void molRemoveH(RWMol &mol, unsigned int idx, bool updateExplicitCount) { |
700 | 13.7k | auto atom = mol.getAtomWithIdx(idx); |
701 | 13.7k | PRECONDITION(atom->getAtomicNum() == 1, "idx corresponds to a non-Hydrogen"); |
702 | 13.7k | for (const auto bond : mol.atomBonds(atom)) { |
703 | 13.7k | Atom *heavyAtom = bond->getOtherAtom(atom); |
704 | 13.7k | int heavyAtomNum = heavyAtom->getAtomicNum(); |
705 | | |
706 | | // we'll update the neighbor's explicit H count if we were told to |
707 | | // *or* if the neighbor is chiral, in which case the H is needed |
708 | | // in order to complete the coordination |
709 | | // *or* if the neighbor has the noImplicit flag set: |
710 | 13.7k | if (updateExplicitCount || heavyAtom->getNoImplicit() || |
711 | 13.1k | heavyAtom->getChiralTag() != Atom::CHI_UNSPECIFIED) { |
712 | 13.1k | heavyAtom->setNumExplicitHs(heavyAtom->getNumExplicitHs() + 1); |
713 | 13.1k | } else { |
714 | | // this is a special case related to Issue 228 and the |
715 | | // "disappearing Hydrogen" problem discussed in MolOps::adjustHs |
716 | | // |
717 | | // If we remove a hydrogen from an aromatic N or P, or if |
718 | | // the heavy atom it is connected to is not in its default |
719 | | // valence state, we need to be *sure* to increment the |
720 | | // explicit count, even if the H itself isn't marked as explicit |
721 | 615 | const INT_VECT &defaultVs = |
722 | 615 | PeriodicTable::getTable()->getValenceList(heavyAtomNum); |
723 | 615 | if (((heavyAtomNum == 7 || heavyAtomNum == 15 || |
724 | 373 | may_need_extra_H(mol, heavyAtom)) && |
725 | 243 | isAromaticAtom(*heavyAtom)) || |
726 | 587 | (std::find(defaultVs.begin() + 1, defaultVs.end(), |
727 | 587 | heavyAtom->getTotalValence()) != defaultVs.end())) { |
728 | 65 | heavyAtom->setNumExplicitHs(heavyAtom->getNumExplicitHs() + 1); |
729 | 65 | } |
730 | 615 | } |
731 | | |
732 | | // One other consequence of removing the H from the graph is |
733 | | // that we may change the ordering of the bonds about a |
734 | | // chiral center. This may change the chiral label at that |
735 | | // atom. We deal with that by explicitly checking here: |
736 | 13.7k | if (heavyAtom->getChiralTag() != Atom::CHI_UNSPECIFIED) { |
737 | 2.10k | INT_LIST neighborIndices; |
738 | 587k | for (const auto &nbnd : mol.atomBonds(heavyAtom)) { |
739 | 587k | if (nbnd->getIdx() != bond->getIdx()) { |
740 | 585k | neighborIndices.push_back(nbnd->getIdx()); |
741 | 585k | } |
742 | 587k | } |
743 | 2.10k | neighborIndices.push_back(bond->getIdx()); |
744 | | |
745 | 2.10k | int nSwaps = heavyAtom->getPerturbationOrder(neighborIndices); |
746 | | // std::cerr << "H: "<<atom->getIdx()<<" hvy: |
747 | | // "<<heavyAtom->getIdx()<<" swaps: " << nSwaps<<std::endl; |
748 | 2.10k | if (nSwaps % 2) { |
749 | 1.04k | heavyAtom->invertChirality(); |
750 | 1.04k | } |
751 | 2.10k | } |
752 | | |
753 | | // If we are removing a H atom that defines bond stereo (e.g. imines), |
754 | | // Then also remove the bond stereo information, as it is no longer valid. |
755 | 13.7k | if (heavyAtom->getDegree() == 2) { |
756 | 12.0k | for (auto &nbnd : mol.atomBonds(heavyAtom)) { |
757 | 12.0k | if (nbnd != bond) { |
758 | 8.64k | if (nbnd->getStereo() > Bond::STEREOANY) { |
759 | 44 | nbnd->setStereo(Bond::STEREONONE); |
760 | 44 | nbnd->getStereoAtoms().clear(); |
761 | 44 | } |
762 | 8.64k | break; |
763 | 8.64k | } |
764 | 12.0k | } |
765 | 8.64k | } |
766 | | |
767 | | // if it's a wavy bond, then we need to |
768 | | // mark the beginning atom with the _UnknownStereo tag. |
769 | | // so that we know later that something was affecting its |
770 | | // stereochem |
771 | 13.7k | if (bond->getBondDir() == Bond::UNKNOWN && |
772 | 19 | bond->getBeginAtomIdx() == heavyAtom->getIdx()) { |
773 | 17 | heavyAtom->setProp(common_properties::_UnknownStereo, 1); |
774 | 13.7k | } else if (bond->getBondDir() == Bond::ENDDOWNRIGHT || |
775 | 13.5k | bond->getBondDir() == Bond::ENDUPRIGHT) { |
776 | | // if the direction is set on this bond and the atom it's connected to |
777 | | // has no other single bonds with directions set, then we need to set |
778 | | // direction on one of the other neighbors in order to avoid double |
779 | | // bond stereochemistry possibly being lost. This was github #754 |
780 | 2.76k | bool foundADir = false; |
781 | 2.76k | Bond *oBond = nullptr; |
782 | 2.76k | for (const auto &nbri : |
783 | 152k | boost::make_iterator_range(mol.getAtomBonds(heavyAtom))) { |
784 | 152k | Bond *nbnd = mol[nbri]; |
785 | 152k | if (nbnd->getIdx() != bond->getIdx() && |
786 | 149k | nbnd->getBondType() == Bond::SINGLE) { |
787 | 144k | if (nbnd->getBondDir() == Bond::NONE) { |
788 | 141k | oBond = nbnd; |
789 | 141k | } else { |
790 | 3.32k | foundADir = true; |
791 | 3.32k | } |
792 | 144k | } |
793 | 152k | } |
794 | 2.76k | if (!foundADir && oBond != nullptr) { |
795 | 1.64k | bool flipIt = (oBond->getBeginAtom() == heavyAtom) && |
796 | 749 | (bond->getBeginAtom() == heavyAtom); |
797 | 1.64k | if (flipIt) { |
798 | 633 | oBond->setBondDir(bond->getBondDir() == Bond::ENDDOWNRIGHT |
799 | 633 | ? Bond::ENDUPRIGHT |
800 | 633 | : Bond::ENDDOWNRIGHT); |
801 | 1.01k | } else { |
802 | 1.01k | oBond->setBondDir(bond->getBondDir()); |
803 | 1.01k | } |
804 | 1.64k | } |
805 | | // if this atom is one of the stereoatoms for a double bond we need |
806 | | // to switch the stereo atom on this end to be the other neighbor |
807 | | // This was part of github #1810 |
808 | 2.76k | adjustStereoAtomsIfRequired(mol, atom, heavyAtom); |
809 | 10.9k | } else { |
810 | | // if this atom is one of the stereoatoms for a double bond we need |
811 | | // to switch the stereo atom on this end to be the other neighbor |
812 | | // This was part of github #1810 |
813 | 10.9k | adjustStereoAtomsIfRequired(mol, atom, heavyAtom); |
814 | 10.9k | } |
815 | | |
816 | | // remove the bond from any SGroups that might include it. |
817 | 46.5k | for (auto &sg : getSubstanceGroups(mol)) { |
818 | 46.5k | sg.removeBondWithIdx(bond->getIdx()); |
819 | 46.5k | } |
820 | 13.7k | } |
821 | | |
822 | | // Finally, remove the atom from any SGroups that might include it, so that |
823 | | // the SGroups don't get removed in removeAtom(). Since we allow removing |
824 | | // SGroup SAP lvidx H atoms, we need to check for those and update them. |
825 | 46.5k | for (auto &sg : getSubstanceGroups(mol)) { |
826 | 46.5k | sg.removeAtomWithIdx(idx); |
827 | 46.5k | sg.removeParentAtomWithIdx(idx); |
828 | | |
829 | 46.5k | for (auto &sap : sg.getAttachPoints()) { |
830 | 5.73k | if (sap.lvIdx == static_cast<int>(idx)) { |
831 | 1.17k | sap.lvIdx = -1; |
832 | 1.17k | } |
833 | 5.73k | } |
834 | 46.5k | } |
835 | | // computed properties will be cleared after all hydrogens are removed |
836 | 13.7k | bool clearProps = false; |
837 | 13.7k | mol.removeAtom(atom, clearProps); |
838 | 13.7k | } |
839 | | |
840 | | bool shouldRemoveH(const RWMol &mol, const Atom *atom, |
841 | 10.7M | const RemoveHsParameters &ps) { |
842 | 10.7M | if (atom->getAtomicNum() != 1) { |
843 | 10.7M | return false; |
844 | 10.7M | } |
845 | 39.6k | if (!ps.removeWithQuery && atom->hasQuery()) { |
846 | 112 | return false; |
847 | 112 | } |
848 | 39.5k | if (!ps.removeDegreeZero && !atom->getDegree()) { |
849 | 156 | if (ps.showWarnings) { |
850 | 156 | BOOST_LOG(rdWarningLog) |
851 | 0 | << "WARNING: not removing hydrogen atom without neighbors" |
852 | 0 | << std::endl; |
853 | 156 | } |
854 | 156 | return false; |
855 | 156 | } |
856 | 39.3k | if (!ps.removeHigherDegrees && atom->getDegree() > 1) { |
857 | 22.2k | return false; |
858 | 22.2k | } |
859 | 17.1k | if (!ps.removeIsotopes && !ps.removeAndTrackIsotopes && atom->getIsotope()) { |
860 | 1.94k | return false; |
861 | 1.94k | } |
862 | 15.2k | if (!ps.removeNonimplicit && !atom->hasProp(common_properties::isImplicit)) { |
863 | 0 | return false; |
864 | 0 | } |
865 | 15.2k | if (!ps.removeMapped && atom->getAtomMapNum()) { |
866 | 0 | return false; |
867 | 0 | } |
868 | | |
869 | 15.2k | if (ps.removeInSGroups) { |
870 | | // If removing H in SGroups, do not remove H atoms in special |
871 | | // roles in the SGroup |
872 | 49.0k | for (const auto &sg : getSubstanceGroups(mol)) { |
873 | | // The H atom is one of the "caps" of the SGroup. Technically, |
874 | | // it's not part of the group, but it defines its boundaries. |
875 | 49.0k | for (const auto &bond_idx : sg.getBonds()) { |
876 | 47.1k | if (sg.getBondType(bond_idx) == SubstanceGroup::BondType::XBOND) { |
877 | 41.8k | auto bond = mol.getBondWithIdx(bond_idx); |
878 | 41.8k | if (bond->getBeginAtom() == atom || bond->getEndAtom() == atom) { |
879 | 278 | return false; |
880 | 278 | } |
881 | 41.8k | } |
882 | 47.1k | } |
883 | | |
884 | 48.8k | for (const auto &sap : sg.getAttachPoints()) { |
885 | | // The H atoms is an attach point. This would be weird, but is possible. |
886 | | // (if it is a 'leaving atom' we don't care, though) |
887 | 5.90k | if (sap.aIdx == atom->getIdx()) { |
888 | 22 | return false; |
889 | 22 | } |
890 | 5.90k | } |
891 | | |
892 | 48.7k | for (const auto &cs : sg.getCStates()) { |
893 | | // The bond to the H atom defines a CState |
894 | 1.20k | auto bond = mol.getBondWithIdx(cs.bondIdx); |
895 | 1.20k | if (bond->getBeginAtom() == atom || bond->getEndAtom() == atom) { |
896 | 2 | return false; |
897 | 2 | } |
898 | 1.20k | } |
899 | 48.7k | } |
900 | 15.2k | } else { |
901 | 0 | for (const auto &sg : getSubstanceGroups(mol)) { |
902 | 0 | if (sg.includesAtom(atom->getIdx())) { |
903 | 0 | return false; |
904 | 0 | } |
905 | 0 | } |
906 | 0 | } |
907 | 14.9k | if (!ps.removeHydrides && atom->getFormalCharge() == -1) { |
908 | 168 | return false; |
909 | 168 | } |
910 | 14.7k | bool removeIt = true; |
911 | 14.7k | if (atom->getDegree() && |
912 | 14.7k | (!ps.removeDummyNeighbors || !ps.removeDefiningBondStereo || |
913 | 0 | !ps.removeOnlyHNeighbors || !ps.removeNontetrahedralNeighbors || |
914 | 14.7k | !ps.removeWithWedgedBond)) { |
915 | 14.7k | bool onlyHNeighbors = true; |
916 | 14.7k | for (const auto nbr : mol.atomNeighbors(atom)) { |
917 | | // is it a dummy? |
918 | 14.7k | if (!ps.removeDummyNeighbors && nbr->getAtomicNum() < 1) { |
919 | 129 | if (ps.showWarnings) { |
920 | 129 | BOOST_LOG(rdWarningLog) << "WARNING: not removing hydrogen atom " |
921 | 0 | "with dummy atom neighbors" |
922 | 0 | << std::endl; |
923 | 129 | } |
924 | 129 | return false; |
925 | 129 | } |
926 | | // does it have non-tetrahedral stereo: |
927 | 14.6k | if (!ps.removeNontetrahedralNeighbors && |
928 | 14.6k | Chirality::hasNonTetrahedralStereo(nbr)) { |
929 | 64 | if (ps.showWarnings) { |
930 | 64 | BOOST_LOG(rdWarningLog) |
931 | 0 | << "WARNING: not removing hydrogen atom " |
932 | 0 | "with neighbor that has non-tetrahedral stereochemistry" |
933 | 0 | << std::endl; |
934 | 64 | } |
935 | 64 | return false; |
936 | 64 | } |
937 | 14.5k | if (!ps.removeOnlyHNeighbors && nbr->getAtomicNum() != 1) { |
938 | 14.1k | onlyHNeighbors = false; |
939 | 14.1k | } |
940 | 14.5k | if (!ps.removeWithWedgedBond) { |
941 | 0 | const auto bnd = mol.getBondBetweenAtoms(atom->getIdx(), nbr->getIdx()); |
942 | 0 | if (bnd->getBondDir() == Bond::BEGINDASH || |
943 | 0 | bnd->getBondDir() == Bond::BEGINWEDGE) { |
944 | 0 | if (ps.showWarnings) { |
945 | 0 | BOOST_LOG(rdWarningLog) << "WARNING: not removing hydrogen atom " |
946 | 0 | "with wedged bond" |
947 | 0 | << std::endl; |
948 | 0 | } |
949 | 0 | return false; |
950 | 0 | } |
951 | 0 | } |
952 | | // Check to see if the neighbor has a double bond and we're the only |
953 | | // neighbor at this end. This was part of github #1810 |
954 | 14.5k | if (!ps.removeDefiningBondStereo && nbr->getDegree() == 2) { |
955 | 18.2k | for (const auto bnd : mol.atomBonds(nbr)) { |
956 | 18.2k | if (bnd->getBondType() == Bond::DOUBLE && |
957 | 494 | (bnd->getStereo() > Bond::STEREOANY || |
958 | 493 | mol.getBondBetweenAtoms(atom->getIdx(), nbr->getIdx()) |
959 | 493 | ->getBondDir() > Bond::NONE)) { |
960 | 355 | return false; |
961 | 355 | } |
962 | 18.2k | } |
963 | 9.32k | } |
964 | 14.5k | } |
965 | 14.1k | if (removeIt && (!ps.removeOnlyHNeighbors && onlyHNeighbors)) { |
966 | 398 | return false; |
967 | 398 | } |
968 | 14.1k | } |
969 | 13.8k | return removeIt; |
970 | 14.7k | } |
971 | | |
972 | | // Do not remove H atoms that are part of SGroups that only contain H atoms. |
973 | | void filter_sgroup_emptying_hydrogens(const ROMol &mol, |
974 | 13.1k | boost::dynamic_bitset<> &atomsToRemove) { |
975 | 13.5k | for (const auto &sg : getSubstanceGroups(mol)) { |
976 | 13.5k | const auto &atoms = sg.getAtoms(); |
977 | 13.5k | const auto &patoms = sg.getParentAtoms(); |
978 | | |
979 | | // If the SGroup already didn't have atoms, we don't care about it |
980 | 13.5k | if (atoms.empty() && patoms.empty()) { |
981 | 1.08k | continue; |
982 | 1.08k | } |
983 | | |
984 | 13.1k | auto would_remove_atom = [&atomsToRemove](const auto idx) { |
985 | 13.1k | return atomsToRemove[idx]; |
986 | 13.1k | }; |
987 | | |
988 | 12.4k | auto no_atoms = atoms.empty() || |
989 | 12.4k | std::all_of(atoms.begin(), atoms.end(), would_remove_atom); |
990 | 12.4k | if (no_atoms) { |
991 | 30 | auto no_patoms = |
992 | 30 | patoms.empty() || |
993 | 12 | std::all_of(patoms.begin(), patoms.end(), would_remove_atom); |
994 | 30 | if (no_patoms) { |
995 | 269 | for (auto atom : atoms) { |
996 | 269 | atomsToRemove.set(atom, false); |
997 | 269 | } |
998 | 242 | for (auto patom : patoms) { |
999 | 242 | atomsToRemove.set(patom, false); |
1000 | 242 | } |
1001 | 30 | } |
1002 | 30 | } |
1003 | 12.4k | } |
1004 | 13.1k | } |
1005 | | |
1006 | | } // end of anonymous namespace |
1007 | | |
1008 | 13.1k | void removeHs(RWMol &mol, const RemoveHsParameters &ps, bool sanitize) { |
1009 | 13.1k | if (ps.removeAndTrackIsotopes) { |
1010 | | // if there are any non-isotopic Hs remove them first |
1011 | | // to make sure chirality is preserved |
1012 | 0 | bool needRemoveHs = false; |
1013 | 0 | for (auto atom : mol.atoms()) { |
1014 | 0 | if (atom->getAtomicNum() == 1 && atom->getIsotope() == 0) { |
1015 | 0 | needRemoveHs = true; |
1016 | 0 | break; |
1017 | 0 | } |
1018 | 0 | } |
1019 | 0 | if (needRemoveHs) { |
1020 | 0 | RemoveHsParameters psCopy(ps); |
1021 | 0 | psCopy.removeAndTrackIsotopes = false; |
1022 | 0 | psCopy.removeIsotopes = false; |
1023 | 0 | removeHs(mol, psCopy, false); |
1024 | 0 | } |
1025 | 0 | } |
1026 | 10.7M | for (auto atom : mol.atoms()) { |
1027 | 10.7M | atom->updatePropertyCache(false); |
1028 | 10.7M | } |
1029 | 13.1k | if (ps.removeAndTrackIsotopes) { |
1030 | 0 | for (const auto &pair : getIsoMap(mol)) { |
1031 | 0 | mol.getAtomWithIdx(pair.first) |
1032 | 0 | ->setProp(common_properties::_isotopicHs, pair.second); |
1033 | 0 | } |
1034 | 0 | } |
1035 | 13.1k | boost::dynamic_bitset<> atomsToRemove{mol.getNumAtoms(), 0}; |
1036 | | |
1037 | 10.7M | for (auto atom : mol.atoms()) { |
1038 | 10.7M | if (shouldRemoveH(mol, atom, ps)) { |
1039 | 13.7k | atomsToRemove.set(atom->getIdx()); |
1040 | 13.7k | } |
1041 | 10.7M | } // end of the loop over atoms |
1042 | | |
1043 | | // Once we know which H atoms would be removed, filter out those that |
1044 | | // would cause any SGroups to become empty |
1045 | 13.1k | if (ps.removeInSGroups) { |
1046 | 13.1k | filter_sgroup_emptying_hydrogens(mol, atomsToRemove); |
1047 | 13.1k | } |
1048 | | |
1049 | | // now that we know which atoms need to be removed, go ahead and remove them |
1050 | | // NOTE: there's too much complexity around stereochemistry here |
1051 | | // to be able to safely use batch editing. |
1052 | 10.7M | for (int idx = mol.getNumAtoms() - 1; idx >= 0; --idx) { |
1053 | 10.7M | if (atomsToRemove[idx]) { |
1054 | 13.7k | molRemoveH(mol, idx, ps.updateExplicitCount); |
1055 | 13.7k | } |
1056 | 10.7M | } |
1057 | 13.1k | mol.clearComputedProps(true); |
1058 | | // |
1059 | | // If we didn't only remove implicit Hs, which are guaranteed to |
1060 | | // be the highest numbered atoms, we may have altered atom indices. |
1061 | | // This can screw up derived properties (such as ring members), so |
1062 | | // do some checks: |
1063 | | // |
1064 | 13.1k | if (!atomsToRemove.empty() && ps.removeNonimplicit && sanitize) { |
1065 | 13.0k | sanitizeMol(mol); |
1066 | 13.0k | } |
1067 | | |
1068 | | // if we removed Hs and any chiral atoms now have more than 1 explict H, |
1069 | | // remove those |
1070 | 13.1k | if (!atomsToRemove.empty()) { |
1071 | 2.68M | for (auto atom : mol.atoms()) { |
1072 | 2.68M | if (!atom->getNoImplicit() && |
1073 | 2.54M | atom->getChiralTag() != Atom::CHI_UNSPECIFIED) { |
1074 | 1.35k | unsigned int numExplicitHs = atom->getNumExplicitHs(); |
1075 | 1.35k | if (numExplicitHs > 1) { |
1076 | 2 | atom->setNumExplicitHs(0); |
1077 | 2 | atom->updatePropertyCache(false); |
1078 | 2 | } |
1079 | 1.35k | } |
1080 | 2.68M | } |
1081 | 7.27k | } |
1082 | 13.1k | } |
1083 | 0 | ROMol *removeHs(const ROMol &mol, const RemoveHsParameters &ps, bool sanitize) { |
1084 | 0 | auto *res = new RWMol(mol); |
1085 | 0 | try { |
1086 | 0 | removeHs(*res, ps, sanitize); |
1087 | 0 | } catch (const MolSanitizeException &) { |
1088 | 0 | delete res; |
1089 | 0 | throw; |
1090 | 0 | } |
1091 | 0 | return static_cast<ROMol *>(res); |
1092 | 0 | } |
1093 | | void removeHs(RWMol &mol, bool implicitOnly, bool updateExplicitCount, |
1094 | 0 | bool sanitize) { |
1095 | 0 | RemoveHsParameters ps; |
1096 | 0 | ps.removeNonimplicit = !implicitOnly; |
1097 | 0 | ps.updateExplicitCount = updateExplicitCount; |
1098 | 0 | removeHs(mol, ps, sanitize); |
1099 | 0 | }; |
1100 | | ROMol *removeHs(const ROMol &mol, bool implicitOnly, bool updateExplicitCount, |
1101 | 0 | bool sanitize) { |
1102 | 0 | auto *res = new RWMol(mol); |
1103 | 0 | RemoveHsParameters ps; |
1104 | 0 | ps.removeNonimplicit = !implicitOnly; |
1105 | 0 | ps.updateExplicitCount = updateExplicitCount; |
1106 | 0 | try { |
1107 | 0 | removeHs(*res, ps, sanitize); |
1108 | 0 | } catch (const MolSanitizeException &) { |
1109 | 0 | delete res; |
1110 | 0 | throw; |
1111 | 0 | } |
1112 | 0 | return static_cast<ROMol *>(res); |
1113 | 0 | } |
1114 | | |
1115 | 0 | void removeAllHs(RWMol &mol, bool sanitize) { |
1116 | 0 | RemoveHsParameters ps; |
1117 | 0 | ps.removeDegreeZero = true; |
1118 | 0 | ps.removeHigherDegrees = true; |
1119 | 0 | ps.removeOnlyHNeighbors = true; |
1120 | 0 | ps.removeIsotopes = true; |
1121 | 0 | ps.removeDummyNeighbors = true; |
1122 | 0 | ps.removeDefiningBondStereo = true; |
1123 | 0 | ps.removeWithWedgedBond = true; |
1124 | 0 | ps.removeWithQuery = true; |
1125 | 0 | ps.removeNonimplicit = true; |
1126 | 0 | ps.removeInSGroups = true; |
1127 | 0 | ps.showWarnings = false; |
1128 | 0 | ps.removeHydrides = true; |
1129 | 0 | ps.removeNontetrahedralNeighbors = true; |
1130 | 0 | removeHs(mol, ps, sanitize); |
1131 | 0 | }; |
1132 | 0 | ROMol *removeAllHs(const ROMol &mol, bool sanitize) { |
1133 | 0 | auto *res = new RWMol(mol); |
1134 | 0 | try { |
1135 | 0 | removeAllHs(*res, sanitize); |
1136 | 0 | } catch (const MolSanitizeException &) { |
1137 | 0 | delete res; |
1138 | 0 | throw; |
1139 | 0 | } |
1140 | 0 | return static_cast<ROMol *>(res); |
1141 | 0 | } |
1142 | | |
1143 | | namespace { |
1144 | | enum class HydrogenType { |
1145 | | NotAHydrogen, |
1146 | | UnMergableQueryHydrogen, |
1147 | | QueryHydrogen |
1148 | | }; |
1149 | | |
1150 | | template <class Q> |
1151 | 0 | std::pair<bool, bool> queryHasHs(Q queryAtom, bool inor = false) { |
1152 | 0 | for (auto childit = queryAtom->beginChildren(); |
1153 | 0 | childit != queryAtom->endChildren(); ++childit) { |
1154 | 0 | QueryAtom::QUERYATOM_QUERY::CHILD_TYPE query = *childit; |
1155 | 0 | if (query->getDescription() == "AtomOr") { |
1156 | 0 | return queryHasHs(query, true); |
1157 | 0 | } else if (query->getDescription() == "AtomAtomicNum") { |
1158 | 0 | if (static_cast<ATOM_EQUALS_QUERY *>(query.get())->getVal() == 1 && |
1159 | 0 | !query->getNegation()) { |
1160 | 0 | return std::make_pair(true, inor); |
1161 | 0 | } |
1162 | 0 | } else if (query->getDescription() == "AtomType") { |
1163 | 0 | auto val = static_cast<ATOM_EQUALS_QUERY *>(query.get())->getVal(); |
1164 | | // 1001 == aromtic hydrogen (not a thing, really) |
1165 | | // 1 == aliphatic hydrogen |
1166 | 0 | if ((val == 1001 || val == 1) && !query->getNegation()) { |
1167 | 0 | return std::make_pair(true, inor); |
1168 | 0 | } |
1169 | 0 | } |
1170 | 0 | } |
1171 | 0 | return std::make_pair(false, inor); |
1172 | 0 | ; |
1173 | 0 | } Unexecuted instantiation: AddHs.cpp:std::__1::pair<bool, bool> RDKit::MolOps::(anonymous namespace)::queryHasHs<Queries::Query<int, RDKit::Atom const*, true>*>(Queries::Query<int, RDKit::Atom const*, true>*, bool) Unexecuted instantiation: AddHs.cpp:std::__1::pair<bool, bool> RDKit::MolOps::(anonymous namespace)::queryHasHs<std::__1::shared_ptr<Queries::Query<int, RDKit::Atom const*, true> > >(std::__1::shared_ptr<Queries::Query<int, RDKit::Atom const*, true> >, bool) |
1174 | | |
1175 | 0 | HydrogenType isQueryH(const Atom *atom) { |
1176 | 0 | PRECONDITION(atom, "bogus atom"); |
1177 | 0 | if (atom->getAtomicNum() == 1) { |
1178 | | // the simple case: the atom is flagged as being an H and |
1179 | | // has no query |
1180 | 0 | if (!atom->hasQuery() || |
1181 | 0 | (!atom->getQuery()->getNegation() && |
1182 | 0 | atom->getQuery()->getDescription() == "AtomAtomicNum")) { |
1183 | 0 | return HydrogenType::QueryHydrogen; |
1184 | 0 | } |
1185 | 0 | } |
1186 | | |
1187 | 0 | if (!(atom->getDegree() <= 1)) { |
1188 | | // bonded and unbonded H atoms will continue rest will be returned |
1189 | 0 | return HydrogenType::NotAHydrogen; |
1190 | 0 | } |
1191 | | |
1192 | 0 | if (atom->hasQuery() && atom->getQuery()->getNegation()) { |
1193 | | // we will not merge negated queries |
1194 | 0 | return HydrogenType::NotAHydrogen; |
1195 | 0 | } |
1196 | | |
1197 | 0 | if (atom->hasQuery()) { |
1198 | 0 | std::pair<bool, bool> res = std::make_pair(false, false); |
1199 | 0 | if (atom->getQuery()->getDescription() == "AtomOr") { |
1200 | 0 | res = queryHasHs(atom->getQuery(), true); |
1201 | 0 | } else if (atom->getQuery()->getDescription() == "AtomAnd") { |
1202 | 0 | res = queryHasHs(atom->getQuery(), false); |
1203 | 0 | } |
1204 | 0 | if (res.first) { // hasH |
1205 | 0 | if (res.second) { // inOr |
1206 | 0 | BOOST_LOG(rdWarningLog) |
1207 | 0 | << "WARNING: merging explicit H queries involved " |
1208 | 0 | "in ORs is not supported. This query will not " |
1209 | 0 | "be merged" |
1210 | 0 | << std::endl; |
1211 | 0 | return HydrogenType::UnMergableQueryHydrogen; |
1212 | 0 | } else { |
1213 | 0 | return HydrogenType::QueryHydrogen; |
1214 | 0 | } |
1215 | 0 | } |
1216 | 0 | } |
1217 | 0 | return HydrogenType::NotAHydrogen; |
1218 | 0 | } |
1219 | | } // namespace |
1220 | | |
1221 | | // |
1222 | | // This routine removes explicit hydrogens (and bonds to them) from |
1223 | | // the molecular graph and adds them as queries to the heavy atoms |
1224 | | // to which they are bound. If the heavy atoms (or atom queries) |
1225 | | // already have hydrogen-count queries, they will be updated. |
1226 | | // |
1227 | | // NOTE: |
1228 | | // - Hydrogens which aren't connected to a heavy atom will not be |
1229 | | // removed. This prevents molecules like "[H][H]" from having |
1230 | | // all atoms removed. |
1231 | | // |
1232 | | // - By default all hydrogens are removed, however if |
1233 | | // merge_unmapped_only is true, any hydrogen participating |
1234 | | // in an atom map will be retained |
1235 | 0 | void mergeQueryHs(RWMol &mol, bool mergeUnmappedOnly, bool mergeIsotopes) { |
1236 | 0 | std::vector<unsigned int> atomsToRemove; |
1237 | |
|
1238 | 0 | boost::dynamic_bitset<> hatoms(mol.getNumAtoms()); |
1239 | 0 | for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) { |
1240 | 0 | hatoms[i] = isQueryH(mol.getAtomWithIdx(i)) == HydrogenType::QueryHydrogen; |
1241 | 0 | } |
1242 | 0 | unsigned int currIdx = 0, stopIdx = mol.getNumAtoms(); |
1243 | 0 | while (currIdx < stopIdx) { |
1244 | 0 | Atom *atom = mol.getAtomWithIdx(currIdx); |
1245 | 0 | if (!hatoms[currIdx]) { |
1246 | 0 | unsigned int numHsToRemove = 0; |
1247 | 0 | ROMol::ADJ_ITER begin, end; |
1248 | 0 | boost::tie(begin, end) = mol.getAtomNeighbors(atom); |
1249 | |
|
1250 | 0 | while (begin != end) { |
1251 | 0 | if (hatoms[*begin]) { |
1252 | 0 | Atom &bgn = *mol.getAtomWithIdx(*begin); |
1253 | 0 | bool checkUnmapped = |
1254 | 0 | !mergeUnmappedOnly || |
1255 | 0 | !bgn.hasProp(common_properties::molAtomMapNumber); |
1256 | 0 | bool checkIsotope = mergeIsotopes || bgn.getIsotope() == 0; |
1257 | 0 | if (checkUnmapped && checkIsotope) { |
1258 | 0 | atomsToRemove.push_back(rdcast<unsigned int>(*begin)); |
1259 | 0 | ++numHsToRemove; |
1260 | 0 | } |
1261 | 0 | } |
1262 | 0 | ++begin; |
1263 | 0 | } |
1264 | 0 | if (numHsToRemove) { |
1265 | | // |
1266 | | // We have H neighbors: |
1267 | | // Add the appropriate queries to compensate for their removal. |
1268 | | // |
1269 | | // Examples: |
1270 | | // C[H] -> [C;!H0] |
1271 | | // C([H])[H] -> [C;!H0;!H1] |
1272 | | // |
1273 | | // It would be more efficient to do this using range queries like: |
1274 | | // C([H])[H] -> [C;H{2-}] |
1275 | | // but that would produce non-standard SMARTS without the user |
1276 | | // having started with a non-standard SMARTS. |
1277 | | // |
1278 | 0 | if (!atom->hasQuery()) { |
1279 | | // it wasn't a query atom, we need to replace it so that we can add |
1280 | | // a query: |
1281 | 0 | ATOM_EQUALS_QUERY *tmp = makeAtomNumQuery(atom->getAtomicNum()); |
1282 | 0 | auto *newAt = new QueryAtom; |
1283 | 0 | newAt->setQuery(tmp); |
1284 | 0 | newAt->updateProps(*atom); |
1285 | 0 | mol.replaceAtom(atom->getIdx(), newAt); |
1286 | 0 | delete newAt; |
1287 | 0 | atom = mol.getAtomWithIdx(currIdx); |
1288 | 0 | } |
1289 | 0 | for (unsigned int i = 0; i < numHsToRemove; ++i) { |
1290 | 0 | ATOM_EQUALS_QUERY *tmp = makeAtomHCountQuery(i); |
1291 | 0 | tmp->setNegation(true); |
1292 | 0 | atom->expandQuery(tmp); |
1293 | 0 | } |
1294 | 0 | } // end of numHsToRemove test |
1295 | | |
1296 | | // recurse if needed (was github isusue 544) |
1297 | 0 | if (atom->hasQuery()) { |
1298 | 0 | if (atom->getQuery()->getDescription() == "RecursiveStructure") { |
1299 | 0 | auto *rsq = dynamic_cast<RecursiveStructureQuery *>(atom->getQuery()); |
1300 | 0 | CHECK_INVARIANT(rsq, "could not convert recursive structure query"); |
1301 | 0 | RWMol *rqm = new RWMol(*rsq->getQueryMol()); |
1302 | 0 | mergeQueryHs(*rqm, mergeUnmappedOnly, mergeIsotopes); |
1303 | 0 | rsq->setQueryMol(rqm); |
1304 | 0 | } |
1305 | | |
1306 | | // FIX: shouldn't be repeating this code here |
1307 | 0 | std::list<QueryAtom::QUERYATOM_QUERY::CHILD_TYPE> childStack( |
1308 | 0 | atom->getQuery()->beginChildren(), atom->getQuery()->endChildren()); |
1309 | 0 | while (childStack.size()) { |
1310 | 0 | QueryAtom::QUERYATOM_QUERY::CHILD_TYPE qry = childStack.front(); |
1311 | 0 | childStack.pop_front(); |
1312 | 0 | if (qry->getDescription() == "RecursiveStructure") { |
1313 | 0 | auto *rsq = dynamic_cast<RecursiveStructureQuery *>(qry.get()); |
1314 | 0 | CHECK_INVARIANT(rsq, "could not convert recursive structure query"); |
1315 | 0 | RWMol *rqm = new RWMol(*rsq->getQueryMol()); |
1316 | 0 | mergeQueryHs(*rqm, mergeUnmappedOnly, mergeIsotopes); |
1317 | 0 | rsq->setQueryMol(rqm); |
1318 | 0 | } else if (qry->beginChildren() != qry->endChildren()) { |
1319 | 0 | childStack.insert(childStack.end(), qry->beginChildren(), |
1320 | 0 | qry->endChildren()); |
1321 | 0 | } |
1322 | 0 | } |
1323 | 0 | } // end of recursion loop |
1324 | 0 | } |
1325 | 0 | ++currIdx; |
1326 | 0 | } |
1327 | 0 | mol.beginBatchEdit(); |
1328 | 0 | for (auto aidx : atomsToRemove) { |
1329 | 0 | mol.removeAtom(aidx); |
1330 | 0 | } |
1331 | 0 | mol.commitBatchEdit(); |
1332 | 0 | }; |
1333 | | ROMol *mergeQueryHs(const ROMol &mol, bool mergeUnmappedOnly, |
1334 | 0 | bool mergeIsotopes) { |
1335 | 0 | auto *res = new RWMol(mol); |
1336 | 0 | mergeQueryHs(*res, mergeUnmappedOnly, mergeIsotopes); |
1337 | 0 | return static_cast<ROMol *>(res); |
1338 | 0 | }; |
1339 | | |
1340 | 0 | bool needsHs(const ROMol &mol) { |
1341 | 0 | for (const auto atom : mol.atoms()) { |
1342 | 0 | bool includeNeighbors = false; |
1343 | 0 | if (atom->getTotalNumHs(includeNeighbors)) { |
1344 | 0 | return true; |
1345 | 0 | } |
1346 | 0 | } |
1347 | 0 | return false; |
1348 | 0 | } |
1349 | | |
1350 | 0 | std::pair<bool, bool> hasQueryHs(const ROMol &mol) { |
1351 | 0 | bool queryHs = false; |
1352 | | // We don't care about announcing ORs or other items during isQueryH |
1353 | 0 | RDLog::LogStateSetter blocker; |
1354 | |
|
1355 | 0 | for (const auto atom : mol.atoms()) { |
1356 | 0 | switch (isQueryH(atom)) { |
1357 | 0 | case HydrogenType::UnMergableQueryHydrogen: |
1358 | 0 | return std::make_pair(true, true); |
1359 | 0 | case HydrogenType::QueryHydrogen: |
1360 | 0 | queryHs = true; |
1361 | 0 | break; |
1362 | 0 | default: // HydrogenType::NotAHydrogen: |
1363 | 0 | break; |
1364 | 0 | } |
1365 | 0 | if (atom->hasQuery()) { |
1366 | 0 | if (atom->getQuery()->getDescription() == "RecursiveStructure") { |
1367 | 0 | auto *rsq = dynamic_cast<RecursiveStructureQuery *>(atom->getQuery()); |
1368 | 0 | CHECK_INVARIANT(rsq, "could not convert recursive structure query"); |
1369 | 0 | auto res = hasQueryHs(*rsq->getQueryMol()); |
1370 | 0 | if (res.second) { // unmergableH implies queryH |
1371 | 0 | return res; |
1372 | 0 | } |
1373 | 0 | queryHs |= res.first; |
1374 | 0 | } |
1375 | | |
1376 | | // FIX: shouldn't be repeating this code here -- yet again! |
1377 | 0 | std::list<QueryAtom::QUERYATOM_QUERY::CHILD_TYPE> childStack( |
1378 | 0 | atom->getQuery()->beginChildren(), atom->getQuery()->endChildren()); |
1379 | 0 | while (!childStack.empty()) { |
1380 | 0 | QueryAtom::QUERYATOM_QUERY::CHILD_TYPE qry = childStack.front(); |
1381 | 0 | childStack.pop_front(); |
1382 | 0 | if (qry->getDescription() == "RecursiveStructure") { |
1383 | 0 | auto *rsq = dynamic_cast<RecursiveStructureQuery *>(qry.get()); |
1384 | 0 | CHECK_INVARIANT(rsq, "could not convert recursive structure query"); |
1385 | 0 | auto res = hasQueryHs(*rsq->getQueryMol()); |
1386 | 0 | if (res.second) { |
1387 | 0 | return res; |
1388 | 0 | } |
1389 | 0 | queryHs |= res.first; |
1390 | 0 | } else { |
1391 | 0 | childStack.insert(childStack.end(), qry->beginChildren(), |
1392 | 0 | qry->endChildren()); |
1393 | 0 | } |
1394 | 0 | } |
1395 | 0 | } |
1396 | 0 | } // end of recursion loop |
1397 | | |
1398 | 0 | return std::make_pair(queryHs, false); |
1399 | 0 | } |
1400 | | |
1401 | | } // namespace MolOps |
1402 | | } // namespace RDKit |