Coverage Report

Created: 2026-06-23 06:55

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/rdkit/Code/GraphMol/AddHs.cpp
Line
Count
Source
1
//
2
//  Copyright (C) 2003-2025 Greg Landrum and other RDKit contributors
3
//
4
//   @@ All Rights Reserved @@
5
//  This file is part of the RDKit.
6
//  The contents are covered by the terms of the BSD license
7
//  which is included in the file license.txt, found at the root
8
//  of the RDKit source tree.
9
//
10
#include "RDKitBase.h"
11
#include <list>
12
#include "QueryAtom.h"
13
#include "QueryOps.h"
14
#include "MonomerInfo.h"
15
#include "Chirality.h"
16
#include <Geometry/Transform3D.h>
17
#include <Geometry/point.h>
18
#include <boost/algorithm/string/classification.hpp>
19
#include <boost/dynamic_bitset.hpp>
20
#include <boost/range/iterator_range.hpp>
21
22
constexpr double sq_dist_zero_tol = 1.e-4;
23
24
namespace RDKit {
25
26
// Local utility functionality:
27
namespace {
28
0
Atom *getAtomNeighborNot(ROMol *mol, const Atom *atom, const Atom *other) {
29
0
  PRECONDITION(mol, "bad molecule");
30
0
  PRECONDITION(atom, "bad atom");
31
0
  PRECONDITION(atom->getDegree() > 1, "bad degree");
32
0
  PRECONDITION(other, "bad atom");
33
0
  Atom *res = nullptr;
34
35
0
  ROMol::ADJ_ITER nbrIdx, endNbrs;
36
0
  boost::tie(nbrIdx, endNbrs) = mol->getAtomNeighbors(atom);
37
0
  while (nbrIdx != endNbrs) {
38
0
    if (*nbrIdx != other->getIdx()) {
39
0
      res = mol->getAtomWithIdx(*nbrIdx);
40
0
      break;
41
0
    }
42
0
    ++nbrIdx;
43
0
  }
44
45
0
  POSTCONDITION(res, "no neighbor found");
46
0
  return res;
47
0
}
48
49
0
void AssignHsResidueInfo(RWMol &mol) {
50
0
  int max_serial = 0;
51
0
  unsigned int stopIdx = mol.getNumAtoms();
52
0
  for (unsigned int aidx = 0; aidx < stopIdx; ++aidx) {
53
0
    auto *info =
54
0
        (AtomPDBResidueInfo *)(mol.getAtomWithIdx(aidx)->getMonomerInfo());
55
0
    if (info && info->getMonomerType() == AtomMonomerInfo::PDBRESIDUE &&
56
0
        info->getSerialNumber() > max_serial) {
57
0
      max_serial = info->getSerialNumber();
58
0
    }
59
0
  }
60
61
0
  AtomPDBResidueInfo *current_info = nullptr;
62
0
  int current_h_id = 0;
63
0
  for (unsigned int aidx = 0; aidx < stopIdx; ++aidx) {
64
0
    Atom *newAt = mol.getAtomWithIdx(aidx);
65
0
    auto *info = (AtomPDBResidueInfo *)(newAt->getMonomerInfo());
66
0
    if (info && info->getMonomerType() == AtomMonomerInfo::PDBRESIDUE) {
67
0
      ROMol::ADJ_ITER begin, end;
68
0
      boost::tie(begin, end) = mol.getAtomNeighbors(newAt);
69
0
      while (begin != end) {
70
0
        if (mol.getAtomWithIdx(*begin)->getAtomicNum() == 1) {
71
          // Make all Hs unique - increment id even for existing
72
0
          ++current_h_id;
73
          // skip if hydrogen already has PDB info
74
0
          auto *h_info = (AtomPDBResidueInfo *)mol.getAtomWithIdx(*begin)
75
0
                             ->getMonomerInfo();
76
0
          if (h_info &&
77
0
              h_info->getMonomerType() == AtomMonomerInfo::PDBRESIDUE) {
78
0
            continue;
79
0
          }
80
          // the hydrogens have unique names on residue basis (H1, H2, ...)
81
0
          if (!current_info ||
82
0
              current_info->getResidueNumber() != info->getResidueNumber() ||
83
0
              current_info->getChainId() != info->getChainId()) {
84
0
            current_h_id = 1;
85
0
            current_info = info;
86
0
          }
87
0
          std::string h_label = std::to_string(current_h_id);
88
0
          if (h_label.length() > 3) {
89
0
            h_label = h_label.substr(h_label.length() - 3, 3);
90
0
          }
91
0
          while (h_label.length() < 3) {
92
0
            h_label = h_label + " ";
93
0
          }
94
0
          h_label = "H" + h_label;
95
          // wrap around id to '3H12'
96
0
          h_label = h_label.substr(3, 1) + h_label.substr(0, 3);
97
0
          AtomPDBResidueInfo *newInfo = new AtomPDBResidueInfo(
98
0
              h_label, max_serial, "", info->getResidueName(),
99
0
              info->getResidueNumber(), info->getChainId(), "", 1.0, 0.0,
100
0
              info->getIsHeteroAtom());
101
0
          mol.getAtomWithIdx(*begin)->setMonomerInfo(newInfo);
102
103
0
          ++max_serial;
104
0
        }
105
0
        ++begin;
106
0
      }
107
0
    }
108
0
  }
109
0
}
110
111
0
std::map<unsigned int, std::vector<unsigned int>> getIsoMap(const ROMol &mol) {
112
0
  std::map<unsigned int, std::vector<unsigned int>> isoMap;
113
0
  for (auto atom : mol.atoms()) {
114
0
    if (atom->hasProp(common_properties::_isotopicHs)) {
115
0
      atom->clearProp(common_properties::_isotopicHs);
116
0
    }
117
0
  }
118
0
  for (auto bond : mol.bonds()) {
119
0
    auto ba = bond->getBeginAtom();
120
0
    auto ea = bond->getEndAtom();
121
0
    int ha = -1;
122
0
    unsigned int iso;
123
0
    if (ba->getAtomicNum() == 1 && ba->getIsotope() &&
124
0
        ea->getAtomicNum() != 1) {
125
0
      ha = ea->getIdx();
126
0
      iso = ba->getIsotope();
127
0
    } else if (ea->getAtomicNum() == 1 && ea->getIsotope() &&
128
0
               ba->getAtomicNum() != 1) {
129
0
      ha = ba->getIdx();
130
0
      iso = ea->getIsotope();
131
0
    }
132
0
    if (ha == -1) {
133
0
      continue;
134
0
    }
135
0
    auto &v = isoMap[ha];
136
0
    v.push_back(iso);
137
0
  }
138
0
  return isoMap;
139
0
}
140
141
373
bool may_need_extra_H(const ROMol &mol, const Atom *atom) {
142
373
  unsigned single_bonds = 0;
143
373
  unsigned aromatic_bonds = 0;
144
647
  for (auto bond : mol.atomBonds(atom)) {
145
647
    if (bond->getBondType() == Bond::SINGLE) {
146
266
      ++single_bonds;
147
381
    } else if (bond->getBondType() == Bond::AROMATIC) {
148
80
      ++aromatic_bonds;
149
301
    } else {
150
301
      return false;
151
301
    }
152
647
  }
153
72
  return single_bonds == 1 && aromatic_bonds == 2 &&
154
4
         atom->getTotalValence() == 3;
155
373
}
156
157
}  // end of unnamed namespace
158
159
namespace MolOps {
160
161
namespace {
162
RDGeom::Point3D pickBisector(const RDGeom::Point3D &nbr1Vect,
163
                             const RDGeom::Point3D &nbr2Vect,
164
0
                             const RDGeom::Point3D &nbr3Vect) {
165
0
  auto dirVect = nbr2Vect + nbr3Vect;
166
0
  if (dirVect.lengthSq() < sq_dist_zero_tol) {
167
    // nbr2Vect and nbr3Vect are anti-parallel (was #3854)
168
0
    dirVect = nbr2Vect;
169
0
    std::swap(dirVect.x, dirVect.y);
170
0
    dirVect.x *= -1;
171
0
  }
172
0
  if (dirVect.dotProduct(nbr1Vect) < 0) {
173
0
    dirVect *= -1;
174
0
  }
175
176
0
  return dirVect;
177
0
}
178
}  // namespace
179
180
void setTerminalAtomCoords(ROMol &mol, unsigned int idx,
181
0
                           unsigned int otherIdx) {
182
  // we will loop over all the coordinates
183
0
  PRECONDITION(otherIdx != idx, "degenerate atoms");
184
0
  Atom *atom = mol.getAtomWithIdx(idx);
185
0
  PRECONDITION(mol.getAtomDegree(atom) == 1, "bad atom degree");
186
0
  const Bond *bond = mol.getBondBetweenAtoms(otherIdx, idx);
187
0
  PRECONDITION(bond, "no bond between atoms");
188
189
0
  const Atom *otherAtom = mol.getAtomWithIdx(otherIdx);
190
0
  double bondLength =
191
0
      PeriodicTable::getTable()->getRb0(1) +
192
0
      PeriodicTable::getTable()->getRb0(otherAtom->getAtomicNum());
193
194
0
  RDGeom::Point3D dirVect(0, 0, 0);
195
196
0
  RDGeom::Point3D perpVect, rotnAxis, nbrPerp;
197
0
  RDGeom::Point3D nbr1Vect, nbr2Vect, nbr3Vect;
198
0
  RDGeom::Transform3D tform;
199
0
  RDGeom::Point3D otherPos, atomPos;
200
201
0
  const Atom *nbr1 = nullptr, *nbr2 = nullptr, *nbr3 = nullptr;
202
0
  const Bond *nbrBond;
203
0
  ROMol::ADJ_ITER nbrIdx, endNbrs;
204
205
0
  switch (otherAtom->getDegree()) {
206
0
    case 1:
207
      // --------------------------------------------------------------------------
208
      //   No other atoms present:
209
      // --------------------------------------------------------------------------
210
      // loop over the conformations and set the coordinates
211
0
      for (auto cfi = mol.beginConformers(); cfi != mol.endConformers();
212
0
           cfi++) {
213
0
        if ((*cfi)->is3D()) {
214
0
          dirVect.z = 1;
215
0
        } else {
216
0
          dirVect.x = 1;
217
0
        }
218
0
        otherPos = (*cfi)->getAtomPos(otherIdx);
219
0
        atomPos = otherPos + dirVect * ((*cfi)->is3D() ? bondLength : 1.0);
220
0
        (*cfi)->setAtomPos(idx, atomPos);
221
0
      }
222
0
      break;
223
224
0
    case 2:
225
      // --------------------------------------------------------------------------
226
      //  One other neighbor:
227
      // --------------------------------------------------------------------------
228
0
      nbr1 = getAtomNeighborNot(&mol, otherAtom, atom);
229
0
      for (auto cfi = mol.beginConformers(); cfi != mol.endConformers();
230
0
           ++cfi) {
231
0
        otherPos = (*cfi)->getAtomPos(otherIdx);
232
0
        RDGeom::Point3D nbr1Pos = (*cfi)->getAtomPos(nbr1->getIdx());
233
        // get a normalized vector pointing away from the neighbor:
234
0
        nbr1Vect = nbr1Pos - otherPos;
235
0
        if (nbr1Vect.lengthSq() < sq_dist_zero_tol) {
236
          // no difference, which likely indicates that we have redundant atoms.
237
          // just put it on top of the heavy atom. This was #678
238
0
          (*cfi)->setAtomPos(idx, otherPos);
239
0
          continue;
240
0
        }
241
0
        nbr1Vect.normalize();
242
0
        nbr1Vect *= -1;
243
244
        // ok, nbr1Vect points away from the other atom, figure out where
245
        // this H goes:
246
0
        switch (otherAtom->getHybridization()) {
247
0
          case Atom::SP3:
248
            // get a perpendicular to nbr1Vect:
249
0
            if ((*cfi)->is3D()) {
250
0
              perpVect = nbr1Vect.getPerpendicular();
251
0
            } else {
252
0
              perpVect.z = 1.0;
253
0
            }
254
            // and move off it:
255
0
            tform.SetRotation((180 - 109.471) * M_PI / 180., perpVect);
256
0
            dirVect = tform * nbr1Vect;
257
0
            atomPos = otherPos + dirVect * ((*cfi)->is3D() ? bondLength : 1.0);
258
0
            (*cfi)->setAtomPos(idx, atomPos);
259
0
            break;
260
0
          case Atom::SP2:
261
            // default 3D position is to just take an arbitrary perpendicular
262
            // for 2D we take the normal to the xy plane
263
0
            if ((*cfi)->is3D()) {
264
0
              perpVect = nbr1Vect.getPerpendicular();
265
0
            } else {
266
0
              perpVect.z = 1.0;
267
0
            }
268
0
            if (nbr1->getDegree() > 1) {
269
              // can we use the neighboring atom to establish a perpendicular?
270
0
              nbrBond = mol.getBondBetweenAtoms(otherIdx, nbr1->getIdx());
271
0
              if (nbrBond->getIsAromatic() ||
272
0
                  nbrBond->getBondType() == Bond::DOUBLE ||
273
0
                  nbrBond->getIsConjugated()) {
274
0
                nbr2 = getAtomNeighborNot(&mol, nbr1, otherAtom);
275
0
                nbr2Vect =
276
0
                    nbr1Pos.directionVector((*cfi)->getAtomPos(nbr2->getIdx()));
277
0
                auto crossProd = nbr2Vect.crossProduct(nbr1Vect);
278
279
                // if nbr1 and nbr2 are aligned, the perpendicular will be null,
280
                // and we'll just keep the default calculated above. Otherwise
281
                // we use the cross product
282
0
                if (crossProd.lengthSq() >= sq_dist_zero_tol) {
283
0
                  perpVect = crossProd;
284
0
                }
285
0
              }
286
0
            }
287
0
            perpVect.normalize();
288
            // rotate the nbr1Vect 60 degrees about perpVect and we're done:
289
0
            tform.SetRotation(60. * M_PI / 180., perpVect);
290
0
            dirVect = tform * nbr1Vect;
291
0
            atomPos = otherPos + dirVect * ((*cfi)->is3D() ? bondLength : 1.0);
292
0
            (*cfi)->setAtomPos(idx, atomPos);
293
0
            break;
294
0
          case Atom::SP:
295
            // just lay the H along the vector:
296
0
            dirVect = nbr1Vect;
297
0
            atomPos = otherPos + dirVect * ((*cfi)->is3D() ? bondLength : 1.0);
298
0
            (*cfi)->setAtomPos(idx, atomPos);
299
0
            break;
300
0
          default:
301
            // FIX: handle other hybridizations
302
            // for now, just lay the H along the vector:
303
0
            dirVect = nbr1Vect;
304
0
            atomPos = otherPos + dirVect * ((*cfi)->is3D() ? bondLength : 1.0);
305
0
            (*cfi)->setAtomPos(idx, atomPos);
306
0
        }
307
0
      }
308
0
      break;
309
0
    case 3:
310
      // --------------------------------------------------------------------------
311
      // Two other neighbors:
312
      // --------------------------------------------------------------------------
313
0
      boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(otherAtom);
314
0
      while (nbrIdx != endNbrs) {
315
0
        if (*nbrIdx != idx) {
316
0
          if (!nbr1) {
317
0
            nbr1 = mol.getAtomWithIdx(*nbrIdx);
318
0
          } else {
319
0
            nbr2 = mol.getAtomWithIdx(*nbrIdx);
320
0
          }
321
0
        }
322
0
        ++nbrIdx;
323
0
      }
324
0
      TEST_ASSERT(nbr1);
325
0
      TEST_ASSERT(nbr2);
326
0
      for (auto cfi = mol.beginConformers(); cfi != mol.endConformers();
327
0
           ++cfi) {
328
        // start along the average of the two vectors:
329
0
        otherPos = (*cfi)->getAtomPos(otherIdx);
330
0
        nbr1Vect = otherPos - (*cfi)->getAtomPos(nbr1->getIdx());
331
0
        nbr2Vect = otherPos - (*cfi)->getAtomPos(nbr2->getIdx());
332
0
        if (nbr1Vect.lengthSq() < sq_dist_zero_tol ||
333
0
            nbr2Vect.lengthSq() < sq_dist_zero_tol) {
334
          // no difference, which likely indicates that we have redundant atoms.
335
          // just put it on top of the heavy atom. This was #678
336
0
          (*cfi)->setAtomPos(idx, otherPos);
337
0
          continue;
338
0
        }
339
0
        nbr1Vect.normalize();
340
0
        nbr2Vect.normalize();
341
0
        dirVect = nbr1Vect + nbr2Vect;
342
343
0
        if (dirVect.lengthSq() < sq_dist_zero_tol) {
344
          // nbr1Vect and nbr2Vect are non-null, but they may
345
          // still cancel each other out
346
0
          continue;
347
0
        }
348
0
        dirVect.normalize();
349
0
        if ((*cfi)->is3D()) {
350
0
          switch (otherAtom->getHybridization()) {
351
0
            case Atom::SP3:
352
              // get the perpendicular to the neighbors:
353
0
              nbrPerp = nbr1Vect.crossProduct(nbr2Vect);
354
              // and the perpendicular to that:
355
0
              rotnAxis = nbrPerp.crossProduct(dirVect);
356
              // and then rotate about that:
357
0
              rotnAxis.normalize();
358
0
              tform.SetRotation((109.471 / 2) * M_PI / 180., rotnAxis);
359
0
              dirVect = tform * dirVect;
360
0
              atomPos =
361
0
                  otherPos + dirVect * ((*cfi)->is3D() ? bondLength : 1.0);
362
0
              (*cfi)->setAtomPos(idx, atomPos);
363
0
              break;
364
0
            case Atom::SP2:
365
              // don't need to do anything here, the H atom goes right on the
366
              // direction vector
367
0
              atomPos =
368
0
                  otherPos + dirVect * ((*cfi)->is3D() ? bondLength : 1.0);
369
0
              (*cfi)->setAtomPos(idx, atomPos);
370
0
              break;
371
0
            default:
372
              // FIX: handle other hybridizations
373
              // for now, just lay the H along the neighbor vector;
374
0
              atomPos =
375
0
                  otherPos + dirVect * ((*cfi)->is3D() ? bondLength : 1.0);
376
0
              (*cfi)->setAtomPos(idx, atomPos);
377
0
              break;
378
0
          }
379
0
        } else {
380
          // don't need to do anything here, the H atom goes right on the
381
          // direction vector
382
0
          atomPos = otherPos + dirVect;
383
0
          (*cfi)->setAtomPos(idx, atomPos);
384
0
        }
385
0
      }
386
0
      break;
387
0
    case 4:
388
      // --------------------------------------------------------------------------
389
      // Three other neighbors:
390
      // --------------------------------------------------------------------------
391
0
      boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(otherAtom);
392
393
      // We're using chiral tag for checking chirality, so we just take the
394
      // initial order
395
0
      while (nbrIdx != endNbrs) {
396
0
        if (*nbrIdx != idx) {
397
0
          if (!nbr1) {
398
0
            nbr1 = mol.getAtomWithIdx(*nbrIdx);
399
0
          } else if (!nbr2) {
400
0
            nbr2 = mol.getAtomWithIdx(*nbrIdx);
401
0
          } else {
402
0
            nbr3 = mol.getAtomWithIdx(*nbrIdx);
403
0
          }
404
0
        }
405
0
        ++nbrIdx;
406
0
      }
407
408
0
      TEST_ASSERT(nbr1);
409
0
      TEST_ASSERT(nbr2);
410
0
      TEST_ASSERT(nbr3);
411
412
0
      for (auto cfi = mol.beginConformers(); cfi != mol.endConformers();
413
0
           ++cfi) {
414
0
        otherPos = (*cfi)->getAtomPos(otherIdx);
415
0
        nbr1Vect = otherPos - (*cfi)->getAtomPos(nbr1->getIdx());
416
0
        nbr2Vect = otherPos - (*cfi)->getAtomPos(nbr2->getIdx());
417
0
        nbr3Vect = otherPos - (*cfi)->getAtomPos(nbr3->getIdx());
418
0
        if (nbr1Vect.lengthSq() < sq_dist_zero_tol ||
419
0
            nbr2Vect.lengthSq() < sq_dist_zero_tol ||
420
0
            nbr3Vect.lengthSq() < sq_dist_zero_tol) {
421
          // no difference, which likely indicates that we have redundant atoms.
422
          // just put it on top of the heavy atom. This was #678
423
0
          (*cfi)->setAtomPos(idx, otherPos);
424
0
          continue;
425
0
        }
426
0
        nbr1Vect.normalize();
427
0
        nbr2Vect.normalize();
428
0
        nbr3Vect.normalize();
429
430
        // if three neighboring atoms are more or less planar, this
431
        // is going to be in a quasi-random (but almost definitely bad)
432
        // direction...
433
        // correct for this (issue 2951221):
434
0
        if ((*cfi)->is3D()) {
435
0
          if (fabs(nbr3Vect.dotProduct(nbr1Vect.crossProduct(nbr2Vect))) <
436
0
              0.1) {
437
            // compute the normal:
438
0
            dirVect = nbr1Vect.crossProduct(nbr2Vect);
439
440
            // Each of the nbr vectors is non-null, but there might be pairs
441
            // that cancel each other out. Try to find a direction from atoms
442
            // that do not overlap.
443
0
            if (dirVect.lengthSq() < sq_dist_zero_tol) {
444
              // This definition of dirVect reverses the parity around otherIdx
445
              // the change of sign restores it
446
0
              dirVect = nbr1Vect.crossProduct(nbr3Vect) * -1;
447
0
            }
448
0
            if (dirVect.lengthSq() < sq_dist_zero_tol) {
449
0
              dirVect = nbr2Vect.crossProduct(nbr3Vect);
450
0
            }
451
            // We couldn't find a good direction
452
0
            if (dirVect.lengthSq() < sq_dist_zero_tol) {
453
0
              continue;
454
0
            }
455
456
0
            std::string cipCode;
457
0
            if (otherAtom->getPropIfPresent(common_properties::_CIPCode,
458
0
                                            cipCode)) {
459
              // the heavy atom is a chiral center, make sure
460
              // that we went go the right direction to preserve
461
              // its chirality. We use the chiral volume for this:
462
0
              RDGeom::Point3D v1 = dirVect - nbr3Vect;
463
0
              RDGeom::Point3D v2 = nbr1Vect - nbr3Vect;
464
0
              RDGeom::Point3D v3 = nbr2Vect - nbr3Vect;
465
0
              double vol = v1.dotProduct(v2.crossProduct(v3));
466
467
0
              if ((otherAtom->getChiralTag() ==
468
0
                       Atom::ChiralType::CHI_TETRAHEDRAL_CCW &&
469
0
                   vol < 0) ||
470
0
                  (otherAtom->getChiralTag() ==
471
0
                       Atom::ChiralType::CHI_TETRAHEDRAL_CW &&
472
0
                   vol > 0)) {
473
0
                dirVect *= -1;
474
0
              }
475
0
            }
476
0
          } else {
477
0
            dirVect = nbr1Vect + nbr2Vect + nbr3Vect;
478
0
          }
479
0
        } else {
480
          // we're in flatland
481
482
          // github #3879 and #908: find the two neighbors with the largest
483
          // outer angle between them and then place the H to bisect that angle
484
          // This is recommendation ST-1.1.4 from the 2006 IUPAC "Graphical
485
          // representation of stereochemical configuration" guideline
486
0
          auto angle12 = nbr1Vect.angleTo(nbr2Vect);
487
0
          auto angle13 = nbr1Vect.angleTo(nbr3Vect);
488
0
          auto angle23 = nbr2Vect.angleTo(nbr3Vect);
489
0
          auto accum1 = angle12 + angle13;
490
0
          auto accum2 = angle12 + angle23;
491
0
          auto accum3 = angle13 + angle23;
492
0
          if (accum1 <= accum2 && accum1 <= accum3) {
493
0
            dirVect = pickBisector(nbr1Vect, nbr2Vect, nbr3Vect);
494
0
          } else if (accum2 <= accum1 && accum2 <= accum3) {
495
0
            dirVect = pickBisector(nbr2Vect, nbr1Vect, nbr3Vect);
496
0
          } else {
497
0
            dirVect = pickBisector(nbr3Vect, nbr1Vect, nbr2Vect);
498
0
          }
499
0
        }
500
501
0
        dirVect.normalize();
502
0
        atomPos = otherPos + dirVect * ((*cfi)->is3D() ? bondLength : 1.0);
503
0
        (*cfi)->setAtomPos(idx, atomPos);
504
0
      }
505
0
      break;
506
0
    default:
507
      // --------------------------------------------------------------------------
508
      // FIX: figure out what to do here
509
      // --------------------------------------------------------------------------
510
0
      atomPos = otherPos + dirVect * bondLength;
511
0
      for (auto cfi = mol.beginConformers(); cfi != mol.endConformers();
512
0
           ++cfi) {
513
0
        (*cfi)->setAtomPos(idx, atomPos);
514
0
      }
515
0
      break;
516
0
  }
517
0
}
518
519
namespace {
520
0
bool isQueryAtom(const RWMol &mol, const Atom &atom) {
521
0
  if (atom.hasQuery()) {
522
0
    return true;
523
0
  }
524
0
  for (const auto bnd : mol.atomBonds(&atom)) {
525
0
    if (bnd->hasQuery()) {
526
0
      return true;
527
0
    }
528
0
  }
529
0
  return false;
530
0
}
531
}  // namespace
532
void addHs(RWMol &mol, const AddHsParameters &params,
533
0
           const UINT_VECT *onlyOnAtoms) {
534
  // when we hit each atom, clear its computed properties
535
  // NOTE: it is essential that we not clear the ring info in the
536
  // molecule's computed properties.  We don't want to have to
537
  // regenerate that.  This caused Issue210 and Issue212:
538
0
  mol.clearComputedProps(false);
539
540
  // precompute the number of hydrogens we are going to add so that we can
541
  // pre-allocate the necessary space on the conformations of the molecule
542
  // for their coordinates
543
0
  unsigned int numAddHyds = 0;
544
0
  boost::dynamic_bitset<> onAtoms(mol.getNumAtoms());
545
0
  if (onlyOnAtoms) {
546
0
    for (auto atIdx : *onlyOnAtoms) {
547
0
      onAtoms.set(atIdx);
548
0
    }
549
0
  } else {
550
0
    onAtoms.set();
551
0
  }
552
0
  std::vector<unsigned int> numExplicitHs(mol.getNumAtoms(), 0);
553
0
  std::vector<unsigned int> numImplicitHs(mol.getNumAtoms(), 0);
554
0
  for (auto at : mol.atoms()) {
555
0
    numExplicitHs[at->getIdx()] = at->getNumExplicitHs();
556
0
    numImplicitHs[at->getIdx()] = at->getNumImplicitHs();
557
0
    if (onAtoms[at->getIdx()]) {
558
0
      if (params.skipQueries && isQueryAtom(mol, *at)) {
559
0
        onAtoms.set(at->getIdx(), 0);
560
0
        continue;
561
0
      }
562
0
      numAddHyds += at->getNumExplicitHs();
563
0
      if (!params.explicitOnly) {
564
0
        numAddHyds += at->getNumImplicitHs();
565
0
      }
566
0
    }
567
0
  }
568
0
  unsigned int nSize = mol.getNumAtoms() + numAddHyds;
569
570
  // loop over the conformations of the molecule and allocate new space
571
  // for the H locations (need to do this even if we aren't adding coords so
572
  // that the conformers have the correct number of atoms).
573
0
  for (auto cfi = mol.beginConformers(); cfi != mol.endConformers(); ++cfi) {
574
0
    (*cfi)->reserve(nSize);
575
0
  }
576
577
0
  unsigned int stopIdx = mol.getNumAtoms();
578
0
  for (unsigned int aidx = 0; aidx < stopIdx; ++aidx) {
579
0
    if (!onAtoms[aidx]) {
580
0
      continue;
581
0
    }
582
583
0
    Atom *newAt = mol.getAtomWithIdx(aidx);
584
585
0
    std::vector<unsigned int> isoHs;
586
0
    if (newAt->getPropIfPresent(common_properties::_isotopicHs, isoHs)) {
587
0
      newAt->clearProp(common_properties::_isotopicHs);
588
0
    }
589
0
    std::vector<unsigned int>::const_iterator isoH = isoHs.begin();
590
0
    unsigned int newIdx;
591
0
    newAt->clearComputedProps();
592
    // always convert explicit Hs
593
0
    unsigned int onumexpl = numExplicitHs[aidx];
594
0
    for (unsigned int i = 0; i < onumexpl; i++) {
595
0
      newIdx = mol.addAtom(new Atom(1), false, true);
596
0
      mol.addBond(aidx, newIdx, Bond::SINGLE);
597
0
      auto hAtom = mol.getAtomWithIdx(newIdx);
598
0
      hAtom->updatePropertyCache();
599
0
      if (params.addCoords) {
600
0
        setTerminalAtomCoords(mol, newIdx, aidx);
601
0
      }
602
0
      if (isoH != isoHs.end()) {
603
0
        hAtom->setIsotope(*isoH);
604
0
        ++isoH;
605
0
      }
606
0
    }
607
    // clear the local property
608
0
    newAt->setNumExplicitHs(0);
609
610
0
    if (!params.explicitOnly) {
611
      // take care of implicits
612
0
      for (unsigned int i = 0; i < numImplicitHs[aidx]; i++) {
613
0
        newIdx = mol.addAtom(new Atom(1), false, true);
614
0
        mol.addBond(aidx, newIdx, Bond::SINGLE);
615
        // set the isImplicit label so that we can strip these back
616
        // off later if need be.
617
0
        auto hAtom = mol.getAtomWithIdx(newIdx);
618
0
        hAtom->setProp(common_properties::isImplicit, 1);
619
0
        hAtom->updatePropertyCache();
620
0
        if (params.addCoords) {
621
0
          setTerminalAtomCoords(mol, newIdx, aidx);
622
0
        }
623
0
        if (isoH != isoHs.end()) {
624
0
          hAtom->setIsotope(*isoH);
625
0
          ++isoH;
626
0
        }
627
0
      }
628
0
    }
629
    // update the atom's derived properties (valence count, etc.)
630
    // no sense in being strict here (was github #2782)
631
0
    newAt->updatePropertyCache(false);
632
0
    if (isoH != isoHs.end()) {
633
0
      BOOST_LOG(rdWarningLog) << "extra H isotope information found on atom "
634
0
                              << newAt->getIdx() << std::endl;
635
0
    }
636
0
  }
637
  // take care of AtomPDBResidueInfo for Hs if root atom has it
638
0
  if (params.addResidueInfo) {
639
0
    AssignHsResidueInfo(mol);
640
0
  }
641
0
}
642
643
namespace {
644
// returns whether or not an adjustment was made, in case we want that info
645
bool adjustStereoAtomsIfRequired(RWMol &mol, const Atom *atom,
646
13.7k
                                 const Atom *heavyAtom) {
647
13.7k
  PRECONDITION(atom != nullptr, "bad atom");
648
13.7k
  PRECONDITION(heavyAtom != nullptr, "bad heavy atom");
649
  // nothing we can do if the degree is only 2 (and we should have covered
650
  // that earlier anyway)
651
13.7k
  if (heavyAtom->getDegree() == 2) {
652
8.63k
    return false;
653
8.63k
  }
654
5.08k
  const auto &cbnd =
655
5.08k
      mol.getBondBetweenAtoms(atom->getIdx(), heavyAtom->getIdx());
656
5.08k
  if (!cbnd) {
657
0
    return false;
658
0
  }
659
5.08k
  for (const auto &nbri :
660
608k
       boost::make_iterator_range(mol.getAtomBonds(heavyAtom))) {
661
608k
    Bond *bnd = mol[nbri];
662
608k
    if (bnd->getBondType() == Bond::DOUBLE &&
663
2.41k
        bnd->getStereo() > Bond::STEREOANY) {
664
18
      auto sAtomIt = std::find(bnd->getStereoAtoms().begin(),
665
18
                               bnd->getStereoAtoms().end(), atom->getIdx());
666
18
      if (sAtomIt != bnd->getStereoAtoms().end()) {
667
        // sAtomIt points to the position of this atom's index in the list.
668
        // find the index of another atom attached to the heavy atom and
669
        // use it to update sAtomIt
670
9
        unsigned int dblNbrIdx = bnd->getOtherAtomIdx(heavyAtom->getIdx());
671
9
        for (const auto &nbri :
672
23
             boost::make_iterator_range(mol.getAtomNeighbors(heavyAtom))) {
673
23
          const auto &nbr = mol[nbri];
674
23
          if (nbr->getIdx() == dblNbrIdx || nbr->getIdx() == atom->getIdx()) {
675
14
            continue;
676
14
          }
677
9
          *sAtomIt = nbr->getIdx();
678
9
          bool madeAdjustment = true;
679
9
          switch (bnd->getStereo()) {
680
7
            case Bond::STEREOCIS:
681
7
              bnd->setStereo(Bond::STEREOTRANS);
682
7
              break;
683
2
            case Bond::STEREOTRANS:
684
2
              bnd->setStereo(Bond::STEREOCIS);
685
2
              break;
686
0
            default:
687
              // I think we shouldn't need to do anything with E and Z...
688
0
              madeAdjustment = false;
689
0
              break;
690
9
          }
691
9
          return madeAdjustment;
692
9
        }
693
9
      }
694
18
    }
695
608k
  }
696
5.07k
  return false;
697
5.08k
}
698
699
13.7k
void molRemoveH(RWMol &mol, unsigned int idx, bool updateExplicitCount) {
700
13.7k
  auto atom = mol.getAtomWithIdx(idx);
701
13.7k
  PRECONDITION(atom->getAtomicNum() == 1, "idx corresponds to a non-Hydrogen");
702
13.7k
  for (const auto bond : mol.atomBonds(atom)) {
703
13.7k
    Atom *heavyAtom = bond->getOtherAtom(atom);
704
13.7k
    int heavyAtomNum = heavyAtom->getAtomicNum();
705
706
    // we'll update the neighbor's explicit H count if we were told to
707
    // *or* if the neighbor is chiral, in which case the H is needed
708
    // in order to complete the coordination
709
    // *or* if the neighbor has the noImplicit flag set:
710
13.7k
    if (updateExplicitCount || heavyAtom->getNoImplicit() ||
711
13.1k
        heavyAtom->getChiralTag() != Atom::CHI_UNSPECIFIED) {
712
13.1k
      heavyAtom->setNumExplicitHs(heavyAtom->getNumExplicitHs() + 1);
713
13.1k
    } else {
714
      // this is a special case related to Issue 228 and the
715
      // "disappearing Hydrogen" problem discussed in MolOps::adjustHs
716
      //
717
      // If we remove a hydrogen from an aromatic N or P, or if
718
      // the heavy atom it is connected to is not in its default
719
      // valence state, we need to be *sure* to increment the
720
      // explicit count, even if the H itself isn't marked as explicit
721
615
      const INT_VECT &defaultVs =
722
615
          PeriodicTable::getTable()->getValenceList(heavyAtomNum);
723
615
      if (((heavyAtomNum == 7 || heavyAtomNum == 15 ||
724
373
            may_need_extra_H(mol, heavyAtom)) &&
725
243
           isAromaticAtom(*heavyAtom)) ||
726
587
          (std::find(defaultVs.begin() + 1, defaultVs.end(),
727
587
                     heavyAtom->getTotalValence()) != defaultVs.end())) {
728
65
        heavyAtom->setNumExplicitHs(heavyAtom->getNumExplicitHs() + 1);
729
65
      }
730
615
    }
731
732
    // One other consequence of removing the H from the graph is
733
    // that we may change the ordering of the bonds about a
734
    // chiral center.  This may change the chiral label at that
735
    // atom.  We deal with that by explicitly checking here:
736
13.7k
    if (heavyAtom->getChiralTag() != Atom::CHI_UNSPECIFIED) {
737
2.10k
      INT_LIST neighborIndices;
738
587k
      for (const auto &nbnd : mol.atomBonds(heavyAtom)) {
739
587k
        if (nbnd->getIdx() != bond->getIdx()) {
740
585k
          neighborIndices.push_back(nbnd->getIdx());
741
585k
        }
742
587k
      }
743
2.10k
      neighborIndices.push_back(bond->getIdx());
744
745
2.10k
      int nSwaps = heavyAtom->getPerturbationOrder(neighborIndices);
746
      // std::cerr << "H: "<<atom->getIdx()<<" hvy:
747
      // "<<heavyAtom->getIdx()<<" swaps: " << nSwaps<<std::endl;
748
2.10k
      if (nSwaps % 2) {
749
1.04k
        heavyAtom->invertChirality();
750
1.04k
      }
751
2.10k
    }
752
753
    // If we are removing a H atom that defines bond stereo (e.g. imines),
754
    // Then also remove the bond stereo information, as it is no longer valid.
755
13.7k
    if (heavyAtom->getDegree() == 2) {
756
12.0k
      for (auto &nbnd : mol.atomBonds(heavyAtom)) {
757
12.0k
        if (nbnd != bond) {
758
8.64k
          if (nbnd->getStereo() > Bond::STEREOANY) {
759
44
            nbnd->setStereo(Bond::STEREONONE);
760
44
            nbnd->getStereoAtoms().clear();
761
44
          }
762
8.64k
          break;
763
8.64k
        }
764
12.0k
      }
765
8.64k
    }
766
767
    // if it's a wavy bond, then we need to
768
    // mark the beginning atom with the _UnknownStereo tag.
769
    // so that we know later that something was affecting its
770
    // stereochem
771
13.7k
    if (bond->getBondDir() == Bond::UNKNOWN &&
772
19
        bond->getBeginAtomIdx() == heavyAtom->getIdx()) {
773
17
      heavyAtom->setProp(common_properties::_UnknownStereo, 1);
774
13.7k
    } else if (bond->getBondDir() == Bond::ENDDOWNRIGHT ||
775
13.5k
               bond->getBondDir() == Bond::ENDUPRIGHT) {
776
      // if the direction is set on this bond and the atom it's connected to
777
      // has no other single bonds with directions set, then we need to set
778
      // direction on one of the other neighbors in order to avoid double
779
      // bond stereochemistry possibly being lost. This was github #754
780
2.76k
      bool foundADir = false;
781
2.76k
      Bond *oBond = nullptr;
782
2.76k
      for (const auto &nbri :
783
152k
           boost::make_iterator_range(mol.getAtomBonds(heavyAtom))) {
784
152k
        Bond *nbnd = mol[nbri];
785
152k
        if (nbnd->getIdx() != bond->getIdx() &&
786
149k
            nbnd->getBondType() == Bond::SINGLE) {
787
144k
          if (nbnd->getBondDir() == Bond::NONE) {
788
141k
            oBond = nbnd;
789
141k
          } else {
790
3.32k
            foundADir = true;
791
3.32k
          }
792
144k
        }
793
152k
      }
794
2.76k
      if (!foundADir && oBond != nullptr) {
795
1.64k
        bool flipIt = (oBond->getBeginAtom() == heavyAtom) &&
796
749
                      (bond->getBeginAtom() == heavyAtom);
797
1.64k
        if (flipIt) {
798
633
          oBond->setBondDir(bond->getBondDir() == Bond::ENDDOWNRIGHT
799
633
                                ? Bond::ENDUPRIGHT
800
633
                                : Bond::ENDDOWNRIGHT);
801
1.01k
        } else {
802
1.01k
          oBond->setBondDir(bond->getBondDir());
803
1.01k
        }
804
1.64k
      }
805
      // if this atom is one of the stereoatoms for a double bond we need
806
      // to switch the stereo atom on this end to be the other neighbor
807
      // This was part of github #1810
808
2.76k
      adjustStereoAtomsIfRequired(mol, atom, heavyAtom);
809
10.9k
    } else {
810
      // if this atom is one of the stereoatoms for a double bond we need
811
      // to switch the stereo atom on this end to be the other neighbor
812
      // This was part of github #1810
813
10.9k
      adjustStereoAtomsIfRequired(mol, atom, heavyAtom);
814
10.9k
    }
815
816
    // remove the bond from any SGroups that might include it.
817
46.5k
    for (auto &sg : getSubstanceGroups(mol)) {
818
46.5k
      sg.removeBondWithIdx(bond->getIdx());
819
46.5k
    }
820
13.7k
  }
821
822
  // Finally, remove the atom from any SGroups that might include it, so that
823
  // the SGroups don't get removed in removeAtom(). Since we allow removing
824
  // SGroup SAP lvidx H atoms, we need to check for those and update them.
825
46.5k
  for (auto &sg : getSubstanceGroups(mol)) {
826
46.5k
    sg.removeAtomWithIdx(idx);
827
46.5k
    sg.removeParentAtomWithIdx(idx);
828
829
46.5k
    for (auto &sap : sg.getAttachPoints()) {
830
5.73k
      if (sap.lvIdx == static_cast<int>(idx)) {
831
1.17k
        sap.lvIdx = -1;
832
1.17k
      }
833
5.73k
    }
834
46.5k
  }
835
  // computed properties will be cleared after all hydrogens are removed
836
13.7k
  bool clearProps = false;
837
13.7k
  mol.removeAtom(atom, clearProps);
838
13.7k
}
839
840
bool shouldRemoveH(const RWMol &mol, const Atom *atom,
841
10.7M
                   const RemoveHsParameters &ps) {
842
10.7M
  if (atom->getAtomicNum() != 1) {
843
10.7M
    return false;
844
10.7M
  }
845
39.6k
  if (!ps.removeWithQuery && atom->hasQuery()) {
846
112
    return false;
847
112
  }
848
39.5k
  if (!ps.removeDegreeZero && !atom->getDegree()) {
849
156
    if (ps.showWarnings) {
850
156
      BOOST_LOG(rdWarningLog)
851
0
          << "WARNING: not removing hydrogen atom without neighbors"
852
0
          << std::endl;
853
156
    }
854
156
    return false;
855
156
  }
856
39.3k
  if (!ps.removeHigherDegrees && atom->getDegree() > 1) {
857
22.2k
    return false;
858
22.2k
  }
859
17.1k
  if (!ps.removeIsotopes && !ps.removeAndTrackIsotopes && atom->getIsotope()) {
860
1.94k
    return false;
861
1.94k
  }
862
15.2k
  if (!ps.removeNonimplicit && !atom->hasProp(common_properties::isImplicit)) {
863
0
    return false;
864
0
  }
865
15.2k
  if (!ps.removeMapped && atom->getAtomMapNum()) {
866
0
    return false;
867
0
  }
868
869
15.2k
  if (ps.removeInSGroups) {
870
    // If removing H in SGroups, do not remove H atoms in special
871
    // roles in the SGroup
872
49.0k
    for (const auto &sg : getSubstanceGroups(mol)) {
873
      // The H atom is one of the "caps" of the SGroup. Technically,
874
      // it's not part of the group, but it defines its boundaries.
875
49.0k
      for (const auto &bond_idx : sg.getBonds()) {
876
47.1k
        if (sg.getBondType(bond_idx) == SubstanceGroup::BondType::XBOND) {
877
41.8k
          auto bond = mol.getBondWithIdx(bond_idx);
878
41.8k
          if (bond->getBeginAtom() == atom || bond->getEndAtom() == atom) {
879
278
            return false;
880
278
          }
881
41.8k
        }
882
47.1k
      }
883
884
48.8k
      for (const auto &sap : sg.getAttachPoints()) {
885
        // The H atoms is an attach point. This would be weird, but is possible.
886
        // (if it is a 'leaving atom' we don't care, though)
887
5.90k
        if (sap.aIdx == atom->getIdx()) {
888
22
          return false;
889
22
        }
890
5.90k
      }
891
892
48.7k
      for (const auto &cs : sg.getCStates()) {
893
        // The bond to the H atom defines a CState
894
1.20k
        auto bond = mol.getBondWithIdx(cs.bondIdx);
895
1.20k
        if (bond->getBeginAtom() == atom || bond->getEndAtom() == atom) {
896
2
          return false;
897
2
        }
898
1.20k
      }
899
48.7k
    }
900
15.2k
  } else {
901
0
    for (const auto &sg : getSubstanceGroups(mol)) {
902
0
      if (sg.includesAtom(atom->getIdx())) {
903
0
        return false;
904
0
      }
905
0
    }
906
0
  }
907
14.9k
  if (!ps.removeHydrides && atom->getFormalCharge() == -1) {
908
168
    return false;
909
168
  }
910
14.7k
  bool removeIt = true;
911
14.7k
  if (atom->getDegree() &&
912
14.7k
      (!ps.removeDummyNeighbors || !ps.removeDefiningBondStereo ||
913
0
       !ps.removeOnlyHNeighbors || !ps.removeNontetrahedralNeighbors ||
914
14.7k
       !ps.removeWithWedgedBond)) {
915
14.7k
    bool onlyHNeighbors = true;
916
14.7k
    for (const auto nbr : mol.atomNeighbors(atom)) {
917
      // is it a dummy?
918
14.7k
      if (!ps.removeDummyNeighbors && nbr->getAtomicNum() < 1) {
919
129
        if (ps.showWarnings) {
920
129
          BOOST_LOG(rdWarningLog) << "WARNING: not removing hydrogen atom "
921
0
                                     "with dummy atom neighbors"
922
0
                                  << std::endl;
923
129
        }
924
129
        return false;
925
129
      }
926
      // does it have non-tetrahedral stereo:
927
14.6k
      if (!ps.removeNontetrahedralNeighbors &&
928
14.6k
          Chirality::hasNonTetrahedralStereo(nbr)) {
929
64
        if (ps.showWarnings) {
930
64
          BOOST_LOG(rdWarningLog)
931
0
              << "WARNING: not removing hydrogen atom "
932
0
                 "with neighbor that has non-tetrahedral stereochemistry"
933
0
              << std::endl;
934
64
        }
935
64
        return false;
936
64
      }
937
14.5k
      if (!ps.removeOnlyHNeighbors && nbr->getAtomicNum() != 1) {
938
14.1k
        onlyHNeighbors = false;
939
14.1k
      }
940
14.5k
      if (!ps.removeWithWedgedBond) {
941
0
        const auto bnd = mol.getBondBetweenAtoms(atom->getIdx(), nbr->getIdx());
942
0
        if (bnd->getBondDir() == Bond::BEGINDASH ||
943
0
            bnd->getBondDir() == Bond::BEGINWEDGE) {
944
0
          if (ps.showWarnings) {
945
0
            BOOST_LOG(rdWarningLog) << "WARNING: not removing hydrogen atom "
946
0
                                       "with wedged bond"
947
0
                                    << std::endl;
948
0
          }
949
0
          return false;
950
0
        }
951
0
      }
952
      // Check to see if the neighbor has a double bond and we're the only
953
      // neighbor at this end.  This was part of github #1810
954
14.5k
      if (!ps.removeDefiningBondStereo && nbr->getDegree() == 2) {
955
18.2k
        for (const auto bnd : mol.atomBonds(nbr)) {
956
18.2k
          if (bnd->getBondType() == Bond::DOUBLE &&
957
494
              (bnd->getStereo() > Bond::STEREOANY ||
958
493
               mol.getBondBetweenAtoms(atom->getIdx(), nbr->getIdx())
959
493
                       ->getBondDir() > Bond::NONE)) {
960
355
            return false;
961
355
          }
962
18.2k
        }
963
9.32k
      }
964
14.5k
    }
965
14.1k
    if (removeIt && (!ps.removeOnlyHNeighbors && onlyHNeighbors)) {
966
398
      return false;
967
398
    }
968
14.1k
  }
969
13.8k
  return removeIt;
970
14.7k
}
971
972
// Do not remove H atoms that are part of SGroups that only contain H atoms.
973
void filter_sgroup_emptying_hydrogens(const ROMol &mol,
974
13.1k
                                      boost::dynamic_bitset<> &atomsToRemove) {
975
13.5k
  for (const auto &sg : getSubstanceGroups(mol)) {
976
13.5k
    const auto &atoms = sg.getAtoms();
977
13.5k
    const auto &patoms = sg.getParentAtoms();
978
979
    // If the SGroup already didn't have atoms, we don't care about it
980
13.5k
    if (atoms.empty() && patoms.empty()) {
981
1.08k
      continue;
982
1.08k
    }
983
984
13.1k
    auto would_remove_atom = [&atomsToRemove](const auto idx) {
985
13.1k
      return atomsToRemove[idx];
986
13.1k
    };
987
988
12.4k
    auto no_atoms = atoms.empty() ||
989
12.4k
                    std::all_of(atoms.begin(), atoms.end(), would_remove_atom);
990
12.4k
    if (no_atoms) {
991
30
      auto no_patoms =
992
30
          patoms.empty() ||
993
12
          std::all_of(patoms.begin(), patoms.end(), would_remove_atom);
994
30
      if (no_patoms) {
995
269
        for (auto atom : atoms) {
996
269
          atomsToRemove.set(atom, false);
997
269
        }
998
242
        for (auto patom : patoms) {
999
242
          atomsToRemove.set(patom, false);
1000
242
        }
1001
30
      }
1002
30
    }
1003
12.4k
  }
1004
13.1k
}
1005
1006
}  // end of anonymous namespace
1007
1008
13.1k
void removeHs(RWMol &mol, const RemoveHsParameters &ps, bool sanitize) {
1009
13.1k
  if (ps.removeAndTrackIsotopes) {
1010
    // if there are any non-isotopic Hs remove them first
1011
    // to make sure chirality is preserved
1012
0
    bool needRemoveHs = false;
1013
0
    for (auto atom : mol.atoms()) {
1014
0
      if (atom->getAtomicNum() == 1 && atom->getIsotope() == 0) {
1015
0
        needRemoveHs = true;
1016
0
        break;
1017
0
      }
1018
0
    }
1019
0
    if (needRemoveHs) {
1020
0
      RemoveHsParameters psCopy(ps);
1021
0
      psCopy.removeAndTrackIsotopes = false;
1022
0
      psCopy.removeIsotopes = false;
1023
0
      removeHs(mol, psCopy, false);
1024
0
    }
1025
0
  }
1026
10.7M
  for (auto atom : mol.atoms()) {
1027
10.7M
    atom->updatePropertyCache(false);
1028
10.7M
  }
1029
13.1k
  if (ps.removeAndTrackIsotopes) {
1030
0
    for (const auto &pair : getIsoMap(mol)) {
1031
0
      mol.getAtomWithIdx(pair.first)
1032
0
          ->setProp(common_properties::_isotopicHs, pair.second);
1033
0
    }
1034
0
  }
1035
13.1k
  boost::dynamic_bitset<> atomsToRemove{mol.getNumAtoms(), 0};
1036
1037
10.7M
  for (auto atom : mol.atoms()) {
1038
10.7M
    if (shouldRemoveH(mol, atom, ps)) {
1039
13.7k
      atomsToRemove.set(atom->getIdx());
1040
13.7k
    }
1041
10.7M
  }  // end of the loop over atoms
1042
1043
  // Once we know which H atoms would be removed, filter out those that
1044
  // would cause any SGroups to become empty
1045
13.1k
  if (ps.removeInSGroups) {
1046
13.1k
    filter_sgroup_emptying_hydrogens(mol, atomsToRemove);
1047
13.1k
  }
1048
1049
  // now that we know which atoms need to be removed, go ahead and remove them
1050
  // NOTE: there's too much complexity around stereochemistry here
1051
  // to be able to safely use batch editing.
1052
10.7M
  for (int idx = mol.getNumAtoms() - 1; idx >= 0; --idx) {
1053
10.7M
    if (atomsToRemove[idx]) {
1054
13.7k
      molRemoveH(mol, idx, ps.updateExplicitCount);
1055
13.7k
    }
1056
10.7M
  }
1057
13.1k
  mol.clearComputedProps(true);
1058
  //
1059
  //  If we didn't only remove implicit Hs, which are guaranteed to
1060
  //  be the highest numbered atoms, we may have altered atom indices.
1061
  //  This can screw up derived properties (such as ring members), so
1062
  //  do some checks:
1063
  //
1064
13.1k
  if (!atomsToRemove.empty() && ps.removeNonimplicit && sanitize) {
1065
13.0k
    sanitizeMol(mol);
1066
13.0k
  }
1067
1068
  // if we removed Hs and any chiral atoms now have more than 1 explict H,
1069
  // remove those
1070
13.1k
  if (!atomsToRemove.empty()) {
1071
2.68M
    for (auto atom : mol.atoms()) {
1072
2.68M
      if (!atom->getNoImplicit() &&
1073
2.54M
          atom->getChiralTag() != Atom::CHI_UNSPECIFIED) {
1074
1.35k
        unsigned int numExplicitHs = atom->getNumExplicitHs();
1075
1.35k
        if (numExplicitHs > 1) {
1076
2
          atom->setNumExplicitHs(0);
1077
2
          atom->updatePropertyCache(false);
1078
2
        }
1079
1.35k
      }
1080
2.68M
    }
1081
7.27k
  }
1082
13.1k
}
1083
0
ROMol *removeHs(const ROMol &mol, const RemoveHsParameters &ps, bool sanitize) {
1084
0
  auto *res = new RWMol(mol);
1085
0
  try {
1086
0
    removeHs(*res, ps, sanitize);
1087
0
  } catch (const MolSanitizeException &) {
1088
0
    delete res;
1089
0
    throw;
1090
0
  }
1091
0
  return static_cast<ROMol *>(res);
1092
0
}
1093
void removeHs(RWMol &mol, bool implicitOnly, bool updateExplicitCount,
1094
0
              bool sanitize) {
1095
0
  RemoveHsParameters ps;
1096
0
  ps.removeNonimplicit = !implicitOnly;
1097
0
  ps.updateExplicitCount = updateExplicitCount;
1098
0
  removeHs(mol, ps, sanitize);
1099
0
};
1100
ROMol *removeHs(const ROMol &mol, bool implicitOnly, bool updateExplicitCount,
1101
0
                bool sanitize) {
1102
0
  auto *res = new RWMol(mol);
1103
0
  RemoveHsParameters ps;
1104
0
  ps.removeNonimplicit = !implicitOnly;
1105
0
  ps.updateExplicitCount = updateExplicitCount;
1106
0
  try {
1107
0
    removeHs(*res, ps, sanitize);
1108
0
  } catch (const MolSanitizeException &) {
1109
0
    delete res;
1110
0
    throw;
1111
0
  }
1112
0
  return static_cast<ROMol *>(res);
1113
0
}
1114
1115
0
void removeAllHs(RWMol &mol, bool sanitize) {
1116
0
  RemoveHsParameters ps;
1117
0
  ps.removeDegreeZero = true;
1118
0
  ps.removeHigherDegrees = true;
1119
0
  ps.removeOnlyHNeighbors = true;
1120
0
  ps.removeIsotopes = true;
1121
0
  ps.removeDummyNeighbors = true;
1122
0
  ps.removeDefiningBondStereo = true;
1123
0
  ps.removeWithWedgedBond = true;
1124
0
  ps.removeWithQuery = true;
1125
0
  ps.removeNonimplicit = true;
1126
0
  ps.removeInSGroups = true;
1127
0
  ps.showWarnings = false;
1128
0
  ps.removeHydrides = true;
1129
0
  ps.removeNontetrahedralNeighbors = true;
1130
0
  removeHs(mol, ps, sanitize);
1131
0
};
1132
0
ROMol *removeAllHs(const ROMol &mol, bool sanitize) {
1133
0
  auto *res = new RWMol(mol);
1134
0
  try {
1135
0
    removeAllHs(*res, sanitize);
1136
0
  } catch (const MolSanitizeException &) {
1137
0
    delete res;
1138
0
    throw;
1139
0
  }
1140
0
  return static_cast<ROMol *>(res);
1141
0
}
1142
1143
namespace {
1144
enum class HydrogenType {
1145
  NotAHydrogen,
1146
  UnMergableQueryHydrogen,
1147
  QueryHydrogen
1148
};
1149
1150
template <class Q>
1151
0
std::pair<bool, bool> queryHasHs(Q queryAtom, bool inor = false) {
1152
0
  for (auto childit = queryAtom->beginChildren();
1153
0
       childit != queryAtom->endChildren(); ++childit) {
1154
0
    QueryAtom::QUERYATOM_QUERY::CHILD_TYPE query = *childit;
1155
0
    if (query->getDescription() == "AtomOr") {
1156
0
      return queryHasHs(query, true);
1157
0
    } else if (query->getDescription() == "AtomAtomicNum") {
1158
0
      if (static_cast<ATOM_EQUALS_QUERY *>(query.get())->getVal() == 1 &&
1159
0
          !query->getNegation()) {
1160
0
        return std::make_pair(true, inor);
1161
0
      }
1162
0
    } else if (query->getDescription() == "AtomType") {
1163
0
      auto val = static_cast<ATOM_EQUALS_QUERY *>(query.get())->getVal();
1164
      // 1001 == aromtic hydrogen (not a thing, really)
1165
      // 1 == aliphatic hydrogen
1166
0
      if ((val == 1001 || val == 1) && !query->getNegation()) {
1167
0
        return std::make_pair(true, inor);
1168
0
      }
1169
0
    }
1170
0
  }
1171
0
  return std::make_pair(false, inor);
1172
0
  ;
1173
0
}
Unexecuted instantiation: AddHs.cpp:std::__1::pair<bool, bool> RDKit::MolOps::(anonymous namespace)::queryHasHs<Queries::Query<int, RDKit::Atom const*, true>*>(Queries::Query<int, RDKit::Atom const*, true>*, bool)
Unexecuted instantiation: AddHs.cpp:std::__1::pair<bool, bool> RDKit::MolOps::(anonymous namespace)::queryHasHs<std::__1::shared_ptr<Queries::Query<int, RDKit::Atom const*, true> > >(std::__1::shared_ptr<Queries::Query<int, RDKit::Atom const*, true> >, bool)
1174
1175
0
HydrogenType isQueryH(const Atom *atom) {
1176
0
  PRECONDITION(atom, "bogus atom");
1177
0
  if (atom->getAtomicNum() == 1) {
1178
    // the simple case: the atom is flagged as being an H and
1179
    // has no query
1180
0
    if (!atom->hasQuery() ||
1181
0
        (!atom->getQuery()->getNegation() &&
1182
0
         atom->getQuery()->getDescription() == "AtomAtomicNum")) {
1183
0
      return HydrogenType::QueryHydrogen;
1184
0
    }
1185
0
  }
1186
1187
0
  if (!(atom->getDegree() <= 1)) {
1188
    // bonded and unbonded H atoms will continue rest will be returned
1189
0
    return HydrogenType::NotAHydrogen;
1190
0
  }
1191
1192
0
  if (atom->hasQuery() && atom->getQuery()->getNegation()) {
1193
    // we will not merge negated queries
1194
0
    return HydrogenType::NotAHydrogen;
1195
0
  }
1196
1197
0
  if (atom->hasQuery()) {
1198
0
    std::pair<bool, bool> res = std::make_pair(false, false);
1199
0
    if (atom->getQuery()->getDescription() == "AtomOr") {
1200
0
      res = queryHasHs(atom->getQuery(), true);
1201
0
    } else if (atom->getQuery()->getDescription() == "AtomAnd") {
1202
0
      res = queryHasHs(atom->getQuery(), false);
1203
0
    }
1204
0
    if (res.first) {     // hasH
1205
0
      if (res.second) {  // inOr
1206
0
        BOOST_LOG(rdWarningLog)
1207
0
            << "WARNING: merging explicit H queries involved "
1208
0
               "in ORs is not supported. This query will not "
1209
0
               "be merged"
1210
0
            << std::endl;
1211
0
        return HydrogenType::UnMergableQueryHydrogen;
1212
0
      } else {
1213
0
        return HydrogenType::QueryHydrogen;
1214
0
      }
1215
0
    }
1216
0
  }
1217
0
  return HydrogenType::NotAHydrogen;
1218
0
}
1219
}  // namespace
1220
1221
//
1222
//  This routine removes explicit hydrogens (and bonds to them) from
1223
//  the molecular graph and adds them as queries to the heavy atoms
1224
//  to which they are bound.  If the heavy atoms (or atom queries)
1225
//  already have hydrogen-count queries, they will be updated.
1226
//
1227
//  NOTE:
1228
//   - Hydrogens which aren't connected to a heavy atom will not be
1229
//     removed.  This prevents molecules like "[H][H]" from having
1230
//     all atoms removed.
1231
//
1232
//   - By default all hydrogens are removed, however if
1233
//     merge_unmapped_only is true, any hydrogen participating
1234
//     in an atom map will be retained
1235
0
void mergeQueryHs(RWMol &mol, bool mergeUnmappedOnly, bool mergeIsotopes) {
1236
0
  std::vector<unsigned int> atomsToRemove;
1237
1238
0
  boost::dynamic_bitset<> hatoms(mol.getNumAtoms());
1239
0
  for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) {
1240
0
    hatoms[i] = isQueryH(mol.getAtomWithIdx(i)) == HydrogenType::QueryHydrogen;
1241
0
  }
1242
0
  unsigned int currIdx = 0, stopIdx = mol.getNumAtoms();
1243
0
  while (currIdx < stopIdx) {
1244
0
    Atom *atom = mol.getAtomWithIdx(currIdx);
1245
0
    if (!hatoms[currIdx]) {
1246
0
      unsigned int numHsToRemove = 0;
1247
0
      ROMol::ADJ_ITER begin, end;
1248
0
      boost::tie(begin, end) = mol.getAtomNeighbors(atom);
1249
1250
0
      while (begin != end) {
1251
0
        if (hatoms[*begin]) {
1252
0
          Atom &bgn = *mol.getAtomWithIdx(*begin);
1253
0
          bool checkUnmapped =
1254
0
              !mergeUnmappedOnly ||
1255
0
              !bgn.hasProp(common_properties::molAtomMapNumber);
1256
0
          bool checkIsotope = mergeIsotopes || bgn.getIsotope() == 0;
1257
0
          if (checkUnmapped && checkIsotope) {
1258
0
            atomsToRemove.push_back(rdcast<unsigned int>(*begin));
1259
0
            ++numHsToRemove;
1260
0
          }
1261
0
        }
1262
0
        ++begin;
1263
0
      }
1264
0
      if (numHsToRemove) {
1265
        //
1266
        //  We have H neighbors:
1267
        //   Add the appropriate queries to compensate for their removal.
1268
        //
1269
        //  Examples:
1270
        //    C[H] -> [C;!H0]
1271
        //    C([H])[H] -> [C;!H0;!H1]
1272
        //
1273
        //  It would be more efficient to do this using range queries like:
1274
        //    C([H])[H] -> [C;H{2-}]
1275
        //  but that would produce non-standard SMARTS without the user
1276
        //  having started with a non-standard SMARTS.
1277
        //
1278
0
        if (!atom->hasQuery()) {
1279
          // it wasn't a query atom, we need to replace it so that we can add
1280
          // a query:
1281
0
          ATOM_EQUALS_QUERY *tmp = makeAtomNumQuery(atom->getAtomicNum());
1282
0
          auto *newAt = new QueryAtom;
1283
0
          newAt->setQuery(tmp);
1284
0
          newAt->updateProps(*atom);
1285
0
          mol.replaceAtom(atom->getIdx(), newAt);
1286
0
          delete newAt;
1287
0
          atom = mol.getAtomWithIdx(currIdx);
1288
0
        }
1289
0
        for (unsigned int i = 0; i < numHsToRemove; ++i) {
1290
0
          ATOM_EQUALS_QUERY *tmp = makeAtomHCountQuery(i);
1291
0
          tmp->setNegation(true);
1292
0
          atom->expandQuery(tmp);
1293
0
        }
1294
0
      }  // end of numHsToRemove test
1295
1296
      // recurse if needed (was github isusue 544)
1297
0
      if (atom->hasQuery()) {
1298
0
        if (atom->getQuery()->getDescription() == "RecursiveStructure") {
1299
0
          auto *rsq = dynamic_cast<RecursiveStructureQuery *>(atom->getQuery());
1300
0
          CHECK_INVARIANT(rsq, "could not convert recursive structure query");
1301
0
          RWMol *rqm = new RWMol(*rsq->getQueryMol());
1302
0
          mergeQueryHs(*rqm, mergeUnmappedOnly, mergeIsotopes);
1303
0
          rsq->setQueryMol(rqm);
1304
0
        }
1305
1306
        // FIX: shouldn't be repeating this code here
1307
0
        std::list<QueryAtom::QUERYATOM_QUERY::CHILD_TYPE> childStack(
1308
0
            atom->getQuery()->beginChildren(), atom->getQuery()->endChildren());
1309
0
        while (childStack.size()) {
1310
0
          QueryAtom::QUERYATOM_QUERY::CHILD_TYPE qry = childStack.front();
1311
0
          childStack.pop_front();
1312
0
          if (qry->getDescription() == "RecursiveStructure") {
1313
0
            auto *rsq = dynamic_cast<RecursiveStructureQuery *>(qry.get());
1314
0
            CHECK_INVARIANT(rsq, "could not convert recursive structure query");
1315
0
            RWMol *rqm = new RWMol(*rsq->getQueryMol());
1316
0
            mergeQueryHs(*rqm, mergeUnmappedOnly, mergeIsotopes);
1317
0
            rsq->setQueryMol(rqm);
1318
0
          } else if (qry->beginChildren() != qry->endChildren()) {
1319
0
            childStack.insert(childStack.end(), qry->beginChildren(),
1320
0
                              qry->endChildren());
1321
0
          }
1322
0
        }
1323
0
      }  // end of recursion loop
1324
0
    }
1325
0
    ++currIdx;
1326
0
  }
1327
0
  mol.beginBatchEdit();
1328
0
  for (auto aidx : atomsToRemove) {
1329
0
    mol.removeAtom(aidx);
1330
0
  }
1331
0
  mol.commitBatchEdit();
1332
0
};
1333
ROMol *mergeQueryHs(const ROMol &mol, bool mergeUnmappedOnly,
1334
0
                    bool mergeIsotopes) {
1335
0
  auto *res = new RWMol(mol);
1336
0
  mergeQueryHs(*res, mergeUnmappedOnly, mergeIsotopes);
1337
0
  return static_cast<ROMol *>(res);
1338
0
};
1339
1340
0
bool needsHs(const ROMol &mol) {
1341
0
  for (const auto atom : mol.atoms()) {
1342
0
    bool includeNeighbors = false;
1343
0
    if (atom->getTotalNumHs(includeNeighbors)) {
1344
0
      return true;
1345
0
    }
1346
0
  }
1347
0
  return false;
1348
0
}
1349
1350
0
std::pair<bool, bool> hasQueryHs(const ROMol &mol) {
1351
0
  bool queryHs = false;
1352
  // We don't care about announcing ORs or other items during isQueryH
1353
0
  RDLog::LogStateSetter blocker;
1354
1355
0
  for (const auto atom : mol.atoms()) {
1356
0
    switch (isQueryH(atom)) {
1357
0
      case HydrogenType::UnMergableQueryHydrogen:
1358
0
        return std::make_pair(true, true);
1359
0
      case HydrogenType::QueryHydrogen:
1360
0
        queryHs = true;
1361
0
        break;
1362
0
      default:  // HydrogenType::NotAHydrogen:
1363
0
        break;
1364
0
    }
1365
0
    if (atom->hasQuery()) {
1366
0
      if (atom->getQuery()->getDescription() == "RecursiveStructure") {
1367
0
        auto *rsq = dynamic_cast<RecursiveStructureQuery *>(atom->getQuery());
1368
0
        CHECK_INVARIANT(rsq, "could not convert recursive structure query");
1369
0
        auto res = hasQueryHs(*rsq->getQueryMol());
1370
0
        if (res.second) {  // unmergableH implies queryH
1371
0
          return res;
1372
0
        }
1373
0
        queryHs |= res.first;
1374
0
      }
1375
1376
      // FIX: shouldn't be repeating this code here -- yet again!
1377
0
      std::list<QueryAtom::QUERYATOM_QUERY::CHILD_TYPE> childStack(
1378
0
          atom->getQuery()->beginChildren(), atom->getQuery()->endChildren());
1379
0
      while (!childStack.empty()) {
1380
0
        QueryAtom::QUERYATOM_QUERY::CHILD_TYPE qry = childStack.front();
1381
0
        childStack.pop_front();
1382
0
        if (qry->getDescription() == "RecursiveStructure") {
1383
0
          auto *rsq = dynamic_cast<RecursiveStructureQuery *>(qry.get());
1384
0
          CHECK_INVARIANT(rsq, "could not convert recursive structure query");
1385
0
          auto res = hasQueryHs(*rsq->getQueryMol());
1386
0
          if (res.second) {
1387
0
            return res;
1388
0
          }
1389
0
          queryHs |= res.first;
1390
0
        } else {
1391
0
          childStack.insert(childStack.end(), qry->beginChildren(),
1392
0
                            qry->endChildren());
1393
0
        }
1394
0
      }
1395
0
    }
1396
0
  }  // end of recursion loop
1397
1398
0
  return std::make_pair(queryHs, false);
1399
0
}
1400
1401
}  // namespace MolOps
1402
}  // namespace RDKit