Coverage Report

Created: 2026-03-31 06:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/rdkit/Code/GraphMol/MolEnumerator/LinkNode.h
Line
Count
Source
1
//
2
//  Copyright (C) 2020 Greg Landrum and T5 Informatics GmbH
3
//
4
//   @@ All Rights Reserved @@
5
//  This file is part of the RDKit.
6
//  The contents are covered by the terms of the BSD license
7
//  which is included in the file license.txt, found at the root
8
//  of the RDKit source tree.
9
//
10
#include <RDGeneral/Invariant.h>
11
12
#include <map>
13
#include <boost/lexical_cast.hpp>
14
#include <boost/tokenizer.hpp>
15
#include <boost/format.hpp>
16
#include <algorithm>
17
18
typedef boost::tokenizer<boost::char_separator<char>> tokenizer;
19
20
namespace RDKit {
21
namespace MolEnumerator {
22
23
struct LinkNode {
24
  unsigned int minRep = 0;
25
  unsigned int maxRep = 0;
26
  unsigned int nBonds = 0;
27
  std::vector<std::pair<unsigned int, unsigned int>> bondAtoms;
28
};
29
30
namespace utils {
31
inline std::vector<LinkNode> getMolLinkNodes(
32
    const ROMol &mol, bool strict = true,
33
0
    const std::map<unsigned, Atom *> *atomIdxMap = nullptr) {
34
0
  std::vector<LinkNode> res;
35
0
  std::string pval;
36
0
  if (!mol.getPropIfPresent(common_properties::molFileLinkNodes, pval)) {
37
0
    return res;
38
0
  }
39
0
  std::vector<int> mapping;
40
41
0
  boost::char_separator<char> pipesep("|");
42
0
  boost::char_separator<char> spacesep(" ");
43
0
  for (auto linknodetext : tokenizer(pval, pipesep)) {
44
0
    LinkNode node;
45
0
    tokenizer tokens(linknodetext, spacesep);
46
0
    std::vector<unsigned int> data;
47
0
    try {
48
0
      std::transform(tokens.begin(), tokens.end(), std::back_inserter(data),
49
0
                     [](const std::string &token) -> unsigned int {
50
0
                       return boost::lexical_cast<unsigned int>(token);
51
0
                     });
52
0
    } catch (boost::bad_lexical_cast &) {
53
0
      std::ostringstream errout;
54
0
      errout << "Cannot convert values in LINKNODE '" << linknodetext
55
0
             << "' to unsigned ints";
56
0
      if (strict) {
57
0
        throw ValueErrorException(errout.str());
58
0
      } else {
59
0
        BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
60
0
        continue;
61
0
      }
62
0
    }
63
    // the second test here is for the atom-pairs defining the bonds
64
    // data[2] contains the number of bonds
65
0
    if (data.size() < 5 || data.size() < 3 + 2 * data[2]) {
66
0
      std::ostringstream errout;
67
0
      errout << "not enough values in LINKNODE '" << linknodetext << "'";
68
0
      if (strict) {
69
0
        throw ValueErrorException(errout.str());
70
0
      } else {
71
0
        BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
72
0
        continue;
73
0
      }
74
0
    }
75
76
0
    node.minRep = data[0];
77
0
    node.maxRep = data[1];
78
0
    if (node.minRep == 0 || node.maxRep < node.minRep) {
79
0
      std::ostringstream errout;
80
0
      errout << "bad counts in LINKNODE '" << linknodetext << "'";
81
0
      if (strict) {
82
0
        throw ValueErrorException(errout.str());
83
0
      } else {
84
0
        BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
85
0
        continue;
86
0
      }
87
0
    }
88
0
    node.nBonds = data[2];
89
0
    if (node.nBonds != 2) {
90
0
      if (strict) {
91
0
        UNDER_CONSTRUCTION(
92
0
            "only link nodes with 2 bonds are currently supported");
93
0
      } else {
94
0
        BOOST_LOG(rdWarningLog)
95
0
            << "only link nodes with 2 bonds are currently supported"
96
0
            << std::endl;
97
0
        continue;
98
0
      }
99
0
    }
100
    // both bonds must start from the same atom:
101
0
    if (data[3] != data[5]) {
102
0
      std::ostringstream errout;
103
0
      errout << "bonds don't start at the same atom for LINKNODE '"
104
0
             << linknodetext << "'";
105
0
      if (strict) {
106
0
        throw ValueErrorException(errout.str());
107
0
      } else {
108
0
        BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
109
0
        continue;
110
0
      }
111
0
    }
112
113
0
    if (atomIdxMap) {
114
      // map the indices back to the original atom numbers
115
0
      for (unsigned int i = 3; i <= 6; ++i) {
116
0
        const auto aidx = atomIdxMap->find(data[i] - 1);
117
0
        if (aidx == atomIdxMap->end()) {
118
0
          std::ostringstream errout;
119
0
          errout << "atom index " << data[i]
120
0
                 << " cannot be found in molecule for LINKNODE '"
121
0
                 << linknodetext << "'";
122
0
          if (strict) {
123
0
            throw ValueErrorException(errout.str());
124
0
          } else {
125
0
            BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
126
0
            continue;
127
0
          }
128
0
        } else {
129
0
          data[i] = aidx->second->getIdx();
130
0
        }
131
0
      }
132
0
    } else {
133
0
      for (unsigned int i = 3; i <= 6; ++i) {
134
0
        --data[i];
135
0
      }
136
0
    }
137
0
    node.bondAtoms.push_back(std::make_pair(data[3], data[4]));
138
0
    node.bondAtoms.push_back(std::make_pair(data[5], data[6]));
139
0
    if (!mol.getBondBetweenAtoms(data[4], data[3]) ||
140
0
        !mol.getBondBetweenAtoms(data[6], data[5])) {
141
0
      std::ostringstream errout;
142
0
      errout << "bond not found between atoms in LINKNODE '" << linknodetext
143
0
             << "'";
144
0
      if (strict) {
145
0
        throw ValueErrorException(errout.str());
146
0
      } else {
147
0
        BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
148
0
        continue;
149
0
      }
150
0
    }
151
0
    res.push_back(std::move(node));
152
0
  }
153
0
  return res;
154
0
}
155
156
}  // namespace utils
157
}  // namespace MolEnumerator
158
159
}  // namespace RDKit