/src/rdkit/Code/GraphMol/MolEnumerator/LinkNode.h
Line | Count | Source |
1 | | // |
2 | | // Copyright (C) 2020 Greg Landrum and T5 Informatics GmbH |
3 | | // |
4 | | // @@ All Rights Reserved @@ |
5 | | // This file is part of the RDKit. |
6 | | // The contents are covered by the terms of the BSD license |
7 | | // which is included in the file license.txt, found at the root |
8 | | // of the RDKit source tree. |
9 | | // |
10 | | #include <RDGeneral/Invariant.h> |
11 | | |
12 | | #include <map> |
13 | | #include <boost/lexical_cast.hpp> |
14 | | #include <boost/tokenizer.hpp> |
15 | | #include <boost/format.hpp> |
16 | | #include <algorithm> |
17 | | |
18 | | typedef boost::tokenizer<boost::char_separator<char>> tokenizer; |
19 | | |
20 | | namespace RDKit { |
21 | | namespace MolEnumerator { |
22 | | |
23 | | struct LinkNode { |
24 | | unsigned int minRep = 0; |
25 | | unsigned int maxRep = 0; |
26 | | unsigned int nBonds = 0; |
27 | | std::vector<std::pair<unsigned int, unsigned int>> bondAtoms; |
28 | | }; |
29 | | |
30 | | namespace utils { |
31 | | inline std::vector<LinkNode> getMolLinkNodes( |
32 | | const ROMol &mol, bool strict = true, |
33 | 0 | const std::map<unsigned, Atom *> *atomIdxMap = nullptr) { |
34 | 0 | std::vector<LinkNode> res; |
35 | 0 | std::string pval; |
36 | 0 | if (!mol.getPropIfPresent(common_properties::molFileLinkNodes, pval)) { |
37 | 0 | return res; |
38 | 0 | } |
39 | 0 | std::vector<int> mapping; |
40 | |
|
41 | 0 | boost::char_separator<char> pipesep("|"); |
42 | 0 | boost::char_separator<char> spacesep(" "); |
43 | 0 | for (auto linknodetext : tokenizer(pval, pipesep)) { |
44 | 0 | LinkNode node; |
45 | 0 | tokenizer tokens(linknodetext, spacesep); |
46 | 0 | std::vector<unsigned int> data; |
47 | 0 | try { |
48 | 0 | std::transform(tokens.begin(), tokens.end(), std::back_inserter(data), |
49 | 0 | [](const std::string &token) -> unsigned int { |
50 | 0 | return boost::lexical_cast<unsigned int>(token); |
51 | 0 | }); |
52 | 0 | } catch (boost::bad_lexical_cast &) { |
53 | 0 | std::ostringstream errout; |
54 | 0 | errout << "Cannot convert values in LINKNODE '" << linknodetext |
55 | 0 | << "' to unsigned ints"; |
56 | 0 | if (strict) { |
57 | 0 | throw ValueErrorException(errout.str()); |
58 | 0 | } else { |
59 | 0 | BOOST_LOG(rdWarningLog) << errout.str() << std::endl; |
60 | 0 | continue; |
61 | 0 | } |
62 | 0 | } |
63 | | // the second test here is for the atom-pairs defining the bonds |
64 | | // data[2] contains the number of bonds |
65 | 0 | if (data.size() < 5 || data.size() < 3 + 2 * data[2]) { |
66 | 0 | std::ostringstream errout; |
67 | 0 | errout << "not enough values in LINKNODE '" << linknodetext << "'"; |
68 | 0 | if (strict) { |
69 | 0 | throw ValueErrorException(errout.str()); |
70 | 0 | } else { |
71 | 0 | BOOST_LOG(rdWarningLog) << errout.str() << std::endl; |
72 | 0 | continue; |
73 | 0 | } |
74 | 0 | } |
75 | | |
76 | 0 | node.minRep = data[0]; |
77 | 0 | node.maxRep = data[1]; |
78 | 0 | if (node.minRep == 0 || node.maxRep < node.minRep) { |
79 | 0 | std::ostringstream errout; |
80 | 0 | errout << "bad counts in LINKNODE '" << linknodetext << "'"; |
81 | 0 | if (strict) { |
82 | 0 | throw ValueErrorException(errout.str()); |
83 | 0 | } else { |
84 | 0 | BOOST_LOG(rdWarningLog) << errout.str() << std::endl; |
85 | 0 | continue; |
86 | 0 | } |
87 | 0 | } |
88 | 0 | node.nBonds = data[2]; |
89 | 0 | if (node.nBonds != 2) { |
90 | 0 | if (strict) { |
91 | 0 | UNDER_CONSTRUCTION( |
92 | 0 | "only link nodes with 2 bonds are currently supported"); |
93 | 0 | } else { |
94 | 0 | BOOST_LOG(rdWarningLog) |
95 | 0 | << "only link nodes with 2 bonds are currently supported" |
96 | 0 | << std::endl; |
97 | 0 | continue; |
98 | 0 | } |
99 | 0 | } |
100 | | // both bonds must start from the same atom: |
101 | 0 | if (data[3] != data[5]) { |
102 | 0 | std::ostringstream errout; |
103 | 0 | errout << "bonds don't start at the same atom for LINKNODE '" |
104 | 0 | << linknodetext << "'"; |
105 | 0 | if (strict) { |
106 | 0 | throw ValueErrorException(errout.str()); |
107 | 0 | } else { |
108 | 0 | BOOST_LOG(rdWarningLog) << errout.str() << std::endl; |
109 | 0 | continue; |
110 | 0 | } |
111 | 0 | } |
112 | | |
113 | 0 | if (atomIdxMap) { |
114 | | // map the indices back to the original atom numbers |
115 | 0 | for (unsigned int i = 3; i <= 6; ++i) { |
116 | 0 | const auto aidx = atomIdxMap->find(data[i] - 1); |
117 | 0 | if (aidx == atomIdxMap->end()) { |
118 | 0 | std::ostringstream errout; |
119 | 0 | errout << "atom index " << data[i] |
120 | 0 | << " cannot be found in molecule for LINKNODE '" |
121 | 0 | << linknodetext << "'"; |
122 | 0 | if (strict) { |
123 | 0 | throw ValueErrorException(errout.str()); |
124 | 0 | } else { |
125 | 0 | BOOST_LOG(rdWarningLog) << errout.str() << std::endl; |
126 | 0 | continue; |
127 | 0 | } |
128 | 0 | } else { |
129 | 0 | data[i] = aidx->second->getIdx(); |
130 | 0 | } |
131 | 0 | } |
132 | 0 | } else { |
133 | 0 | for (unsigned int i = 3; i <= 6; ++i) { |
134 | 0 | --data[i]; |
135 | 0 | } |
136 | 0 | } |
137 | 0 | node.bondAtoms.push_back(std::make_pair(data[3], data[4])); |
138 | 0 | node.bondAtoms.push_back(std::make_pair(data[5], data[6])); |
139 | 0 | if (!mol.getBondBetweenAtoms(data[4], data[3]) || |
140 | 0 | !mol.getBondBetweenAtoms(data[6], data[5])) { |
141 | 0 | std::ostringstream errout; |
142 | 0 | errout << "bond not found between atoms in LINKNODE '" << linknodetext |
143 | 0 | << "'"; |
144 | 0 | if (strict) { |
145 | 0 | throw ValueErrorException(errout.str()); |
146 | 0 | } else { |
147 | 0 | BOOST_LOG(rdWarningLog) << errout.str() << std::endl; |
148 | 0 | continue; |
149 | 0 | } |
150 | 0 | } |
151 | 0 | res.push_back(std::move(node)); |
152 | 0 | } |
153 | 0 | return res; |
154 | 0 | } |
155 | | |
156 | | } // namespace utils |
157 | | } // namespace MolEnumerator |
158 | | |
159 | | } // namespace RDKit |