/src/rdkit/Code/GraphMol/QueryOps.cpp
Line | Count | Source |
1 | | // |
2 | | // Copyright (C) 2003-2021 Greg Landrum and other RDKit contributors |
3 | | // |
4 | | // @@ All Rights Reserved @@ |
5 | | // This file is part of the RDKit. |
6 | | // The contents are covered by the terms of the BSD license |
7 | | // which is included in the file license.txt, found at the root |
8 | | // of the RDKit source tree. |
9 | | // |
10 | | #include "QueryOps.h" |
11 | | #include <algorithm> |
12 | | #include <RDGeneral/types.h> |
13 | | #include <GraphMol/QueryAtom.h> |
14 | | #include <boost/range/iterator_range.hpp> |
15 | | #include <boost/dynamic_bitset.hpp> |
16 | | #include <boost/algorithm/string.hpp> |
17 | | |
18 | | namespace RDKit { |
19 | | |
20 | | // common general queries |
21 | | |
22 | 3.09k | int queryIsAtomBridgehead(Atom const *at) { |
23 | | // at least three ring bonds, all ring bonds in a ring which shares at |
24 | | // least two bonds with another ring involving this atom |
25 | | // |
26 | | // We can't just go with "at least three ring bonds shared between multiple |
27 | | // rings" because of structures like CC12CCN(CC1)C2 where there are only two |
28 | | // SSSRs |
29 | 3.09k | PRECONDITION(at, "no atom"); |
30 | 3.09k | if (at->getDegree() < 3) { |
31 | 6 | return 0; |
32 | 6 | } |
33 | 3.08k | const auto &mol = at->getOwningMol(); |
34 | 3.08k | const auto ri = mol.getRingInfo(); |
35 | 3.08k | if (!ri || !ri->isInitialized()) { |
36 | 0 | return 0; |
37 | 0 | } |
38 | | // track which ring bonds involve this atom |
39 | 3.08k | boost::dynamic_bitset<> atomRingBonds(mol.getNumBonds()); |
40 | 9.34k | for (const auto bnd : mol.atomBonds(at)) { |
41 | 9.34k | if (ri->numBondRings(bnd->getIdx())) { |
42 | 4.70k | atomRingBonds.set(bnd->getIdx()); |
43 | 4.70k | } |
44 | 9.34k | } |
45 | 3.08k | if (atomRingBonds.count() < 3) { |
46 | 2.25k | return 0; |
47 | 2.25k | } |
48 | | |
49 | 838 | boost::dynamic_bitset<> bondsInRingI(mol.getNumBonds()); |
50 | 838 | boost::dynamic_bitset<> ringsOverlap(ri->numRings()); |
51 | 98.5k | for (unsigned int i = 0; i < ri->bondRings().size(); ++i) { |
52 | 97.8k | bondsInRingI.reset(); |
53 | 97.8k | bool atomInRingI = false; |
54 | 880k | for (const auto bidx : ri->bondRings()[i]) { |
55 | 880k | bondsInRingI.set(bidx); |
56 | 880k | if (atomRingBonds[bidx]) { |
57 | 6.76k | atomInRingI = true; |
58 | 6.76k | } |
59 | 880k | } |
60 | 97.8k | if (!atomInRingI) { |
61 | 94.4k | continue; |
62 | 94.4k | } |
63 | 150k | for (unsigned int j = i + 1; j < ri->bondRings().size(); ++j) { |
64 | 147k | unsigned int overlap = 0; |
65 | 147k | bool atomInRingJ = false; |
66 | 2.84M | for (const auto bidx : ri->bondRings()[j]) { |
67 | 2.84M | if (atomRingBonds[bidx]) { |
68 | 20.4k | atomInRingJ = true; |
69 | 20.4k | } |
70 | 2.84M | if (bondsInRingI[bidx]) { |
71 | 1.55M | ++overlap; |
72 | 1.55M | } |
73 | 2.84M | if (overlap >= 2 && atomInRingJ) { |
74 | | // we have two rings containing the atom which share at least two |
75 | | // bonds: |
76 | 16.1k | ringsOverlap.set(i); |
77 | 16.1k | ringsOverlap.set(j); |
78 | 16.1k | break; |
79 | 16.1k | } |
80 | 2.84M | } |
81 | 147k | } |
82 | 3.38k | if (!ringsOverlap[i]) { |
83 | 186 | return 0; |
84 | 186 | } |
85 | 3.38k | } |
86 | 652 | return 1; |
87 | 838 | } |
88 | | |
89 | | //! returns a Query for matching atoms with a particular number of ring bonds |
90 | 18.0k | ATOM_EQUALS_QUERY *makeAtomRingBondCountQuery(int what) { |
91 | 18.0k | ATOM_EQUALS_QUERY *res = new AtomRingQuery(what); |
92 | 18.0k | res->setDescription("AtomRingBondCount"); |
93 | 18.0k | res->setDataFunc(queryAtomRingBondCount); |
94 | 18.0k | return res; |
95 | 18.0k | }; |
96 | | |
97 | 27 | ATOM_EQUALS_QUERY *makeAtomInRingOfSizeQuery(int tgt) { |
98 | 27 | auto *res = new ATOM_EQUALS_QUERY; |
99 | 27 | res->setVal(tgt); |
100 | 27 | res->setDataFunc( |
101 | 27 | [tgt](Atom const *at) { return queryAtomIsInRingOfSize(at, tgt); }); |
102 | 27 | res->setDescription("AtomRingSize"); |
103 | 27 | return res; |
104 | 27 | } |
105 | | |
106 | | ATOM_RANGE_QUERY *makeAtomInRingOfSizeQuery(int lower, int upper, |
107 | 0 | bool lowerOpen, bool upperOpen) { |
108 | 0 | auto *res = new ATOM_RANGE_QUERY; |
109 | 0 | res->setLower(lower); |
110 | 0 | res->setUpper(upper); |
111 | 0 | res->setEndsOpen(lowerOpen, upperOpen); |
112 | 0 | res->setDataFunc([lower, upper, lowerOpen, upperOpen](Atom const *at) { |
113 | 0 | return queryAtomIsInRingOfSize(at, lower, upper, lowerOpen, upperOpen); |
114 | 0 | }); |
115 | 0 | res->setDescription("range_AtomRingSize"); |
116 | 0 | return res; |
117 | 0 | } |
118 | | |
119 | 0 | BOND_EQUALS_QUERY *makeBondInRingOfSizeQuery(int tgt) { |
120 | 0 | RANGE_CHECK(3, tgt, 20); |
121 | 0 | auto *res = new BOND_EQUALS_QUERY; |
122 | 0 | res->setVal(tgt); |
123 | 0 | switch (tgt) { |
124 | 0 | case 3: |
125 | 0 | res->setDataFunc(queryBondIsInRingOfSize<3>); |
126 | 0 | break; |
127 | 0 | case 4: |
128 | 0 | res->setDataFunc(queryBondIsInRingOfSize<4>); |
129 | 0 | break; |
130 | 0 | case 5: |
131 | 0 | res->setDataFunc(queryBondIsInRingOfSize<5>); |
132 | 0 | break; |
133 | 0 | case 6: |
134 | 0 | res->setDataFunc(queryBondIsInRingOfSize<6>); |
135 | 0 | break; |
136 | 0 | case 7: |
137 | 0 | res->setDataFunc(queryBondIsInRingOfSize<7>); |
138 | 0 | break; |
139 | 0 | case 8: |
140 | 0 | res->setDataFunc(queryBondIsInRingOfSize<8>); |
141 | 0 | break; |
142 | 0 | case 9: |
143 | 0 | res->setDataFunc(queryBondIsInRingOfSize<9>); |
144 | 0 | break; |
145 | 0 | case 10: |
146 | 0 | res->setDataFunc(queryBondIsInRingOfSize<10>); |
147 | 0 | break; |
148 | 0 | case 11: |
149 | 0 | res->setDataFunc(queryBondIsInRingOfSize<11>); |
150 | 0 | break; |
151 | 0 | case 12: |
152 | 0 | res->setDataFunc(queryBondIsInRingOfSize<12>); |
153 | 0 | break; |
154 | 0 | case 13: |
155 | 0 | res->setDataFunc(queryBondIsInRingOfSize<13>); |
156 | 0 | break; |
157 | 0 | case 14: |
158 | 0 | res->setDataFunc(queryBondIsInRingOfSize<14>); |
159 | 0 | break; |
160 | 0 | case 15: |
161 | 0 | res->setDataFunc(queryBondIsInRingOfSize<15>); |
162 | 0 | break; |
163 | 0 | case 16: |
164 | 0 | res->setDataFunc(queryBondIsInRingOfSize<16>); |
165 | 0 | break; |
166 | 0 | case 17: |
167 | 0 | res->setDataFunc(queryBondIsInRingOfSize<17>); |
168 | 0 | break; |
169 | 0 | case 18: |
170 | 0 | res->setDataFunc(queryBondIsInRingOfSize<18>); |
171 | 0 | break; |
172 | 0 | case 19: |
173 | 0 | res->setDataFunc(queryBondIsInRingOfSize<19>); |
174 | 0 | break; |
175 | 0 | case 20: |
176 | 0 | res->setDataFunc(queryBondIsInRingOfSize<20>); |
177 | 0 | break; |
178 | 0 | } |
179 | 0 | res->setDescription("BondRingSize"); |
180 | 0 | return res; |
181 | 0 | } |
182 | | |
183 | 1.34k | ATOM_EQUALS_QUERY *makeAtomMinRingSizeQuery(int tgt) { |
184 | 1.34k | auto *res = new ATOM_EQUALS_QUERY; |
185 | 1.34k | res->setVal(tgt); |
186 | 1.34k | res->setDataFunc(queryAtomMinRingSize); |
187 | 1.34k | res->setDescription("AtomMinRingSize"); |
188 | 1.34k | return res; |
189 | 1.34k | } |
190 | 0 | BOND_EQUALS_QUERY *makeBondMinRingSizeQuery(int tgt) { |
191 | 0 | auto *res = new BOND_EQUALS_QUERY; |
192 | 0 | res->setVal(tgt); |
193 | 0 | res->setDataFunc(queryBondMinRingSize); |
194 | 0 | res->setDescription("BondMinRingSize"); |
195 | 0 | return res; |
196 | 0 | } |
197 | | |
198 | 0 | unsigned int queryAtomBondProduct(Atom const *at) { |
199 | 0 | ROMol::OEDGE_ITER beg, end; |
200 | 0 | boost::tie(beg, end) = at->getOwningMol().getAtomBonds(at); |
201 | 0 | unsigned int prod = 1; |
202 | 0 | while (beg != end) { |
203 | 0 | prod *= static_cast<unsigned int>( |
204 | 0 | firstThousandPrimes[at->getOwningMol()[*beg]->getBondType()]); |
205 | 0 | ++beg; |
206 | 0 | } |
207 | 0 | return prod; |
208 | 0 | } |
209 | 0 | unsigned int queryAtomAllBondProduct(Atom const *at) { |
210 | 0 | ROMol::OEDGE_ITER beg, end; |
211 | |
|
212 | 0 | boost::tie(beg, end) = at->getOwningMol().getAtomBonds(at); |
213 | 0 | unsigned int prod = 1; |
214 | 0 | while (beg != end) { |
215 | 0 | prod *= static_cast<unsigned int>( |
216 | 0 | firstThousandPrimes[at->getOwningMol()[*beg]->getBondType()]); |
217 | 0 | ++beg; |
218 | 0 | } |
219 | 0 | for (unsigned int i = 0; i < at->getTotalNumHs(); i++) { |
220 | 0 | prod *= static_cast<unsigned int>(firstThousandPrimes[Bond::SINGLE]); |
221 | 0 | } |
222 | 0 | return prod; |
223 | 0 | } |
224 | | |
225 | 0 | ATOM_EQUALS_QUERY *makeAtomImplicitValenceQuery(int what) { |
226 | 0 | auto *res = |
227 | 0 | makeAtomSimpleQuery<ATOM_EQUALS_QUERY>(what, queryAtomImplicitValence); |
228 | 0 | res->setDescription("AtomImplicitValence"); |
229 | 0 | return res; |
230 | 0 | } |
231 | 0 | ATOM_EQUALS_QUERY *makeAtomExplicitValenceQuery(int what) { |
232 | 0 | auto *res = |
233 | 0 | makeAtomSimpleQuery<ATOM_EQUALS_QUERY>(what, queryAtomExplicitValence); |
234 | 0 | res->setDescription("AtomExplicitValence"); |
235 | 0 | return res; |
236 | 0 | } |
237 | | |
238 | 942 | ATOM_EQUALS_QUERY *makeAtomTotalValenceQuery(int what) { |
239 | 942 | auto *res = |
240 | 942 | makeAtomSimpleQuery<ATOM_EQUALS_QUERY>(what, queryAtomTotalValence); |
241 | 942 | res->setDescription("AtomTotalValence"); |
242 | 942 | return res; |
243 | 942 | } |
244 | | |
245 | 489k | ATOM_EQUALS_QUERY *makeAtomNumQuery(int what) { |
246 | 489k | return makeAtomSimpleQuery<ATOM_EQUALS_QUERY>(what, queryAtomNum, |
247 | 489k | "AtomAtomicNum"); |
248 | 489k | } |
249 | | |
250 | 199k | ATOM_EQUALS_QUERY *makeAtomTypeQuery(int num, int aromatic) { |
251 | 199k | return makeAtomSimpleQuery<ATOM_EQUALS_QUERY>(makeAtomType(num, aromatic), |
252 | 199k | queryAtomType, "AtomType"); |
253 | 199k | } |
254 | 19.1k | ATOM_EQUALS_QUERY *makeAtomExplicitDegreeQuery(int what) { |
255 | 19.1k | auto *res = |
256 | 19.1k | makeAtomSimpleQuery<ATOM_EQUALS_QUERY>(what, queryAtomExplicitDegree); |
257 | 19.1k | res->setDescription("AtomExplicitDegree"); |
258 | 19.1k | return res; |
259 | 19.1k | } |
260 | | |
261 | 44.8k | ATOM_EQUALS_QUERY *makeAtomTotalDegreeQuery(int what) { |
262 | 44.8k | auto *res = |
263 | 44.8k | makeAtomSimpleQuery<ATOM_EQUALS_QUERY>(what, queryAtomTotalDegree); |
264 | 44.8k | res->setDescription("AtomTotalDegree"); |
265 | 44.8k | return res; |
266 | 44.8k | } |
267 | | |
268 | 0 | ATOM_EQUALS_QUERY *makeAtomHeavyAtomDegreeQuery(int what) { |
269 | 0 | auto *res = |
270 | 0 | makeAtomSimpleQuery<ATOM_EQUALS_QUERY>(what, queryAtomHeavyAtomDegree); |
271 | 0 | res->setDescription("AtomHeavyAtomDegree"); |
272 | 0 | return res; |
273 | 0 | } |
274 | | |
275 | 225k | ATOM_EQUALS_QUERY *makeAtomHCountQuery(int what) { |
276 | 225k | auto *res = makeAtomSimpleQuery<ATOM_EQUALS_QUERY>(what, queryAtomHCount); |
277 | 225k | res->setDescription("AtomHCount"); |
278 | 225k | return res; |
279 | 225k | } |
280 | 8.47k | ATOM_EQUALS_QUERY *makeAtomImplicitHCountQuery(int what) { |
281 | 8.47k | auto *res = |
282 | 8.47k | makeAtomSimpleQuery<ATOM_EQUALS_QUERY>(what, queryAtomImplicitHCount); |
283 | 8.47k | res->setDescription("AtomImplicitHCount"); |
284 | 8.47k | return res; |
285 | 8.47k | } |
286 | 3.73k | ATOM_EQUALS_QUERY *makeAtomHasImplicitHQuery() { |
287 | 3.73k | auto *res = |
288 | 3.73k | makeAtomSimpleQuery<ATOM_EQUALS_QUERY>(true, queryAtomHasImplicitH); |
289 | 3.73k | res->setDescription("AtomHasImplicitH"); |
290 | 3.73k | return res; |
291 | 3.73k | } |
292 | | |
293 | 2.34M | ATOM_EQUALS_QUERY *makeAtomAromaticQuery() { |
294 | 2.34M | auto *res = makeAtomSimpleQuery<ATOM_EQUALS_QUERY>(true, queryAtomAromatic); |
295 | 2.34M | res->setDescription("AtomIsAromatic"); |
296 | 2.34M | return res; |
297 | 2.34M | } |
298 | | |
299 | 33.4k | ATOM_EQUALS_QUERY *makeAtomAliphaticQuery() { |
300 | 33.4k | auto *res = makeAtomSimpleQuery<ATOM_EQUALS_QUERY>(true, queryAtomAliphatic); |
301 | 33.4k | res->setDescription("AtomIsAliphatic"); |
302 | 33.4k | return res; |
303 | 33.4k | } |
304 | | |
305 | 8.75k | ATOM_EQUALS_QUERY *makeAtomUnsaturatedQuery() { |
306 | 8.75k | auto *res = |
307 | 8.75k | makeAtomSimpleQuery<ATOM_EQUALS_QUERY>(true, queryAtomUnsaturated); |
308 | 8.75k | res->setDescription("AtomUnsaturated"); |
309 | 8.75k | return res; |
310 | 8.75k | } |
311 | | |
312 | 0 | ATOM_EQUALS_QUERY *makeAtomMassQuery(int what) { |
313 | 0 | auto *res = makeAtomSimpleQuery<ATOM_EQUALS_QUERY>( |
314 | 0 | massIntegerConversionFactor * what, queryAtomMass); |
315 | 0 | res->setDescription("AtomMass"); |
316 | 0 | return res; |
317 | 0 | } |
318 | | |
319 | 71.6k | ATOM_EQUALS_QUERY *makeAtomIsotopeQuery(int what) { |
320 | 71.6k | auto *res = makeAtomSimpleQuery<ATOM_EQUALS_QUERY>(what, queryAtomIsotope); |
321 | 71.6k | res->setDescription("AtomIsotope"); |
322 | 71.6k | return res; |
323 | 71.6k | } |
324 | | |
325 | 23.1k | ATOM_EQUALS_QUERY *makeAtomFormalChargeQuery(int what) { |
326 | 23.1k | auto *res = |
327 | 23.1k | makeAtomSimpleQuery<ATOM_EQUALS_QUERY>(what, queryAtomFormalCharge); |
328 | 23.1k | res->setDescription("AtomFormalCharge"); |
329 | 23.1k | return res; |
330 | 23.1k | } |
331 | | |
332 | 3 | ATOM_EQUALS_QUERY *makeAtomNegativeFormalChargeQuery(int what) { |
333 | 3 | auto *res = makeAtomSimpleQuery<ATOM_EQUALS_QUERY>( |
334 | 3 | what, queryAtomNegativeFormalCharge); |
335 | 3 | res->setDescription("AtomNegativeFormalCharge"); |
336 | 3 | return res; |
337 | 3 | } |
338 | | |
339 | 25.0k | ATOM_EQUALS_QUERY *makeAtomHybridizationQuery(int what) { |
340 | 25.0k | auto *res = |
341 | 25.0k | makeAtomSimpleQuery<ATOM_EQUALS_QUERY>(what, queryAtomHybridization); |
342 | 25.0k | res->setDescription("AtomHybridization"); |
343 | 25.0k | return res; |
344 | 25.0k | } |
345 | | |
346 | 1.10k | ATOM_EQUALS_QUERY *makeAtomNumRadicalElectronsQuery(int what) { |
347 | 1.10k | auto *res = makeAtomSimpleQuery<ATOM_EQUALS_QUERY>( |
348 | 1.10k | what, queryAtomNumRadicalElectrons); |
349 | 1.10k | res->setDescription("AtomNumRadicalElectrons"); |
350 | 1.10k | return res; |
351 | 1.10k | } |
352 | | |
353 | 0 | ATOM_EQUALS_QUERY *makeAtomHasChiralTagQuery() { |
354 | 0 | auto *res = |
355 | 0 | makeAtomSimpleQuery<ATOM_EQUALS_QUERY>(true, queryAtomHasChiralTag); |
356 | 0 | res->setDescription("AtomHasChiralTag"); |
357 | 0 | return res; |
358 | 0 | } |
359 | | |
360 | 0 | ATOM_EQUALS_QUERY *makeAtomMissingChiralTagQuery() { |
361 | 0 | auto *res = |
362 | 0 | makeAtomSimpleQuery<ATOM_EQUALS_QUERY>(true, queryAtomMissingChiralTag); |
363 | 0 | res->setDescription("AtomMissingChiralTag"); |
364 | 0 | return res; |
365 | 0 | } |
366 | | |
367 | 23.1k | ATOM_EQUALS_QUERY *makeAtomInRingQuery() { |
368 | 23.1k | auto *res = makeAtomSimpleQuery<ATOM_EQUALS_QUERY>(true, queryIsAtomInRing); |
369 | 23.1k | res->setDescription("AtomInRing"); |
370 | 23.1k | return res; |
371 | 23.1k | } |
372 | | |
373 | 0 | ATOM_EQUALS_QUERY *makeAtomIsBridgeheadQuery() { |
374 | 0 | auto *res = |
375 | 0 | makeAtomSimpleQuery<ATOM_EQUALS_QUERY>(true, queryIsAtomBridgehead); |
376 | 0 | res->setDescription("AtomIsBridgehead"); |
377 | 0 | return res; |
378 | 0 | } |
379 | | |
380 | 367 | ATOM_OR_QUERY *makeQAtomQuery() { |
381 | 367 | auto *res = new ATOM_OR_QUERY; |
382 | 367 | res->setDescription("AtomOr"); |
383 | 367 | res->setTypeLabel("Q"); |
384 | 367 | res->setNegation(true); |
385 | 367 | res->addChild( |
386 | 367 | Queries::Query<int, Atom const *, true>::CHILD_TYPE(makeAtomNumQuery(6))); |
387 | 367 | res->addChild( |
388 | 367 | Queries::Query<int, Atom const *, true>::CHILD_TYPE(makeAtomNumQuery(1))); |
389 | 367 | return res; |
390 | 367 | } |
391 | 710 | ATOM_EQUALS_QUERY *makeQHAtomQuery() { |
392 | 710 | ATOM_EQUALS_QUERY *res = makeAtomNumQuery(6); |
393 | 710 | res->setNegation(true); |
394 | 710 | res->setTypeLabel("QH"); |
395 | 710 | return res; |
396 | 710 | } |
397 | 17.6k | ATOM_EQUALS_QUERY *makeAAtomQuery() { |
398 | 17.6k | ATOM_EQUALS_QUERY *res = makeAtomNumQuery(1); |
399 | 17.6k | res->setNegation(true); |
400 | 17.6k | res->setTypeLabel("A"); |
401 | 17.6k | return res; |
402 | 17.6k | } |
403 | 50 | ATOM_NULL_QUERY *makeAHAtomQuery() { |
404 | 50 | auto *res = makeAtomNullQuery(); |
405 | 50 | res->setTypeLabel("AH"); |
406 | 50 | return res; |
407 | 50 | } |
408 | | |
409 | 2.05k | ATOM_OR_QUERY *makeXAtomQuery() { |
410 | 2.05k | auto *res = new ATOM_OR_QUERY; |
411 | 2.05k | res->setDescription("AtomOr"); |
412 | 2.05k | res->addChild( |
413 | 2.05k | Queries::Query<int, Atom const *, true>::CHILD_TYPE(makeAtomNumQuery(9))); |
414 | 2.05k | res->addChild(Queries::Query<int, Atom const *, true>::CHILD_TYPE( |
415 | 2.05k | makeAtomNumQuery(17))); |
416 | 2.05k | res->addChild(Queries::Query<int, Atom const *, true>::CHILD_TYPE( |
417 | 2.05k | makeAtomNumQuery(35))); |
418 | 2.05k | res->addChild(Queries::Query<int, Atom const *, true>::CHILD_TYPE( |
419 | 2.05k | makeAtomNumQuery(53))); |
420 | 2.05k | res->addChild(Queries::Query<int, Atom const *, true>::CHILD_TYPE( |
421 | 2.05k | makeAtomNumQuery(85))); |
422 | 2.05k | res->setTypeLabel("X"); |
423 | | |
424 | 2.05k | return res; |
425 | 2.05k | } |
426 | 196 | ATOM_OR_QUERY *makeXHAtomQuery() { |
427 | 196 | ATOM_OR_QUERY *res = makeXAtomQuery(); |
428 | 196 | res->addChild( |
429 | 196 | Queries::Query<int, Atom const *, true>::CHILD_TYPE(makeAtomNumQuery(1))); |
430 | 196 | res->setTypeLabel("XH"); |
431 | 196 | return res; |
432 | 196 | } |
433 | | |
434 | 1.57k | ATOM_OR_QUERY *makeMAtomQuery() { |
435 | | // using the definition from Marvin Sketch, which produces the following |
436 | | // SMARTS: |
437 | | // !#1!#2!#5!#6!#7!#8!#9!#10!#14!#15!#16!#17!#18!#33!#34!#35!#36!#52!#53!#54!#85!#86 |
438 | | // We expanded this with !#0 as part of #6106 |
439 | | // it's easier to define what isn't a metal than what is. :-) |
440 | 1.57k | ATOM_OR_QUERY *res = makeMHAtomQuery(); |
441 | 1.57k | res->addChild( |
442 | 1.57k | Queries::Query<int, Atom const *, true>::CHILD_TYPE(makeAtomNumQuery(1))); |
443 | 1.57k | res->setTypeLabel("M"); |
444 | | |
445 | 1.57k | return res; |
446 | 1.57k | } |
447 | 1.72k | ATOM_OR_QUERY *makeMHAtomQuery() { |
448 | | // using the definition from Marvin Sketch, which produces the following |
449 | | // SMARTS: |
450 | | // !#2!#5!#6!#7!#8!#9!#10!#14!#15!#16!#17!#18!#33!#34!#35!#36!#52!#53!#54!#85!#86 |
451 | | // We expanded this with !#0 as part of #6106 |
452 | | // it's easier to define what isn't a metal than what is. :-) |
453 | 1.72k | auto *res = new ATOM_OR_QUERY; |
454 | 1.72k | res->setDescription("AtomOr"); |
455 | 1.72k | res->setNegation(true); |
456 | 1.72k | res->addChild( |
457 | 1.72k | Queries::Query<int, Atom const *, true>::CHILD_TYPE(makeAtomNumQuery(0))); |
458 | 1.72k | res->addChild( |
459 | 1.72k | Queries::Query<int, Atom const *, true>::CHILD_TYPE(makeAtomNumQuery(2))); |
460 | 1.72k | res->addChild( |
461 | 1.72k | Queries::Query<int, Atom const *, true>::CHILD_TYPE(makeAtomNumQuery(5))); |
462 | 1.72k | res->addChild( |
463 | 1.72k | Queries::Query<int, Atom const *, true>::CHILD_TYPE(makeAtomNumQuery(6))); |
464 | 1.72k | res->addChild( |
465 | 1.72k | Queries::Query<int, Atom const *, true>::CHILD_TYPE(makeAtomNumQuery(7))); |
466 | 1.72k | res->addChild( |
467 | 1.72k | Queries::Query<int, Atom const *, true>::CHILD_TYPE(makeAtomNumQuery(8))); |
468 | 1.72k | res->addChild( |
469 | 1.72k | Queries::Query<int, Atom const *, true>::CHILD_TYPE(makeAtomNumQuery(9))); |
470 | 1.72k | res->addChild(Queries::Query<int, Atom const *, true>::CHILD_TYPE( |
471 | 1.72k | makeAtomNumQuery(10))); |
472 | 1.72k | res->addChild(Queries::Query<int, Atom const *, true>::CHILD_TYPE( |
473 | 1.72k | makeAtomNumQuery(14))); |
474 | 1.72k | res->addChild(Queries::Query<int, Atom const *, true>::CHILD_TYPE( |
475 | 1.72k | makeAtomNumQuery(15))); |
476 | 1.72k | res->addChild(Queries::Query<int, Atom const *, true>::CHILD_TYPE( |
477 | 1.72k | makeAtomNumQuery(16))); |
478 | 1.72k | res->addChild(Queries::Query<int, Atom const *, true>::CHILD_TYPE( |
479 | 1.72k | makeAtomNumQuery(17))); |
480 | 1.72k | res->addChild(Queries::Query<int, Atom const *, true>::CHILD_TYPE( |
481 | 1.72k | makeAtomNumQuery(18))); |
482 | 1.72k | res->addChild(Queries::Query<int, Atom const *, true>::CHILD_TYPE( |
483 | 1.72k | makeAtomNumQuery(33))); |
484 | 1.72k | res->addChild(Queries::Query<int, Atom const *, true>::CHILD_TYPE( |
485 | 1.72k | makeAtomNumQuery(34))); |
486 | 1.72k | res->addChild(Queries::Query<int, Atom const *, true>::CHILD_TYPE( |
487 | 1.72k | makeAtomNumQuery(35))); |
488 | 1.72k | res->addChild(Queries::Query<int, Atom const *, true>::CHILD_TYPE( |
489 | 1.72k | makeAtomNumQuery(36))); |
490 | 1.72k | res->addChild(Queries::Query<int, Atom const *, true>::CHILD_TYPE( |
491 | 1.72k | makeAtomNumQuery(52))); |
492 | 1.72k | res->addChild(Queries::Query<int, Atom const *, true>::CHILD_TYPE( |
493 | 1.72k | makeAtomNumQuery(53))); |
494 | 1.72k | res->addChild(Queries::Query<int, Atom const *, true>::CHILD_TYPE( |
495 | 1.72k | makeAtomNumQuery(54))); |
496 | 1.72k | res->addChild(Queries::Query<int, Atom const *, true>::CHILD_TYPE( |
497 | 1.72k | makeAtomNumQuery(85))); |
498 | 1.72k | res->addChild(Queries::Query<int, Atom const *, true>::CHILD_TYPE( |
499 | 1.72k | makeAtomNumQuery(86))); |
500 | 1.72k | res->setTypeLabel("MH"); |
501 | 1.72k | return res; |
502 | 1.72k | } |
503 | | |
504 | 0 | ATOM_EQUALS_QUERY *makeAtomInNRingsQuery(int what) { |
505 | 0 | ATOM_EQUALS_QUERY *res; |
506 | 0 | res = makeAtomSimpleQuery<ATOM_EQUALS_QUERY>(what, queryIsAtomInNRings); |
507 | 0 | res->setDescription("AtomInNRings"); |
508 | 0 | return res; |
509 | 0 | } |
510 | | |
511 | 6.55k | ATOM_EQUALS_QUERY *makeAtomHasRingBondQuery() { |
512 | 6.55k | auto *res = |
513 | 6.55k | makeAtomSimpleQuery<ATOM_EQUALS_QUERY>(true, queryAtomHasRingBond); |
514 | 6.55k | res->setDescription("AtomHasRingBond"); |
515 | 6.55k | return res; |
516 | 6.55k | } |
517 | | |
518 | 108 | ATOM_EQUALS_QUERY *makeAtomNumHeteroatomNbrsQuery(int what) { |
519 | 108 | auto *res = |
520 | 108 | makeAtomSimpleQuery<ATOM_EQUALS_QUERY>(what, queryAtomNumHeteroatomNbrs); |
521 | 108 | res->setDescription("AtomNumHeteroatomNeighbors"); |
522 | 108 | return res; |
523 | 108 | } |
524 | | |
525 | 9.41k | ATOM_EQUALS_QUERY *makeAtomHasHeteroatomNbrsQuery() { |
526 | 9.41k | auto *res = |
527 | 9.41k | makeAtomSimpleQuery<ATOM_EQUALS_QUERY>(true, queryAtomHasHeteroatomNbrs); |
528 | 9.41k | res->setDescription("AtomHasHeteroatomNeighbors"); |
529 | 9.41k | return res; |
530 | 9.41k | } |
531 | 5.45k | ATOM_EQUALS_QUERY *makeAtomNumAliphaticHeteroatomNbrsQuery(int what) { |
532 | 5.45k | auto *res = makeAtomSimpleQuery<ATOM_EQUALS_QUERY>( |
533 | 5.45k | what, queryAtomNumAliphaticHeteroatomNbrs); |
534 | 5.45k | res->setDescription("AtomNumAliphaticHeteroatomNeighbors"); |
535 | 5.45k | return res; |
536 | 5.45k | } |
537 | | |
538 | 50.7k | ATOM_EQUALS_QUERY *makeAtomHasAliphaticHeteroatomNbrsQuery() { |
539 | 50.7k | auto *res = makeAtomSimpleQuery<ATOM_EQUALS_QUERY>( |
540 | 50.7k | true, queryAtomHasAliphaticHeteroatomNbrs); |
541 | 50.7k | res->setDescription("AtomHasAliphaticHeteroatomNeighbors"); |
542 | 50.7k | return res; |
543 | 50.7k | } |
544 | | |
545 | 36.2k | ATOM_EQUALS_QUERY *makeAtomNonHydrogenDegreeQuery(int what) { |
546 | 36.2k | auto *res = |
547 | 36.2k | makeAtomSimpleQuery<ATOM_EQUALS_QUERY>(what, queryAtomNonHydrogenDegree); |
548 | 36.2k | res->setDescription("AtomNonHydrogenDegree"); |
549 | 36.2k | return res; |
550 | 36.2k | } |
551 | | |
552 | 4.36M | BOND_EQUALS_QUERY *makeBondOrderEqualsQuery(Bond::BondType what) { |
553 | 4.36M | auto *res = new BOND_EQUALS_QUERY; |
554 | 4.36M | res->setVal(what); |
555 | 4.36M | res->setDataFunc(queryBondOrder); |
556 | 4.36M | res->setDescription("BondOrder"); |
557 | 4.36M | res->setTypeLabel("BondOrder"); |
558 | 4.36M | return res; |
559 | 4.36M | } |
560 | | |
561 | 1.64M | RDKIT_GRAPHMOL_EXPORT BOND_EQUALS_QUERY *makeSingleOrAromaticBondQuery() { |
562 | 1.64M | auto *res = new BOND_EQUALS_QUERY; |
563 | 1.64M | res->setVal(true); |
564 | 1.64M | res->setDataFunc(queryBondIsSingleOrAromatic); |
565 | 1.64M | res->setDescription("SingleOrAromaticBond"); |
566 | 1.64M | res->setTypeLabel("BondOrder"); |
567 | 1.64M | return res; |
568 | 1.64M | }; |
569 | | |
570 | 953 | RDKIT_GRAPHMOL_EXPORT BOND_EQUALS_QUERY *makeDoubleOrAromaticBondQuery() { |
571 | 953 | auto *res = new BOND_EQUALS_QUERY; |
572 | 953 | res->setVal(true); |
573 | 953 | res->setDataFunc(queryBondIsDoubleOrAromatic); |
574 | 953 | res->setDescription("DoubleOrAromaticBond"); |
575 | 953 | res->setTypeLabel("BondOrder"); |
576 | 953 | return res; |
577 | 953 | }; |
578 | | |
579 | 1.10k | RDKIT_GRAPHMOL_EXPORT BOND_EQUALS_QUERY *makeSingleOrDoubleBondQuery() { |
580 | 1.10k | auto *res = new BOND_EQUALS_QUERY; |
581 | 1.10k | res->setVal(true); |
582 | 1.10k | res->setDataFunc(queryBondIsSingleOrDouble); |
583 | 1.10k | res->setDescription("SingleOrDoubleBond"); |
584 | 1.10k | res->setTypeLabel("BondOrder"); |
585 | 1.10k | return res; |
586 | 1.10k | }; |
587 | | |
588 | | RDKIT_GRAPHMOL_EXPORT BOND_EQUALS_QUERY * |
589 | 0 | makeSingleOrDoubleOrAromaticBondQuery() { |
590 | 0 | auto *res = new BOND_EQUALS_QUERY; |
591 | 0 | res->setVal(true); |
592 | 0 | res->setDataFunc(queryBondIsSingleOrDoubleOrAromatic); |
593 | 0 | res->setDescription("SingleOrDoubleOrAromaticBond"); |
594 | 0 | res->setTypeLabel("BondOrder"); |
595 | 0 | return res; |
596 | 0 | }; |
597 | | |
598 | | namespace QueryOps { |
599 | | // we don't use these anymore but we need to keep them around for backwards |
600 | | // compatibility with pickled queries. There's no reason to update this list. |
601 | | const std::vector<std::string> bondOrderQueryFunctions{ |
602 | | std::string("BondOrder"), std::string("SingleOrAromaticBond"), |
603 | | std::string("DoubleOrAromaticBond"), std::string("SingleOrDoubleBond"), |
604 | | std::string("SingleOrDoubleOrAromaticBond")}; |
605 | | RDKIT_GRAPHMOL_EXPORT bool hasBondTypeQuery( |
606 | 14.4k | const Queries::Query<int, Bond const *, true> &qry) { |
607 | 14.4k | const auto df = qry.getDescription(); |
608 | 14.4k | const auto dt = qry.getTypeLabel(); |
609 | | // is this a bond order query? |
610 | 14.4k | if (dt == "BondOrder" || |
611 | 11.8k | (dt.empty() && |
612 | 11.8k | std::find(bondOrderQueryFunctions.begin(), bondOrderQueryFunctions.end(), |
613 | 11.8k | df) != bondOrderQueryFunctions.end())) { |
614 | 2.61k | return true; |
615 | 2.61k | } |
616 | 11.8k | for (const auto &child : |
617 | 11.8k | boost::make_iterator_range(qry.beginChildren(), qry.endChildren())) { |
618 | 489 | if (hasBondTypeQuery(*child)) { |
619 | 489 | return true; |
620 | 489 | } |
621 | 489 | } |
622 | 11.3k | return false; |
623 | 11.8k | } |
624 | | |
625 | | namespace { |
626 | | bool hasComplexBondTypeQueryHelper( |
627 | 434k | const Queries::Query<int, Bond const *, true> &qry, bool seenBondOrder) { |
628 | 434k | const auto df = qry.getDescription(); |
629 | 434k | bool isBondOrder = (df == "BondOrder"); |
630 | | // is this a bond order query? |
631 | 434k | if (std::find(bondOrderQueryFunctions.begin(), bondOrderQueryFunctions.end(), |
632 | 434k | df) != bondOrderQueryFunctions.end()) { |
633 | 84.1k | if (seenBondOrder || !isBondOrder || qry.getNegation()) { |
634 | 68.7k | return true; |
635 | 68.7k | } |
636 | 84.1k | } |
637 | 366k | for (const auto &child : |
638 | 366k | boost::make_iterator_range(qry.beginChildren(), qry.endChildren())) { |
639 | 30.9k | if (hasComplexBondTypeQueryHelper(*child, seenBondOrder | isBondOrder)) { |
640 | 294 | return true; |
641 | 294 | } |
642 | 30.6k | if (child->getDescription() == "BondOrder") { |
643 | 15.3k | seenBondOrder = true; |
644 | 15.3k | } |
645 | 30.6k | } |
646 | 365k | return false; |
647 | 366k | } |
648 | | } // namespace |
649 | | |
650 | | RDKIT_GRAPHMOL_EXPORT bool hasComplexBondTypeQuery( |
651 | 403k | const Queries::Query<int, Bond const *, true> &qry) { |
652 | 403k | return hasComplexBondTypeQueryHelper(qry, false); |
653 | 403k | } |
654 | | } // namespace QueryOps |
655 | | |
656 | 0 | BOND_EQUALS_QUERY *makeBondDirEqualsQuery(Bond::BondDir what) { |
657 | 0 | auto *res = new BOND_EQUALS_QUERY; |
658 | 0 | res->setVal(what); |
659 | 0 | res->setDataFunc(queryBondDir); |
660 | 0 | res->setDescription("BondDir"); |
661 | 0 | return res; |
662 | 0 | } |
663 | | |
664 | 0 | BOND_EQUALS_QUERY *makeBondHasStereoQuery() { |
665 | 0 | auto *res = new BOND_EQUALS_QUERY; |
666 | 0 | res->setVal(true); |
667 | 0 | res->setDataFunc(queryBondHasStereo); |
668 | 0 | res->setDescription("BondStereo"); |
669 | 0 | return res; |
670 | 0 | } |
671 | | |
672 | 4.12k | BOND_EQUALS_QUERY *makeBondIsInRingQuery() { |
673 | 4.12k | auto *res = new BOND_EQUALS_QUERY; |
674 | 4.12k | res->setVal(true); |
675 | 4.12k | res->setDataFunc(queryIsBondInRing); |
676 | 4.12k | res->setDescription("BondInRing"); |
677 | 4.12k | return res; |
678 | 4.12k | } |
679 | | |
680 | 0 | BOND_EQUALS_QUERY *makeBondInNRingsQuery(int what) { |
681 | 0 | auto *res = new BOND_EQUALS_QUERY; |
682 | 0 | res->setVal(what); |
683 | 0 | res->setDataFunc(queryIsBondInNRings); |
684 | 0 | res->setDescription("BondInNRings"); |
685 | 0 | return res; |
686 | 0 | } |
687 | | |
688 | 47.8k | BOND_NULL_QUERY *makeBondNullQuery() { |
689 | 47.8k | auto *res = new BOND_NULL_QUERY; |
690 | 47.8k | res->setDataFunc(nullDataFun<const RDKit::Bond *>); |
691 | 47.8k | res->setMatchFunc(nullQueryFun<int>); |
692 | 47.8k | res->setDescription("BondNull"); |
693 | 47.8k | return res; |
694 | 47.8k | } |
695 | | |
696 | 126k | ATOM_NULL_QUERY *makeAtomNullQuery() { |
697 | 126k | auto *res = new ATOM_NULL_QUERY; |
698 | 126k | res->setDataFunc(nullDataFun<const RDKit::Atom *>); |
699 | 126k | res->setMatchFunc(nullQueryFun<int>); |
700 | 126k | res->setDescription("AtomNull"); |
701 | 126k | return res; |
702 | 126k | } |
703 | | |
704 | 2.09k | void convertComplexNameToQuery(Atom *query, std::string_view symb) { |
705 | 2.09k | if (symb == "Q") { |
706 | 179 | query->setQuery(makeQAtomQuery()); |
707 | 1.91k | } else if (symb == "QH") { |
708 | 4 | query->setQuery(makeQHAtomQuery()); |
709 | 1.91k | } else if (symb == "A") { |
710 | 859 | query->setQuery(makeAAtomQuery()); |
711 | 1.05k | } else if (symb == "AH") { |
712 | 41 | query->setQuery(makeAHAtomQuery()); |
713 | 1.01k | } else if (symb == "X") { |
714 | 37 | query->setQuery(makeXAtomQuery()); |
715 | 978 | } else if (symb == "XH") { |
716 | 110 | query->setQuery(makeXHAtomQuery()); |
717 | 868 | } else if (symb == "M") { |
718 | 861 | query->setQuery(makeMAtomQuery()); |
719 | 861 | } else if (symb == "MH") { |
720 | 7 | query->setQuery(makeMHAtomQuery()); |
721 | 7 | } else { |
722 | | // we control what this function gets called with, so we should never land |
723 | | // here |
724 | 0 | ASSERT_INVARIANT(0, "bad complex query symbol"); |
725 | 0 | } |
726 | 2.09k | } |
727 | | |
728 | 0 | bool isComplexQuery(const Bond *b) { |
729 | 0 | PRECONDITION(b, "bad bond"); |
730 | 0 | if (!b->hasQuery()) { |
731 | 0 | return false; |
732 | 0 | } |
733 | | // negated things are always complex: |
734 | 0 | if (b->getQuery()->getNegation()) { |
735 | 0 | return true; |
736 | 0 | } |
737 | 0 | std::string descr = b->getQuery()->getDescription(); |
738 | 0 | if (descr == "BondOrder" || descr == "SingleOrAromaticBond") { |
739 | 0 | return false; |
740 | 0 | } |
741 | 0 | if (descr == "BondAnd" || descr == "BondXor") { |
742 | 0 | return true; |
743 | 0 | } |
744 | 0 | if (descr == "BondOr") { |
745 | | // detect the types of queries that appear for unspecified bonds in |
746 | | // SMARTS: |
747 | 0 | if (b->getQuery()->endChildren() - b->getQuery()->beginChildren() == 2) { |
748 | 0 | for (auto child = b->getQuery()->beginChildren(); |
749 | 0 | child != b->getQuery()->endChildren(); ++child) { |
750 | 0 | if ((*child)->getDescription() != "BondOrder" || |
751 | 0 | (*child)->getNegation()) { |
752 | 0 | return true; |
753 | 0 | } |
754 | 0 | if (static_cast<BOND_EQUALS_QUERY *>(child->get())->getVal() != |
755 | 0 | Bond::SINGLE && |
756 | 0 | static_cast<BOND_EQUALS_QUERY *>(child->get())->getVal() != |
757 | 0 | Bond::AROMATIC) { |
758 | 0 | return true; |
759 | 0 | } |
760 | 0 | } |
761 | 0 | return false; |
762 | 0 | } |
763 | 0 | } |
764 | | |
765 | 0 | return true; |
766 | 0 | } |
767 | | |
768 | | namespace { |
769 | 0 | bool _complexQueryHelper(Atom::QUERYATOM_QUERY const *query, bool &hasAtNum) { |
770 | 0 | if (!query) { |
771 | 0 | return false; |
772 | 0 | } |
773 | 0 | if (query->getNegation()) { |
774 | 0 | return true; |
775 | 0 | } |
776 | 0 | std::string descr = query->getDescription(); |
777 | | // std::cerr<<" |"<<descr; |
778 | 0 | if (descr == "AtomAtomicNum" || descr == "AtomType") { |
779 | 0 | hasAtNum = true; |
780 | 0 | return false; |
781 | 0 | } |
782 | 0 | if (descr == "AtomOr" || descr == "AtomXor") { |
783 | 0 | return true; |
784 | 0 | } |
785 | 0 | if (descr == "AtomAnd") { |
786 | 0 | auto childIt = query->beginChildren(); |
787 | 0 | while (childIt != query->endChildren()) { |
788 | 0 | if (_complexQueryHelper(childIt->get(), hasAtNum)) { |
789 | 0 | return true; |
790 | 0 | } |
791 | 0 | ++childIt; |
792 | 0 | } |
793 | 0 | } |
794 | 0 | return false; |
795 | 0 | } |
796 | | |
797 | | template <typename T> |
798 | 0 | bool _atomListQueryHelper(const T query, bool ignoreNegation) { |
799 | 0 | PRECONDITION(query, "no query"); |
800 | 0 | if (!ignoreNegation && query->getNegation()) { |
801 | 0 | return false; |
802 | 0 | } |
803 | 0 | if (query->getDescription() == "AtomAtomicNum" || |
804 | 0 | query->getDescription() == "AtomType") { |
805 | 0 | return true; |
806 | 0 | } |
807 | 0 | if (query->getDescription() == "AtomOr") { |
808 | 0 | for (const auto &child : boost::make_iterator_range(query->beginChildren(), |
809 | 0 | query->endChildren())) { |
810 | 0 | if (!_atomListQueryHelper(child, ignoreNegation)) { |
811 | 0 | return false; |
812 | 0 | } |
813 | 0 | } |
814 | 0 | return true; |
815 | 0 | } |
816 | 0 | return false; |
817 | 0 | } Unexecuted instantiation: QueryOps.cpp:bool RDKit::(anonymous namespace)::_atomListQueryHelper<std::__1::shared_ptr<Queries::Query<int, RDKit::Atom const*, true> > >(std::__1::shared_ptr<Queries::Query<int, RDKit::Atom const*, true> >, bool) Unexecuted instantiation: QueryOps.cpp:bool RDKit::(anonymous namespace)::_atomListQueryHelper<Queries::Query<int, RDKit::Atom const*, true>*>(Queries::Query<int, RDKit::Atom const*, true>*, bool) |
818 | | } // namespace |
819 | 0 | bool isAtomListQuery(const Atom *a) { |
820 | 0 | PRECONDITION(a, "bad atom"); |
821 | 0 | if (!a->hasQuery()) { |
822 | 0 | return false; |
823 | 0 | } |
824 | 0 | if (a->getQuery()->getDescription() == "AtomOr") { |
825 | 0 | for (const auto &child : boost::make_iterator_range( |
826 | 0 | a->getQuery()->beginChildren(), a->getQuery()->endChildren())) { |
827 | 0 | if (!_atomListQueryHelper(child, false)) { |
828 | 0 | return false; |
829 | 0 | } |
830 | 0 | } |
831 | 0 | return true; |
832 | 0 | } else if (a->getQuery()->getNegation() && |
833 | 0 | _atomListQueryHelper(a->getQuery(), true)) { |
834 | | // this was github #5930: negated list queries containing a single atom were |
835 | | // being lost on output |
836 | 0 | return true; |
837 | 0 | } else if (a->getQuery()->getDescription() == "AtomAtomicNum" && |
838 | 0 | static_cast<ATOM_EQUALS_QUERY *>(a->getQuery())->getVal() != |
839 | 0 | a->getAtomicNum()) { |
840 | | // when reading single-member atom lists from CTABs we end up with simple |
841 | | // AtomAtomicNum queries where the atomic number of the atom itself is zero. |
842 | | // Recognize this case. |
843 | 0 | return true; |
844 | 0 | } |
845 | 0 | return false; |
846 | 0 | } |
847 | | |
848 | | void getAtomListQueryVals(const Atom::QUERYATOM_QUERY *q, |
849 | 0 | std::vector<int> &vals) { |
850 | | // list queries are series of nested ors of AtomAtomicNum queries |
851 | 0 | PRECONDITION(q, "bad query"); |
852 | 0 | auto descr = q->getDescription(); |
853 | 0 | if (descr == "AtomOr") { |
854 | 0 | for (const auto &child : |
855 | 0 | boost::make_iterator_range(q->beginChildren(), q->endChildren())) { |
856 | 0 | auto descr = child->getDescription(); |
857 | 0 | if (child->getNegation() || |
858 | 0 | (descr != "AtomOr" && descr != "AtomAtomicNum" && |
859 | 0 | descr != "AtomType")) { |
860 | 0 | throw ValueErrorException("bad query type1"); |
861 | 0 | } |
862 | | // we don't allow negation of any children of the query: |
863 | 0 | if (descr == "AtomOr") { |
864 | 0 | getAtomListQueryVals(child.get(), vals); |
865 | 0 | } else if (descr == "AtomAtomicNum") { |
866 | 0 | vals.push_back(static_cast<ATOM_EQUALS_QUERY *>(child.get())->getVal()); |
867 | 0 | } else if (descr == "AtomType") { |
868 | 0 | auto v = static_cast<ATOM_EQUALS_QUERY *>(child.get())->getVal(); |
869 | | // aromatic AtomType queries add 1000 to the atomic number; |
870 | | // correct for that: |
871 | 0 | if (v >= 1000) { |
872 | 0 | v -= 1000; |
873 | 0 | } |
874 | 0 | vals.push_back(v); |
875 | 0 | } |
876 | 0 | } |
877 | 0 | } else if (descr == "AtomAtomicNum") { |
878 | 0 | vals.push_back(static_cast<const ATOM_EQUALS_QUERY *>(q)->getVal()); |
879 | 0 | } else if (descr == "AtomType") { |
880 | 0 | auto v = static_cast<const ATOM_EQUALS_QUERY *>(q)->getVal(); |
881 | | // aromatic AtomType queries add 1000 to the atomic number; |
882 | | // correct for that: |
883 | 0 | if (v >= 1000) { |
884 | 0 | v -= 1000; |
885 | 0 | } |
886 | 0 | vals.push_back(v); |
887 | 0 | } else { |
888 | 0 | CHECK_INVARIANT(0, "bad query type"); |
889 | 0 | } |
890 | 0 | } |
891 | | |
892 | 0 | bool isComplexQuery(const Atom *a) { |
893 | 0 | PRECONDITION(a, "bad atom"); |
894 | 0 | if (!a->hasQuery()) { |
895 | 0 | return false; |
896 | 0 | } |
897 | | // std::cerr<<"\n"<<a->getIdx(); |
898 | | // negated things are always complex: |
899 | 0 | if (a->getQuery()->getNegation()) { |
900 | 0 | return true; |
901 | 0 | } |
902 | 0 | std::string descr = a->getQuery()->getDescription(); |
903 | | // std::cerr<<" "<<descr; |
904 | 0 | if (descr == "AtomNull" || descr == "AtomAtomicNum" || descr == "AtomType") { |
905 | 0 | return false; |
906 | 0 | } |
907 | 0 | if (descr == "AtomOr" || descr == "AtomXor") { |
908 | 0 | return true; |
909 | 0 | } |
910 | 0 | if (descr == "AtomAnd") { |
911 | 0 | bool hasAtNum = false; |
912 | 0 | if (_complexQueryHelper(a->getQuery(), hasAtNum)) { |
913 | 0 | return true; |
914 | 0 | } |
915 | 0 | return !hasAtNum; |
916 | 0 | } |
917 | | |
918 | 0 | return true; |
919 | 0 | } |
920 | 0 | bool isAtomAromatic(const Atom *a) { |
921 | 0 | PRECONDITION(a, "bad atom"); |
922 | 0 | bool res = false; |
923 | 0 | if (!a->hasQuery()) { |
924 | 0 | res = isAromaticAtom(*a); |
925 | 0 | } else { |
926 | 0 | std::string descr = a->getQuery()->getDescription(); |
927 | 0 | if (descr == "AtomAtomicNum") { |
928 | 0 | res = a->getIsAromatic(); |
929 | 0 | } else if (descr == "AtomIsAromatic") { |
930 | 0 | res = true; |
931 | 0 | if (a->getQuery()->getNegation()) { |
932 | 0 | res = !res; |
933 | 0 | } |
934 | 0 | } else if (descr == "AtomIsAliphatic") { |
935 | 0 | res = false; |
936 | 0 | if (a->getQuery()->getNegation()) { |
937 | 0 | res = !res; |
938 | 0 | } |
939 | 0 | } else if (descr == "AtomType") { |
940 | 0 | res = getAtomTypeIsAromatic( |
941 | 0 | static_cast<ATOM_EQUALS_QUERY *>(a->getQuery())->getVal()); |
942 | 0 | if (a->getQuery()->getNegation()) { |
943 | 0 | res = !res; |
944 | 0 | } |
945 | 0 | } else if (descr == "AtomAnd") { |
946 | 0 | auto childIt = a->getQuery()->beginChildren(); |
947 | 0 | if ((*childIt)->getDescription() == "AtomAtomicNum") { |
948 | 0 | if (a->getQuery()->getNegation()) { |
949 | 0 | res = false; |
950 | 0 | } else if ((*(childIt + 1))->getDescription() == "AtomIsAliphatic") { |
951 | 0 | res = false; |
952 | 0 | } else if ((*(childIt + 1))->getDescription() == "AtomIsAromatic") { |
953 | 0 | res = true; |
954 | 0 | } |
955 | 0 | } |
956 | 0 | } |
957 | 0 | } |
958 | 0 | return res; |
959 | 0 | } |
960 | | |
961 | | namespace QueryOps { |
962 | | namespace { |
963 | | void completeQueryAndChildren(Atom::QUERYATOM_QUERY *query, Atom *tgt, |
964 | 14.7k | unsigned int magicVal) { |
965 | 14.7k | PRECONDITION(query, "no query"); |
966 | 14.7k | PRECONDITION(tgt, "no atom"); |
967 | 14.7k | auto eqQuery = dynamic_cast<ATOM_EQUALS_QUERY *>(query); |
968 | 14.7k | if (eqQuery) { |
969 | 8.16k | if (static_cast<unsigned int>(eqQuery->getVal()) == magicVal) { |
970 | 4.37k | int tgtVal = eqQuery->getDataFunc()(tgt); |
971 | 4.37k | eqQuery->setVal(tgtVal); |
972 | 4.37k | } |
973 | 8.16k | } |
974 | 27.6k | for (auto childIt = query->beginChildren(); childIt != query->endChildren(); |
975 | 14.7k | ++childIt) { |
976 | 12.9k | completeQueryAndChildren(childIt->get(), tgt, magicVal); |
977 | 12.9k | } |
978 | 14.7k | } |
979 | | } // namespace |
980 | 175 | void completeMolQueries(RWMol *mol, unsigned int magicVal) { |
981 | 175 | PRECONDITION(mol, "bad molecule"); |
982 | 5.05k | for (auto atom : mol->atoms()) { |
983 | 5.05k | if (atom->hasQuery()) { |
984 | 1.75k | completeQueryAndChildren(atom->getQuery(), atom, magicVal); |
985 | 1.75k | } |
986 | 5.05k | } |
987 | 175 | } |
988 | | |
989 | 2.88k | Atom *replaceAtomWithQueryAtom(RWMol *mol, Atom *atom) { |
990 | 2.88k | PRECONDITION(mol, "bad molecule"); |
991 | 2.88k | PRECONDITION(atom, "bad atom"); |
992 | 2.88k | if (atom->hasQuery()) { |
993 | 9 | return atom; |
994 | 9 | } |
995 | | |
996 | 2.87k | QueryAtom qa(*atom); |
997 | 2.87k | unsigned int idx = atom->getIdx(); |
998 | | |
999 | 2.87k | if (atom->hasProp(common_properties::_hasMassQuery)) { |
1000 | 0 | qa.expandQuery(makeAtomMassQuery(static_cast<int>(atom->getMass()))); |
1001 | 0 | } |
1002 | 2.87k | mol->replaceAtom(idx, &qa); |
1003 | 2.87k | return mol->getAtomWithIdx(idx); |
1004 | 2.88k | } |
1005 | | |
1006 | | enum class RangeQueryType : char { |
1007 | | EQUAL, |
1008 | | LESS, |
1009 | | GREATER, |
1010 | | RANGE |
1011 | | }; |
1012 | | void finalizeAtomRingSizeQuery(Queries::Query<int, Atom const *, true> *query, |
1013 | 0 | RangeQueryType qtype) { |
1014 | 0 | switch (qtype) { |
1015 | 0 | case RangeQueryType::EQUAL: { |
1016 | 0 | auto tgt = static_cast<ATOM_EQUALS_QUERY *>(query)->getVal(); |
1017 | 0 | query->setDataFunc( |
1018 | 0 | [tgt](Atom const *at) { return queryAtomIsInRingOfSize(at, tgt); }); |
1019 | 0 | } break; |
1020 | 0 | case RangeQueryType::RANGE: { |
1021 | 0 | auto rq = static_cast<ATOM_RANGE_QUERY *>(query); |
1022 | 0 | auto uv = rq->getUpper(); |
1023 | 0 | auto lv = rq->getLower(); |
1024 | 0 | auto [lo, uo] = rq->getEndsOpen(); |
1025 | 0 | query->setDataFunc([lv, uv, lo, uo](Atom const *at) { |
1026 | 0 | return queryAtomIsInRingOfSize(at, lv, uv, lo, uo); |
1027 | 0 | }); |
1028 | 0 | } break; |
1029 | 0 | case RangeQueryType::LESS: { |
1030 | 0 | auto lv = static_cast<ATOM_LESSEQUAL_QUERY *>(query)->getVal(); |
1031 | 0 | auto uv = -1; |
1032 | 0 | query->setDataFunc([lv, uv](Atom const *at) { |
1033 | 0 | return queryAtomIsInRingOfSize(at, lv, uv); |
1034 | 0 | }); |
1035 | 0 | } break; |
1036 | 0 | case RangeQueryType::GREATER: { |
1037 | 0 | auto lv = -1; |
1038 | 0 | auto uv = static_cast<ATOM_GREATEREQUAL_QUERY *>(query)->getVal(); |
1039 | 0 | query->setDataFunc([lv, uv](Atom const *at) { |
1040 | 0 | return queryAtomIsInRingOfSize(at, lv, uv); |
1041 | 0 | }); |
1042 | 0 | } break; |
1043 | 0 | default: |
1044 | 0 | throw ValueErrorException("bad range query type"); |
1045 | 0 | } |
1046 | 0 | } |
1047 | | |
1048 | | void finalizeQueryFromDescription( |
1049 | 550 | Queries::Query<int, Atom const *, true> *query, Atom const *) { |
1050 | 550 | std::string descr = query->getDescription(); |
1051 | | |
1052 | 550 | RangeQueryType qtype = RangeQueryType::EQUAL; |
1053 | 550 | if (boost::starts_with(descr, "range_")) { |
1054 | 0 | descr = descr.substr(6); |
1055 | 0 | qtype = RangeQueryType::RANGE; |
1056 | 550 | } else if (boost::starts_with(descr, "less_")) { |
1057 | 0 | descr = descr.substr(5); |
1058 | 0 | qtype = RangeQueryType::LESS; |
1059 | 550 | } else if (boost::starts_with(descr, "greater_")) { |
1060 | 0 | descr = descr.substr(8); |
1061 | 0 | qtype = RangeQueryType::GREATER; |
1062 | 0 | } |
1063 | | |
1064 | 550 | if (descr == "AtomRingBondCount") { |
1065 | 0 | query->setDataFunc(queryAtomRingBondCount); |
1066 | 550 | } else if (descr == "AtomHasRingBond") { |
1067 | 0 | query->setDataFunc(queryAtomHasRingBond); |
1068 | 550 | } else if (descr == "AtomRingSize") { |
1069 | 0 | finalizeAtomRingSizeQuery(query, qtype); |
1070 | 550 | } else if (descr == "AtomMinRingSize") { |
1071 | 0 | query->setDataFunc(queryAtomMinRingSize); |
1072 | 550 | } else if (descr == "AtomImplicitValence") { |
1073 | 0 | query->setDataFunc(queryAtomImplicitValence); |
1074 | 550 | } else if (descr == "AtomTotalValence") { |
1075 | 0 | query->setDataFunc(queryAtomTotalValence); |
1076 | 550 | } else if (descr == "AtomAtomicNum") { |
1077 | 0 | query->setDataFunc(queryAtomNum); |
1078 | 550 | } else if (descr == "AtomExplicitDegree") { |
1079 | 0 | query->setDataFunc(queryAtomExplicitDegree); |
1080 | 550 | } else if (descr == "AtomTotalDegree") { |
1081 | 0 | query->setDataFunc(queryAtomTotalDegree); |
1082 | 550 | } else if (descr == "AtomHeavyAtomDegree") { |
1083 | 0 | query->setDataFunc(queryAtomHeavyAtomDegree); |
1084 | 550 | } else if (descr == "AtomHCount") { |
1085 | 0 | query->setDataFunc(queryAtomHCount); |
1086 | 550 | } else if (descr == "AtomImplicitHCount") { |
1087 | 0 | query->setDataFunc(queryAtomImplicitHCount); |
1088 | 550 | } else if (descr == "AtomHasImplicitH") { |
1089 | 0 | query->setDataFunc(queryAtomHasImplicitH); |
1090 | 550 | } else if (descr == "AtomIsAromatic") { |
1091 | 0 | query->setDataFunc(queryAtomAromatic); |
1092 | 550 | } else if (descr == "AtomIsAliphatic") { |
1093 | 0 | query->setDataFunc(queryAtomAliphatic); |
1094 | 550 | } else if (descr == "AtomUnsaturated") { |
1095 | 0 | query->setDataFunc(queryAtomUnsaturated); |
1096 | 550 | } else if (descr == "AtomMass") { |
1097 | 0 | query->setDataFunc(queryAtomMass); |
1098 | 550 | } else if (descr == "AtomIsotope") { |
1099 | 0 | query->setDataFunc(queryAtomIsotope); |
1100 | 550 | } else if (descr == "AtomFormalCharge") { |
1101 | 550 | query->setDataFunc(queryAtomFormalCharge); |
1102 | 550 | } else if (descr == "AtomNegativeFormalCharge") { |
1103 | 0 | query->setDataFunc(queryAtomNegativeFormalCharge); |
1104 | 0 | } else if (descr == "AtomHybridization") { |
1105 | 0 | query->setDataFunc(queryAtomHybridization); |
1106 | 0 | } else if (descr == "AtomInRing") { |
1107 | 0 | query->setDataFunc(queryIsAtomInRing); |
1108 | 0 | } else if (descr == "AtomInNRings") { |
1109 | 0 | query->setDataFunc(queryIsAtomInNRings); |
1110 | 0 | } else if (descr == "AtomHasHeteroatomNeighbors") { |
1111 | 0 | query->setDataFunc(queryAtomHasHeteroatomNbrs); |
1112 | 0 | } else if (descr == "AtomNumHeteroatomNeighbors") { |
1113 | 0 | query->setDataFunc(queryAtomNumHeteroatomNbrs); |
1114 | 0 | } else if (descr == "AtomNonHydrogenDegree") { |
1115 | 0 | query->setDataFunc(queryAtomNonHydrogenDegree); |
1116 | 0 | } else if (descr == "AtomHasAliphaticHeteroatomNeighbors") { |
1117 | 0 | query->setDataFunc(queryAtomHasAliphaticHeteroatomNbrs); |
1118 | 0 | } else if (descr == "AtomNumAliphaticHeteroatomNeighbors") { |
1119 | 0 | query->setDataFunc(queryAtomNumAliphaticHeteroatomNbrs); |
1120 | 0 | } else if (descr == "AtomNull") { |
1121 | 0 | query->setDataFunc(nullDataFun<const RDKit::Atom *>); |
1122 | 0 | query->setMatchFunc(nullQueryFun<int>); |
1123 | 0 | } else if (descr == "AtomType") { |
1124 | 0 | query->setDataFunc(queryAtomType); |
1125 | 0 | } else if (descr == "AtomNumRadicalElectrons") { |
1126 | 0 | query->setDataFunc(queryAtomNumRadicalElectrons); |
1127 | 0 | } else if (descr == "AtomInNRings" || descr == "RecursiveStructure") { |
1128 | | // don't need to do anything here because the classes |
1129 | | // automatically have everything set |
1130 | 0 | } else if (descr == "AtomAnd" || descr == "AtomOr" || descr == "AtomXor" || |
1131 | 0 | descr == "HasProp" || descr == "HasPropWithValue") { |
1132 | | // don't need to do anything here because the classes |
1133 | | // automatically have everything set |
1134 | 0 | } else { |
1135 | 0 | throw ValueErrorException("Do not know how to finalize query: '" + descr + |
1136 | 0 | "'"); |
1137 | 0 | } |
1138 | 550 | } |
1139 | | |
1140 | | void finalizeQueryFromDescription( |
1141 | 0 | Queries::Query<int, Bond const *, true> *query, Bond const *) { |
1142 | 0 | std::string descr = query->getDescription(); |
1143 | 0 | Queries::Query<int, Bond const *, true> *tmpQuery; |
1144 | 0 | if (descr == "BondRingSize") { |
1145 | 0 | tmpQuery = makeBondInRingOfSizeQuery( |
1146 | 0 | static_cast<BOND_EQUALS_QUERY *>(query)->getVal()); |
1147 | 0 | query->setDataFunc(tmpQuery->getDataFunc()); |
1148 | 0 | delete tmpQuery; |
1149 | 0 | } else if (descr == "BondMinRingSize") { |
1150 | 0 | query->setDataFunc(queryBondMinRingSize); |
1151 | 0 | } else if (descr == "BondOrder") { |
1152 | 0 | query->setDataFunc(queryBondOrder); |
1153 | 0 | } else if (descr == "BondDir") { |
1154 | 0 | query->setDataFunc(queryBondDir); |
1155 | 0 | } else if (descr == "BondInRing") { |
1156 | 0 | query->setDataFunc(queryIsBondInRing); |
1157 | 0 | } else if (descr == "BondInNRings") { |
1158 | 0 | query->setDataFunc(queryIsBondInNRings); |
1159 | 0 | } else if (descr == "SingleOrAromaticBond") { |
1160 | 0 | query->setDataFunc(queryBondIsSingleOrAromatic); |
1161 | 0 | } else if (descr == "SingleOrDoubleBond") { |
1162 | 0 | query->setDataFunc(queryBondIsSingleOrDouble); |
1163 | 0 | } else if (descr == "DoubleOrAromaticBond") { |
1164 | 0 | query->setDataFunc(queryBondIsDoubleOrAromatic); |
1165 | 0 | } else if (descr == "SingleOrDoubleOrAromaticBond") { |
1166 | 0 | query->setDataFunc(queryBondIsSingleOrDoubleOrAromatic); |
1167 | 0 | } else if (descr == "BondNull") { |
1168 | 0 | query->setDataFunc(nullDataFun<const RDKit::Bond *>); |
1169 | 0 | query->setMatchFunc(nullQueryFun<int>); |
1170 | 0 | } else if (descr == "BondAnd" || descr == "BondOr" || descr == "BondXor" || |
1171 | 0 | descr == "HasProp" || descr == "HasPropWithValue") { |
1172 | | // don't need to do anything here because the classes |
1173 | | // automatically have everything set |
1174 | 0 | } else { |
1175 | 0 | throw ValueErrorException("Do not know how to finalize query: '" + descr + |
1176 | 0 | "'"); |
1177 | 0 | } |
1178 | 0 | } |
1179 | | |
1180 | 5.99M | bool isMetal(const Atom &atom) { |
1181 | 5.99M | static const std::unique_ptr<ATOM_OR_QUERY> q(makeMAtomQuery()); |
1182 | 5.99M | return q->Match(&atom); |
1183 | 5.99M | } |
1184 | | |
1185 | | } // namespace QueryOps |
1186 | | }; // namespace RDKit |