/src/openbabel/src/parsmart.cpp
Line | Count | Source |
1 | | /********************************************************************** |
2 | | parsmart.cpp - SMARTS parser. |
3 | | |
4 | | Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc. |
5 | | Some portions Copyright (C) 2001-2006 by Geoffrey R. Hutchison |
6 | | |
7 | | This file is part of the Open Babel project. |
8 | | For more information, see <http://openbabel.org/> |
9 | | |
10 | | This program is free software; you can redistribute it and/or modify |
11 | | it under the terms of the GNU General Public License as published by |
12 | | the Free Software Foundation version 2 of the License. |
13 | | |
14 | | This program is distributed in the hope that it will be useful, |
15 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | | GNU General Public License for more details. |
18 | | ***********************************************************************/ |
19 | | #include <openbabel/babelconfig.h> |
20 | | |
21 | | #include <cctype> |
22 | | #include <iomanip> |
23 | | #include <cstring> |
24 | | |
25 | | #include <openbabel/mol.h> |
26 | | #include <openbabel/atom.h> |
27 | | #include <openbabel/bond.h> |
28 | | #include <openbabel/parsmart.h> |
29 | | #include <openbabel/stereo/stereo.h> |
30 | | #include <openbabel/stereo/tetrahedral.h> |
31 | | |
32 | | using namespace std; |
33 | | |
34 | | namespace OpenBabel |
35 | | { |
36 | | /*! \class OBSmartsPattern parsmart.h <openbabel/parsmart.h> |
37 | | |
38 | | Substructure search is an incredibly useful tool in the context of a |
39 | | small molecule programming library. Having an efficient substructure |
40 | | search engine reduces the amount of hard code needed for molecule |
41 | | perception, as well as increases the flexibility of certain |
42 | | operations. For instance, atom typing can be easily performed based on |
43 | | hard coded rules of element type and bond orders (or |
44 | | hybridization). Alternatively, atom typing can also be done by |
45 | | matching a set of substructure rules read at run time. In the latter |
46 | | case customization based on application (such as changing the pH) |
47 | | becomes a facile operation. Fortunately for Open Babel and its users, |
48 | | Roger Sayle donated a SMARTS parser which became the basis for SMARTS |
49 | | matching in Open Babel. |
50 | | |
51 | | For more information on the SMARTS support in Open Babel, see the wiki page: |
52 | | http://openbabel.org/wiki/SMARTS |
53 | | |
54 | | The SMARTS matcher, or OBSmartsPattern, is a separate object which can |
55 | | match patterns in the OBMol class. The following code demonstrates how |
56 | | to use the OBSmartsPattern class: |
57 | | \code |
58 | | OBMol mol; |
59 | | ... |
60 | | OBSmartsPattern sp; |
61 | | sp.Init("CC"); |
62 | | sp.Match(mol); |
63 | | vector<vector<int> > maplist; |
64 | | maplist = sp.GetMapList(); |
65 | | //or maplist = sp.GetUMapList(); |
66 | | //print out the results |
67 | | vector<vector<int> >::iterator i; |
68 | | vector<int>::iterator j; |
69 | | for (i = maplist.begin();i != maplist.end();++i) |
70 | | { |
71 | | for (j = i->begin();j != i->end();++j) |
72 | | cout << j << ' `; |
73 | | cout << endl; |
74 | | } |
75 | | \endcode |
76 | | |
77 | | The preceding code reads in a molecule, initializes a SMARTS pattern |
78 | | of two single-bonded carbons, and locates all instances of the |
79 | | pattern in the molecule. Note that calling the Match() function |
80 | | does not return the results of the substructure match. The results |
81 | | from a match are stored in the OBSmartsPattern, and a call to |
82 | | GetMapList() or GetUMapList() must be made to extract the |
83 | | results. The function GetMapList() returns all matches of a |
84 | | particular pattern while GetUMapList() returns only the unique |
85 | | matches. For instance, the pattern [OD1]~C~[OD1] describes a |
86 | | carboxylate group. This pattern will match both atom number |
87 | | permutations of the carboxylate, and if GetMapList() is called, both |
88 | | matches will be returned. If GetUMapList() is called only unique |
89 | | matches of the pattern will be returned. A unique match is defined as |
90 | | one which does not cover the identical atoms that a previous match |
91 | | has covered. |
92 | | |
93 | | */ |
94 | | |
95 | 238 | #define ATOMPOOL 1 |
96 | 83 | #define BONDPOOL 1 |
97 | | |
98 | 7.87M | #define AE_ANDHI 1 |
99 | 10.0M | #define AE_ANDLO 2 |
100 | 4.98M | #define AE_OR 3 |
101 | 6.09M | #define AE_RECUR 4 |
102 | 2.28k | #define AE_NOT 5 |
103 | 1.26k | #define AE_TRUE 6 |
104 | 0 | #define AE_FALSE 7 |
105 | 145 | #define AE_AROMATIC 8 |
106 | 0 | #define AE_ALIPHATIC 9 |
107 | 0 | #define AE_CYCLIC 10 |
108 | 6 | #define AE_ACYCLIC 11 |
109 | 0 | #define AE_MASS 12 |
110 | 16.6M | #define AE_ELEM 13 |
111 | 2.22M | #define AE_AROMELEM 14 |
112 | 10.0M | #define AE_ALIPHELEM 15 |
113 | 0 | #define AE_HCOUNT 16 |
114 | 6 | #define AE_CHARGE 17 |
115 | 3 | #define AE_CONNECT 18 |
116 | 1.76M | #define AE_DEGREE 19 |
117 | 0 | #define AE_IMPLICIT 20 |
118 | 0 | #define AE_RINGS 21 |
119 | 0 | #define AE_SIZE 22 |
120 | 4.99k | #define AE_VALENCE 23 |
121 | 0 | #define AE_CHIRAL 24 |
122 | 9 | #define AE_HYB 25 |
123 | 0 | #define AE_RINGCONNECT 26 |
124 | | |
125 | 2 | #define AL_CLOCKWISE 1 |
126 | 2 | #define AL_ANTICLOCKWISE 2 |
127 | 0 | #define AL_UNSPECIFIED 0 |
128 | | |
129 | | /* Each BondExpr node is identified by an integer type field |
130 | | selected from the list of BE_ codes below. BE_ANDHI, |
131 | | BE_ANDLO and BE_OR are binary nodes of type BondExpr.bin, |
132 | | BE_NOT is unary node of type BondExpr.mon, and the remaining |
133 | | code are all leaf nodes. */ |
134 | | |
135 | 0 | #define BE_ANDHI 1 |
136 | 0 | #define BE_ANDLO 2 |
137 | 52 | #define BE_OR 3 |
138 | 0 | #define BE_NOT 4 |
139 | 14 | #define BE_ANY 5 |
140 | 106 | #define BE_DEFAULT 6 |
141 | 0 | #define BE_SINGLE 7 |
142 | 833 | #define BE_DOUBLE 8 |
143 | 443 | #define BE_TRIPLE 9 |
144 | 0 | #define BE_QUAD 10 |
145 | 20 | #define BE_AROM 11 |
146 | 0 | #define BE_RING 12 |
147 | 0 | #define BE_UP 13 |
148 | 0 | #define BE_DOWN 14 |
149 | 0 | #define BE_UPUNSPEC 15 |
150 | 0 | #define BE_DOWNUNSPEC 16 |
151 | | |
152 | | |
153 | | static int CreateAtom(Pattern*,AtomExpr*,int,int vb=0); |
154 | | |
155 | | const int SmartsImplicitRef = -9999; // Used as a placeholder when recording atom nbrs for chiral atoms |
156 | | |
157 | | |
158 | | /*=============================*/ |
159 | | /* Standard Utility Routines */ |
160 | | /*=============================*/ |
161 | | |
162 | | static void FatalAllocationError( const char *ptr ) |
163 | 0 | { |
164 | 0 | stringstream errorMsg; |
165 | 0 | errorMsg << "Error: Unable to allocate" << ptr << endl; |
166 | 0 | obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obError); |
167 | 0 | } |
168 | | |
169 | | /*================================*/ |
170 | | /* Atom Expression Manipulation */ |
171 | | /*================================*/ |
172 | | |
173 | | static void FreePattern( Pattern* ); |
174 | | static Pattern *CopyPattern( Pattern* ); |
175 | | |
176 | | static AtomExpr *CopyAtomExpr( AtomExpr *expr ) |
177 | 0 | { |
178 | 0 | AtomExpr *result; |
179 | 0 |
|
180 | 0 | result = new AtomExpr; |
181 | 0 | result->type = expr->type; |
182 | 0 | switch( expr->type ) |
183 | 0 | { |
184 | 0 | case AE_ANDHI: |
185 | 0 | case AE_ANDLO: |
186 | 0 | case AE_OR: |
187 | 0 | result->bin.lft = CopyAtomExpr(expr->bin.lft); |
188 | 0 | result->bin.rgt = CopyAtomExpr(expr->bin.rgt); |
189 | 0 | break; |
190 | 0 |
|
191 | 0 | case AE_NOT: |
192 | 0 | result->mon.arg = CopyAtomExpr(expr->mon.arg); |
193 | 0 | break; |
194 | 0 |
|
195 | 0 | case AE_RECUR: |
196 | 0 | result->recur.recur = CopyPattern((Pattern*)expr->recur.recur); |
197 | 0 | break; |
198 | 0 |
|
199 | 0 | default: |
200 | 0 | result->leaf.value = expr->leaf.value; |
201 | 0 | break; |
202 | 0 | } |
203 | 0 | return result; |
204 | 0 | } |
205 | | |
206 | | static void FreeAtomExpr( AtomExpr *expr ) |
207 | 379 | { |
208 | 379 | if( expr ) |
209 | 379 | { |
210 | 379 | switch( expr->type ) |
211 | 379 | { |
212 | 70 | case AE_ANDHI: |
213 | 81 | case AE_ANDLO: |
214 | 99 | case AE_OR: |
215 | 99 | FreeAtomExpr(expr->bin.lft); |
216 | 99 | FreeAtomExpr(expr->bin.rgt); |
217 | 99 | break; |
218 | | |
219 | 2 | case AE_NOT: |
220 | 2 | FreeAtomExpr(expr->mon.arg); |
221 | 2 | break; |
222 | | |
223 | 33 | case AE_RECUR: |
224 | 33 | FreePattern((Pattern*)expr->recur.recur); |
225 | 33 | break; |
226 | 379 | } |
227 | | |
228 | 379 | delete expr; |
229 | 379 | } |
230 | 379 | } |
231 | | |
232 | | static AtomExpr *BuildAtomPred( int type ) |
233 | 26 | { |
234 | 26 | AtomExpr *result; |
235 | | |
236 | 26 | result = new AtomExpr; |
237 | 26 | result->leaf.type = type; |
238 | 26 | result->leaf.value = 0; |
239 | 26 | return result; |
240 | 26 | } |
241 | | |
242 | | static AtomExpr *BuildAtomLeaf( int type, int val ) |
243 | 219 | { |
244 | 219 | AtomExpr *result; |
245 | | |
246 | 219 | result = new AtomExpr; |
247 | 219 | result->leaf.type = type; |
248 | 219 | result->leaf.value = val; |
249 | 219 | return result; |
250 | 219 | } |
251 | | |
252 | | static AtomExpr *BuildAtomNot( AtomExpr *expr ) |
253 | 2 | { |
254 | 2 | AtomExpr *result; |
255 | | |
256 | 2 | result = new AtomExpr; |
257 | 2 | result->mon.type = AE_NOT; |
258 | 2 | result->mon.arg = expr; |
259 | 2 | return result; |
260 | 2 | } |
261 | | |
262 | | static AtomExpr *BuildAtomBin( int op, AtomExpr *lft, AtomExpr *rgt ) |
263 | 99 | { |
264 | 99 | AtomExpr *result; |
265 | | |
266 | 99 | result = new AtomExpr; |
267 | 99 | result->bin.type = op; |
268 | 99 | result->bin.lft = lft; |
269 | 99 | result->bin.rgt = rgt; |
270 | 99 | return result; |
271 | 99 | } |
272 | | |
273 | | static AtomExpr *BuildAtomRecurs( Pattern *pat ) |
274 | 33 | { |
275 | 33 | AtomExpr *result; |
276 | | |
277 | 33 | result = new AtomExpr; |
278 | 33 | result->recur.type = AE_RECUR; |
279 | 33 | result->recur.recur = (void*)pat; |
280 | 33 | return result; |
281 | 33 | } |
282 | | |
283 | | static AtomExpr *GenerateElement( int elem ) |
284 | 74 | { |
285 | 74 | return BuildAtomLeaf(AE_ELEM,elem); |
286 | 74 | } |
287 | | |
288 | | static AtomExpr *GenerateAromElem( int elem, int flag ) |
289 | 76 | { |
290 | 76 | return flag ? BuildAtomLeaf(AE_AROMELEM,elem) |
291 | 76 | : BuildAtomLeaf(AE_ALIPHELEM,elem); |
292 | 76 | } |
293 | | |
294 | | /*================================*/ |
295 | | /* Bond Expression Manipulation */ |
296 | | /*================================*/ |
297 | | |
298 | | static BondExpr *CopyBondExpr( BondExpr *expr ) |
299 | 0 | { |
300 | 0 | BondExpr *result; |
301 | 0 |
|
302 | 0 | result = new BondExpr; |
303 | 0 | result->type = expr->type; |
304 | 0 | switch( expr->type ) |
305 | 0 | { |
306 | 0 | case BE_ANDHI: |
307 | 0 | case BE_ANDLO: |
308 | 0 | case BE_OR: |
309 | 0 | result->bin.lft = CopyBondExpr(expr->bin.lft); |
310 | 0 | result->bin.rgt = CopyBondExpr(expr->bin.rgt); |
311 | 0 | break; |
312 | 0 |
|
313 | 0 | case BE_NOT: |
314 | 0 | result->mon.arg = CopyBondExpr(expr->mon.arg); |
315 | 0 | break; |
316 | 0 | } |
317 | 0 | return result; |
318 | 0 | } |
319 | | |
320 | | /** |
321 | | * Check if two BondExpr objects are the same. This is used for ring closures |
322 | | * to identify invalid SMARTS like: |
323 | | * |
324 | | * C-1CCCCC#1 |
325 | | * C=1CCCCC:1 |
326 | | * |
327 | | * However, the SMARTS below are valid and the bond expression next to the the |
328 | | * second closure digit is used. |
329 | | * |
330 | | * C1CCCCC#1 |
331 | | * C1CCCCC=1 |
332 | | */ |
333 | | static bool EquivalentBondExpr( BondExpr *expr1, BondExpr *expr2 ) |
334 | 0 | { |
335 | 0 | if (expr1 == nullptr && expr2 == nullptr) |
336 | 0 | return true; |
337 | 0 | if (expr1 == nullptr && expr2 != nullptr) |
338 | 0 | return false; |
339 | 0 | if (expr1 != nullptr && expr2 == nullptr) |
340 | 0 | return false; |
341 | | |
342 | 0 | if (expr1->type != expr2->type) |
343 | 0 | return false; |
344 | | |
345 | 0 | switch( expr1->type ) |
346 | 0 | { |
347 | 0 | case BE_ANDHI: |
348 | 0 | case BE_ANDLO: |
349 | 0 | case BE_OR: |
350 | 0 | return EquivalentBondExpr(expr1->bin.lft, expr2->bin.lft) && |
351 | 0 | EquivalentBondExpr(expr1->bin.rgt, expr2->bin.rgt); |
352 | | |
353 | 0 | case BE_NOT: |
354 | 0 | return EquivalentBondExpr(expr1->mon.arg, expr2->mon.arg); |
355 | 0 | } |
356 | 0 | return true; |
357 | 0 | } |
358 | | |
359 | | static void FreeBondExpr( BondExpr *expr ) |
360 | 65 | { |
361 | 65 | if( expr ) |
362 | 65 | { |
363 | 65 | switch( expr->type ) |
364 | 65 | { |
365 | 0 | case BE_ANDHI: |
366 | 0 | case BE_ANDLO: |
367 | 3 | case BE_OR: |
368 | 3 | FreeBondExpr(expr->bin.lft); |
369 | 3 | FreeBondExpr(expr->bin.rgt); |
370 | 3 | break; |
371 | | |
372 | 0 | case BE_NOT: |
373 | 0 | FreeBondExpr(expr->mon.arg); |
374 | 0 | break; |
375 | 65 | } |
376 | | |
377 | 65 | delete expr; |
378 | 65 | } |
379 | 65 | } |
380 | | |
381 | | static BondExpr *BuildBondLeaf( int type ) |
382 | 62 | { |
383 | 62 | BondExpr *result; |
384 | | |
385 | 62 | result = new BondExpr; |
386 | 62 | result->type = type; |
387 | 62 | return result; |
388 | 62 | } |
389 | | |
390 | | static BondExpr *BuildBondNot( BondExpr *expr ) |
391 | 0 | { |
392 | 0 | BondExpr *result; |
393 | |
|
394 | 0 | result = new BondExpr; |
395 | 0 | result->mon.type = BE_NOT; |
396 | 0 | result->mon.arg = expr; |
397 | 0 | return result; |
398 | 0 | } |
399 | | |
400 | | static BondExpr *BuildBondBin( int op, BondExpr *lft, BondExpr *rgt ) |
401 | 3 | { |
402 | 3 | BondExpr *result; |
403 | | |
404 | 3 | result = new BondExpr; |
405 | 3 | result->bin.type = op; |
406 | 3 | result->bin.lft = lft; |
407 | 3 | result->bin.rgt = rgt; |
408 | 3 | return result; |
409 | 3 | } |
410 | | |
411 | | static BondExpr *GenerateDefaultBond( void ) |
412 | 19 | { |
413 | 19 | return BuildBondLeaf(BE_DEFAULT); |
414 | 19 | } |
415 | | |
416 | | /*===============================*/ |
417 | | /* SMARTS Pattern Manipulation */ |
418 | | /*===============================*/ |
419 | | |
420 | | static Pattern *AllocPattern( void ) |
421 | 120 | { |
422 | 120 | Pattern *ptr; |
423 | | |
424 | 120 | ptr = new Pattern; |
425 | 120 | if( !ptr ) { |
426 | 0 | FatalAllocationError("pattern"); |
427 | 0 | return nullptr; |
428 | 0 | } |
429 | | |
430 | 120 | ptr->atom = nullptr; |
431 | 120 | ptr->aalloc = 0; |
432 | 120 | ptr->acount = 0; |
433 | | |
434 | 120 | ptr->bond = nullptr; |
435 | 120 | ptr->balloc = 0; |
436 | 120 | ptr->bcount = 0; |
437 | | |
438 | 120 | ptr->parts = 1; |
439 | | |
440 | 120 | ptr->hasExplicitH=false; |
441 | 120 | return ptr; |
442 | 120 | } |
443 | | |
444 | | static int CreateAtom( Pattern *pat, AtomExpr *expr, int part,int vb) |
445 | 179 | { |
446 | 179 | int index,size; |
447 | | |
448 | 179 | if (!pat) |
449 | 0 | return -1; // should never happen |
450 | | |
451 | 179 | if( pat->acount == pat->aalloc ) |
452 | 179 | { |
453 | 179 | pat->aalloc += ATOMPOOL; |
454 | 179 | size = (int)(pat->aalloc*sizeof(AtomSpec)); |
455 | 179 | if( pat->atom ) |
456 | 59 | { |
457 | 59 | AtomSpec *tmp = new AtomSpec[pat->aalloc]; |
458 | 59 | copy(pat->atom, pat->atom + pat->aalloc - ATOMPOOL, tmp); |
459 | 59 | delete [] pat->atom; |
460 | 59 | pat->atom = tmp; |
461 | 59 | } |
462 | 120 | else |
463 | 120 | pat->atom = new AtomSpec[pat->aalloc]; |
464 | 179 | if( !pat->atom ) |
465 | 0 | FatalAllocationError("atom pool"); |
466 | 179 | } |
467 | | |
468 | 179 | index = pat->acount++; |
469 | 179 | pat->atom[index].part = part; |
470 | 179 | pat->atom[index].expr = expr; |
471 | 179 | pat->atom[index].vb = vb; //std::vector binding |
472 | | |
473 | 179 | return index; |
474 | 179 | } |
475 | | |
476 | | static int CreateBond( Pattern *pat, BondExpr *expr, int src, int dst ) |
477 | 59 | { |
478 | 59 | int index,size; |
479 | | |
480 | 59 | if (!pat) |
481 | 0 | return -1; // should never happen |
482 | | |
483 | 59 | if( pat->bcount == pat->balloc ) |
484 | 59 | { |
485 | 59 | pat->balloc += BONDPOOL; |
486 | 59 | size = (int)(pat->balloc*sizeof(BondSpec)); |
487 | 59 | if( pat->bond ) |
488 | 24 | { |
489 | 24 | BondSpec *tmp = new BondSpec[pat->balloc]; |
490 | 24 | copy(pat->bond, pat->bond + pat->balloc - BONDPOOL, tmp); |
491 | 24 | delete [] pat->bond; |
492 | 24 | pat->bond = tmp; |
493 | 24 | } |
494 | 35 | else |
495 | 35 | pat->bond = new BondSpec[pat->balloc]; |
496 | 59 | if( !pat->bond ) |
497 | 0 | FatalAllocationError("bond pool"); |
498 | 59 | } |
499 | | |
500 | 59 | index = pat->bcount++; |
501 | 59 | pat->bond[index].expr = expr; |
502 | 59 | pat->bond[index].src = src; |
503 | 59 | pat->bond[index].dst = dst; |
504 | 59 | return(index); |
505 | 59 | } |
506 | | |
507 | | static Pattern *CopyPattern( Pattern *pat ) |
508 | 0 | { |
509 | 0 | Pattern *result; |
510 | 0 | AtomExpr *aexpr; |
511 | 0 | BondExpr *bexpr; |
512 | 0 | int i; |
513 | 0 |
|
514 | 0 | result = AllocPattern(); |
515 | 0 | result->parts = pat->parts; |
516 | 0 | for( i=0; i<pat->acount; i++ ) |
517 | 0 | { |
518 | 0 | aexpr = CopyAtomExpr(pat->atom[i].expr); |
519 | 0 | CreateAtom(result,aexpr,pat->atom[i].part); |
520 | 0 | } |
521 | 0 |
|
522 | 0 | for( i=0; i<pat->bcount; i++ ) |
523 | 0 | { |
524 | 0 | bexpr = CopyBondExpr(pat->bond[i].expr); |
525 | 0 | CreateBond(result,bexpr,pat->bond[i].src,pat->bond[i].dst); |
526 | 0 | } |
527 | 0 |
|
528 | 0 | return result; |
529 | 0 | } |
530 | | |
531 | | static void FreePattern( Pattern *pat ) |
532 | 120 | { |
533 | 120 | int i; |
534 | | |
535 | 120 | if( pat ) |
536 | 120 | { |
537 | 120 | if( pat->aalloc ) |
538 | 120 | { |
539 | 299 | for( i=0; i<pat->acount; i++ ) |
540 | 179 | FreeAtomExpr(pat->atom[i].expr); |
541 | 120 | if (pat->atom != nullptr) { |
542 | | //free(pat->atom); |
543 | 120 | delete [] pat->atom; |
544 | 120 | pat->atom = nullptr; |
545 | 120 | } |
546 | 120 | } |
547 | | |
548 | 120 | if( pat->balloc ) |
549 | 35 | { |
550 | 94 | for( i=0; i<pat->bcount; i++ ) |
551 | 59 | FreeBondExpr(pat->bond[i].expr); |
552 | 35 | if (pat->bond != nullptr) { |
553 | | //free(pat->bond); |
554 | 35 | delete [] pat->bond; |
555 | 35 | pat->bond = nullptr; |
556 | 35 | } |
557 | 35 | } |
558 | 120 | delete pat; |
559 | 120 | pat = nullptr; |
560 | 120 | } |
561 | 120 | } |
562 | | |
563 | | /*=========================*/ |
564 | | /* SMARTS Syntax Parsing */ |
565 | | /*=========================*/ |
566 | | |
567 | | |
568 | | Pattern *OBSmartsPattern::SMARTSError( Pattern *pat ) |
569 | 0 | { |
570 | 0 | stringstream errorMsg; |
571 | 0 | errorMsg << "SMARTS Error:\n" << MainPtr << endl; |
572 | 0 | errorMsg << setw(LexPtr-MainPtr+1) << '^' << endl; |
573 | 0 | obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obError, onceOnly); |
574 | |
|
575 | 0 | FreePattern(pat); |
576 | 0 | return nullptr; |
577 | 0 | } |
578 | | |
579 | | AtomExpr *OBSmartsPattern::ParseSimpleAtomPrimitive( void ) |
580 | 49 | { |
581 | 49 | switch( *LexPtr++ ) |
582 | 49 | { |
583 | 17 | case '*': |
584 | 17 | return BuildAtomPred(AE_TRUE); |
585 | 0 | case 'A': |
586 | 0 | return BuildAtomPred(AE_ALIPHATIC); |
587 | 0 | case 'B': |
588 | 0 | if( *LexPtr == 'r' ) |
589 | 0 | { |
590 | 0 | LexPtr++; |
591 | 0 | return GenerateElement(35); |
592 | 0 | } |
593 | 0 | return GenerateElement(5); |
594 | 10 | case 'C': |
595 | 10 | if( *LexPtr == 'l' ) |
596 | 0 | { |
597 | 0 | LexPtr++; |
598 | 0 | return GenerateElement(17); |
599 | 0 | } |
600 | 10 | return GenerateAromElem(6,false); |
601 | 0 | case 'F': |
602 | 0 | return GenerateElement( 9); |
603 | 0 | case 'I': |
604 | 0 | return GenerateElement(53); |
605 | 7 | case 'N': |
606 | 7 | return GenerateAromElem(7,false); |
607 | 10 | case 'O': |
608 | 10 | return GenerateAromElem(8,false); |
609 | 0 | case 'P': |
610 | 0 | return GenerateAromElem(15, false); |
611 | 2 | case 'S': |
612 | 2 | return GenerateAromElem(16,false); |
613 | 3 | case 'a': |
614 | 3 | if( *LexPtr == 's' ) |
615 | 0 | { |
616 | 0 | LexPtr++; |
617 | 0 | return GenerateAromElem(33, true); |
618 | 0 | } |
619 | 3 | return BuildAtomPred(AE_AROMATIC); |
620 | 0 | case 'c': |
621 | 0 | return GenerateAromElem( 6,true); |
622 | 0 | case 'n': |
623 | 0 | return GenerateAromElem( 7,true); |
624 | 0 | case 'o': |
625 | 0 | return GenerateAromElem( 8,true); |
626 | 0 | case 'p': |
627 | 0 | return GenerateAromElem(15,true); |
628 | 0 | case 's': |
629 | 0 | if( *LexPtr == 'e' ) |
630 | 0 | { |
631 | 0 | LexPtr++; |
632 | 0 | return GenerateAromElem(34, true); |
633 | 0 | } |
634 | 0 | return GenerateAromElem(16,true); |
635 | 49 | } |
636 | 0 | LexPtr--; |
637 | 0 | return nullptr; |
638 | 49 | } |
639 | | |
640 | | AtomExpr *OBSmartsPattern::ParseComplexAtomPrimitive( void ) |
641 | 229 | { |
642 | 229 | Pattern *pat; |
643 | 229 | int index; |
644 | | |
645 | 229 | switch( *LexPtr++ ) |
646 | 229 | { |
647 | 46 | case('#'): |
648 | 46 | if( !isdigit(*LexPtr) ) |
649 | 0 | return nullptr; |
650 | | |
651 | 46 | index = 0; |
652 | 110 | while( isdigit(*LexPtr) ) |
653 | 64 | index = index*10 + ((*LexPtr++)-'0'); |
654 | 46 | if( index > 255 ) |
655 | 0 | { |
656 | 0 | LexPtr--; |
657 | 0 | return nullptr; |
658 | 0 | } |
659 | 46 | return( GenerateElement(index) ); |
660 | | |
661 | 33 | case('$'): |
662 | 33 | if( *LexPtr != '(' ) |
663 | 0 | return nullptr; |
664 | 33 | LexPtr++; |
665 | 33 | pat = ParseSMARTSPattern(); |
666 | | |
667 | 33 | if( !pat ) |
668 | 0 | return nullptr; |
669 | 33 | if( *LexPtr != ')' ) |
670 | 0 | { |
671 | 0 | FreePattern(pat); |
672 | 0 | return nullptr; |
673 | 0 | } |
674 | 33 | LexPtr++; |
675 | 33 | return( BuildAtomRecurs(pat) ); |
676 | | |
677 | 0 | case('*'): |
678 | 0 | return BuildAtomPred(AE_TRUE); |
679 | | |
680 | 4 | case('+'): |
681 | 4 | if( isdigit(*LexPtr) ) |
682 | 0 | { |
683 | 0 | index = 0; |
684 | 0 | while( isdigit(*LexPtr) ) |
685 | 0 | index = index*10 + ((*LexPtr++)-'0'); |
686 | 0 | } |
687 | 4 | else |
688 | 4 | { |
689 | 4 | index = 1; |
690 | 4 | while( *LexPtr == '+' ) |
691 | 0 | { |
692 | 0 | LexPtr++; |
693 | 0 | index++; |
694 | 0 | } |
695 | 4 | } |
696 | 4 | return BuildAtomLeaf(AE_CHARGE,index); |
697 | | |
698 | 2 | case('-'): |
699 | 2 | if( isdigit(*LexPtr) ) |
700 | 0 | { |
701 | 0 | index = 0; |
702 | 0 | while( isdigit(*LexPtr) ) |
703 | 0 | index = index*10 + ((*LexPtr++)-'0'); |
704 | 0 | } |
705 | 2 | else |
706 | 2 | { |
707 | 2 | index = 1; |
708 | 2 | while( *LexPtr == '-' ) |
709 | 0 | { |
710 | 0 | LexPtr++; |
711 | 0 | index++; |
712 | 0 | } |
713 | 2 | } |
714 | 2 | return BuildAtomLeaf(AE_CHARGE,-index); |
715 | | |
716 | 0 | case '@': |
717 | 0 | if (*LexPtr == '?') |
718 | 0 | { |
719 | 0 | LexPtr++; |
720 | 0 | return BuildAtomLeaf(AE_CHIRAL,AL_UNSPECIFIED); // unspecified |
721 | 0 | } |
722 | 0 | else if (*LexPtr != '@') |
723 | 0 | return BuildAtomLeaf(AE_CHIRAL,AL_ANTICLOCKWISE); |
724 | 0 | else |
725 | 0 | { |
726 | 0 | LexPtr++; |
727 | 0 | return BuildAtomLeaf(AE_CHIRAL,AL_CLOCKWISE); |
728 | 0 | } |
729 | | |
730 | 9 | case '^': |
731 | 9 | if (isdigit(*LexPtr)) |
732 | 9 | { |
733 | 9 | index = 0; |
734 | 18 | while( isdigit(*LexPtr) ) |
735 | 9 | index = index*10 + ((*LexPtr++)-'0'); |
736 | 9 | return BuildAtomLeaf(AE_HYB,index); |
737 | 9 | } |
738 | 0 | else |
739 | 0 | return BuildAtomLeaf(AE_HYB,1); |
740 | | |
741 | 0 | case('0'): case('1'): case('2'): case('3'): case('4'): |
742 | 0 | case('5'): case('6'): case('7'): case('8'): case('9'): |
743 | 0 | index = LexPtr[-1]-'0'; |
744 | 0 | while( isdigit(*LexPtr) ) |
745 | 0 | index = index*10 + ((*LexPtr++)-'0'); |
746 | 0 | return BuildAtomLeaf(AE_MASS,index); |
747 | | |
748 | 3 | case('A'): |
749 | 3 | switch( *LexPtr++ ) |
750 | 3 | { |
751 | 0 | case('c'): return GenerateElement(89); |
752 | 0 | case('g'): return GenerateElement(47); |
753 | 2 | case('l'): return GenerateElement(13); |
754 | 0 | case('m'): return GenerateElement(95); |
755 | 0 | case('r'): return GenerateElement(18); |
756 | 1 | case('s'): return GenerateElement(33); |
757 | 0 | case('t'): return GenerateElement(85); |
758 | 0 | case('u'): return GenerateElement(79); |
759 | 3 | } |
760 | 0 | LexPtr--; |
761 | 0 | return BuildAtomPred(AE_ALIPHATIC); |
762 | | |
763 | 6 | case('B'): |
764 | 6 | switch( *LexPtr++ ) |
765 | 6 | { |
766 | 1 | case('a'): return GenerateElement(56); |
767 | 1 | case('e'): return GenerateElement( 4); |
768 | 2 | case('i'): return GenerateElement(83); |
769 | 0 | case('k'): return GenerateElement(97); |
770 | 0 | case('r'): return GenerateElement(35); |
771 | 6 | } |
772 | 2 | LexPtr--; |
773 | 2 | return GenerateElement(5); |
774 | | |
775 | 2 | case('C'): |
776 | 2 | switch( *LexPtr++ ) |
777 | 2 | { |
778 | 1 | case('a'): return GenerateElement(20); |
779 | 0 | case('d'): return GenerateElement(48); |
780 | 0 | case('e'): return GenerateElement(58); |
781 | 0 | case('f'): return GenerateElement(98); |
782 | 0 | case('l'): return GenerateElement(17); |
783 | 0 | case('m'): return GenerateElement(96); |
784 | 0 | case('o'): return GenerateElement(27); |
785 | 0 | case('r'): return GenerateElement(24); |
786 | 0 | case('s'): return GenerateElement(55); |
787 | 0 | case('u'): return GenerateElement(29); |
788 | 2 | } |
789 | 1 | LexPtr--; |
790 | 1 | return GenerateAromElem(6,false); |
791 | | |
792 | 50 | case('D'): |
793 | 50 | if( *LexPtr == 'y' ) |
794 | 0 | { |
795 | 0 | LexPtr++; |
796 | 0 | return GenerateElement(66); |
797 | 0 | } |
798 | 50 | else if( isdigit(*LexPtr) ) |
799 | 50 | { |
800 | 50 | index = 0; |
801 | 100 | while( isdigit(*LexPtr) ) |
802 | 50 | index = index*10 + ((*LexPtr++)-'0'); |
803 | 50 | return BuildAtomLeaf(AE_DEGREE,index); |
804 | 50 | } |
805 | 0 | return BuildAtomLeaf(AE_DEGREE,1); |
806 | | |
807 | 0 | case('E'): |
808 | 0 | if( *LexPtr == 'r' ) |
809 | 0 | { |
810 | 0 | LexPtr++; |
811 | 0 | return GenerateElement(68); |
812 | 0 | } |
813 | 0 | else if( *LexPtr == 's' ) |
814 | 0 | { |
815 | 0 | LexPtr++; |
816 | 0 | return GenerateElement(99); |
817 | 0 | } |
818 | 0 | else if( *LexPtr == 'u' ) |
819 | 0 | { |
820 | 0 | LexPtr++; |
821 | 0 | return GenerateElement(63); |
822 | 0 | } |
823 | 0 | break; |
824 | | |
825 | 0 | case('F'): |
826 | 0 | if( *LexPtr == 'e' ) |
827 | 0 | { |
828 | 0 | LexPtr++; |
829 | 0 | return GenerateElement(26); |
830 | 0 | } |
831 | 0 | else if( *LexPtr == 'm' ) |
832 | 0 | { |
833 | 0 | LexPtr++; |
834 | 0 | return GenerateElement(100); |
835 | 0 | } |
836 | 0 | else if( *LexPtr == 'r' ) |
837 | 0 | { |
838 | 0 | LexPtr++; |
839 | 0 | return GenerateElement(87); |
840 | 0 | } |
841 | 0 | return GenerateElement(9); |
842 | | |
843 | 2 | case('G'): |
844 | 2 | if( *LexPtr == 'a' ) |
845 | 1 | { |
846 | 1 | LexPtr++; |
847 | 1 | return( GenerateElement(31) ); |
848 | 1 | } |
849 | 1 | else if( *LexPtr == 'd' ) |
850 | 0 | { |
851 | 0 | LexPtr++; |
852 | 0 | return( GenerateElement(64) ); |
853 | 0 | } |
854 | 1 | else if( *LexPtr == 'e' ) |
855 | 1 | { |
856 | 1 | LexPtr++; |
857 | 1 | return( GenerateElement(32) ); |
858 | 1 | } |
859 | 0 | break; |
860 | | |
861 | 0 | case('H'): |
862 | 0 | if( *LexPtr == 'e' ) |
863 | 0 | { |
864 | 0 | LexPtr++; |
865 | 0 | return( GenerateElement( 2) ); |
866 | 0 | } |
867 | 0 | else if( *LexPtr == 'f' ) |
868 | 0 | { |
869 | 0 | LexPtr++; |
870 | 0 | return( GenerateElement(72) ); |
871 | 0 | } |
872 | 0 | else if( *LexPtr == 'g' ) |
873 | 0 | { |
874 | 0 | LexPtr++; |
875 | 0 | return( GenerateElement(80) ); |
876 | 0 | } |
877 | 0 | else if( *LexPtr == 'o' ) |
878 | 0 | { |
879 | 0 | LexPtr++; |
880 | 0 | return( GenerateElement(67) ); |
881 | 0 | } |
882 | 0 | else if( isdigit(*LexPtr) ) |
883 | 0 | { |
884 | 0 | index = 0; |
885 | 0 | while( isdigit(*LexPtr) ) |
886 | 0 | index = index*10 + ((*LexPtr++)-'0'); |
887 | 0 | return BuildAtomLeaf(AE_HCOUNT,index); |
888 | 0 | } |
889 | 0 | return BuildAtomLeaf(AE_HCOUNT,1); |
890 | | |
891 | 1 | case('I'): |
892 | 1 | if( *LexPtr == 'n' ) |
893 | 1 | { |
894 | 1 | LexPtr++; |
895 | 1 | return( GenerateElement(49) ); |
896 | 1 | } |
897 | 0 | else if( *LexPtr == 'r' ) |
898 | 0 | { |
899 | 0 | LexPtr++; |
900 | 0 | return( GenerateElement(77) ); |
901 | 0 | } |
902 | 0 | return( GenerateElement(53) ); |
903 | | |
904 | 0 | case('K'): |
905 | 0 | if( *LexPtr == 'r' ) |
906 | 0 | { |
907 | 0 | LexPtr++; |
908 | 0 | return( GenerateElement(36) ); |
909 | 0 | } |
910 | 0 | return( GenerateElement(19) ); |
911 | | |
912 | 0 | case('L'): |
913 | 0 | if( *LexPtr == 'a' ) |
914 | 0 | { |
915 | 0 | LexPtr++; |
916 | 0 | return( GenerateElement( 57) ); |
917 | 0 | } |
918 | 0 | else if( *LexPtr == 'i' ) |
919 | 0 | { |
920 | 0 | LexPtr++; |
921 | 0 | return( GenerateElement( 3) ); |
922 | 0 | } |
923 | 0 | else if( *LexPtr == 'r' ) |
924 | 0 | { |
925 | 0 | LexPtr++; |
926 | 0 | return( GenerateElement(103) ); |
927 | 0 | } |
928 | 0 | else if( *LexPtr == 'u' ) |
929 | 0 | { |
930 | 0 | LexPtr++; |
931 | 0 | return( GenerateElement( 71) ); |
932 | 0 | } |
933 | 0 | break; |
934 | | |
935 | 1 | case('M'): |
936 | 1 | if( *LexPtr == 'd' ) |
937 | 0 | { |
938 | 0 | LexPtr++; |
939 | 0 | return( GenerateElement(101) ); |
940 | 0 | } |
941 | 1 | else if( *LexPtr == 'g' ) |
942 | 1 | { |
943 | 1 | LexPtr++; |
944 | 1 | return( GenerateElement( 12) ); |
945 | 1 | } |
946 | 0 | else if( *LexPtr == 'n' ) |
947 | 0 | { |
948 | 0 | LexPtr++; |
949 | 0 | return( GenerateElement( 25) ); |
950 | 0 | } |
951 | 0 | else if( *LexPtr == 'o' ) |
952 | 0 | { |
953 | 0 | LexPtr++; |
954 | 0 | return( GenerateElement( 42) ); |
955 | 0 | } |
956 | 0 | break; |
957 | | |
958 | 15 | case('N'): |
959 | 15 | switch( *LexPtr++ ) |
960 | 15 | { |
961 | 0 | case('a'): return( GenerateElement( 11) ); |
962 | 0 | case('b'): return( GenerateElement( 41) ); |
963 | 0 | case('d'): return( GenerateElement( 60) ); |
964 | 0 | case('e'): return( GenerateElement( 10) ); |
965 | 0 | case('i'): return( GenerateElement( 28) ); |
966 | 0 | case('o'): return( GenerateElement(102) ); |
967 | 0 | case('p'): return( GenerateElement( 93) ); |
968 | 15 | } |
969 | 15 | LexPtr--; |
970 | 15 | return( GenerateAromElem(7,false) ); |
971 | | |
972 | 17 | case('O'): |
973 | 17 | if( *LexPtr == 's' ) |
974 | 0 | { |
975 | 0 | LexPtr++; |
976 | 0 | return( GenerateElement(76) ); |
977 | 0 | } |
978 | 17 | return( GenerateAromElem(8,false) ); |
979 | | |
980 | 6 | case('P'): |
981 | 6 | switch( *LexPtr++ ) |
982 | 6 | { |
983 | 0 | case('a'): return( GenerateElement(91) ); |
984 | 1 | case('b'): return( GenerateElement(82) ); |
985 | 0 | case('d'): return( GenerateElement(46) ); |
986 | 0 | case('m'): return( GenerateElement(61) ); |
987 | 1 | case('o'): return( GenerateElement(84) ); |
988 | 0 | case('r'): return( GenerateElement(59) ); |
989 | 0 | case('t'): return( GenerateElement(78) ); |
990 | 0 | case('u'): return( GenerateElement(94) ); |
991 | 6 | } |
992 | 4 | LexPtr--; |
993 | 4 | return( GenerateElement(15) ); |
994 | | |
995 | 1 | case('R'): |
996 | 1 | switch( *LexPtr++ ) |
997 | 1 | { |
998 | 1 | case('a'): return( GenerateElement(88) ); |
999 | 0 | case('b'): return( GenerateElement(37) ); |
1000 | 0 | case('e'): return( GenerateElement(75) ); |
1001 | 0 | case('h'): return( GenerateElement(45) ); |
1002 | 0 | case('n'): return( GenerateElement(86) ); |
1003 | 0 | case('u'): return( GenerateElement(44) ); |
1004 | 1 | } |
1005 | 0 | LexPtr--; |
1006 | 0 | if( isdigit(*LexPtr) ) |
1007 | 0 | { |
1008 | 0 | index = 0; |
1009 | 0 | while( isdigit(*LexPtr) ) |
1010 | 0 | index = index*10 + ((*LexPtr++)-'0'); |
1011 | 0 | if( index == 0 ) |
1012 | 0 | return BuildAtomPred(AE_ACYCLIC); |
1013 | 0 | return BuildAtomLeaf(AE_RINGS,index); |
1014 | 0 | } |
1015 | 0 | return BuildAtomPred(AE_CYCLIC); |
1016 | | |
1017 | 11 | case('S'): |
1018 | 11 | switch( *LexPtr++ ) |
1019 | 11 | { |
1020 | 1 | case('b'): return( GenerateElement(51) ); |
1021 | 0 | case('c'): return( GenerateElement(21) ); |
1022 | 1 | case('e'): return( GenerateElement(34) ); |
1023 | 1 | case('i'): return( GenerateElement(14) ); |
1024 | 0 | case('m'): return( GenerateElement(62) ); |
1025 | 1 | case('n'): return( GenerateElement(50) ); |
1026 | 1 | case('r'): return( GenerateElement(38) ); |
1027 | 11 | } |
1028 | 6 | LexPtr--; |
1029 | 6 | return( GenerateAromElem(16,false) ); |
1030 | | |
1031 | 2 | case('T'): |
1032 | 2 | switch( *LexPtr++ ) |
1033 | 2 | { |
1034 | 0 | case('a'): return( GenerateElement(73) ); |
1035 | 0 | case('b'): return( GenerateElement(65) ); |
1036 | 0 | case('c'): return( GenerateElement(43) ); |
1037 | 1 | case('e'): return( GenerateElement(52) ); |
1038 | 0 | case('h'): return( GenerateElement(90) ); |
1039 | 0 | case('i'): return( GenerateElement(22) ); |
1040 | 1 | case('l'): return( GenerateElement(81) ); |
1041 | 0 | case('m'): return( GenerateElement(69) ); |
1042 | 2 | } |
1043 | 0 | LexPtr--; |
1044 | 0 | break; |
1045 | | |
1046 | 0 | case('U'): return( GenerateElement(92) ); |
1047 | 0 | case('V'): return( GenerateElement(23) ); |
1048 | 0 | case('W'): return( GenerateElement(74) ); |
1049 | | |
1050 | 3 | case('X'): |
1051 | 3 | if( *LexPtr == 'e' ) |
1052 | 0 | { |
1053 | 0 | LexPtr++; |
1054 | 0 | return( GenerateElement(54) ); |
1055 | 0 | } |
1056 | 3 | else if( isdigit(*LexPtr) ) |
1057 | 3 | { |
1058 | 3 | index = 0; |
1059 | 6 | while( isdigit(*LexPtr) ) |
1060 | 3 | index = index*10 + ((*LexPtr++)-'0'); |
1061 | 3 | if (index == 0) // default to 1 (if no number present) |
1062 | 0 | index = 1; |
1063 | 3 | return BuildAtomLeaf(AE_CONNECT,index); |
1064 | 3 | } |
1065 | 0 | return BuildAtomLeaf(AE_CONNECT,1); |
1066 | | |
1067 | 0 | case('Y'): |
1068 | 0 | if( *LexPtr == 'b' ) |
1069 | 0 | { |
1070 | 0 | LexPtr++; |
1071 | 0 | return( GenerateElement(70) ); |
1072 | 0 | } |
1073 | 0 | return( GenerateElement(39) ); |
1074 | | |
1075 | 0 | case('Z'): |
1076 | 0 | if( *LexPtr == 'n' ) |
1077 | 0 | { |
1078 | 0 | LexPtr++; |
1079 | 0 | return GenerateElement(30); |
1080 | 0 | } |
1081 | 0 | else if( *LexPtr == 'r' ) |
1082 | 0 | { |
1083 | 0 | LexPtr++; |
1084 | 0 | return GenerateElement(40); |
1085 | 0 | } |
1086 | 0 | break; |
1087 | | |
1088 | 0 | case('a'): |
1089 | 0 | if( *LexPtr == 's' ) |
1090 | 0 | { |
1091 | 0 | LexPtr++; |
1092 | 0 | return GenerateAromElem(33,true); |
1093 | 0 | } |
1094 | 0 | return BuildAtomPred(AE_AROMATIC); |
1095 | | |
1096 | 2 | case('c'): |
1097 | 2 | return GenerateAromElem(6,true); |
1098 | | |
1099 | 0 | case('h'): |
1100 | 0 | if( isdigit(*LexPtr) ) |
1101 | 0 | { |
1102 | 0 | index = 0; |
1103 | 0 | while( isdigit(*LexPtr) ) |
1104 | 0 | index = index*10 + ((*LexPtr++)-'0'); |
1105 | 0 | } |
1106 | 0 | else |
1107 | 0 | index = 1; |
1108 | 0 | return BuildAtomLeaf(AE_IMPLICIT,index); |
1109 | | |
1110 | 2 | case('n'): return GenerateAromElem(7,true); |
1111 | 1 | case('o'): return GenerateAromElem(8,true); |
1112 | 0 | case('p'): return GenerateAromElem(15,true); |
1113 | | |
1114 | 6 | case('r'): |
1115 | 6 | if( isdigit(*LexPtr) ) |
1116 | 6 | { |
1117 | 6 | index = 0; |
1118 | 12 | while( isdigit(*LexPtr) ) |
1119 | 6 | index = index*10 + ((*LexPtr++)-'0'); |
1120 | 6 | if( index == 0 ) |
1121 | 6 | return BuildAtomPred(AE_ACYCLIC); |
1122 | 0 | return BuildAtomLeaf(AE_SIZE,index); |
1123 | 6 | } |
1124 | 0 | return BuildAtomPred(AE_CYCLIC); |
1125 | | |
1126 | 3 | case('s'): |
1127 | 3 | if( *LexPtr == 'e' ) |
1128 | 1 | { |
1129 | 1 | LexPtr++; |
1130 | 1 | return GenerateAromElem(34,true); |
1131 | 1 | } |
1132 | 2 | return GenerateAromElem(16,true); |
1133 | | |
1134 | 1 | case('v'): |
1135 | 1 | if( isdigit(*LexPtr) ) |
1136 | 1 | { |
1137 | 1 | index = 0; |
1138 | 2 | while( isdigit(*LexPtr) ) |
1139 | 1 | index = index*10 + ((*LexPtr++)-'0'); |
1140 | 1 | return BuildAtomLeaf(AE_VALENCE,index); |
1141 | 1 | } |
1142 | 0 | return BuildAtomLeaf(AE_VALENCE,1); |
1143 | | |
1144 | 0 | case('x'): |
1145 | 0 | if( isdigit(*LexPtr) ) |
1146 | 0 | { |
1147 | 0 | index = 0; |
1148 | 0 | while( isdigit(*LexPtr) ) |
1149 | 0 | index = index*10 + ((*LexPtr++)-'0'); |
1150 | 0 | return BuildAtomLeaf(AE_RINGCONNECT,index); |
1151 | 0 | } |
1152 | 0 | return BuildAtomPred(AE_CYCLIC); |
1153 | 229 | } |
1154 | 0 | LexPtr--; |
1155 | 0 | return nullptr; |
1156 | 229 | } |
1157 | | |
1158 | | AtomExpr *OBSmartsPattern::ParseAtomExpr( int level ) |
1159 | 661 | { |
1160 | 661 | AtomExpr *expr1 = nullptr; |
1161 | 661 | AtomExpr *expr2 = nullptr; |
1162 | 661 | char *prev; |
1163 | | |
1164 | 661 | switch( level ) |
1165 | 661 | { |
1166 | 130 | case(0): /* Low Precedence Conjunction */ |
1167 | 130 | if( !(expr1=ParseAtomExpr(1)) ) |
1168 | 0 | return nullptr; |
1169 | | |
1170 | 141 | while( *LexPtr == ';' ) |
1171 | 11 | { |
1172 | 11 | LexPtr++; |
1173 | 11 | if( !(expr2=ParseAtomExpr(1)) ) |
1174 | 0 | { |
1175 | 0 | FreeAtomExpr(expr1); |
1176 | 0 | return nullptr; |
1177 | 0 | } |
1178 | 11 | expr1 = BuildAtomBin(AE_ANDLO,expr1,expr2); |
1179 | 11 | } |
1180 | 130 | return expr1; |
1181 | | |
1182 | 141 | case(1): /* Disjunction */ |
1183 | 141 | if( !(expr1=ParseAtomExpr(2)) ) |
1184 | 0 | return nullptr; |
1185 | | |
1186 | 159 | while( *LexPtr == ',' ) |
1187 | 18 | { |
1188 | 18 | LexPtr++; |
1189 | 18 | if( !(expr2=ParseAtomExpr(2)) ) |
1190 | 0 | { |
1191 | 0 | FreeAtomExpr(expr1); |
1192 | 0 | return nullptr; |
1193 | 0 | } |
1194 | 18 | expr1 = BuildAtomBin(AE_OR,expr1,expr2); |
1195 | 18 | } |
1196 | 141 | return( expr1 ); |
1197 | | |
1198 | 159 | case(2): /* High Precedence Conjunction */ |
1199 | 159 | if( !(expr1=ParseAtomExpr(3)) ) |
1200 | 0 | return nullptr; |
1201 | | |
1202 | 229 | while( (*LexPtr!=']') && (*LexPtr!=';') && |
1203 | 88 | (*LexPtr!=',') && *LexPtr ) |
1204 | 70 | { |
1205 | 70 | if( *LexPtr=='&' ) |
1206 | 0 | LexPtr++; |
1207 | 70 | prev = LexPtr; |
1208 | 70 | if( !(expr2=ParseAtomExpr(3)) ) |
1209 | 0 | { |
1210 | 0 | if( prev != LexPtr ) |
1211 | 0 | { |
1212 | 0 | FreeAtomExpr(expr1); |
1213 | 0 | return nullptr; |
1214 | 0 | } |
1215 | 0 | else |
1216 | 0 | return( expr1 ); |
1217 | 0 | } |
1218 | 70 | expr1 = BuildAtomBin(AE_ANDHI,expr1,expr2); |
1219 | 70 | } |
1220 | 159 | return( expr1 ); |
1221 | | |
1222 | 231 | case(3): /* Negation or Primitive */ |
1223 | 231 | if( *LexPtr == '!' ) |
1224 | 2 | { |
1225 | 2 | LexPtr++; |
1226 | 2 | if( !(expr1=ParseAtomExpr(3)) ) |
1227 | 0 | return nullptr; |
1228 | 2 | return( BuildAtomNot(expr1) ); |
1229 | 2 | } |
1230 | 229 | return( ParseComplexAtomPrimitive() ); |
1231 | 661 | } |
1232 | 0 | return nullptr; |
1233 | 661 | } |
1234 | | |
1235 | | BondExpr *OBSmartsPattern::ParseBondPrimitive( void ) |
1236 | 83 | { |
1237 | 83 | char bsym = *LexPtr++; |
1238 | | |
1239 | 83 | switch(bsym) |
1240 | 83 | { |
1241 | 0 | case '-': return BuildBondLeaf(BE_SINGLE); |
1242 | 23 | case '=': return BuildBondLeaf(BE_DOUBLE); |
1243 | 4 | case '#': return BuildBondLeaf(BE_TRIPLE); |
1244 | 0 | case '$': return BuildBondLeaf(BE_QUAD); |
1245 | 2 | case ':': return BuildBondLeaf(BE_AROM); |
1246 | 0 | case '@': return BuildBondLeaf(BE_RING); |
1247 | 14 | case '~': return BuildBondLeaf(BE_ANY); |
1248 | | |
1249 | | // return BuildBondLeaf(*LexPtr == '?' ? BE_UPUNSPEC : BE_UP); |
1250 | 0 | case '/': return BuildBondLeaf(BE_SINGLE); |
1251 | | // return BuildBondLeaf(*LexPtr == '?' ? BE_DOWNUNSPEC : BE_DOWN); |
1252 | 0 | case '\\': return BuildBondLeaf(BE_SINGLE); |
1253 | 83 | } |
1254 | 40 | LexPtr--; |
1255 | 40 | return nullptr; |
1256 | 83 | } |
1257 | | |
1258 | | BondExpr *OBSmartsPattern::ParseBondExpr( int level ) |
1259 | 206 | { |
1260 | 206 | BondExpr *expr1 = nullptr; |
1261 | 206 | BondExpr *expr2 = nullptr; |
1262 | 206 | char *prev; |
1263 | | |
1264 | 206 | switch( level ) |
1265 | 206 | { |
1266 | 40 | case(0): /* Low Precedence Conjunction */ |
1267 | 40 | if( !(expr1=ParseBondExpr(1)) ) |
1268 | 0 | return nullptr; |
1269 | | |
1270 | 40 | while( *LexPtr == ';' ) |
1271 | 0 | { |
1272 | 0 | LexPtr++; |
1273 | 0 | if( !(expr2=ParseBondExpr(1)) ) |
1274 | 0 | { |
1275 | 0 | FreeBondExpr(expr1); |
1276 | 0 | return nullptr; |
1277 | 0 | } |
1278 | 0 | expr1 = BuildBondBin(BE_ANDLO,expr1,expr2); |
1279 | 0 | } |
1280 | 40 | return expr1; |
1281 | | |
1282 | 40 | case(1): /* Disjunction */ |
1283 | 40 | if( !(expr1=ParseBondExpr(2)) ) |
1284 | 0 | return nullptr; |
1285 | | |
1286 | 43 | while( *LexPtr == ',' ) |
1287 | 3 | { |
1288 | 3 | LexPtr++; |
1289 | 3 | if( !(expr2=ParseBondExpr(2)) ) |
1290 | 0 | { |
1291 | 0 | FreeBondExpr(expr1); |
1292 | 0 | return nullptr; |
1293 | 0 | } |
1294 | 3 | expr1 = BuildBondBin(BE_OR,expr1,expr2); |
1295 | 3 | } |
1296 | 40 | return expr1; |
1297 | | |
1298 | 43 | case(2): /* High Precedence Conjunction */ |
1299 | 43 | if( !(expr1=ParseBondExpr(3)) ) |
1300 | 0 | return nullptr; |
1301 | | |
1302 | 43 | while( (*LexPtr!=']') && (*LexPtr!=';') && |
1303 | 43 | (*LexPtr!=',') && *LexPtr ) |
1304 | 40 | { |
1305 | 40 | if( *LexPtr == '&' ) |
1306 | 0 | LexPtr++; |
1307 | 40 | prev = LexPtr; |
1308 | 40 | if( !(expr2=ParseBondExpr(3)) ) |
1309 | 40 | { |
1310 | 40 | if( prev != LexPtr ) |
1311 | 0 | { |
1312 | 0 | FreeBondExpr(expr1); |
1313 | 0 | return nullptr; |
1314 | 0 | } |
1315 | 40 | else |
1316 | 40 | return expr1; |
1317 | 40 | } |
1318 | 0 | expr1 = BuildBondBin(BE_ANDHI,expr1,expr2); |
1319 | 0 | } |
1320 | 3 | return expr1; |
1321 | | |
1322 | 83 | case(3): /* Negation or Primitive */ |
1323 | 83 | if( *LexPtr == '!' ) |
1324 | 0 | { |
1325 | 0 | LexPtr++; |
1326 | 0 | if( !(expr1=ParseBondExpr(3)) ) |
1327 | 0 | return nullptr; |
1328 | 0 | return BuildBondNot(expr1); |
1329 | 0 | } |
1330 | 83 | return ParseBondPrimitive(); |
1331 | 206 | } |
1332 | 0 | return nullptr; |
1333 | 206 | } |
1334 | | |
1335 | | int OBSmartsPattern::GetVectorBinding() |
1336 | 0 | { |
1337 | 0 | int vb=0; |
1338 | |
|
1339 | 0 | LexPtr++; //skip colon |
1340 | 0 | if(isdigit(*LexPtr)) |
1341 | 0 | { |
1342 | 0 | vb = 0; |
1343 | 0 | while( isdigit(*LexPtr) ) |
1344 | 0 | vb = vb*10 + ((*LexPtr++)-'0'); |
1345 | 0 | } |
1346 | |
|
1347 | 0 | return(vb); |
1348 | 0 | } |
1349 | | |
1350 | | Pattern *OBSmartsPattern::ParseSMARTSError( Pattern *pat, BondExpr *expr ) |
1351 | 0 | { |
1352 | 0 | if( expr ) |
1353 | 0 | FreeBondExpr(expr); |
1354 | 0 | return SMARTSError(pat); |
1355 | 0 | } |
1356 | | |
1357 | | Pattern *OBSmartsPattern::SMARTSParser( Pattern *pat, ParseState *stat, |
1358 | | int prev, int part ) |
1359 | 136 | { |
1360 | 136 | int vb = 0; |
1361 | 136 | AtomExpr *aexpr; |
1362 | 136 | BondExpr *bexpr; |
1363 | 136 | int index; |
1364 | | |
1365 | 136 | aexpr = nullptr; |
1366 | 136 | bexpr = nullptr; |
1367 | | |
1368 | 371 | while( *LexPtr ) |
1369 | 284 | { |
1370 | 284 | switch( *LexPtr++ ) |
1371 | 284 | { |
1372 | 0 | case('.'): |
1373 | 0 | return ParseSMARTSError(pat,bexpr); |
1374 | | |
1375 | 26 | case('-'): case('='): case('#'): case('$'): |
1376 | 40 | case(':'): case('~'): case('@'): |
1377 | 40 | case('/'): case('\\'): case('!'): |
1378 | 40 | LexPtr--; |
1379 | 40 | if( (prev==-1) || bexpr ) |
1380 | 0 | return ParseSMARTSError(pat,bexpr); |
1381 | 40 | if( !(bexpr=ParseBondExpr(0)) ) |
1382 | 0 | return ParseSMARTSError(pat,bexpr); |
1383 | 40 | break; |
1384 | | |
1385 | 40 | case('('): |
1386 | 16 | if( bexpr ) |
1387 | 0 | { |
1388 | 0 | LexPtr--; |
1389 | 0 | return ParseSMARTSError(pat,bexpr); |
1390 | 0 | } |
1391 | 16 | if( prev == -1 ) |
1392 | 0 | { |
1393 | 0 | index = pat->acount; |
1394 | 0 | pat = SMARTSParser(pat,stat,-1,part); |
1395 | 0 | if( !pat ) |
1396 | 0 | return nullptr; |
1397 | 0 | if( index == pat->acount ) |
1398 | 0 | return ParseSMARTSError(pat,bexpr); |
1399 | 0 | prev = index; |
1400 | 0 | } |
1401 | 16 | else |
1402 | 16 | { |
1403 | 16 | pat = SMARTSParser(pat,stat,prev,part); |
1404 | 16 | if( !pat ) |
1405 | 0 | return nullptr; |
1406 | 16 | } |
1407 | | |
1408 | 16 | if( *LexPtr != ')' ) |
1409 | 0 | return ParseSMARTSError(pat,bexpr); |
1410 | 16 | LexPtr++; |
1411 | 16 | break; |
1412 | | |
1413 | 49 | case(')'): LexPtr--; |
1414 | 49 | if( (prev==-1) || bexpr ) |
1415 | 0 | return ParseSMARTSError(pat,bexpr); |
1416 | 49 | return pat; |
1417 | | |
1418 | 0 | case('%'): if( prev == -1 ) |
1419 | 0 | { |
1420 | 0 | LexPtr--; |
1421 | 0 | return ParseSMARTSError(pat,bexpr); |
1422 | 0 | } |
1423 | | |
1424 | 0 | if( isdigit(LexPtr[0]) && isdigit(LexPtr[1]) ) |
1425 | 0 | { |
1426 | 0 | index = 10*(LexPtr[0]-'0') + (LexPtr[1]-'0'); |
1427 | 0 | LexPtr += 2; |
1428 | 0 | } |
1429 | 0 | else |
1430 | 0 | return ParseSMARTSError(pat,bexpr); |
1431 | | |
1432 | 0 | if( stat->closure[index] == -1 ) |
1433 | 0 | { |
1434 | 0 | stat->closord[index] = bexpr; |
1435 | 0 | stat->closure[index] = prev; |
1436 | 0 | } |
1437 | 0 | else if( stat->closure[index] != prev ) |
1438 | 0 | { |
1439 | 0 | if( !bexpr ) { |
1440 | 0 | if (!stat->closord[index]) { |
1441 | 0 | bexpr = GenerateDefaultBond(); |
1442 | 0 | FreeBondExpr(stat->closord[index]); |
1443 | 0 | } else |
1444 | 0 | bexpr = stat->closord[index]; |
1445 | 0 | } else if (stat->closord[index] && !EquivalentBondExpr(bexpr, stat->closord[index])) |
1446 | 0 | return ParseSMARTSError(pat,bexpr); |
1447 | | |
1448 | 0 | CreateBond(pat,bexpr,prev,stat->closure[index]); |
1449 | 0 | stat->closure[index] = -1; |
1450 | 0 | bexpr = nullptr; |
1451 | 0 | } |
1452 | 0 | else |
1453 | 0 | return ParseSMARTSError(pat,bexpr); |
1454 | 0 | break; |
1455 | | |
1456 | 0 | case('0'): case('1'): case('2'): |
1457 | 0 | case('3'): case('4'): case('5'): |
1458 | 0 | case('6'): case('7'): case('8'): |
1459 | 0 | case('9'): LexPtr--; |
1460 | 0 | if( prev == -1 ) |
1461 | 0 | return ParseSMARTSError(pat,bexpr); |
1462 | 0 | index = (*LexPtr++)-'0'; |
1463 | |
|
1464 | 0 | if( stat->closure[index] == -1 ) |
1465 | 0 | { // Ring opening |
1466 | 0 | stat->closord[index] = bexpr; |
1467 | 0 | stat->closure[index] = prev; |
1468 | 0 | pat->atom[prev].nbrs.push_back(-index); // Store the BC idx as a -ve |
1469 | 0 | bexpr = nullptr; |
1470 | 0 | } |
1471 | 0 | else if( stat->closure[index] != prev ) |
1472 | 0 | { // Ring closure |
1473 | 0 | if( !bexpr ) { |
1474 | 0 | if (!stat->closord[index]) { |
1475 | 0 | bexpr = GenerateDefaultBond(); |
1476 | 0 | FreeBondExpr(stat->closord[index]); |
1477 | 0 | } else |
1478 | 0 | bexpr = stat->closord[index]; |
1479 | 0 | } else if (stat->closord[index] && !EquivalentBondExpr(bexpr, stat->closord[index])) |
1480 | 0 | return ParseSMARTSError(pat,bexpr); |
1481 | | |
1482 | 0 | CreateBond(pat,bexpr,prev,stat->closure[index]); |
1483 | 0 | pat->atom[prev].nbrs.push_back(stat->closure[index]); |
1484 | 0 | for (unsigned int nbr_idx=0; nbr_idx < pat->atom[stat->closure[index]].nbrs.size(); ++nbr_idx) { |
1485 | 0 | if (pat->atom[stat->closure[index]].nbrs[nbr_idx] == -index) |
1486 | 0 | pat->atom[stat->closure[index]].nbrs[nbr_idx] = prev; |
1487 | 0 | } |
1488 | 0 | stat->closure[index] = -1; |
1489 | 0 | bexpr = nullptr; |
1490 | 0 | } |
1491 | 0 | else |
1492 | 0 | return ParseSMARTSError(pat,bexpr); |
1493 | 0 | break; |
1494 | | |
1495 | 130 | case('['): |
1496 | | // shortcut for '[H]' primitive (PR#1463791) |
1497 | 130 | if (*LexPtr == 'H' && *(LexPtr+1) == ']') |
1498 | 0 | { |
1499 | 0 | aexpr = GenerateElement(1); |
1500 | 0 | LexPtr++; // skip the 'H' |
1501 | 0 | pat->hasExplicitH = true; |
1502 | 0 | } |
1503 | 130 | else |
1504 | 130 | aexpr = ParseAtomExpr(0); |
1505 | 130 | vb = (*LexPtr == ':') ? GetVectorBinding():0; |
1506 | 130 | if( aexpr == nullptr || (*LexPtr!=']') ){ |
1507 | 0 | if (aexpr != nullptr){ |
1508 | 0 | FreeAtomExpr(aexpr); |
1509 | 0 | aexpr = nullptr; |
1510 | 0 | } |
1511 | |
|
1512 | 0 | return ParseSMARTSError(pat,bexpr); |
1513 | 0 | } |
1514 | 130 | index = CreateAtom(pat,aexpr,part,vb); |
1515 | 130 | if( prev != -1 ) |
1516 | 28 | { |
1517 | 28 | if( !bexpr ) |
1518 | 10 | bexpr = GenerateDefaultBond(); |
1519 | 28 | CreateBond(pat,bexpr,prev,index); |
1520 | 28 | pat->atom[index].nbrs.push_back(prev); |
1521 | 28 | pat->atom[prev].nbrs.push_back(index); |
1522 | 28 | bexpr = nullptr; |
1523 | 28 | } |
1524 | 130 | if(*(LexPtr-1) == 'H' && *(LexPtr-2) == '@' ) { // i.e. [C@H] or [C@@H] |
1525 | 0 | pat->atom[index].nbrs.push_back(SmartsImplicitRef); |
1526 | 0 | } |
1527 | 130 | prev = index; |
1528 | 130 | LexPtr++; |
1529 | 130 | break; |
1530 | | |
1531 | 49 | default: |
1532 | 49 | LexPtr--; |
1533 | 49 | aexpr = ParseSimpleAtomPrimitive(); |
1534 | 49 | if( !aexpr ) |
1535 | 0 | return ParseSMARTSError(pat,bexpr); |
1536 | 49 | index = CreateAtom(pat,aexpr,part); |
1537 | 49 | if( prev != -1 ) |
1538 | 31 | { |
1539 | 31 | if( !bexpr ) |
1540 | 9 | bexpr = GenerateDefaultBond(); |
1541 | 31 | CreateBond(pat,bexpr,prev,index); |
1542 | 31 | pat->atom[index].nbrs.push_back(prev); |
1543 | 31 | pat->atom[prev].nbrs.push_back(index); |
1544 | 31 | bexpr = nullptr; |
1545 | 31 | } |
1546 | 49 | prev = index; |
1547 | 284 | } |
1548 | 284 | } |
1549 | | |
1550 | 87 | if( (prev==-1) || bexpr ) |
1551 | 0 | return ParseSMARTSError(pat,bexpr); |
1552 | | |
1553 | 87 | return pat; |
1554 | 87 | } |
1555 | | |
1556 | | static void MarkGrowBonds(Pattern *pat) |
1557 | 120 | { |
1558 | 120 | int i; |
1559 | 120 | OBBitVec bv; |
1560 | | |
1561 | 179 | for (i = 0;i < pat->bcount;++i) |
1562 | 59 | { |
1563 | 59 | pat->bond[i].grow = (bv[pat->bond[i].src] && bv[pat->bond[i].dst])? |
1564 | 59 | false:true; |
1565 | | |
1566 | 59 | bv.SetBitOn(pat->bond[i].src); |
1567 | 59 | bv.SetBitOn(pat->bond[i].dst); |
1568 | 59 | } |
1569 | 120 | } |
1570 | | |
1571 | | static int GetChiralFlag(AtomExpr *expr) |
1572 | 379 | { |
1573 | 379 | int tmp1,tmp2; |
1574 | | |
1575 | 379 | switch (expr->type) |
1576 | 379 | { |
1577 | 0 | case AE_CHIRAL: |
1578 | 0 | return expr->leaf.value; |
1579 | | |
1580 | 70 | case AE_ANDHI: |
1581 | 81 | case AE_ANDLO: |
1582 | 81 | tmp1 = GetChiralFlag(expr->bin.lft); |
1583 | 81 | tmp2 = GetChiralFlag(expr->bin.rgt); |
1584 | 81 | if (tmp1 == 0) return tmp2; |
1585 | 0 | if (tmp2 == 0) return tmp1; |
1586 | 0 | if (tmp1 == tmp2) return tmp1; |
1587 | 0 | break; |
1588 | | |
1589 | 18 | case AE_OR: |
1590 | 18 | tmp1 = GetChiralFlag(expr->bin.lft); |
1591 | 18 | tmp2 = GetChiralFlag(expr->bin.rgt); |
1592 | 18 | if (tmp1 == 0 || tmp2 == 0) return 0; |
1593 | 0 | if (tmp1 == tmp2) return tmp1; |
1594 | 0 | break; |
1595 | | |
1596 | 2 | case AE_NOT: |
1597 | | // Treat [!@] as [@@], and [!@@] as [@] |
1598 | 2 | tmp1 = GetChiralFlag(expr->mon.arg); |
1599 | 2 | if (tmp1 == AL_ANTICLOCKWISE) return AL_CLOCKWISE; |
1600 | 2 | if (tmp1 == AL_CLOCKWISE) return AL_ANTICLOCKWISE; |
1601 | 2 | break; |
1602 | 379 | } |
1603 | | |
1604 | 280 | return 0; |
1605 | 379 | } |
1606 | | |
1607 | | Pattern *OBSmartsPattern::ParseSMARTSPart( Pattern *result, int part ) |
1608 | 120 | { |
1609 | 120 | ParseState stat; |
1610 | 120 | int i,flag; |
1611 | | |
1612 | 12.1k | for( i=0; i<100; i++ ) |
1613 | 12.0k | stat.closure[i] = -1; |
1614 | | |
1615 | 120 | result = SMARTSParser(result,&stat,-1,part); |
1616 | | |
1617 | 120 | flag = false; |
1618 | 12.1k | for( i=0; i<100; i++ ) |
1619 | 12.0k | if( stat.closure[i] != -1 ) |
1620 | 0 | { |
1621 | 0 | FreeBondExpr(stat.closord[i]); |
1622 | 0 | flag = true; |
1623 | 0 | } |
1624 | | |
1625 | 120 | if( result ) |
1626 | 120 | { |
1627 | 120 | if( flag ) |
1628 | 0 | return(SMARTSError(result)); |
1629 | 120 | else |
1630 | 120 | { |
1631 | 120 | MarkGrowBonds(result); |
1632 | 120 | result->ischiral = false; |
1633 | 299 | for (i = 0;i < result->acount;++i) |
1634 | 179 | { |
1635 | 179 | result->atom[i].chiral_flag = GetChiralFlag(result->atom[i].expr); |
1636 | 179 | if (result->atom[i].chiral_flag) |
1637 | 0 | result->ischiral = true; |
1638 | 179 | } |
1639 | 120 | return(result); |
1640 | 120 | } |
1641 | 120 | } |
1642 | 0 | else |
1643 | 0 | return nullptr; |
1644 | 120 | } |
1645 | | |
1646 | | |
1647 | | Pattern *OBSmartsPattern::ParseSMARTSPattern( void ) |
1648 | 120 | { |
1649 | 120 | Pattern *result; |
1650 | 120 | result = AllocPattern(); |
1651 | | |
1652 | 120 | while( *LexPtr == '(' ) |
1653 | 0 | { |
1654 | 0 | if (!result) |
1655 | 0 | return nullptr; // ensure we don't get a null dereference |
1656 | | |
1657 | 0 | LexPtr++; |
1658 | 0 | result = ParseSMARTSPart(result,result->parts); |
1659 | 0 | if( !result ) |
1660 | 0 | return nullptr; |
1661 | 0 | result->parts++; |
1662 | |
|
1663 | 0 | if( *LexPtr != ')' ) |
1664 | 0 | return SMARTSError(result); |
1665 | 0 | LexPtr++; |
1666 | |
|
1667 | 0 | if( !*LexPtr || (*LexPtr==')') ) |
1668 | 0 | return result; |
1669 | | |
1670 | 0 | if( *LexPtr != '.' ) |
1671 | 0 | return SMARTSError(result); |
1672 | | |
1673 | | // Here's where we'd handle fragments |
1674 | | // cerr << " conjunction " << LexPtr[0] << endl; |
1675 | 0 | LexPtr++; |
1676 | 0 | } |
1677 | | |
1678 | 120 | return ParseSMARTSPart(result,0); |
1679 | 120 | } |
1680 | | |
1681 | | Pattern *OBSmartsPattern::ParseSMARTSString( char *ptr ) |
1682 | 87 | { |
1683 | 87 | Pattern *result; |
1684 | | |
1685 | 87 | if( !ptr || !*ptr ) |
1686 | 0 | return nullptr; |
1687 | | |
1688 | 87 | LexPtr = MainPtr = ptr; |
1689 | 87 | result = ParseSMARTSPattern(); |
1690 | 87 | if( result && *LexPtr ) |
1691 | 0 | return SMARTSError(result); |
1692 | 87 | return result; |
1693 | 87 | } |
1694 | | |
1695 | | Pattern *OBSmartsPattern::ParseSMARTSRecord( char *ptr ) |
1696 | 87 | { |
1697 | 87 | char *src; |
1698 | | |
1699 | 87 | src = ptr; |
1700 | 956 | while( *src && !isspace(*src) ) |
1701 | 869 | src++; |
1702 | | |
1703 | 87 | if( isspace(*src) ) |
1704 | 0 | { |
1705 | 0 | *src++ = '\0'; |
1706 | 0 | while( isspace(*src) ) |
1707 | 0 | src++; |
1708 | 0 | } |
1709 | | |
1710 | 87 | return ParseSMARTSString(ptr); |
1711 | 87 | } |
1712 | | |
1713 | | //********************************** |
1714 | | //********Pattern Matching********** |
1715 | | //********************************** |
1716 | | |
1717 | | bool OBSmartsPattern::Init(const char *buffer) |
1718 | 0 | { |
1719 | 0 | delete[] _buffer; |
1720 | 0 | _buffer = new char[strlen(buffer) + 1]; |
1721 | 0 | strcpy(_buffer,buffer); |
1722 | |
|
1723 | 0 | if (_pat != nullptr) |
1724 | 0 | FreePattern(_pat); |
1725 | 0 | _pat = ParseSMARTSRecord(_buffer); |
1726 | 0 | _str = _buffer; |
1727 | |
|
1728 | 0 | return _pat != nullptr; |
1729 | 0 | } |
1730 | | |
1731 | | bool OBSmartsPattern::Init(const std::string &s) |
1732 | 87 | { |
1733 | 87 | delete[] _buffer; |
1734 | 87 | _buffer = new char[s.length() + 1]; |
1735 | 87 | strcpy(_buffer, s.c_str()); |
1736 | | |
1737 | 87 | if (_pat != nullptr) |
1738 | 0 | FreePattern(_pat); |
1739 | 87 | _pat = ParseSMARTSRecord(_buffer); |
1740 | 87 | _str = s; |
1741 | | |
1742 | 87 | return _pat != nullptr; |
1743 | 87 | } |
1744 | | |
1745 | | OBSmartsPattern::~OBSmartsPattern() |
1746 | 87 | { |
1747 | 87 | if (_pat) |
1748 | 87 | FreePattern(_pat); |
1749 | 87 | delete [] _buffer; |
1750 | 87 | } |
1751 | | |
1752 | | bool OBSmartsPattern::Match(OBMol &mol,bool single) |
1753 | 0 | { |
1754 | 0 | OBSmartsMatcher matcher; |
1755 | 0 | if (_pat == nullptr) |
1756 | 0 | return false; |
1757 | 0 | if(_pat->hasExplicitH) //The SMARTS pattern contains [H] |
1758 | 0 | { |
1759 | | //Do matching on a copy of mol with explicit hydrogens |
1760 | 0 | OBMol tmol = mol; |
1761 | 0 | tmol.AddHydrogens(false,false); |
1762 | 0 | return(matcher.match(tmol,_pat,_mlist,single)); |
1763 | 0 | } |
1764 | 0 | return(matcher.match(mol,_pat,_mlist,single)); |
1765 | 0 | } |
1766 | | |
1767 | | bool OBSmartsPattern::HasMatch(OBMol &mol) const |
1768 | 0 | { |
1769 | | //a convenience function |
1770 | 0 | std::vector<std::vector<int> > dummy; |
1771 | 0 | return Match(mol, dummy, Single); |
1772 | 0 | } |
1773 | | |
1774 | | bool OBSmartsPattern::Match(OBMol &mol, std::vector<std::vector<int> > & mlist, |
1775 | | MatchType mtype /*=All*/) const |
1776 | 236k | { |
1777 | 236k | OBSmartsMatcher matcher; |
1778 | 236k | mlist.clear(); |
1779 | 236k | if (_pat == nullptr) |
1780 | 0 | return false; |
1781 | 236k | if(_pat->hasExplicitH) //The SMARTS pattern contains [H] |
1782 | 0 | { |
1783 | | //Do matching on a copy of mol with explicit hydrogens |
1784 | 0 | OBMol tmol = mol; |
1785 | 0 | tmol.AddHydrogens(false,false); |
1786 | 0 | if(!matcher.match(tmol,_pat,mlist,mtype == Single)) |
1787 | 0 | return false; |
1788 | 0 | } |
1789 | 236k | else if(!matcher.match(mol,_pat,mlist,mtype == Single)) |
1790 | 232k | return false; |
1791 | | |
1792 | 3.23k | if((mtype == AllUnique) && mlist.size() > 1) |
1793 | 0 | { |
1794 | | //uniquify |
1795 | 0 | bool ok; |
1796 | 0 | OBBitVec bv; |
1797 | 0 | std::vector<OBBitVec> vbv; |
1798 | 0 | std::vector<std::vector<int> > ulist; |
1799 | 0 | std::vector<std::vector<int> >::iterator i; |
1800 | 0 | std::vector<OBBitVec>::iterator j; |
1801 | |
|
1802 | 0 | for (i = mlist.begin();i != mlist.end();++i) |
1803 | 0 | { |
1804 | 0 | ok = true; |
1805 | 0 | bv.Clear(); |
1806 | 0 | bv.FromVecInt(*i); |
1807 | 0 | for (j = vbv.begin();j != vbv.end() && ok;++j) |
1808 | 0 | if ((*j) == bv) |
1809 | 0 | ok = false; |
1810 | |
|
1811 | 0 | if (ok) |
1812 | 0 | { |
1813 | 0 | ulist.push_back(*i); |
1814 | 0 | vbv.push_back(bv); |
1815 | 0 | } |
1816 | 0 | } |
1817 | |
|
1818 | 0 | mlist = ulist; |
1819 | 0 | } |
1820 | 3.23k | return true; |
1821 | 236k | } |
1822 | | |
1823 | | |
1824 | | bool OBSmartsPattern::RestrictedMatch(OBMol &mol, |
1825 | | std::vector<std::pair<int,int> > &pr, |
1826 | | bool single) |
1827 | 0 | { |
1828 | 0 | bool ok; |
1829 | 0 | std::vector<std::vector<int> > mlist; |
1830 | 0 | std::vector<std::vector<int> >::iterator i; |
1831 | 0 | std::vector<std::pair<int,int> >::iterator j; |
1832 | |
|
1833 | 0 | OBSmartsMatcher matcher; |
1834 | 0 | matcher.match(mol,_pat,mlist); |
1835 | 0 | _mlist.clear(); |
1836 | 0 | if (mlist.empty()) |
1837 | 0 | return(false); |
1838 | | |
1839 | 0 | for (i = mlist.begin();i != mlist.end();++i) |
1840 | 0 | { |
1841 | 0 | ok = true; |
1842 | 0 | for (j = pr.begin();j != pr.end() && ok;++j) |
1843 | 0 | if ((*i)[j->first] != j->second) |
1844 | 0 | ok = false; |
1845 | |
|
1846 | 0 | if (ok) |
1847 | 0 | _mlist.push_back(*i); |
1848 | 0 | if (single && !_mlist.empty()) |
1849 | 0 | return(true); |
1850 | 0 | } |
1851 | | |
1852 | 0 | return((_mlist.empty()) ? false:true); |
1853 | 0 | } |
1854 | | |
1855 | | bool OBSmartsPattern::RestrictedMatch(OBMol &mol,OBBitVec &vres, bool single) |
1856 | 0 | { |
1857 | 0 | bool ok; |
1858 | 0 | std::vector<int>::iterator j; |
1859 | 0 | std::vector<std::vector<int> > mlist; |
1860 | 0 | std::vector<std::vector<int> >::iterator i; |
1861 | |
|
1862 | 0 | OBSmartsMatcher matcher; |
1863 | 0 | matcher.match(mol,_pat,mlist); |
1864 | |
|
1865 | 0 | _mlist.clear(); |
1866 | 0 | if (mlist.empty()) |
1867 | 0 | return(false); |
1868 | | |
1869 | 0 | for (i = mlist.begin();i != mlist.end();++i) |
1870 | 0 | { |
1871 | 0 | ok = true; |
1872 | 0 | for (j = i->begin();j != i->end();++j) |
1873 | 0 | if (!vres[*j]) |
1874 | 0 | { |
1875 | 0 | ok = false; |
1876 | 0 | break; |
1877 | 0 | } |
1878 | 0 | if (!ok) |
1879 | 0 | continue; |
1880 | | |
1881 | 0 | _mlist.push_back(*i); |
1882 | 0 | if (single && !_mlist.empty()) |
1883 | 0 | return(true); |
1884 | 0 | } |
1885 | | |
1886 | 0 | return((_mlist.empty()) ? false:true); |
1887 | 0 | } |
1888 | | |
1889 | | void OBSmartsMatcher::SetupAtomMatchTable(std::vector<std::vector<bool> > &ttab, |
1890 | | const Pattern *pat, OBMol &mol) |
1891 | 0 | { |
1892 | 0 | int i; |
1893 | |
|
1894 | 0 | ttab.resize(pat->acount); |
1895 | 0 | for (i = 0;i < pat->acount;++i) |
1896 | 0 | ttab[i].resize(mol.NumAtoms()+1); |
1897 | |
|
1898 | 0 | OBAtom *atom; |
1899 | 0 | std::vector<OBAtom*>::iterator j; |
1900 | 0 | for (i = 0;i < pat->acount;++i) |
1901 | 0 | for (atom = mol.BeginAtom(j);atom;atom = mol.NextAtom(j)) |
1902 | 0 | if (EvalAtomExpr(pat->atom[0].expr,atom)) |
1903 | 0 | ttab[i][atom->GetIdx()] = true; |
1904 | 0 | } |
1905 | | |
1906 | | void OBSmartsMatcher::FastSingleMatch(OBMol &mol, const Pattern *pat, |
1907 | | std::vector<std::vector<int> > &mlist) |
1908 | 0 | { |
1909 | 0 | OBAtom *atom,*a1,*nbr; |
1910 | 0 | std::vector<OBAtom*>::iterator i; |
1911 | |
|
1912 | 0 | OBBitVec bv(mol.NumAtoms()+1); |
1913 | 0 | std::vector<int> map; |
1914 | 0 | map.resize(pat->acount); |
1915 | 0 | std::vector<std::vector<OBBond*>::iterator> vi; |
1916 | 0 | std::vector<bool> vif; |
1917 | |
|
1918 | 0 | if (pat->bcount) |
1919 | 0 | { |
1920 | 0 | vif.resize(pat->bcount); |
1921 | 0 | vi.resize(pat->bcount); |
1922 | 0 | } |
1923 | |
|
1924 | 0 | int bcount; |
1925 | 0 | for (atom = mol.BeginAtom(i);atom;atom=mol.NextAtom(i)) |
1926 | 0 | if (EvalAtomExpr(pat->atom[0].expr,atom)) |
1927 | 0 | { |
1928 | 0 | map[0] = atom->GetIdx(); |
1929 | 0 | if (pat->bcount) |
1930 | 0 | vif[0] = false; |
1931 | 0 | bv.Clear(); |
1932 | 0 | bv.SetBitOn(atom->GetIdx()); |
1933 | |
|
1934 | 0 | for (bcount=0;bcount >=0;) |
1935 | 0 | { |
1936 | | //***entire pattern matched*** |
1937 | 0 | if (bcount == pat->bcount) //save full match here |
1938 | 0 | { |
1939 | 0 | mlist.push_back(map); |
1940 | 0 | bcount--; |
1941 | 0 | return; //found a single match |
1942 | 0 | } |
1943 | | |
1944 | | //***match the next bond*** |
1945 | 0 | if (!pat->bond[bcount].grow) //just check bond here |
1946 | 0 | { |
1947 | 0 | if ( !vif[bcount] ) |
1948 | 0 | { |
1949 | 0 | OBBond *bond = mol.GetBond(map[pat->bond[bcount].src], |
1950 | 0 | map[pat->bond[bcount].dst]); |
1951 | 0 | if (bond && EvalBondExpr(pat->bond[bcount].expr,bond)) |
1952 | 0 | { |
1953 | 0 | vif[bcount++] = true; |
1954 | 0 | if (bcount < pat->bcount) |
1955 | 0 | vif[bcount] = false; |
1956 | 0 | } |
1957 | 0 | else |
1958 | 0 | bcount--; |
1959 | 0 | } |
1960 | 0 | else //bond must have already been visited - backtrack |
1961 | 0 | bcount--; |
1962 | 0 | } |
1963 | 0 | else //need to map atom and check bond |
1964 | 0 | { |
1965 | 0 | a1 = mol.GetAtom(map[pat->bond[bcount].src]); |
1966 | |
|
1967 | 0 | if (!vif[bcount]) //figure out which nbr atom we are mapping |
1968 | 0 | { |
1969 | 0 | nbr = a1->BeginNbrAtom(vi[bcount]); |
1970 | 0 | } |
1971 | 0 | else |
1972 | 0 | { |
1973 | 0 | bv.SetBitOff(map[pat->bond[bcount].dst]); |
1974 | 0 | nbr = a1->NextNbrAtom(vi[bcount]); |
1975 | 0 | } |
1976 | |
|
1977 | 0 | for (;nbr;nbr=a1->NextNbrAtom(vi[bcount])) |
1978 | 0 | if (!bv[nbr->GetIdx()]) |
1979 | 0 | if (EvalAtomExpr(pat->atom[pat->bond[bcount].dst].expr,nbr) |
1980 | 0 | && EvalBondExpr(pat->bond[bcount].expr,(OBBond *)*(vi[bcount]))) |
1981 | 0 | { |
1982 | 0 | bv.SetBitOn(nbr->GetIdx()); |
1983 | 0 | map[pat->bond[bcount].dst] = nbr->GetIdx(); |
1984 | 0 | vif[bcount] = true; |
1985 | 0 | bcount++; |
1986 | 0 | if (bcount < pat->bcount) |
1987 | 0 | vif[bcount] = false; |
1988 | 0 | break; |
1989 | 0 | } |
1990 | |
|
1991 | 0 | if (!nbr)//no match - time to backtrack |
1992 | 0 | bcount--; |
1993 | 0 | } |
1994 | 0 | } |
1995 | 0 | } |
1996 | 0 | } |
1997 | | |
1998 | | |
1999 | | bool OBSmartsMatcher::match(OBMol &mol, const Pattern *pat, |
2000 | | std::vector<std::vector<int> > &mlist,bool single) |
2001 | 293k | { |
2002 | 293k | mlist.clear(); |
2003 | 293k | if (!pat || pat->acount == 0) |
2004 | 0 | return(false);//shouldn't ever happen |
2005 | | |
2006 | 293k | if (single && !pat->ischiral) { |
2007 | | // perform a fast single match (only works for non-chiral SMARTS) |
2008 | 0 | FastSingleMatch(mol,pat,mlist); |
2009 | 293k | } else { |
2010 | | // perform normal match (chirality ignored and checked below) |
2011 | 293k | OBSSMatch ssm(mol,pat); |
2012 | 293k | ssm.Match(mlist); |
2013 | 293k | } |
2014 | | |
2015 | 293k | if (pat->ischiral) { |
2016 | 0 | std::vector<std::vector<int> >::iterator m; |
2017 | 0 | std::vector<std::vector<int> > tmpmlist; |
2018 | |
|
2019 | 0 | tmpmlist.clear(); |
2020 | | // iterate over the atom mappings |
2021 | 0 | for (m = mlist.begin();m != mlist.end();++m) { |
2022 | |
|
2023 | 0 | bool allStereoCentersMatch = true; |
2024 | | |
2025 | | // for each pattern atom |
2026 | 0 | for (int j = 0; j < pat->acount; ++j) { |
2027 | | // skip non-chiral pattern atoms |
2028 | 0 | if (!pat->atom[j].chiral_flag) |
2029 | 0 | continue; |
2030 | | // ignore @? in smarts, parse like any other smarts |
2031 | 0 | if (pat->atom[j].chiral_flag == AL_UNSPECIFIED) |
2032 | 0 | continue; |
2033 | | |
2034 | | // use the mapping the get the chiral atom in the molecule being queried |
2035 | 0 | OBAtom *center = mol.GetAtom((*m)[j]); |
2036 | | |
2037 | | // get the OBTetrahedralStereo::Config from the molecule |
2038 | 0 | OBStereoFacade stereo(&mol); |
2039 | 0 | OBTetrahedralStereo *ts = stereo.GetTetrahedralStereo(center->GetId()); |
2040 | 0 | if (!ts || !ts->GetConfig().specified) { |
2041 | | // no stereochemistry specified in molecule for the atom |
2042 | | // corresponding to the chiral pattern atom using the current |
2043 | | // mapping --> no match |
2044 | 0 | allStereoCentersMatch = false; |
2045 | 0 | break; |
2046 | 0 | } |
2047 | | |
2048 | 0 | std::vector<int> nbrs = pat->atom[j].nbrs; |
2049 | |
|
2050 | 0 | if (nbrs.size() != 4) { // 3 nbrs currently not supported. Other values are errors. |
2051 | | //stringstream ss; |
2052 | | //ss << "Ignoring stereochemistry. There are " << nbrs.size() << " connections to this atom instead of 4. Title: " << mol.GetTitle(); |
2053 | | //obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning); |
2054 | 0 | continue; |
2055 | 0 | } |
2056 | | |
2057 | | // construct a OBTetrahedralStereo::Config using the smarts pattern |
2058 | 0 | OBTetrahedralStereo::Config smartsConfig; |
2059 | 0 | smartsConfig.center = center->GetId(); |
2060 | 0 | if (nbrs.at(0) == SmartsImplicitRef) |
2061 | 0 | smartsConfig.from = OBStereo::ImplicitRef; |
2062 | 0 | else |
2063 | 0 | smartsConfig.from = mol.GetAtom( (*m)[nbrs.at(0)] )->GetId(); |
2064 | 0 | OBStereo::Ref firstref; |
2065 | 0 | if (nbrs.at(1) == SmartsImplicitRef) |
2066 | 0 | firstref = OBStereo::ImplicitRef; |
2067 | 0 | else |
2068 | 0 | firstref = mol.GetAtom( (*m)[nbrs.at(1)] )->GetId(); |
2069 | 0 | OBAtom *ra2 = mol.GetAtom( (*m)[nbrs.at(2)] ); |
2070 | 0 | OBAtom *ra3 = mol.GetAtom( (*m)[nbrs.at(3)] ); |
2071 | 0 | smartsConfig.refs = OBStereo::MakeRefs(firstref, ra2->GetId(), ra3->GetId()); |
2072 | |
|
2073 | 0 | smartsConfig.view = OBStereo::ViewFrom; |
2074 | 0 | switch (pat->atom[j].chiral_flag) { |
2075 | 0 | case AL_CLOCKWISE: |
2076 | 0 | smartsConfig.winding = OBStereo::Clockwise; |
2077 | 0 | break; |
2078 | 0 | case AL_ANTICLOCKWISE: |
2079 | 0 | smartsConfig.winding = OBStereo::AntiClockwise; |
2080 | 0 | break; |
2081 | 0 | default: |
2082 | 0 | smartsConfig.specified = false; |
2083 | 0 | } |
2084 | | |
2085 | | // cout << "smarts config = " << smartsConfig << endl; |
2086 | | // cout << "molecule config = " << ts->GetConfig() << endl; |
2087 | | // cout << "match = " << (ts->GetConfig() == smartsConfig) << endl; |
2088 | | |
2089 | | // and save the match if the two configurations are the same |
2090 | 0 | if (ts->GetConfig() != smartsConfig) |
2091 | 0 | allStereoCentersMatch = false; |
2092 | | |
2093 | | // don't waste time checking more stereocenters using this mapping if one didn't match |
2094 | 0 | if (!allStereoCentersMatch) |
2095 | 0 | break; |
2096 | 0 | } |
2097 | | |
2098 | | // if all the atoms in the molecule match the stereochemistry specified |
2099 | | // in the smarts pattern, save this mapping as a match |
2100 | 0 | if (allStereoCentersMatch) |
2101 | 0 | tmpmlist.push_back(*m); |
2102 | 0 | } |
2103 | | |
2104 | 0 | mlist = tmpmlist; |
2105 | 0 | } |
2106 | | |
2107 | 293k | return(!mlist.empty()); |
2108 | 293k | } |
2109 | | |
2110 | | bool OBSmartsMatcher::EvalAtomExpr(AtomExpr *expr,OBAtom *atom) |
2111 | 46.7M | { |
2112 | 46.7M | for (;;) |
2113 | 51.8M | switch (expr->type) |
2114 | 51.8M | { |
2115 | 1.24k | case AE_TRUE: |
2116 | 1.24k | return true; |
2117 | 0 | case AE_FALSE: |
2118 | 0 | return false; |
2119 | 142 | case AE_AROMATIC: |
2120 | 142 | return atom->IsAromatic(); |
2121 | 0 | case AE_ALIPHATIC: |
2122 | 0 | return !atom->IsAromatic(); |
2123 | 0 | case AE_CYCLIC: |
2124 | 0 | return atom->IsInRing(); |
2125 | 0 | case AE_ACYCLIC: |
2126 | 0 | return !atom->IsInRing(); |
2127 | | |
2128 | 0 | case AE_MASS: |
2129 | 0 | return expr->leaf.value == atom->GetIsotope(); |
2130 | 16.6M | case AE_ELEM: |
2131 | 16.6M | return expr->leaf.value == (int)atom->GetAtomicNum(); |
2132 | 2.22M | case AE_AROMELEM: |
2133 | 2.22M | return expr->leaf.value == (int)atom->GetAtomicNum() && |
2134 | 60.3k | atom->IsAromatic(); |
2135 | 10.0M | case AE_ALIPHELEM: |
2136 | 10.0M | return expr->leaf.value == (int)atom->GetAtomicNum() && |
2137 | 263k | !atom->IsAromatic(); |
2138 | 0 | case AE_HCOUNT: |
2139 | 0 | return expr->leaf.value == ((int)atom->ExplicitHydrogenCount() + |
2140 | 0 | (int)atom->GetImplicitHCount()); |
2141 | 0 | case AE_CHARGE: |
2142 | 0 | return expr->leaf.value == atom->GetFormalCharge(); |
2143 | 0 | case AE_CONNECT: |
2144 | 0 | return expr->leaf.value == (int)atom->GetTotalDegree(); |
2145 | 1.76M | case AE_DEGREE: |
2146 | 1.76M | return expr->leaf.value == (int)atom->GetExplicitDegree(); |
2147 | 0 | case AE_IMPLICIT: |
2148 | 0 | return expr->leaf.value == (int)atom->GetImplicitHCount(); |
2149 | 0 | case AE_RINGS: |
2150 | 0 | return expr->leaf.value == (int)atom->MemberOfRingCount(); |
2151 | 0 | case AE_SIZE: |
2152 | 0 | return atom->IsInRingSize(expr->leaf.value); |
2153 | 4.99k | case AE_VALENCE: |
2154 | 4.99k | return expr->leaf.value == (int)atom->GetTotalValence(); |
2155 | 0 | case AE_CHIRAL: |
2156 | | // always return true (i.e. accept the match) and check later |
2157 | 0 | return true; |
2158 | 0 | case AE_HYB: |
2159 | 0 | return expr->leaf.value == (int)atom->GetHyb(); |
2160 | 0 | case AE_RINGCONNECT: |
2161 | 0 | return expr->leaf.value == (int)atom->CountRingBonds(); |
2162 | | |
2163 | 2.27k | case AE_NOT: |
2164 | 2.27k | return !EvalAtomExpr(expr->mon.arg,atom); |
2165 | 7.87M | case AE_ANDHI: /* Same as AE_ANDLO */ |
2166 | 10.0M | case AE_ANDLO: |
2167 | 10.0M | if (!EvalAtomExpr(expr->bin.lft,atom)) |
2168 | 9.95M | return false; |
2169 | 130k | expr = expr->bin.rgt; |
2170 | 130k | break; |
2171 | 4.98M | case AE_OR: |
2172 | 4.98M | if (EvalAtomExpr(expr->bin.lft,atom)) |
2173 | 215 | return true; |
2174 | 4.98M | expr = expr->bin.rgt; |
2175 | 4.98M | break; |
2176 | | |
2177 | 6.09M | case AE_RECUR: |
2178 | 6.09M | { |
2179 | | //see if pattern has been matched |
2180 | 6.09M | std::vector<std::pair<const Pattern*,std::vector<bool> > >::iterator i; |
2181 | 8.30M | for (i = RSCACHE.begin();i != RSCACHE.end();++i) |
2182 | 8.24M | if (i->first == (Pattern*)expr->recur.recur) |
2183 | 6.03M | return(i->second[atom->GetIdx()]); |
2184 | | |
2185 | | //perceive and match pattern |
2186 | 57.3k | std::vector<std::vector<int> >::iterator j; |
2187 | 57.3k | std::vector<bool> vb(((OBMol*) atom->GetParent())->NumAtoms()+1); |
2188 | 57.3k | std::vector<std::vector<int> > mlist; |
2189 | 57.3k | if (match( *((OBMol *) atom->GetParent()), |
2190 | 57.3k | (Pattern*)expr->recur.recur,mlist)) |
2191 | 240 | for (j = mlist.begin();j != mlist.end();++j) |
2192 | 141 | vb[(*j)[0]] = true; |
2193 | | |
2194 | 57.3k | RSCACHE.push_back(std::pair<const Pattern*, |
2195 | 57.3k | std::vector<bool> > ((const Pattern*)expr->recur.recur, |
2196 | 57.3k | vb)); |
2197 | | |
2198 | 57.3k | return(vb[atom->GetIdx()]); |
2199 | 6.09M | } |
2200 | | |
2201 | 0 | default: |
2202 | 0 | return false; |
2203 | 51.8M | } |
2204 | 46.7M | } |
2205 | | |
2206 | | bool OBSmartsMatcher::EvalBondExpr(BondExpr *expr,OBBond *bond) |
2207 | 1.37k | { |
2208 | 1.37k | for (;;) |
2209 | 1.40k | switch( expr->type ) |
2210 | 1.40k | { |
2211 | 0 | case BE_ANDHI: |
2212 | 0 | case BE_ANDLO: |
2213 | 0 | if (!EvalBondExpr(expr->bin.lft,bond)) |
2214 | 0 | return false; |
2215 | 0 | expr = expr->bin.rgt; |
2216 | 0 | break; |
2217 | | |
2218 | 46 | case BE_OR: |
2219 | 46 | if (EvalBondExpr(expr->bin.lft,bond)) |
2220 | 22 | return true; |
2221 | 24 | expr = expr->bin.rgt; |
2222 | 24 | break; |
2223 | | |
2224 | 0 | case BE_NOT: |
2225 | 0 | return !EvalBondExpr(expr->mon.arg,bond); |
2226 | | |
2227 | 0 | case BE_ANY: |
2228 | 0 | return true; |
2229 | 87 | case BE_DEFAULT: |
2230 | 87 | return bond->GetBondOrder()==1 || bond->IsAromatic(); |
2231 | 0 | case BE_SINGLE: |
2232 | 0 | return bond->GetBondOrder()==1 && !bond->IsAromatic(); |
2233 | 810 | case BE_DOUBLE: |
2234 | 810 | return bond->GetBondOrder()==2 && !bond->IsAromatic(); |
2235 | 439 | case BE_TRIPLE: |
2236 | 439 | return bond->GetBondOrder() == 3; |
2237 | 0 | case BE_QUAD: |
2238 | 0 | return bond->GetBondOrder() == 4; |
2239 | 18 | case BE_AROM: |
2240 | 18 | return bond->IsAromatic(); |
2241 | 0 | case BE_RING: |
2242 | 0 | return bond->IsInRing(); |
2243 | | //case BE_UP: |
2244 | | // return bond->IsUp(); |
2245 | | //case BE_DOWN: |
2246 | | // return bond->IsDown(); |
2247 | | //case BE_UPUNSPEC: // up or unspecified (i.e., not down) |
2248 | | // return !bond->IsDown(); |
2249 | | //case BE_DOWNUNSPEC: // down or unspecified (i.e., not up) |
2250 | | // return !bond->IsUp(); |
2251 | 0 | default: |
2252 | 0 | return false; |
2253 | 1.40k | } |
2254 | 1.37k | } |
2255 | | |
2256 | | std::vector<std::vector<int> > &OBSmartsPattern::GetUMapList() |
2257 | 0 | { |
2258 | 0 | if (_mlist.empty() || _mlist.size() == 1) |
2259 | 0 | return(_mlist); |
2260 | | |
2261 | 0 | bool ok; |
2262 | 0 | OBBitVec bv; |
2263 | 0 | std::vector<OBBitVec> vbv; |
2264 | 0 | std::vector<std::vector<int> > mlist; |
2265 | 0 | std::vector<std::vector<int> >::iterator i; |
2266 | 0 | std::vector<OBBitVec>::iterator j; |
2267 | |
|
2268 | 0 | for (i = _mlist.begin();i != _mlist.end();++i) |
2269 | 0 | { |
2270 | 0 | ok = true; |
2271 | 0 | bv.Clear(); |
2272 | 0 | bv.FromVecInt(*i); |
2273 | 0 | for (j = vbv.begin();j != vbv.end() && ok;++j) |
2274 | 0 | if ((*j) == bv) |
2275 | 0 | ok = false; |
2276 | |
|
2277 | 0 | if (ok) |
2278 | 0 | { |
2279 | 0 | mlist.push_back(*i); |
2280 | 0 | vbv.push_back(bv); |
2281 | 0 | } |
2282 | 0 | } |
2283 | |
|
2284 | 0 | _mlist = mlist; |
2285 | 0 | return(_mlist); |
2286 | 0 | } |
2287 | | |
2288 | | void OBSmartsPattern::WriteMapList(ostream &ofs) |
2289 | 0 | { |
2290 | 0 | std::vector<std::vector<int> >::iterator i; |
2291 | 0 | std::vector<int>::iterator j; |
2292 | |
|
2293 | 0 | for ( i = _mlist.begin() ; i != _mlist.end() ; ++i ) |
2294 | 0 | { |
2295 | 0 | for (j = (*i).begin();j != (*i).end();++j) |
2296 | 0 | ofs << *j << ' ' << ends; |
2297 | 0 | ofs << endl; |
2298 | 0 | } |
2299 | 0 | } |
2300 | | |
2301 | | //******************************************************************* |
2302 | | // The OBSSMatch class performs exhaustive matching using recursion |
2303 | | // Explicit stack handling is used to find just a single match in |
2304 | | // match() |
2305 | | //******************************************************************* |
2306 | | |
2307 | | OBSSMatch::OBSSMatch(OBMol &mol, const Pattern *pat) |
2308 | 293k | { |
2309 | 293k | _mol = &mol; |
2310 | 293k | _pat = pat; |
2311 | 293k | _map.resize(pat->acount); |
2312 | | |
2313 | 293k | if (!mol.Empty()) |
2314 | 293k | { |
2315 | 293k | _uatoms = new bool [mol.NumAtoms()+1]; |
2316 | 293k | memset((char*)_uatoms,'\0',sizeof(bool)*(mol.NumAtoms()+1)); |
2317 | 293k | } |
2318 | 0 | else |
2319 | 0 | _uatoms = nullptr; |
2320 | 293k | } |
2321 | | |
2322 | | OBSSMatch::~OBSSMatch() |
2323 | 293k | { |
2324 | 293k | delete [] _uatoms; |
2325 | 293k | } |
2326 | | |
2327 | | void OBSSMatch::Match(std::vector<std::vector<int> > &mlist,int bidx) |
2328 | 515k | { |
2329 | 515k | OBSmartsMatcher matcher; |
2330 | 515k | if (bidx == -1) |
2331 | 293k | { |
2332 | 293k | OBAtom *atom; |
2333 | 293k | std::vector<OBAtom*>::iterator i; |
2334 | 31.9M | for (atom = _mol->BeginAtom(i);atom;atom = _mol->NextAtom(i)) |
2335 | 31.6M | if (matcher.EvalAtomExpr(_pat->atom[0].expr,atom)) |
2336 | 221k | { |
2337 | 221k | _map[0] = atom->GetIdx(); |
2338 | 221k | _uatoms[atom->GetIdx()] = true; |
2339 | 221k | Match(mlist,0); |
2340 | 221k | _map[0] = 0; |
2341 | 221k | _uatoms[atom->GetIdx()] = false; |
2342 | 221k | } |
2343 | 293k | return; |
2344 | 293k | } |
2345 | | |
2346 | 221k | if (bidx == _pat->bcount) //save full match here |
2347 | 74.1k | { |
2348 | 74.1k | mlist.push_back(_map); |
2349 | 74.1k | return; |
2350 | 74.1k | } |
2351 | | |
2352 | 147k | if (_pat->bond[bidx].grow) //match the next bond |
2353 | 147k | { |
2354 | 147k | int src = _pat->bond[bidx].src; |
2355 | 147k | int dst = _pat->bond[bidx].dst; |
2356 | | |
2357 | 147k | if (_map[src] <= 0 || _map[src] > (signed)_mol->NumAtoms()) |
2358 | 0 | return; |
2359 | | |
2360 | 147k | AtomExpr *aexpr = _pat->atom[dst].expr; |
2361 | 147k | BondExpr *bexpr = _pat->bond[bidx].expr; |
2362 | 147k | OBAtom *atom,*nbr; |
2363 | 147k | std::vector<OBBond*>::iterator i; |
2364 | | |
2365 | 147k | atom = _mol->GetAtom(_map[src]); |
2366 | 149k | for (nbr = atom->BeginNbrAtom(i);nbr;nbr = atom->NextNbrAtom(i)) |
2367 | 1.79k | if (!_uatoms[nbr->GetIdx()] && matcher.EvalAtomExpr(aexpr,nbr) && |
2368 | 1.33k | matcher.EvalBondExpr(bexpr,((OBBond*) *i))) |
2369 | 206 | { |
2370 | 206 | _map[dst] = nbr->GetIdx(); |
2371 | 206 | _uatoms[nbr->GetIdx()] = true; |
2372 | 206 | Match(mlist,bidx+1); |
2373 | 206 | _uatoms[nbr->GetIdx()] = false; |
2374 | 206 | _map[dst] = 0; |
2375 | 206 | } |
2376 | 147k | } |
2377 | 0 | else //just check bond here |
2378 | 0 | { |
2379 | 0 | OBBond *bond = _mol->GetBond(_map[_pat->bond[bidx].src], |
2380 | 0 | _map[_pat->bond[bidx].dst]); |
2381 | 0 | if (bond && matcher.EvalBondExpr(_pat->bond[bidx].expr,bond)) |
2382 | 0 | Match(mlist,bidx+1); |
2383 | 0 | } |
2384 | 147k | } |
2385 | | |
2386 | | static int GetExprOrder(BondExpr *expr) |
2387 | 0 | { |
2388 | 0 | int tmp1,tmp2; |
2389 | |
|
2390 | 0 | switch( expr->type ) |
2391 | 0 | { |
2392 | 0 | case BE_SINGLE: return 1; |
2393 | 0 | case BE_DOUBLE: return 2; |
2394 | 0 | case BE_TRIPLE: return 3; |
2395 | 0 | case BE_QUAD: return 4; |
2396 | 0 | case BE_AROM: return 5; |
2397 | | |
2398 | 0 | case BE_UP: |
2399 | 0 | case BE_DOWN: |
2400 | 0 | case BE_UPUNSPEC: |
2401 | 0 | case BE_DOWNUNSPEC: |
2402 | 0 | return 1; |
2403 | | |
2404 | 0 | case BE_ANDHI: |
2405 | 0 | case BE_ANDLO: |
2406 | 0 | tmp1 = GetExprOrder(expr->bin.lft); |
2407 | 0 | tmp2 = GetExprOrder(expr->bin.rgt); |
2408 | 0 | if (tmp1 == 0) return tmp2; |
2409 | 0 | if (tmp2 == 0) return tmp1; |
2410 | 0 | if (tmp1 == tmp2) return tmp1; |
2411 | 0 | break; |
2412 | | |
2413 | 0 | case BE_OR: |
2414 | 0 | tmp1 = GetExprOrder(expr->bin.lft); |
2415 | 0 | if (tmp1 == 0) return 0; |
2416 | 0 | tmp2 = GetExprOrder(expr->bin.rgt); |
2417 | 0 | if (tmp2 == 0) return 0; |
2418 | 0 | if (tmp1 == tmp2) return tmp1; |
2419 | 0 | break; |
2420 | 0 | } |
2421 | | |
2422 | 0 | return 0; |
2423 | 0 | } |
2424 | | |
2425 | | static int GetExprCharge(AtomExpr *expr) |
2426 | 0 | { |
2427 | 0 | int tmp1,tmp2; |
2428 | |
|
2429 | 0 | switch( expr->type ) |
2430 | 0 | { |
2431 | 0 | case AE_CHARGE: |
2432 | 0 | return expr->leaf.value; |
2433 | | |
2434 | 0 | case AE_ANDHI: |
2435 | 0 | case AE_ANDLO: |
2436 | 0 | tmp1 = GetExprCharge(expr->bin.lft); |
2437 | 0 | tmp2 = GetExprCharge(expr->bin.rgt); |
2438 | 0 | if (tmp1 == 0) return tmp2; |
2439 | 0 | if (tmp2 == 0) return tmp1; |
2440 | 0 | if (tmp1 == tmp2) return tmp1; |
2441 | 0 | break; |
2442 | | |
2443 | 0 | case AE_OR: |
2444 | 0 | tmp1 = GetExprCharge(expr->bin.lft); |
2445 | 0 | if (tmp1 == 0) return 0; |
2446 | 0 | tmp2 = GetExprCharge(expr->bin.rgt); |
2447 | 0 | if (tmp2 == 0) return 0; |
2448 | 0 | if (tmp1 == tmp2) return tmp1; |
2449 | 0 | break; |
2450 | 0 | } |
2451 | | |
2452 | 0 | return 0; |
2453 | 0 | } |
2454 | | |
2455 | | int OBSmartsPattern::GetCharge(int idx) |
2456 | 0 | { |
2457 | 0 | return GetExprCharge(_pat->atom[idx].expr); |
2458 | 0 | } |
2459 | | |
2460 | | static int GetExprAtomicNum(AtomExpr *expr) |
2461 | 0 | { |
2462 | 0 | int tmp1,tmp2; |
2463 | |
|
2464 | 0 | switch( expr->type ) |
2465 | 0 | { |
2466 | 0 | case AE_ELEM: |
2467 | 0 | case AE_AROMELEM: |
2468 | 0 | case AE_ALIPHELEM: |
2469 | 0 | return expr->leaf.value; |
2470 | | |
2471 | 0 | case AE_ANDHI: |
2472 | 0 | case AE_ANDLO: |
2473 | 0 | tmp1 = GetExprAtomicNum(expr->bin.lft); |
2474 | 0 | tmp2 = GetExprAtomicNum(expr->bin.rgt); |
2475 | 0 | if (tmp1 == 0) return tmp2; |
2476 | 0 | if (tmp2 == 0) return tmp1; |
2477 | 0 | if (tmp1 == tmp2) return tmp1; |
2478 | 0 | break; |
2479 | | |
2480 | 0 | case AE_OR: |
2481 | 0 | tmp1 = GetExprAtomicNum(expr->bin.lft); |
2482 | 0 | if (tmp1 == 0) return 0; |
2483 | 0 | tmp2 = GetExprAtomicNum(expr->bin.rgt); |
2484 | 0 | if (tmp2 == 0) return 0; |
2485 | 0 | if (tmp1 == tmp2) return tmp1; |
2486 | 0 | break; |
2487 | 0 | } |
2488 | | |
2489 | 0 | return 0; |
2490 | 0 | } |
2491 | | |
2492 | | int OBSmartsPattern::GetAtomicNum(int idx) |
2493 | 0 | { |
2494 | 0 | return GetExprAtomicNum(_pat->atom[idx].expr); |
2495 | 0 | } |
2496 | | |
2497 | | void OBSmartsPattern::GetBond(int &src,int &dst,int &ord,int idx) |
2498 | 0 | { |
2499 | 0 | src = _pat->bond[idx].src; |
2500 | 0 | dst = _pat->bond[idx].dst; |
2501 | 0 | ord = GetExprOrder(_pat->bond[idx].expr); |
2502 | 0 | } |
2503 | | |
2504 | | void SmartsLexReplace(std::string &s,std::vector<std::pair<std::string,std::string> > &vlex) |
2505 | 0 | { |
2506 | 0 | size_t j,pos; |
2507 | 0 | std::string token,repstr; |
2508 | 0 | std::vector<std::pair<std::string,std::string> >::iterator i; |
2509 | |
|
2510 | 0 | for (pos = 0,pos = s.find("$",pos);pos < s.size();pos = s.find("$",pos)) |
2511 | | //for (pos = 0,pos = s.find("$",pos);pos != std::string::npos;pos = s.find("$",pos)) |
2512 | 0 | { |
2513 | 0 | pos++; |
2514 | 0 | for (j = pos;j < s.size();++j) |
2515 | 0 | if (!isalpha(s[j]) && !isdigit(s[j]) && s[j] != '_') |
2516 | 0 | break; |
2517 | 0 | if (pos == j) |
2518 | 0 | continue; |
2519 | | |
2520 | 0 | token = s.substr(pos,j-pos); |
2521 | 0 | for (i = vlex.begin();i != vlex.end();++i) |
2522 | 0 | if (token == i->first) |
2523 | 0 | { |
2524 | 0 | repstr = "(" + i->second + ")"; |
2525 | 0 | s.replace(pos,j-pos,repstr); |
2526 | 0 | j = 0; |
2527 | 0 | } |
2528 | 0 | pos = j; |
2529 | 0 | } |
2530 | 0 | } |
2531 | | |
2532 | | } // end namespace OpenBabel |
2533 | | |
2534 | | //! \file parsmart.cpp |
2535 | | //! \brief Implementation of Daylight SMARTS parser. |