Coverage Report

Created: 2026-02-26 07:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openbabel/src/parsmart.cpp
Line
Count
Source
1
/**********************************************************************
2
parsmart.cpp - SMARTS parser.
3
4
Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc.
5
Some portions Copyright (C) 2001-2006 by Geoffrey R. Hutchison
6
7
This file is part of the Open Babel project.
8
For more information, see <http://openbabel.org/>
9
10
This program is free software; you can redistribute it and/or modify
11
it under the terms of the GNU General Public License as published by
12
the Free Software Foundation version 2 of the License.
13
14
This program is distributed in the hope that it will be useful,
15
but WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
GNU General Public License for more details.
18
***********************************************************************/
19
#include <openbabel/babelconfig.h>
20
21
#include <cctype>
22
#include <iomanip>
23
#include <cstring>
24
25
#include <openbabel/mol.h>
26
#include <openbabel/atom.h>
27
#include <openbabel/bond.h>
28
#include <openbabel/parsmart.h>
29
#include <openbabel/stereo/stereo.h>
30
#include <openbabel/stereo/tetrahedral.h>
31
32
using namespace std;
33
34
namespace OpenBabel
35
{
36
  /*! \class OBSmartsPattern parsmart.h <openbabel/parsmart.h>
37
38
    Substructure search is an incredibly useful tool in the context of a
39
    small molecule programming library. Having an efficient substructure
40
    search engine reduces the amount of hard code needed for molecule
41
    perception, as well as increases the flexibility of certain
42
    operations. For instance, atom typing can be easily performed based on
43
    hard coded rules of element type and bond orders (or
44
    hybridization). Alternatively, atom typing can also be done by
45
    matching a set of substructure rules read at run time. In the latter
46
    case customization based on application (such as changing the pH)
47
    becomes a facile operation. Fortunately for Open Babel and its users,
48
    Roger Sayle donated a SMARTS parser which became the basis for SMARTS
49
    matching in Open Babel.
50
51
    For more information on the SMARTS support in Open Babel, see the wiki page:
52
    http://openbabel.org/wiki/SMARTS
53
54
    The SMARTS matcher, or OBSmartsPattern, is a separate object which can
55
    match patterns in the OBMol class. The following code demonstrates how
56
    to use the OBSmartsPattern class:
57
    \code
58
    OBMol mol;
59
    ...
60
    OBSmartsPattern sp;
61
    sp.Init("CC");
62
    sp.Match(mol);
63
    vector<vector<int> > maplist;
64
    maplist = sp.GetMapList();
65
    //or maplist = sp.GetUMapList();
66
    //print out the results
67
    vector<vector<int> >::iterator i;
68
    vector<int>::iterator j;
69
    for (i = maplist.begin();i != maplist.end();++i)
70
    {
71
    for (j = i->begin();j != i->end();++j)
72
    cout << j << ' `;
73
    cout << endl;
74
    }
75
    \endcode
76
77
    The preceding code reads in a molecule, initializes a SMARTS pattern
78
    of two single-bonded carbons, and locates all instances of the
79
    pattern in the molecule. Note that calling the Match() function
80
    does not return the results of the substructure match. The results
81
    from a match are stored in the OBSmartsPattern, and a call to
82
    GetMapList() or GetUMapList() must be made to extract the
83
    results. The function GetMapList() returns all matches of a
84
    particular pattern while GetUMapList() returns only the unique
85
    matches. For instance, the pattern [OD1]~C~[OD1] describes a
86
    carboxylate group. This pattern will match both atom number
87
    permutations of the carboxylate, and if GetMapList() is called, both
88
    matches will be returned. If GetUMapList() is called only unique
89
    matches of the pattern will be returned. A unique match is defined as
90
    one which does not cover the identical atoms that a previous match
91
    has covered.
92
93
  */
94
95
238
#define ATOMPOOL      1
96
83
#define BONDPOOL      1
97
98
7.87M
#define AE_ANDHI        1
99
10.0M
#define AE_ANDLO        2
100
4.98M
#define AE_OR           3
101
6.09M
#define AE_RECUR        4
102
2.28k
#define AE_NOT          5
103
1.26k
#define AE_TRUE         6
104
0
#define AE_FALSE        7
105
145
#define AE_AROMATIC     8
106
0
#define AE_ALIPHATIC    9
107
0
#define AE_CYCLIC       10
108
6
#define AE_ACYCLIC      11
109
0
#define AE_MASS         12
110
16.6M
#define AE_ELEM         13
111
2.22M
#define AE_AROMELEM     14
112
10.0M
#define AE_ALIPHELEM    15
113
0
#define AE_HCOUNT       16
114
6
#define AE_CHARGE       17
115
3
#define AE_CONNECT      18
116
1.76M
#define AE_DEGREE       19
117
0
#define AE_IMPLICIT     20
118
0
#define AE_RINGS        21
119
0
#define AE_SIZE         22
120
4.99k
#define AE_VALENCE      23
121
0
#define AE_CHIRAL       24
122
9
#define AE_HYB          25
123
0
#define AE_RINGCONNECT  26
124
125
2
#define AL_CLOCKWISE      1
126
2
#define AL_ANTICLOCKWISE  2
127
0
#define AL_UNSPECIFIED    0
128
129
/* Each BondExpr node is identified by an integer type field
130
   selected from the list of BE_ codes below.  BE_ANDHI,
131
   BE_ANDLO and BE_OR are binary nodes of type BondExpr.bin,
132
   BE_NOT is unary node of type BondExpr.mon, and the remaining
133
   code are all leaf nodes.  */
134
135
0
#define BE_ANDHI        1
136
0
#define BE_ANDLO        2
137
52
#define BE_OR           3
138
0
#define BE_NOT          4
139
14
#define BE_ANY          5
140
106
#define BE_DEFAULT      6
141
0
#define BE_SINGLE       7
142
833
#define BE_DOUBLE       8
143
443
#define BE_TRIPLE       9
144
0
#define BE_QUAD         10
145
20
#define BE_AROM         11
146
0
#define BE_RING         12
147
0
#define BE_UP           13
148
0
#define BE_DOWN         14
149
0
#define BE_UPUNSPEC     15
150
0
#define BE_DOWNUNSPEC   16
151
152
153
  static int CreateAtom(Pattern*,AtomExpr*,int,int vb=0);
154
155
  const int SmartsImplicitRef = -9999; // Used as a placeholder when recording atom nbrs for chiral atoms
156
157
158
  /*=============================*/
159
  /*  Standard Utility Routines  */
160
  /*=============================*/
161
162
  static void FatalAllocationError( const char *ptr )
163
0
  {
164
0
    stringstream errorMsg;
165
0
    errorMsg << "Error: Unable to allocate" << ptr << endl;
166
0
    obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obError);
167
0
  }
168
169
  /*================================*/
170
  /*  Atom Expression Manipulation  */
171
  /*================================*/
172
173
  static void FreePattern( Pattern* );
174
  static Pattern *CopyPattern( Pattern* );
175
176
  static AtomExpr *CopyAtomExpr( AtomExpr *expr )
177
0
  {
178
0
    AtomExpr *result;
179
0
180
0
    result = new AtomExpr;
181
0
    result->type = expr->type;
182
0
    switch( expr->type )
183
0
      {
184
0
      case AE_ANDHI:
185
0
      case AE_ANDLO:
186
0
      case AE_OR:
187
0
        result->bin.lft = CopyAtomExpr(expr->bin.lft);
188
0
        result->bin.rgt = CopyAtomExpr(expr->bin.rgt);
189
0
        break;
190
0
191
0
      case AE_NOT:
192
0
        result->mon.arg = CopyAtomExpr(expr->mon.arg);
193
0
        break;
194
0
195
0
      case AE_RECUR:
196
0
        result->recur.recur = CopyPattern((Pattern*)expr->recur.recur);
197
0
        break;
198
0
199
0
      default:
200
0
        result->leaf.value = expr->leaf.value;
201
0
        break;
202
0
      }
203
0
    return result;
204
0
  }
205
206
  static void FreeAtomExpr( AtomExpr *expr )
207
379
  {
208
379
    if( expr )
209
379
      {
210
379
        switch( expr->type )
211
379
          {
212
70
          case AE_ANDHI:
213
81
          case AE_ANDLO:
214
99
          case AE_OR:
215
99
            FreeAtomExpr(expr->bin.lft);
216
99
            FreeAtomExpr(expr->bin.rgt);
217
99
            break;
218
219
2
          case AE_NOT:
220
2
            FreeAtomExpr(expr->mon.arg);
221
2
            break;
222
223
33
          case AE_RECUR:
224
33
            FreePattern((Pattern*)expr->recur.recur);
225
33
            break;
226
379
          }
227
228
379
        delete expr;
229
379
      }
230
379
  }
231
232
  static AtomExpr *BuildAtomPred( int type )
233
26
  {
234
26
    AtomExpr *result;
235
236
26
    result = new AtomExpr;
237
26
    result->leaf.type = type;
238
26
    result->leaf.value = 0;
239
26
    return result;
240
26
  }
241
242
  static AtomExpr *BuildAtomLeaf( int type, int val )
243
219
  {
244
219
    AtomExpr *result;
245
246
219
    result = new AtomExpr;
247
219
    result->leaf.type = type;
248
219
    result->leaf.value = val;
249
219
    return result;
250
219
  }
251
252
  static AtomExpr *BuildAtomNot( AtomExpr *expr )
253
2
  {
254
2
    AtomExpr *result;
255
256
2
    result = new AtomExpr;
257
2
    result->mon.type = AE_NOT;
258
2
    result->mon.arg = expr;
259
2
    return result;
260
2
  }
261
262
  static AtomExpr *BuildAtomBin( int op, AtomExpr *lft, AtomExpr *rgt )
263
99
  {
264
99
    AtomExpr *result;
265
266
99
    result = new AtomExpr;
267
99
    result->bin.type = op;
268
99
    result->bin.lft = lft;
269
99
    result->bin.rgt = rgt;
270
99
    return result;
271
99
  }
272
273
  static AtomExpr *BuildAtomRecurs( Pattern *pat )
274
33
  {
275
33
    AtomExpr *result;
276
277
33
    result = new AtomExpr;
278
33
    result->recur.type = AE_RECUR;
279
33
    result->recur.recur = (void*)pat;
280
33
    return result;
281
33
  }
282
283
  static AtomExpr *GenerateElement( int elem )
284
74
  {
285
74
    return BuildAtomLeaf(AE_ELEM,elem);
286
74
  }
287
288
  static AtomExpr *GenerateAromElem( int elem, int flag )
289
76
  {
290
76
    return flag ? BuildAtomLeaf(AE_AROMELEM,elem)
291
76
                : BuildAtomLeaf(AE_ALIPHELEM,elem);
292
76
  }
293
294
  /*================================*/
295
  /*  Bond Expression Manipulation  */
296
  /*================================*/
297
298
  static BondExpr *CopyBondExpr( BondExpr *expr )
299
0
  {
300
0
    BondExpr *result;
301
0
302
0
    result = new BondExpr;
303
0
    result->type = expr->type;
304
0
    switch( expr->type )
305
0
      {
306
0
      case BE_ANDHI:
307
0
      case BE_ANDLO:
308
0
      case BE_OR:
309
0
        result->bin.lft = CopyBondExpr(expr->bin.lft);
310
0
        result->bin.rgt = CopyBondExpr(expr->bin.rgt);
311
0
        break;
312
0
313
0
      case BE_NOT:
314
0
        result->mon.arg = CopyBondExpr(expr->mon.arg);
315
0
        break;
316
0
      }
317
0
    return result;
318
0
  }
319
320
  /**
321
   * Check if two BondExpr objects are the same. This is used for ring closures
322
   * to identify invalid SMARTS like:
323
   *
324
   *   C-1CCCCC#1
325
   *   C=1CCCCC:1
326
   *
327
   * However, the SMARTS below are valid and the bond expression next to the the
328
   * second closure digit is used.
329
   *
330
   *   C1CCCCC#1
331
   *   C1CCCCC=1
332
   */
333
  static bool EquivalentBondExpr( BondExpr *expr1, BondExpr *expr2 )
334
0
  {
335
0
    if (expr1 == nullptr && expr2 == nullptr)
336
0
      return true;
337
0
    if (expr1 == nullptr && expr2 != nullptr)
338
0
      return false;
339
0
    if (expr1 != nullptr && expr2 == nullptr)
340
0
      return false;
341
342
0
    if (expr1->type != expr2->type)
343
0
      return false;
344
345
0
    switch( expr1->type )
346
0
      {
347
0
      case BE_ANDHI:
348
0
      case BE_ANDLO:
349
0
      case BE_OR:
350
0
        return EquivalentBondExpr(expr1->bin.lft, expr2->bin.lft) &&
351
0
               EquivalentBondExpr(expr1->bin.rgt, expr2->bin.rgt);
352
353
0
      case BE_NOT:
354
0
        return EquivalentBondExpr(expr1->mon.arg, expr2->mon.arg);
355
0
      }
356
0
    return true;
357
0
  }
358
359
  static void FreeBondExpr( BondExpr *expr )
360
65
  {
361
65
    if( expr )
362
65
      {
363
65
        switch( expr->type )
364
65
          {
365
0
          case BE_ANDHI:
366
0
          case BE_ANDLO:
367
3
          case BE_OR:
368
3
            FreeBondExpr(expr->bin.lft);
369
3
            FreeBondExpr(expr->bin.rgt);
370
3
            break;
371
372
0
          case BE_NOT:
373
0
            FreeBondExpr(expr->mon.arg);
374
0
            break;
375
65
          }
376
377
65
        delete expr;
378
65
      }
379
65
  }
380
381
  static BondExpr *BuildBondLeaf( int type )
382
62
  {
383
62
    BondExpr *result;
384
385
62
    result = new BondExpr;
386
62
    result->type = type;
387
62
    return result;
388
62
  }
389
390
  static BondExpr *BuildBondNot( BondExpr *expr )
391
0
  {
392
0
    BondExpr *result;
393
394
0
    result = new BondExpr;
395
0
    result->mon.type = BE_NOT;
396
0
    result->mon.arg = expr;
397
0
    return result;
398
0
  }
399
400
  static BondExpr *BuildBondBin( int op, BondExpr *lft, BondExpr *rgt )
401
3
  {
402
3
    BondExpr *result;
403
404
3
    result = new BondExpr;
405
3
    result->bin.type = op;
406
3
    result->bin.lft = lft;
407
3
    result->bin.rgt = rgt;
408
3
    return result;
409
3
  }
410
411
  static BondExpr *GenerateDefaultBond( void )
412
19
  {
413
19
    return BuildBondLeaf(BE_DEFAULT);
414
19
  }
415
416
  /*===============================*/
417
  /*  SMARTS Pattern Manipulation  */
418
  /*===============================*/
419
420
  static Pattern *AllocPattern( void )
421
120
  {
422
120
    Pattern *ptr;
423
424
120
    ptr = new Pattern;
425
120
    if( !ptr ) {
426
0
      FatalAllocationError("pattern");
427
0
      return nullptr;
428
0
    }
429
430
120
    ptr->atom = nullptr;
431
120
    ptr->aalloc = 0;
432
120
    ptr->acount = 0;
433
434
120
    ptr->bond = nullptr;
435
120
    ptr->balloc = 0;
436
120
    ptr->bcount = 0;
437
438
120
    ptr->parts = 1;
439
440
120
    ptr->hasExplicitH=false;
441
120
    return ptr;
442
120
  }
443
444
  static int CreateAtom( Pattern *pat, AtomExpr *expr, int part,int vb)
445
179
  {
446
179
    int index,size;
447
448
179
    if (!pat)
449
0
      return -1; // should never happen
450
451
179
    if( pat->acount == pat->aalloc )
452
179
      {
453
179
        pat->aalloc += ATOMPOOL;
454
179
        size = (int)(pat->aalloc*sizeof(AtomSpec));
455
179
        if( pat->atom )
456
59
          {
457
59
            AtomSpec *tmp = new AtomSpec[pat->aalloc];
458
59
            copy(pat->atom, pat->atom + pat->aalloc - ATOMPOOL, tmp);
459
59
            delete [] pat->atom;
460
59
            pat->atom = tmp;
461
59
          }
462
120
        else
463
120
          pat->atom = new AtomSpec[pat->aalloc];
464
179
        if( !pat->atom )
465
0
          FatalAllocationError("atom pool");
466
179
      }
467
468
179
    index = pat->acount++;
469
179
    pat->atom[index].part = part;
470
179
    pat->atom[index].expr = expr;
471
179
    pat->atom[index].vb = vb; //std::vector binding
472
473
179
    return index;
474
179
  }
475
476
  static int CreateBond( Pattern *pat, BondExpr *expr, int src, int dst )
477
59
  {
478
59
    int index,size;
479
480
59
    if (!pat)
481
0
      return -1; // should never happen
482
483
59
    if( pat->bcount == pat->balloc )
484
59
      {
485
59
        pat->balloc += BONDPOOL;
486
59
        size = (int)(pat->balloc*sizeof(BondSpec));
487
59
        if( pat->bond )
488
24
          {
489
24
            BondSpec *tmp = new BondSpec[pat->balloc];
490
24
            copy(pat->bond, pat->bond + pat->balloc - BONDPOOL, tmp);
491
24
            delete [] pat->bond;
492
24
            pat->bond = tmp;
493
24
          }
494
35
        else
495
35
          pat->bond = new BondSpec[pat->balloc];
496
59
        if( !pat->bond )
497
0
          FatalAllocationError("bond pool");
498
59
      }
499
500
59
    index = pat->bcount++;
501
59
    pat->bond[index].expr = expr;
502
59
    pat->bond[index].src = src;
503
59
    pat->bond[index].dst = dst;
504
59
    return(index);
505
59
  }
506
507
  static Pattern *CopyPattern( Pattern *pat )
508
0
  {
509
0
    Pattern *result;
510
0
    AtomExpr *aexpr;
511
0
    BondExpr *bexpr;
512
0
    int i;
513
0
514
0
    result = AllocPattern();
515
0
    result->parts = pat->parts;
516
0
    for( i=0; i<pat->acount; i++ )
517
0
      {
518
0
        aexpr = CopyAtomExpr(pat->atom[i].expr);
519
0
        CreateAtom(result,aexpr,pat->atom[i].part);
520
0
      }
521
0
522
0
    for( i=0; i<pat->bcount; i++ )
523
0
      {
524
0
        bexpr = CopyBondExpr(pat->bond[i].expr);
525
0
        CreateBond(result,bexpr,pat->bond[i].src,pat->bond[i].dst);
526
0
      }
527
0
528
0
    return result;
529
0
  }
530
531
  static void FreePattern( Pattern *pat )
532
120
  {
533
120
    int i;
534
535
120
    if( pat )
536
120
      {
537
120
        if( pat->aalloc )
538
120
          {
539
299
            for( i=0; i<pat->acount; i++ )
540
179
              FreeAtomExpr(pat->atom[i].expr);
541
120
            if (pat->atom != nullptr) {
542
              //free(pat->atom);
543
120
              delete [] pat->atom;
544
120
              pat->atom = nullptr;
545
120
            }
546
120
          }
547
548
120
        if( pat->balloc )
549
35
          {
550
94
            for( i=0; i<pat->bcount; i++ )
551
59
              FreeBondExpr(pat->bond[i].expr);
552
35
            if (pat->bond != nullptr) {
553
              //free(pat->bond);
554
35
              delete [] pat->bond;
555
35
              pat->bond = nullptr;
556
35
            }
557
35
          }
558
120
        delete pat;
559
120
        pat = nullptr;
560
120
      }
561
120
  }
562
563
  /*=========================*/
564
  /*  SMARTS Syntax Parsing  */
565
  /*=========================*/
566
567
568
  Pattern *OBSmartsPattern::SMARTSError( Pattern *pat )
569
0
  {
570
0
    stringstream errorMsg;
571
0
    errorMsg << "SMARTS Error:\n" << MainPtr << endl;
572
0
    errorMsg << setw(LexPtr-MainPtr+1) << '^' << endl;
573
0
    obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obError, onceOnly);
574
575
0
    FreePattern(pat);
576
0
    return nullptr;
577
0
  }
578
579
  AtomExpr *OBSmartsPattern::ParseSimpleAtomPrimitive( void )
580
49
  {
581
49
    switch( *LexPtr++ )
582
49
      {
583
17
      case '*':
584
17
        return BuildAtomPred(AE_TRUE);
585
0
      case 'A':
586
0
        return BuildAtomPred(AE_ALIPHATIC);
587
0
      case 'B':
588
0
        if( *LexPtr == 'r' )
589
0
          {
590
0
            LexPtr++;
591
0
            return GenerateElement(35);
592
0
          }
593
0
        return GenerateElement(5);
594
10
      case 'C':
595
10
        if( *LexPtr == 'l' )
596
0
          {
597
0
            LexPtr++;
598
0
            return GenerateElement(17);
599
0
          }
600
10
        return GenerateAromElem(6,false);
601
0
      case 'F':
602
0
        return GenerateElement( 9);
603
0
      case 'I':
604
0
        return GenerateElement(53);
605
7
      case 'N':
606
7
        return GenerateAromElem(7,false);
607
10
      case 'O':
608
10
        return GenerateAromElem(8,false);
609
0
      case 'P':
610
0
        return GenerateAromElem(15, false);
611
2
      case 'S':
612
2
        return GenerateAromElem(16,false);
613
3
      case 'a':
614
3
        if( *LexPtr == 's' )
615
0
          {
616
0
            LexPtr++;
617
0
            return GenerateAromElem(33, true);
618
0
          }
619
3
        return BuildAtomPred(AE_AROMATIC);
620
0
      case 'c':
621
0
        return GenerateAromElem( 6,true);
622
0
      case 'n':
623
0
        return GenerateAromElem( 7,true);
624
0
      case 'o':
625
0
        return GenerateAromElem( 8,true);
626
0
      case 'p':
627
0
        return GenerateAromElem(15,true);
628
0
      case 's':
629
0
        if( *LexPtr == 'e' )
630
0
          {
631
0
            LexPtr++;
632
0
            return GenerateAromElem(34, true);
633
0
          }
634
0
        return GenerateAromElem(16,true);
635
49
      }
636
0
    LexPtr--;
637
0
    return nullptr;
638
49
  }
639
640
  AtomExpr *OBSmartsPattern::ParseComplexAtomPrimitive( void )
641
229
  {
642
229
    Pattern *pat;
643
229
    int index;
644
645
229
    switch( *LexPtr++ )
646
229
      {
647
46
      case('#'):
648
46
        if( !isdigit(*LexPtr) )
649
0
          return nullptr;
650
651
46
        index = 0;
652
110
        while( isdigit(*LexPtr) )
653
64
          index = index*10 + ((*LexPtr++)-'0');
654
46
        if( index > 255 )
655
0
          {
656
0
            LexPtr--;
657
0
            return nullptr;
658
0
          }
659
46
        return( GenerateElement(index) );
660
661
33
      case('$'):
662
33
        if( *LexPtr != '(' )
663
0
          return nullptr;
664
33
        LexPtr++;
665
33
        pat = ParseSMARTSPattern();
666
667
33
        if( !pat )
668
0
          return nullptr;
669
33
        if( *LexPtr != ')' )
670
0
          {
671
0
            FreePattern(pat);
672
0
            return nullptr;
673
0
          }
674
33
        LexPtr++;
675
33
        return( BuildAtomRecurs(pat) );
676
677
0
      case('*'):
678
0
        return BuildAtomPred(AE_TRUE);
679
680
4
      case('+'):
681
4
        if( isdigit(*LexPtr) )
682
0
          {
683
0
            index = 0;
684
0
            while( isdigit(*LexPtr) )
685
0
              index = index*10 + ((*LexPtr++)-'0');
686
0
          }
687
4
        else
688
4
          {
689
4
            index = 1;
690
4
            while( *LexPtr == '+' )
691
0
              {
692
0
                LexPtr++;
693
0
                index++;
694
0
              }
695
4
          }
696
4
        return BuildAtomLeaf(AE_CHARGE,index);
697
698
2
      case('-'):
699
2
        if( isdigit(*LexPtr) )
700
0
          {
701
0
            index = 0;
702
0
            while( isdigit(*LexPtr) )
703
0
              index = index*10 + ((*LexPtr++)-'0');
704
0
          }
705
2
        else
706
2
          {
707
2
            index = 1;
708
2
            while( *LexPtr == '-' )
709
0
              {
710
0
                LexPtr++;
711
0
                index++;
712
0
              }
713
2
          }
714
2
        return BuildAtomLeaf(AE_CHARGE,-index);
715
716
0
      case '@':
717
0
        if (*LexPtr == '?')
718
0
          {
719
0
            LexPtr++;
720
0
            return BuildAtomLeaf(AE_CHIRAL,AL_UNSPECIFIED); // unspecified
721
0
          }
722
0
        else if (*LexPtr != '@')
723
0
          return BuildAtomLeaf(AE_CHIRAL,AL_ANTICLOCKWISE);
724
0
        else
725
0
          {
726
0
            LexPtr++;
727
0
            return BuildAtomLeaf(AE_CHIRAL,AL_CLOCKWISE);
728
0
          }
729
730
9
      case '^':
731
9
        if (isdigit(*LexPtr))
732
9
          {
733
9
            index = 0;
734
18
            while( isdigit(*LexPtr) )
735
9
              index = index*10 + ((*LexPtr++)-'0');
736
9
            return BuildAtomLeaf(AE_HYB,index);
737
9
          }
738
0
        else
739
0
          return BuildAtomLeaf(AE_HYB,1);
740
741
0
      case('0'): case('1'): case('2'): case('3'): case('4'):
742
0
      case('5'): case('6'): case('7'): case('8'): case('9'):
743
0
        index = LexPtr[-1]-'0';
744
0
        while( isdigit(*LexPtr) )
745
0
          index = index*10 + ((*LexPtr++)-'0');
746
0
        return BuildAtomLeaf(AE_MASS,index);
747
748
3
      case('A'):
749
3
        switch( *LexPtr++ )
750
3
          {
751
0
          case('c'):  return GenerateElement(89);
752
0
          case('g'):  return GenerateElement(47);
753
2
          case('l'):  return GenerateElement(13);
754
0
          case('m'):  return GenerateElement(95);
755
0
          case('r'):  return GenerateElement(18);
756
1
          case('s'):  return GenerateElement(33);
757
0
          case('t'):  return GenerateElement(85);
758
0
          case('u'):  return GenerateElement(79);
759
3
          }
760
0
        LexPtr--;
761
0
        return BuildAtomPred(AE_ALIPHATIC);
762
763
6
      case('B'):
764
6
        switch( *LexPtr++ )
765
6
          {
766
1
          case('a'):  return GenerateElement(56);
767
1
          case('e'):  return GenerateElement( 4);
768
2
          case('i'):  return GenerateElement(83);
769
0
          case('k'):  return GenerateElement(97);
770
0
          case('r'):  return GenerateElement(35);
771
6
          }
772
2
        LexPtr--;
773
2
        return GenerateElement(5);
774
775
2
      case('C'):
776
2
        switch( *LexPtr++ )
777
2
          {
778
1
          case('a'):  return GenerateElement(20);
779
0
          case('d'):  return GenerateElement(48);
780
0
          case('e'):  return GenerateElement(58);
781
0
          case('f'):  return GenerateElement(98);
782
0
          case('l'):  return GenerateElement(17);
783
0
          case('m'):  return GenerateElement(96);
784
0
          case('o'):  return GenerateElement(27);
785
0
          case('r'):  return GenerateElement(24);
786
0
          case('s'):  return GenerateElement(55);
787
0
          case('u'):  return GenerateElement(29);
788
2
          }
789
1
        LexPtr--;
790
1
        return GenerateAromElem(6,false);
791
792
50
      case('D'):
793
50
        if( *LexPtr == 'y' )
794
0
          {
795
0
            LexPtr++;
796
0
            return GenerateElement(66);
797
0
          }
798
50
        else if( isdigit(*LexPtr) )
799
50
          {
800
50
            index = 0;
801
100
            while( isdigit(*LexPtr) )
802
50
              index = index*10 + ((*LexPtr++)-'0');
803
50
            return BuildAtomLeaf(AE_DEGREE,index);
804
50
          }
805
0
        return BuildAtomLeaf(AE_DEGREE,1);
806
807
0
      case('E'):
808
0
        if( *LexPtr == 'r' )
809
0
          {
810
0
            LexPtr++;
811
0
            return GenerateElement(68);
812
0
          }
813
0
        else if( *LexPtr == 's' )
814
0
          {
815
0
            LexPtr++;
816
0
            return GenerateElement(99);
817
0
          }
818
0
        else if( *LexPtr == 'u' )
819
0
          {
820
0
            LexPtr++;
821
0
            return GenerateElement(63);
822
0
          }
823
0
        break;
824
825
0
      case('F'):
826
0
        if( *LexPtr == 'e' )
827
0
          {
828
0
            LexPtr++;
829
0
            return GenerateElement(26);
830
0
          }
831
0
        else if( *LexPtr == 'm' )
832
0
          {
833
0
            LexPtr++;
834
0
            return GenerateElement(100);
835
0
          }
836
0
        else if( *LexPtr == 'r' )
837
0
          {
838
0
            LexPtr++;
839
0
            return GenerateElement(87);
840
0
          }
841
0
        return GenerateElement(9);
842
843
2
      case('G'):
844
2
        if( *LexPtr == 'a' )
845
1
          {
846
1
            LexPtr++;
847
1
            return( GenerateElement(31) );
848
1
          }
849
1
        else if( *LexPtr == 'd' )
850
0
          {
851
0
            LexPtr++;
852
0
            return( GenerateElement(64) );
853
0
          }
854
1
        else if( *LexPtr == 'e' )
855
1
          {
856
1
            LexPtr++;
857
1
            return( GenerateElement(32) );
858
1
          }
859
0
        break;
860
861
0
      case('H'):
862
0
        if( *LexPtr == 'e' )
863
0
          {
864
0
            LexPtr++;
865
0
            return( GenerateElement( 2) );
866
0
          }
867
0
        else if( *LexPtr == 'f' )
868
0
          {
869
0
            LexPtr++;
870
0
            return( GenerateElement(72) );
871
0
          }
872
0
        else if( *LexPtr == 'g' )
873
0
          {
874
0
            LexPtr++;
875
0
            return( GenerateElement(80) );
876
0
          }
877
0
        else if( *LexPtr == 'o' )
878
0
          {
879
0
            LexPtr++;
880
0
            return( GenerateElement(67) );
881
0
          }
882
0
        else if( isdigit(*LexPtr) )
883
0
          {
884
0
            index = 0;
885
0
            while( isdigit(*LexPtr) )
886
0
              index = index*10 + ((*LexPtr++)-'0');
887
0
            return BuildAtomLeaf(AE_HCOUNT,index);
888
0
          }
889
0
        return BuildAtomLeaf(AE_HCOUNT,1);
890
891
1
      case('I'):
892
1
        if( *LexPtr == 'n' )
893
1
          {
894
1
            LexPtr++;
895
1
            return( GenerateElement(49) );
896
1
          }
897
0
        else if( *LexPtr == 'r' )
898
0
          {
899
0
            LexPtr++;
900
0
            return( GenerateElement(77) );
901
0
          }
902
0
        return( GenerateElement(53) );
903
904
0
      case('K'):
905
0
        if( *LexPtr == 'r' )
906
0
          {
907
0
            LexPtr++;
908
0
            return( GenerateElement(36) );
909
0
          }
910
0
        return( GenerateElement(19) );
911
912
0
      case('L'):
913
0
        if( *LexPtr == 'a' )
914
0
          {
915
0
            LexPtr++;
916
0
            return( GenerateElement( 57) );
917
0
          }
918
0
        else if( *LexPtr == 'i' )
919
0
          {
920
0
            LexPtr++;
921
0
            return( GenerateElement(  3) );
922
0
          }
923
0
        else if( *LexPtr == 'r' )
924
0
          {
925
0
            LexPtr++;
926
0
            return( GenerateElement(103) );
927
0
          }
928
0
        else if( *LexPtr == 'u' )
929
0
          {
930
0
            LexPtr++;
931
0
            return( GenerateElement( 71) );
932
0
          }
933
0
        break;
934
935
1
      case('M'):
936
1
        if( *LexPtr == 'd' )
937
0
          {
938
0
            LexPtr++;
939
0
            return( GenerateElement(101) );
940
0
          }
941
1
        else if( *LexPtr == 'g' )
942
1
          {
943
1
            LexPtr++;
944
1
            return( GenerateElement( 12) );
945
1
          }
946
0
        else if( *LexPtr == 'n' )
947
0
          {
948
0
            LexPtr++;
949
0
            return( GenerateElement( 25) );
950
0
          }
951
0
        else if( *LexPtr == 'o' )
952
0
          {
953
0
            LexPtr++;
954
0
            return( GenerateElement( 42) );
955
0
          }
956
0
        break;
957
958
15
      case('N'):
959
15
        switch( *LexPtr++ )
960
15
          {
961
0
          case('a'):  return( GenerateElement( 11) );
962
0
          case('b'):  return( GenerateElement( 41) );
963
0
          case('d'):  return( GenerateElement( 60) );
964
0
          case('e'):  return( GenerateElement( 10) );
965
0
          case('i'):  return( GenerateElement( 28) );
966
0
          case('o'):  return( GenerateElement(102) );
967
0
          case('p'):  return( GenerateElement( 93) );
968
15
          }
969
15
        LexPtr--;
970
15
        return( GenerateAromElem(7,false) );
971
972
17
      case('O'):
973
17
        if( *LexPtr == 's' )
974
0
          {
975
0
            LexPtr++;
976
0
            return( GenerateElement(76) );
977
0
          }
978
17
        return( GenerateAromElem(8,false) );
979
980
6
      case('P'):
981
6
        switch( *LexPtr++ )
982
6
          {
983
0
          case('a'):  return( GenerateElement(91) );
984
1
          case('b'):  return( GenerateElement(82) );
985
0
          case('d'):  return( GenerateElement(46) );
986
0
          case('m'):  return( GenerateElement(61) );
987
1
          case('o'):  return( GenerateElement(84) );
988
0
          case('r'):  return( GenerateElement(59) );
989
0
          case('t'):  return( GenerateElement(78) );
990
0
          case('u'):  return( GenerateElement(94) );
991
6
          }
992
4
        LexPtr--;
993
4
        return( GenerateElement(15) );
994
995
1
      case('R'):
996
1
        switch( *LexPtr++ )
997
1
          {
998
1
          case('a'):  return( GenerateElement(88) );
999
0
          case('b'):  return( GenerateElement(37) );
1000
0
          case('e'):  return( GenerateElement(75) );
1001
0
          case('h'):  return( GenerateElement(45) );
1002
0
          case('n'):  return( GenerateElement(86) );
1003
0
          case('u'):  return( GenerateElement(44) );
1004
1
          }
1005
0
        LexPtr--;
1006
0
        if( isdigit(*LexPtr) )
1007
0
          {
1008
0
            index = 0;
1009
0
            while( isdigit(*LexPtr) )
1010
0
              index = index*10 + ((*LexPtr++)-'0');
1011
0
            if( index == 0 )
1012
0
              return BuildAtomPred(AE_ACYCLIC);
1013
0
            return BuildAtomLeaf(AE_RINGS,index);
1014
0
          }
1015
0
        return BuildAtomPred(AE_CYCLIC);
1016
1017
11
      case('S'):
1018
11
        switch( *LexPtr++ )
1019
11
          {
1020
1
          case('b'):  return( GenerateElement(51) );
1021
0
          case('c'):  return( GenerateElement(21) );
1022
1
          case('e'):  return( GenerateElement(34) );
1023
1
          case('i'):  return( GenerateElement(14) );
1024
0
          case('m'):  return( GenerateElement(62) );
1025
1
          case('n'):  return( GenerateElement(50) );
1026
1
          case('r'):  return( GenerateElement(38) );
1027
11
          }
1028
6
        LexPtr--;
1029
6
        return( GenerateAromElem(16,false) );
1030
1031
2
      case('T'):
1032
2
        switch( *LexPtr++ )
1033
2
          {
1034
0
          case('a'):  return( GenerateElement(73) );
1035
0
          case('b'):  return( GenerateElement(65) );
1036
0
          case('c'):  return( GenerateElement(43) );
1037
1
          case('e'):  return( GenerateElement(52) );
1038
0
          case('h'):  return( GenerateElement(90) );
1039
0
          case('i'):  return( GenerateElement(22) );
1040
1
          case('l'):  return( GenerateElement(81) );
1041
0
          case('m'):  return( GenerateElement(69) );
1042
2
          }
1043
0
        LexPtr--;
1044
0
        break;
1045
1046
0
      case('U'):  return( GenerateElement(92) );
1047
0
      case('V'):  return( GenerateElement(23) );
1048
0
      case('W'):  return( GenerateElement(74) );
1049
1050
3
      case('X'):
1051
3
        if( *LexPtr == 'e' )
1052
0
          {
1053
0
            LexPtr++;
1054
0
            return( GenerateElement(54) );
1055
0
          }
1056
3
        else if( isdigit(*LexPtr) )
1057
3
          {
1058
3
            index = 0;
1059
6
            while( isdigit(*LexPtr) )
1060
3
              index = index*10 + ((*LexPtr++)-'0');
1061
3
            if (index == 0) // default to 1 (if no number present)
1062
0
              index = 1;
1063
3
            return BuildAtomLeaf(AE_CONNECT,index);
1064
3
          }
1065
0
        return BuildAtomLeaf(AE_CONNECT,1);
1066
1067
0
      case('Y'):
1068
0
        if( *LexPtr == 'b' )
1069
0
          {
1070
0
            LexPtr++;
1071
0
            return( GenerateElement(70) );
1072
0
          }
1073
0
        return( GenerateElement(39) );
1074
1075
0
      case('Z'):
1076
0
        if( *LexPtr == 'n' )
1077
0
          {
1078
0
            LexPtr++;
1079
0
            return GenerateElement(30);
1080
0
          }
1081
0
        else if( *LexPtr == 'r' )
1082
0
          {
1083
0
            LexPtr++;
1084
0
            return GenerateElement(40);
1085
0
          }
1086
0
        break;
1087
1088
0
      case('a'):
1089
0
        if( *LexPtr == 's' )
1090
0
          {
1091
0
            LexPtr++;
1092
0
            return GenerateAromElem(33,true);
1093
0
          }
1094
0
        return BuildAtomPred(AE_AROMATIC);
1095
1096
2
      case('c'):
1097
2
        return GenerateAromElem(6,true);
1098
1099
0
      case('h'):
1100
0
        if( isdigit(*LexPtr) )
1101
0
          {
1102
0
            index = 0;
1103
0
            while( isdigit(*LexPtr) )
1104
0
              index = index*10 + ((*LexPtr++)-'0');
1105
0
          }
1106
0
        else
1107
0
          index = 1;
1108
0
        return BuildAtomLeaf(AE_IMPLICIT,index);
1109
1110
2
      case('n'):  return GenerateAromElem(7,true);
1111
1
      case('o'):  return GenerateAromElem(8,true);
1112
0
      case('p'):  return GenerateAromElem(15,true);
1113
1114
6
      case('r'):
1115
6
        if( isdigit(*LexPtr) )
1116
6
          {
1117
6
            index = 0;
1118
12
            while( isdigit(*LexPtr) )
1119
6
              index = index*10 + ((*LexPtr++)-'0');
1120
6
            if( index == 0 )
1121
6
              return BuildAtomPred(AE_ACYCLIC);
1122
0
            return BuildAtomLeaf(AE_SIZE,index);
1123
6
          }
1124
0
        return BuildAtomPred(AE_CYCLIC);
1125
1126
3
      case('s'):
1127
3
        if( *LexPtr == 'e' )
1128
1
          {
1129
1
            LexPtr++;
1130
1
            return GenerateAromElem(34,true);
1131
1
          }
1132
2
        return GenerateAromElem(16,true);
1133
1134
1
      case('v'):
1135
1
        if( isdigit(*LexPtr) )
1136
1
          {
1137
1
            index = 0;
1138
2
            while( isdigit(*LexPtr) )
1139
1
              index = index*10 + ((*LexPtr++)-'0');
1140
1
            return BuildAtomLeaf(AE_VALENCE,index);
1141
1
          }
1142
0
        return BuildAtomLeaf(AE_VALENCE,1);
1143
1144
0
      case('x'):
1145
0
        if( isdigit(*LexPtr) )
1146
0
          {
1147
0
            index = 0;
1148
0
            while( isdigit(*LexPtr) )
1149
0
              index = index*10 + ((*LexPtr++)-'0');
1150
0
            return BuildAtomLeaf(AE_RINGCONNECT,index);
1151
0
          }
1152
0
        return BuildAtomPred(AE_CYCLIC);
1153
229
      }
1154
0
    LexPtr--;
1155
0
    return nullptr;
1156
229
  }
1157
1158
  AtomExpr *OBSmartsPattern::ParseAtomExpr( int level )
1159
661
  {
1160
661
    AtomExpr *expr1 = nullptr;
1161
661
    AtomExpr *expr2 = nullptr;
1162
661
    char *prev;
1163
1164
661
    switch( level )
1165
661
      {
1166
130
      case(0): /* Low Precedence Conjunction */
1167
130
        if( !(expr1=ParseAtomExpr(1)) )
1168
0
          return nullptr;
1169
1170
141
        while( *LexPtr == ';' )
1171
11
          {
1172
11
            LexPtr++;
1173
11
            if( !(expr2=ParseAtomExpr(1)) )
1174
0
              {
1175
0
                FreeAtomExpr(expr1);
1176
0
                return nullptr;
1177
0
              }
1178
11
            expr1 = BuildAtomBin(AE_ANDLO,expr1,expr2);
1179
11
          }
1180
130
        return expr1;
1181
1182
141
      case(1): /* Disjunction */
1183
141
        if( !(expr1=ParseAtomExpr(2)) )
1184
0
          return nullptr;
1185
1186
159
        while( *LexPtr == ',' )
1187
18
          {
1188
18
            LexPtr++;
1189
18
            if( !(expr2=ParseAtomExpr(2)) )
1190
0
              {
1191
0
                FreeAtomExpr(expr1);
1192
0
                return nullptr;
1193
0
              }
1194
18
            expr1 = BuildAtomBin(AE_OR,expr1,expr2);
1195
18
          }
1196
141
        return( expr1 );
1197
1198
159
      case(2): /* High Precedence Conjunction */
1199
159
        if( !(expr1=ParseAtomExpr(3)) )
1200
0
          return nullptr;
1201
1202
229
        while( (*LexPtr!=']') && (*LexPtr!=';') &&
1203
88
               (*LexPtr!=',') && *LexPtr )
1204
70
          {
1205
70
            if( *LexPtr=='&' )
1206
0
              LexPtr++;
1207
70
            prev = LexPtr;
1208
70
            if( !(expr2=ParseAtomExpr(3)) )
1209
0
              {
1210
0
                if( prev != LexPtr )
1211
0
                  {
1212
0
                    FreeAtomExpr(expr1);
1213
0
                    return nullptr;
1214
0
                  }
1215
0
                else
1216
0
                  return( expr1 );
1217
0
              }
1218
70
            expr1 = BuildAtomBin(AE_ANDHI,expr1,expr2);
1219
70
          }
1220
159
        return( expr1 );
1221
1222
231
      case(3): /* Negation or Primitive */
1223
231
        if( *LexPtr == '!' )
1224
2
          {
1225
2
            LexPtr++;
1226
2
            if( !(expr1=ParseAtomExpr(3)) )
1227
0
              return nullptr;
1228
2
            return( BuildAtomNot(expr1) );
1229
2
          }
1230
229
        return( ParseComplexAtomPrimitive() );
1231
661
      }
1232
0
    return nullptr;
1233
661
  }
1234
1235
  BondExpr *OBSmartsPattern::ParseBondPrimitive( void )
1236
83
  {
1237
83
    char bsym = *LexPtr++;
1238
1239
83
    switch(bsym)
1240
83
      {
1241
0
      case '-':  return BuildBondLeaf(BE_SINGLE);
1242
23
      case '=':  return BuildBondLeaf(BE_DOUBLE);
1243
4
      case '#':  return BuildBondLeaf(BE_TRIPLE);
1244
0
      case '$':  return BuildBondLeaf(BE_QUAD);
1245
2
      case ':':  return BuildBondLeaf(BE_AROM);
1246
0
      case '@':  return BuildBondLeaf(BE_RING);
1247
14
      case '~':  return BuildBondLeaf(BE_ANY);
1248
1249
      // return BuildBondLeaf(*LexPtr == '?' ? BE_UPUNSPEC : BE_UP);
1250
0
      case '/':  return BuildBondLeaf(BE_SINGLE);
1251
      // return BuildBondLeaf(*LexPtr == '?' ? BE_DOWNUNSPEC : BE_DOWN);
1252
0
      case '\\': return BuildBondLeaf(BE_SINGLE);
1253
83
      }
1254
40
    LexPtr--;
1255
40
    return nullptr;
1256
83
  }
1257
1258
  BondExpr *OBSmartsPattern::ParseBondExpr( int level )
1259
206
  {
1260
206
    BondExpr *expr1 = nullptr;
1261
206
    BondExpr *expr2 = nullptr;
1262
206
    char *prev;
1263
1264
206
    switch( level )
1265
206
      {
1266
40
      case(0): /* Low Precedence Conjunction */
1267
40
        if( !(expr1=ParseBondExpr(1)) )
1268
0
          return nullptr;
1269
1270
40
        while( *LexPtr == ';' )
1271
0
          {
1272
0
            LexPtr++;
1273
0
            if( !(expr2=ParseBondExpr(1)) )
1274
0
              {
1275
0
                FreeBondExpr(expr1);
1276
0
                return nullptr;
1277
0
              }
1278
0
            expr1 = BuildBondBin(BE_ANDLO,expr1,expr2);
1279
0
          }
1280
40
        return expr1;
1281
1282
40
      case(1): /* Disjunction */
1283
40
        if( !(expr1=ParseBondExpr(2)) )
1284
0
          return nullptr;
1285
1286
43
        while( *LexPtr == ',' )
1287
3
          {
1288
3
            LexPtr++;
1289
3
            if( !(expr2=ParseBondExpr(2)) )
1290
0
              {
1291
0
                FreeBondExpr(expr1);
1292
0
                return nullptr;
1293
0
              }
1294
3
            expr1 = BuildBondBin(BE_OR,expr1,expr2);
1295
3
          }
1296
40
        return expr1;
1297
1298
43
      case(2): /* High Precedence Conjunction */
1299
43
        if( !(expr1=ParseBondExpr(3)) )
1300
0
          return nullptr;
1301
1302
43
        while( (*LexPtr!=']') && (*LexPtr!=';') &&
1303
43
               (*LexPtr!=',') && *LexPtr )
1304
40
          {
1305
40
            if( *LexPtr == '&' )
1306
0
              LexPtr++;
1307
40
            prev = LexPtr;
1308
40
            if( !(expr2=ParseBondExpr(3)) )
1309
40
              {
1310
40
                if( prev != LexPtr )
1311
0
                  {
1312
0
                    FreeBondExpr(expr1);
1313
0
                    return nullptr;
1314
0
                  }
1315
40
                else
1316
40
                  return expr1;
1317
40
              }
1318
0
            expr1 = BuildBondBin(BE_ANDHI,expr1,expr2);
1319
0
          }
1320
3
        return expr1;
1321
1322
83
      case(3): /* Negation or Primitive */
1323
83
        if( *LexPtr == '!' )
1324
0
          {
1325
0
            LexPtr++;
1326
0
            if( !(expr1=ParseBondExpr(3)) )
1327
0
              return nullptr;
1328
0
            return BuildBondNot(expr1);
1329
0
          }
1330
83
        return ParseBondPrimitive();
1331
206
      }
1332
0
    return nullptr;
1333
206
  }
1334
1335
  int OBSmartsPattern::GetVectorBinding()
1336
0
  {
1337
0
    int vb=0;
1338
1339
0
    LexPtr++; //skip colon
1340
0
    if(isdigit(*LexPtr))
1341
0
      {
1342
0
        vb = 0;
1343
0
        while( isdigit(*LexPtr) )
1344
0
          vb = vb*10 + ((*LexPtr++)-'0');
1345
0
      }
1346
1347
0
    return(vb);
1348
0
  }
1349
1350
  Pattern *OBSmartsPattern::ParseSMARTSError( Pattern *pat, BondExpr *expr )
1351
0
  {
1352
0
    if( expr )
1353
0
      FreeBondExpr(expr);
1354
0
    return SMARTSError(pat);
1355
0
  }
1356
1357
  Pattern *OBSmartsPattern::SMARTSParser( Pattern *pat, ParseState *stat,
1358
                                int prev, int part )
1359
136
  {
1360
136
    int vb = 0;
1361
136
    AtomExpr *aexpr;
1362
136
    BondExpr *bexpr;
1363
136
    int index;
1364
1365
136
    aexpr = nullptr;
1366
136
    bexpr = nullptr;
1367
1368
371
    while( *LexPtr )
1369
284
      {
1370
284
        switch( *LexPtr++ )
1371
284
          {
1372
0
          case('.'):
1373
0
            return ParseSMARTSError(pat,bexpr);
1374
1375
26
          case('-'):  case('='):  case('#'): case('$'):
1376
40
          case(':'):  case('~'):  case('@'):
1377
40
          case('/'):  case('\\'): case('!'):
1378
40
            LexPtr--;
1379
40
            if( (prev==-1) || bexpr )
1380
0
              return ParseSMARTSError(pat,bexpr);
1381
40
            if( !(bexpr=ParseBondExpr(0)) )
1382
0
              return ParseSMARTSError(pat,bexpr);
1383
40
            break;
1384
1385
40
          case('('):
1386
16
            if( bexpr )
1387
0
              {
1388
0
                LexPtr--;
1389
0
                return ParseSMARTSError(pat,bexpr);
1390
0
              }
1391
16
            if( prev == -1 )
1392
0
              {
1393
0
                index = pat->acount;
1394
0
                pat = SMARTSParser(pat,stat,-1,part);
1395
0
                if( !pat )
1396
0
                  return nullptr;
1397
0
                if( index == pat->acount )
1398
0
                  return ParseSMARTSError(pat,bexpr);
1399
0
                prev = index;
1400
0
              }
1401
16
            else
1402
16
              {
1403
16
                pat = SMARTSParser(pat,stat,prev,part);
1404
16
                if( !pat )
1405
0
                  return nullptr;
1406
16
              }
1407
1408
16
            if( *LexPtr != ')' )
1409
0
              return ParseSMARTSError(pat,bexpr);
1410
16
            LexPtr++;
1411
16
            break;
1412
1413
49
          case(')'):  LexPtr--;
1414
49
            if( (prev==-1) || bexpr )
1415
0
              return ParseSMARTSError(pat,bexpr);
1416
49
            return pat;
1417
1418
0
          case('%'):  if( prev == -1 )
1419
0
              {
1420
0
                LexPtr--;
1421
0
                return ParseSMARTSError(pat,bexpr);
1422
0
              }
1423
1424
0
            if( isdigit(LexPtr[0]) && isdigit(LexPtr[1]) )
1425
0
              {
1426
0
                index = 10*(LexPtr[0]-'0') + (LexPtr[1]-'0');
1427
0
                LexPtr += 2;
1428
0
              }
1429
0
            else
1430
0
              return ParseSMARTSError(pat,bexpr);
1431
1432
0
            if( stat->closure[index] == -1 )
1433
0
              {
1434
0
                stat->closord[index] = bexpr;
1435
0
                stat->closure[index] = prev;
1436
0
              }
1437
0
            else if( stat->closure[index] != prev )
1438
0
              {
1439
0
                if( !bexpr ) {
1440
0
                  if (!stat->closord[index]) {
1441
0
                    bexpr = GenerateDefaultBond();
1442
0
                    FreeBondExpr(stat->closord[index]);
1443
0
                  } else
1444
0
                    bexpr = stat->closord[index];
1445
0
                } else if (stat->closord[index] && !EquivalentBondExpr(bexpr, stat->closord[index]))
1446
0
                  return ParseSMARTSError(pat,bexpr);
1447
1448
0
                CreateBond(pat,bexpr,prev,stat->closure[index]);
1449
0
                stat->closure[index] = -1;
1450
0
                bexpr = nullptr;
1451
0
              }
1452
0
            else
1453
0
              return ParseSMARTSError(pat,bexpr);
1454
0
            break;
1455
1456
0
          case('0'):  case('1'):  case('2'):
1457
0
          case('3'):  case('4'):  case('5'):
1458
0
          case('6'):  case('7'):  case('8'):
1459
0
          case('9'):  LexPtr--;
1460
0
            if( prev == -1 )
1461
0
              return ParseSMARTSError(pat,bexpr);
1462
0
            index = (*LexPtr++)-'0';
1463
1464
0
            if( stat->closure[index] == -1 )
1465
0
              { // Ring opening
1466
0
                stat->closord[index] = bexpr;
1467
0
                stat->closure[index] = prev;
1468
0
                pat->atom[prev].nbrs.push_back(-index); // Store the BC idx as a -ve
1469
0
                bexpr = nullptr;
1470
0
              }
1471
0
            else if( stat->closure[index] != prev )
1472
0
              { // Ring closure
1473
0
                if( !bexpr ) {
1474
0
                  if (!stat->closord[index]) {
1475
0
                    bexpr = GenerateDefaultBond();
1476
0
                    FreeBondExpr(stat->closord[index]);
1477
0
                  } else
1478
0
                    bexpr = stat->closord[index];
1479
0
                } else if (stat->closord[index] && !EquivalentBondExpr(bexpr, stat->closord[index]))
1480
0
                  return ParseSMARTSError(pat,bexpr);
1481
1482
0
                CreateBond(pat,bexpr,prev,stat->closure[index]);
1483
0
                pat->atom[prev].nbrs.push_back(stat->closure[index]);
1484
0
                for (unsigned int nbr_idx=0; nbr_idx < pat->atom[stat->closure[index]].nbrs.size(); ++nbr_idx) {
1485
0
                  if (pat->atom[stat->closure[index]].nbrs[nbr_idx] == -index)
1486
0
                    pat->atom[stat->closure[index]].nbrs[nbr_idx] = prev;
1487
0
                }
1488
0
                stat->closure[index] = -1;
1489
0
                bexpr = nullptr;
1490
0
              }
1491
0
            else
1492
0
              return ParseSMARTSError(pat,bexpr);
1493
0
            break;
1494
1495
130
          case('['):
1496
            // shortcut for '[H]' primitive (PR#1463791)
1497
130
            if (*LexPtr == 'H' && *(LexPtr+1) == ']')
1498
0
              {
1499
0
                aexpr = GenerateElement(1);
1500
0
                LexPtr++; // skip the 'H'
1501
0
                pat->hasExplicitH = true;
1502
0
              }
1503
130
            else
1504
130
              aexpr = ParseAtomExpr(0);
1505
130
            vb = (*LexPtr == ':') ? GetVectorBinding():0;
1506
130
            if( aexpr == nullptr || (*LexPtr!=']') ){
1507
0
              if (aexpr != nullptr){
1508
0
                FreeAtomExpr(aexpr);
1509
0
                aexpr = nullptr;
1510
0
              }
1511
1512
0
              return ParseSMARTSError(pat,bexpr);
1513
0
            }
1514
130
            index = CreateAtom(pat,aexpr,part,vb);
1515
130
            if( prev != -1 )
1516
28
              {
1517
28
                if( !bexpr )
1518
10
                  bexpr = GenerateDefaultBond();
1519
28
                CreateBond(pat,bexpr,prev,index);
1520
28
                pat->atom[index].nbrs.push_back(prev);
1521
28
                pat->atom[prev].nbrs.push_back(index);
1522
28
                bexpr = nullptr;
1523
28
              }
1524
130
            if(*(LexPtr-1) == 'H' && *(LexPtr-2) == '@' ) { // i.e. [C@H] or [C@@H]
1525
0
              pat->atom[index].nbrs.push_back(SmartsImplicitRef);
1526
0
            }
1527
130
            prev = index;
1528
130
            LexPtr++;
1529
130
            break;
1530
1531
49
          default:
1532
49
            LexPtr--;
1533
49
            aexpr = ParseSimpleAtomPrimitive();
1534
49
            if( !aexpr )
1535
0
              return ParseSMARTSError(pat,bexpr);
1536
49
            index = CreateAtom(pat,aexpr,part);
1537
49
            if( prev != -1 )
1538
31
              {
1539
31
                if( !bexpr )
1540
9
                  bexpr = GenerateDefaultBond();
1541
31
                CreateBond(pat,bexpr,prev,index);
1542
31
                pat->atom[index].nbrs.push_back(prev);
1543
31
                pat->atom[prev].nbrs.push_back(index);
1544
31
                bexpr = nullptr;
1545
31
              }
1546
49
            prev = index;
1547
284
          }
1548
284
      }
1549
1550
87
    if( (prev==-1) || bexpr )
1551
0
      return ParseSMARTSError(pat,bexpr);
1552
1553
87
    return pat;
1554
87
  }
1555
1556
  static void MarkGrowBonds(Pattern *pat)
1557
120
  {
1558
120
    int i;
1559
120
    OBBitVec bv;
1560
1561
179
    for (i = 0;i < pat->bcount;++i)
1562
59
      {
1563
59
        pat->bond[i].grow = (bv[pat->bond[i].src] && bv[pat->bond[i].dst])?
1564
59
          false:true;
1565
1566
59
        bv.SetBitOn(pat->bond[i].src);
1567
59
        bv.SetBitOn(pat->bond[i].dst);
1568
59
      }
1569
120
  }
1570
1571
  static int GetChiralFlag(AtomExpr *expr)
1572
379
  {
1573
379
    int tmp1,tmp2;
1574
1575
379
    switch (expr->type)
1576
379
      {
1577
0
        case AE_CHIRAL:
1578
0
          return expr->leaf.value;
1579
1580
70
        case AE_ANDHI:
1581
81
        case AE_ANDLO:
1582
81
          tmp1 = GetChiralFlag(expr->bin.lft);
1583
81
          tmp2 = GetChiralFlag(expr->bin.rgt);
1584
81
          if (tmp1 == 0) return tmp2;
1585
0
          if (tmp2 == 0) return tmp1;
1586
0
          if (tmp1 == tmp2) return tmp1;
1587
0
          break;
1588
1589
18
        case AE_OR:
1590
18
          tmp1 = GetChiralFlag(expr->bin.lft);
1591
18
          tmp2 = GetChiralFlag(expr->bin.rgt);
1592
18
          if (tmp1 == 0 || tmp2 == 0) return 0;
1593
0
          if (tmp1 == tmp2) return tmp1;
1594
0
          break;
1595
1596
2
        case AE_NOT:
1597
          // Treat [!@] as [@@], and [!@@] as [@]
1598
2
          tmp1 = GetChiralFlag(expr->mon.arg);
1599
2
          if (tmp1 == AL_ANTICLOCKWISE) return AL_CLOCKWISE;
1600
2
          if (tmp1 == AL_CLOCKWISE) return AL_ANTICLOCKWISE;
1601
2
          break;
1602
379
      }
1603
1604
280
    return 0;
1605
379
  }
1606
1607
  Pattern *OBSmartsPattern::ParseSMARTSPart( Pattern *result, int part )
1608
120
  {
1609
120
    ParseState stat;
1610
120
    int i,flag;
1611
1612
12.1k
    for( i=0; i<100; i++ )
1613
12.0k
      stat.closure[i] = -1;
1614
1615
120
    result = SMARTSParser(result,&stat,-1,part);
1616
1617
120
    flag = false;
1618
12.1k
    for( i=0; i<100; i++ )
1619
12.0k
      if( stat.closure[i] != -1 )
1620
0
        {
1621
0
          FreeBondExpr(stat.closord[i]);
1622
0
          flag = true;
1623
0
        }
1624
1625
120
    if( result )
1626
120
      {
1627
120
        if( flag )
1628
0
          return(SMARTSError(result));
1629
120
        else
1630
120
          {
1631
120
            MarkGrowBonds(result);
1632
120
            result->ischiral = false;
1633
299
            for (i = 0;i < result->acount;++i)
1634
179
              {
1635
179
                result->atom[i].chiral_flag = GetChiralFlag(result->atom[i].expr);
1636
179
                if (result->atom[i].chiral_flag)
1637
0
                  result->ischiral = true;
1638
179
              }
1639
120
            return(result);
1640
120
          }
1641
120
      }
1642
0
    else
1643
0
      return nullptr;
1644
120
  }
1645
1646
1647
  Pattern *OBSmartsPattern::ParseSMARTSPattern( void )
1648
120
  {
1649
120
    Pattern *result;
1650
120
    result = AllocPattern();
1651
1652
120
    while( *LexPtr == '(' )
1653
0
      {
1654
0
        if (!result)
1655
0
          return nullptr; // ensure we don't get a null dereference
1656
1657
0
        LexPtr++;
1658
0
        result = ParseSMARTSPart(result,result->parts);
1659
0
        if( !result )
1660
0
          return nullptr;
1661
0
        result->parts++;
1662
1663
0
        if( *LexPtr != ')' )
1664
0
          return SMARTSError(result);
1665
0
        LexPtr++;
1666
1667
0
        if( !*LexPtr || (*LexPtr==')') )
1668
0
          return result;
1669
1670
0
        if( *LexPtr != '.' )
1671
0
          return SMARTSError(result);
1672
1673
        // Here's where we'd handle fragments
1674
        //        cerr << " conjunction " << LexPtr[0] << endl;
1675
0
        LexPtr++;
1676
0
      }
1677
1678
120
    return ParseSMARTSPart(result,0);
1679
120
  }
1680
1681
  Pattern *OBSmartsPattern::ParseSMARTSString( char *ptr )
1682
87
  {
1683
87
    Pattern *result;
1684
1685
87
    if( !ptr || !*ptr )
1686
0
      return nullptr;
1687
1688
87
    LexPtr = MainPtr = ptr;
1689
87
    result = ParseSMARTSPattern();
1690
87
    if( result && *LexPtr )
1691
0
      return SMARTSError(result);
1692
87
    return result;
1693
87
  }
1694
1695
  Pattern *OBSmartsPattern::ParseSMARTSRecord( char *ptr )
1696
87
  {
1697
87
    char *src;
1698
1699
87
    src = ptr;
1700
956
    while( *src && !isspace(*src) )
1701
869
      src++;
1702
1703
87
    if( isspace(*src) )
1704
0
      {
1705
0
        *src++ = '\0';
1706
0
        while( isspace(*src) )
1707
0
          src++;
1708
0
      }
1709
1710
87
    return ParseSMARTSString(ptr);
1711
87
  }
1712
1713
  //**********************************
1714
  //********Pattern Matching**********
1715
  //**********************************
1716
1717
  bool OBSmartsPattern::Init(const char *buffer)
1718
0
  {
1719
0
      delete[] _buffer;
1720
0
    _buffer = new char[strlen(buffer) + 1];
1721
0
    strcpy(_buffer,buffer);
1722
1723
0
    if (_pat != nullptr)
1724
0
      FreePattern(_pat);
1725
0
    _pat = ParseSMARTSRecord(_buffer);
1726
0
    _str = _buffer;
1727
1728
0
    return _pat != nullptr;
1729
0
  }
1730
1731
  bool OBSmartsPattern::Init(const std::string &s)
1732
87
  {
1733
87
    delete[] _buffer;
1734
87
    _buffer = new char[s.length() + 1];
1735
87
    strcpy(_buffer, s.c_str());
1736
1737
87
    if (_pat != nullptr)
1738
0
      FreePattern(_pat);
1739
87
    _pat = ParseSMARTSRecord(_buffer);
1740
87
    _str = s;
1741
1742
87
    return _pat != nullptr;
1743
87
  }
1744
1745
  OBSmartsPattern::~OBSmartsPattern()
1746
87
  {
1747
87
    if (_pat)
1748
87
      FreePattern(_pat);
1749
87
    delete [] _buffer;
1750
87
  }
1751
1752
  bool OBSmartsPattern::Match(OBMol &mol,bool single)
1753
0
  {
1754
0
  OBSmartsMatcher matcher;
1755
0
  if (_pat == nullptr)
1756
0
      return false;
1757
0
    if(_pat->hasExplicitH) //The SMARTS pattern contains [H]
1758
0
      {
1759
        //Do matching on a copy of mol with explicit hydrogens
1760
0
        OBMol tmol = mol;
1761
0
        tmol.AddHydrogens(false,false);
1762
0
        return(matcher.match(tmol,_pat,_mlist,single));
1763
0
      }
1764
0
    return(matcher.match(mol,_pat,_mlist,single));
1765
0
  }
1766
1767
  bool OBSmartsPattern::HasMatch(OBMol &mol) const
1768
0
  {
1769
    //a convenience function
1770
0
    std::vector<std::vector<int> > dummy;
1771
0
    return Match(mol, dummy, Single);
1772
0
  }
1773
1774
  bool OBSmartsPattern::Match(OBMol &mol, std::vector<std::vector<int> > & mlist,
1775
      MatchType mtype /*=All*/) const
1776
236k
  {
1777
236k
  OBSmartsMatcher matcher;
1778
236k
  mlist.clear();
1779
236k
  if (_pat == nullptr)
1780
0
      return false;
1781
236k
    if(_pat->hasExplicitH) //The SMARTS pattern contains [H]
1782
0
      {
1783
        //Do matching on a copy of mol with explicit hydrogens
1784
0
        OBMol tmol = mol;
1785
0
        tmol.AddHydrogens(false,false);
1786
0
        if(!matcher.match(tmol,_pat,mlist,mtype == Single))
1787
0
          return false;
1788
0
      }
1789
236k
    else if(!matcher.match(mol,_pat,mlist,mtype == Single))
1790
232k
      return false;
1791
1792
3.23k
    if((mtype == AllUnique) && mlist.size() > 1)
1793
0
    {
1794
      //uniquify
1795
0
         bool ok;
1796
0
        OBBitVec bv;
1797
0
        std::vector<OBBitVec> vbv;
1798
0
        std::vector<std::vector<int> > ulist;
1799
0
        std::vector<std::vector<int> >::iterator i;
1800
0
        std::vector<OBBitVec>::iterator j;
1801
1802
0
        for (i = mlist.begin();i != mlist.end();++i)
1803
0
          {
1804
0
            ok = true;
1805
0
            bv.Clear();
1806
0
            bv.FromVecInt(*i);
1807
0
            for (j = vbv.begin();j != vbv.end() && ok;++j)
1808
0
              if ((*j) == bv)
1809
0
                ok = false;
1810
1811
0
            if (ok)
1812
0
              {
1813
0
                ulist.push_back(*i);
1814
0
                vbv.push_back(bv);
1815
0
              }
1816
0
          }
1817
1818
0
        mlist = ulist;
1819
0
    }
1820
3.23k
    return true;
1821
236k
  }
1822
1823
1824
  bool OBSmartsPattern::RestrictedMatch(OBMol &mol,
1825
                                        std::vector<std::pair<int,int> > &pr,
1826
                                        bool single)
1827
0
  {
1828
0
    bool ok;
1829
0
    std::vector<std::vector<int> > mlist;
1830
0
    std::vector<std::vector<int> >::iterator i;
1831
0
    std::vector<std::pair<int,int> >::iterator j;
1832
1833
0
    OBSmartsMatcher matcher;
1834
0
    matcher.match(mol,_pat,mlist);
1835
0
    _mlist.clear();
1836
0
    if (mlist.empty())
1837
0
      return(false);
1838
1839
0
    for (i = mlist.begin();i != mlist.end();++i)
1840
0
      {
1841
0
        ok = true;
1842
0
        for (j = pr.begin();j != pr.end() && ok;++j)
1843
0
          if ((*i)[j->first] != j->second)
1844
0
            ok = false;
1845
1846
0
        if (ok)
1847
0
          _mlist.push_back(*i);
1848
0
        if (single && !_mlist.empty())
1849
0
          return(true);
1850
0
      }
1851
1852
0
    return((_mlist.empty()) ? false:true);
1853
0
  }
1854
1855
  bool OBSmartsPattern::RestrictedMatch(OBMol &mol,OBBitVec &vres, bool single)
1856
0
  {
1857
0
    bool ok;
1858
0
    std::vector<int>::iterator j;
1859
0
    std::vector<std::vector<int> > mlist;
1860
0
    std::vector<std::vector<int> >::iterator i;
1861
1862
0
    OBSmartsMatcher matcher;
1863
0
    matcher.match(mol,_pat,mlist);
1864
1865
0
    _mlist.clear();
1866
0
    if (mlist.empty())
1867
0
      return(false);
1868
1869
0
    for (i = mlist.begin();i != mlist.end();++i)
1870
0
      {
1871
0
        ok = true;
1872
0
        for (j = i->begin();j != i->end();++j)
1873
0
          if (!vres[*j])
1874
0
            {
1875
0
              ok = false;
1876
0
              break;
1877
0
            }
1878
0
        if (!ok)
1879
0
          continue;
1880
1881
0
        _mlist.push_back(*i);
1882
0
        if (single && !_mlist.empty())
1883
0
          return(true);
1884
0
      }
1885
1886
0
    return((_mlist.empty()) ? false:true);
1887
0
  }
1888
1889
  void OBSmartsMatcher::SetupAtomMatchTable(std::vector<std::vector<bool> > &ttab,
1890
                           const Pattern *pat, OBMol &mol)
1891
0
  {
1892
0
    int i;
1893
1894
0
    ttab.resize(pat->acount);
1895
0
    for (i = 0;i < pat->acount;++i)
1896
0
      ttab[i].resize(mol.NumAtoms()+1);
1897
1898
0
    OBAtom *atom;
1899
0
    std::vector<OBAtom*>::iterator j;
1900
0
    for (i = 0;i < pat->acount;++i)
1901
0
      for (atom = mol.BeginAtom(j);atom;atom = mol.NextAtom(j))
1902
0
        if (EvalAtomExpr(pat->atom[0].expr,atom))
1903
0
          ttab[i][atom->GetIdx()] = true;
1904
0
  }
1905
1906
  void OBSmartsMatcher::FastSingleMatch(OBMol &mol, const Pattern *pat,
1907
                              std::vector<std::vector<int> > &mlist)
1908
0
  {
1909
0
    OBAtom *atom,*a1,*nbr;
1910
0
    std::vector<OBAtom*>::iterator i;
1911
1912
0
    OBBitVec bv(mol.NumAtoms()+1);
1913
0
    std::vector<int> map;
1914
0
    map.resize(pat->acount);
1915
0
    std::vector<std::vector<OBBond*>::iterator> vi;
1916
0
    std::vector<bool> vif;
1917
1918
0
    if (pat->bcount)
1919
0
      {
1920
0
        vif.resize(pat->bcount);
1921
0
        vi.resize(pat->bcount);
1922
0
      }
1923
1924
0
    int bcount;
1925
0
    for (atom = mol.BeginAtom(i);atom;atom=mol.NextAtom(i))
1926
0
      if (EvalAtomExpr(pat->atom[0].expr,atom))
1927
0
        {
1928
0
          map[0] = atom->GetIdx();
1929
0
          if (pat->bcount)
1930
0
            vif[0] = false;
1931
0
          bv.Clear();
1932
0
          bv.SetBitOn(atom->GetIdx());
1933
1934
0
          for (bcount=0;bcount >=0;)
1935
0
            {
1936
              //***entire pattern matched***
1937
0
              if (bcount == pat->bcount) //save full match here
1938
0
                {
1939
0
                  mlist.push_back(map);
1940
0
                  bcount--;
1941
0
                  return; //found a single match
1942
0
                }
1943
1944
              //***match the next bond***
1945
0
              if (!pat->bond[bcount].grow) //just check bond here
1946
0
                {
1947
0
                  if ( !vif[bcount] )
1948
0
                    {
1949
0
                      OBBond *bond = mol.GetBond(map[pat->bond[bcount].src],
1950
0
                                                 map[pat->bond[bcount].dst]);
1951
0
                      if (bond && EvalBondExpr(pat->bond[bcount].expr,bond))
1952
0
                        {
1953
0
                          vif[bcount++] = true;
1954
0
                          if (bcount < pat->bcount)
1955
0
                            vif[bcount] = false;
1956
0
                        }
1957
0
                      else
1958
0
                        bcount--;
1959
0
                    }
1960
0
                  else //bond must have already been visited - backtrack
1961
0
                    bcount--;
1962
0
                }
1963
0
              else //need to map atom and check bond
1964
0
                {
1965
0
                  a1 = mol.GetAtom(map[pat->bond[bcount].src]);
1966
1967
0
                  if (!vif[bcount]) //figure out which nbr atom we are mapping
1968
0
                    {
1969
0
                      nbr = a1->BeginNbrAtom(vi[bcount]);
1970
0
                    }
1971
0
                  else
1972
0
                    {
1973
0
                      bv.SetBitOff(map[pat->bond[bcount].dst]);
1974
0
                      nbr = a1->NextNbrAtom(vi[bcount]);
1975
0
                    }
1976
1977
0
                  for (;nbr;nbr=a1->NextNbrAtom(vi[bcount]))
1978
0
                    if (!bv[nbr->GetIdx()])
1979
0
                      if (EvalAtomExpr(pat->atom[pat->bond[bcount].dst].expr,nbr)
1980
0
                          && EvalBondExpr(pat->bond[bcount].expr,(OBBond *)*(vi[bcount])))
1981
0
                        {
1982
0
                          bv.SetBitOn(nbr->GetIdx());
1983
0
                          map[pat->bond[bcount].dst] = nbr->GetIdx();
1984
0
                          vif[bcount] = true;
1985
0
                          bcount++;
1986
0
                          if (bcount < pat->bcount)
1987
0
                            vif[bcount] = false;
1988
0
                          break;
1989
0
                        }
1990
1991
0
                  if (!nbr)//no match - time to backtrack
1992
0
                    bcount--;
1993
0
                }
1994
0
            }
1995
0
        }
1996
0
  }
1997
1998
1999
  bool OBSmartsMatcher::match(OBMol &mol, const Pattern *pat,
2000
                    std::vector<std::vector<int> > &mlist,bool single)
2001
293k
  {
2002
293k
    mlist.clear();
2003
293k
    if (!pat || pat->acount == 0)
2004
0
      return(false);//shouldn't ever happen
2005
2006
293k
    if (single && !pat->ischiral) {
2007
      // perform a fast single match (only works for non-chiral SMARTS)
2008
0
      FastSingleMatch(mol,pat,mlist);
2009
293k
    } else {
2010
      // perform normal match (chirality ignored and checked below)
2011
293k
      OBSSMatch ssm(mol,pat);
2012
293k
      ssm.Match(mlist);
2013
293k
    }
2014
2015
293k
    if (pat->ischiral) {
2016
0
      std::vector<std::vector<int> >::iterator m;
2017
0
      std::vector<std::vector<int> > tmpmlist;
2018
2019
0
      tmpmlist.clear();
2020
      // iterate over the atom mappings
2021
0
      for (m = mlist.begin();m != mlist.end();++m) {
2022
2023
0
        bool allStereoCentersMatch = true;
2024
2025
        // for each pattern atom
2026
0
        for (int j = 0; j < pat->acount; ++j) {
2027
          // skip non-chiral pattern atoms
2028
0
          if (!pat->atom[j].chiral_flag)
2029
0
            continue;
2030
          // ignore @? in smarts, parse like any other smarts
2031
0
          if (pat->atom[j].chiral_flag == AL_UNSPECIFIED)
2032
0
            continue;
2033
2034
          // use the mapping the get the chiral atom in the molecule being queried
2035
0
          OBAtom *center = mol.GetAtom((*m)[j]);
2036
2037
          // get the OBTetrahedralStereo::Config from the molecule
2038
0
          OBStereoFacade stereo(&mol);
2039
0
          OBTetrahedralStereo *ts = stereo.GetTetrahedralStereo(center->GetId());
2040
0
          if (!ts || !ts->GetConfig().specified) {
2041
            // no stereochemistry specified in molecule for the atom
2042
            // corresponding to the chiral pattern atom using the current
2043
            // mapping --> no match
2044
0
            allStereoCentersMatch = false;
2045
0
            break;
2046
0
          }
2047
2048
0
          std::vector<int> nbrs = pat->atom[j].nbrs;
2049
2050
0
          if (nbrs.size() != 4) { // 3 nbrs currently not supported. Other values are errors.
2051
            //stringstream ss;
2052
            //ss << "Ignoring stereochemistry. There are " << nbrs.size() << " connections to this atom instead of 4. Title: " << mol.GetTitle();
2053
            //obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning);
2054
0
            continue;
2055
0
          }
2056
2057
          // construct a OBTetrahedralStereo::Config using the smarts pattern
2058
0
          OBTetrahedralStereo::Config smartsConfig;
2059
0
          smartsConfig.center = center->GetId();
2060
0
          if (nbrs.at(0) == SmartsImplicitRef)
2061
0
            smartsConfig.from = OBStereo::ImplicitRef;
2062
0
          else
2063
0
            smartsConfig.from = mol.GetAtom( (*m)[nbrs.at(0)] )->GetId();
2064
0
          OBStereo::Ref firstref;
2065
0
          if (nbrs.at(1) == SmartsImplicitRef)
2066
0
            firstref = OBStereo::ImplicitRef;
2067
0
          else
2068
0
            firstref = mol.GetAtom( (*m)[nbrs.at(1)] )->GetId();
2069
0
          OBAtom *ra2 = mol.GetAtom( (*m)[nbrs.at(2)] );
2070
0
          OBAtom *ra3 = mol.GetAtom( (*m)[nbrs.at(3)] );
2071
0
          smartsConfig.refs = OBStereo::MakeRefs(firstref, ra2->GetId(), ra3->GetId());
2072
2073
0
          smartsConfig.view = OBStereo::ViewFrom;
2074
0
          switch (pat->atom[j].chiral_flag) {
2075
0
            case AL_CLOCKWISE:
2076
0
              smartsConfig.winding = OBStereo::Clockwise;
2077
0
              break;
2078
0
            case AL_ANTICLOCKWISE:
2079
0
              smartsConfig.winding = OBStereo::AntiClockwise;
2080
0
              break;
2081
0
            default:
2082
0
              smartsConfig.specified = false;
2083
0
          }
2084
2085
          // cout << "smarts config = " << smartsConfig << endl;
2086
          // cout << "molecule config = " << ts->GetConfig() << endl;
2087
          // cout << "match = " << (ts->GetConfig() == smartsConfig) << endl;
2088
2089
          // and save the match if the two configurations are the same
2090
0
          if (ts->GetConfig() != smartsConfig)
2091
0
            allStereoCentersMatch = false;
2092
2093
          // don't waste time checking more stereocenters using this mapping if one didn't match
2094
0
          if (!allStereoCentersMatch)
2095
0
            break;
2096
0
        }
2097
2098
        // if all the atoms in the molecule match the stereochemistry specified
2099
        // in the smarts pattern, save this mapping as a match
2100
0
        if (allStereoCentersMatch)
2101
0
          tmpmlist.push_back(*m);
2102
0
      }
2103
2104
0
      mlist = tmpmlist;
2105
0
    }
2106
2107
293k
    return(!mlist.empty());
2108
293k
  }
2109
2110
  bool OBSmartsMatcher::EvalAtomExpr(AtomExpr *expr,OBAtom *atom)
2111
46.7M
  {
2112
46.7M
    for (;;)
2113
51.8M
      switch (expr->type)
2114
51.8M
        {
2115
1.24k
        case AE_TRUE:
2116
1.24k
          return true;
2117
0
        case AE_FALSE:
2118
0
          return false;
2119
142
        case AE_AROMATIC:
2120
142
          return atom->IsAromatic();
2121
0
        case AE_ALIPHATIC:
2122
0
          return !atom->IsAromatic();
2123
0
        case AE_CYCLIC:
2124
0
          return atom->IsInRing();
2125
0
        case AE_ACYCLIC:
2126
0
          return !atom->IsInRing();
2127
2128
0
        case AE_MASS:
2129
0
          return expr->leaf.value == atom->GetIsotope();
2130
16.6M
        case AE_ELEM:
2131
16.6M
          return expr->leaf.value == (int)atom->GetAtomicNum();
2132
2.22M
        case AE_AROMELEM:
2133
2.22M
          return expr->leaf.value == (int)atom->GetAtomicNum() &&
2134
60.3k
                 atom->IsAromatic();
2135
10.0M
        case AE_ALIPHELEM:
2136
10.0M
          return expr->leaf.value == (int)atom->GetAtomicNum() &&
2137
263k
                 !atom->IsAromatic();
2138
0
        case AE_HCOUNT:
2139
0
          return expr->leaf.value == ((int)atom->ExplicitHydrogenCount() +
2140
0
                                      (int)atom->GetImplicitHCount());
2141
0
        case AE_CHARGE:
2142
0
          return expr->leaf.value == atom->GetFormalCharge();
2143
0
        case AE_CONNECT:
2144
0
          return expr->leaf.value == (int)atom->GetTotalDegree();
2145
1.76M
        case AE_DEGREE:
2146
1.76M
          return expr->leaf.value == (int)atom->GetExplicitDegree();
2147
0
        case AE_IMPLICIT:
2148
0
          return expr->leaf.value == (int)atom->GetImplicitHCount();
2149
0
        case AE_RINGS:
2150
0
          return expr->leaf.value == (int)atom->MemberOfRingCount();
2151
0
        case AE_SIZE:
2152
0
          return atom->IsInRingSize(expr->leaf.value);
2153
4.99k
        case AE_VALENCE:
2154
4.99k
          return expr->leaf.value == (int)atom->GetTotalValence();
2155
0
        case AE_CHIRAL:
2156
          // always return true (i.e. accept the match) and check later
2157
0
          return true;
2158
0
        case AE_HYB:
2159
0
          return expr->leaf.value == (int)atom->GetHyb();
2160
0
        case AE_RINGCONNECT:
2161
0
          return expr->leaf.value == (int)atom->CountRingBonds();
2162
2163
2.27k
        case AE_NOT:
2164
2.27k
          return !EvalAtomExpr(expr->mon.arg,atom);
2165
7.87M
        case AE_ANDHI: /* Same as AE_ANDLO */
2166
10.0M
        case AE_ANDLO:
2167
10.0M
          if (!EvalAtomExpr(expr->bin.lft,atom))
2168
9.95M
            return false;
2169
130k
          expr = expr->bin.rgt;
2170
130k
          break;
2171
4.98M
        case AE_OR:
2172
4.98M
          if (EvalAtomExpr(expr->bin.lft,atom))
2173
215
            return true;
2174
4.98M
          expr = expr->bin.rgt;
2175
4.98M
          break;
2176
2177
6.09M
        case AE_RECUR:
2178
6.09M
          {
2179
            //see if pattern has been matched
2180
6.09M
            std::vector<std::pair<const Pattern*,std::vector<bool> > >::iterator i;
2181
8.30M
            for (i = RSCACHE.begin();i != RSCACHE.end();++i)
2182
8.24M
              if (i->first == (Pattern*)expr->recur.recur)
2183
6.03M
                return(i->second[atom->GetIdx()]);
2184
2185
            //perceive and match pattern
2186
57.3k
            std::vector<std::vector<int> >::iterator j;
2187
57.3k
            std::vector<bool> vb(((OBMol*) atom->GetParent())->NumAtoms()+1);
2188
57.3k
            std::vector<std::vector<int> > mlist;
2189
57.3k
            if (match( *((OBMol *) atom->GetParent()),
2190
57.3k
                       (Pattern*)expr->recur.recur,mlist))
2191
240
              for (j = mlist.begin();j != mlist.end();++j)
2192
141
                vb[(*j)[0]] = true;
2193
2194
57.3k
            RSCACHE.push_back(std::pair<const Pattern*,
2195
57.3k
                              std::vector<bool> > ((const Pattern*)expr->recur.recur,
2196
57.3k
                                                   vb));
2197
2198
57.3k
            return(vb[atom->GetIdx()]);
2199
6.09M
          }
2200
2201
0
        default:
2202
0
          return false;
2203
51.8M
        }
2204
46.7M
  }
2205
2206
  bool OBSmartsMatcher::EvalBondExpr(BondExpr *expr,OBBond *bond)
2207
1.37k
  {
2208
1.37k
    for (;;)
2209
1.40k
      switch( expr->type )
2210
1.40k
        {
2211
0
        case BE_ANDHI:
2212
0
        case BE_ANDLO:
2213
0
          if (!EvalBondExpr(expr->bin.lft,bond))
2214
0
            return false;
2215
0
          expr = expr->bin.rgt;
2216
0
          break;
2217
2218
46
        case BE_OR:
2219
46
          if (EvalBondExpr(expr->bin.lft,bond))
2220
22
            return true;
2221
24
          expr = expr->bin.rgt;
2222
24
          break;
2223
2224
0
        case BE_NOT:
2225
0
          return !EvalBondExpr(expr->mon.arg,bond);
2226
2227
0
        case BE_ANY:
2228
0
          return true;
2229
87
        case BE_DEFAULT:
2230
87
          return bond->GetBondOrder()==1 || bond->IsAromatic();
2231
0
        case BE_SINGLE:
2232
0
          return bond->GetBondOrder()==1 && !bond->IsAromatic();
2233
810
        case BE_DOUBLE:
2234
810
          return bond->GetBondOrder()==2 && !bond->IsAromatic();
2235
439
        case BE_TRIPLE:
2236
439
          return bond->GetBondOrder() == 3;
2237
0
        case BE_QUAD:
2238
0
          return bond->GetBondOrder() == 4;
2239
18
        case BE_AROM:
2240
18
          return bond->IsAromatic();
2241
0
        case BE_RING:
2242
0
          return bond->IsInRing();
2243
        //case BE_UP:
2244
        //  return bond->IsUp();
2245
        //case BE_DOWN:
2246
        //  return bond->IsDown();
2247
        //case BE_UPUNSPEC: // up or unspecified (i.e., not down)
2248
        //  return !bond->IsDown();
2249
        //case BE_DOWNUNSPEC: // down or unspecified (i.e., not up)
2250
        //  return !bond->IsUp();
2251
0
        default:
2252
0
          return false;
2253
1.40k
        }
2254
1.37k
  }
2255
2256
  std::vector<std::vector<int> > &OBSmartsPattern::GetUMapList()
2257
0
  {
2258
0
    if (_mlist.empty() || _mlist.size() == 1)
2259
0
      return(_mlist);
2260
2261
0
    bool ok;
2262
0
    OBBitVec bv;
2263
0
    std::vector<OBBitVec> vbv;
2264
0
    std::vector<std::vector<int> > mlist;
2265
0
    std::vector<std::vector<int> >::iterator i;
2266
0
    std::vector<OBBitVec>::iterator j;
2267
2268
0
    for (i = _mlist.begin();i != _mlist.end();++i)
2269
0
      {
2270
0
        ok = true;
2271
0
        bv.Clear();
2272
0
        bv.FromVecInt(*i);
2273
0
        for (j = vbv.begin();j != vbv.end() && ok;++j)
2274
0
          if ((*j) == bv)
2275
0
            ok = false;
2276
2277
0
        if (ok)
2278
0
          {
2279
0
            mlist.push_back(*i);
2280
0
            vbv.push_back(bv);
2281
0
          }
2282
0
      }
2283
2284
0
    _mlist = mlist;
2285
0
    return(_mlist);
2286
0
  }
2287
2288
  void OBSmartsPattern::WriteMapList(ostream &ofs)
2289
0
  {
2290
0
    std::vector<std::vector<int> >::iterator i;
2291
0
    std::vector<int>::iterator j;
2292
2293
0
    for ( i = _mlist.begin() ; i != _mlist.end() ; ++i )
2294
0
      {
2295
0
        for (j = (*i).begin();j != (*i).end();++j)
2296
0
          ofs << *j << ' ' << ends;
2297
0
        ofs << endl;
2298
0
      }
2299
0
  }
2300
2301
  //*******************************************************************
2302
  //  The OBSSMatch class performs exhaustive matching using recursion
2303
  //  Explicit stack handling is used to find just a single match in
2304
  //  match()
2305
  //*******************************************************************
2306
2307
  OBSSMatch::OBSSMatch(OBMol &mol, const Pattern *pat)
2308
293k
  {
2309
293k
    _mol = &mol;
2310
293k
    _pat = pat;
2311
293k
    _map.resize(pat->acount);
2312
2313
293k
    if (!mol.Empty())
2314
293k
      {
2315
293k
        _uatoms = new bool [mol.NumAtoms()+1];
2316
293k
        memset((char*)_uatoms,'\0',sizeof(bool)*(mol.NumAtoms()+1));
2317
293k
      }
2318
0
    else
2319
0
      _uatoms = nullptr;
2320
293k
  }
2321
2322
  OBSSMatch::~OBSSMatch()
2323
293k
  {
2324
293k
    delete [] _uatoms;
2325
293k
  }
2326
2327
  void OBSSMatch::Match(std::vector<std::vector<int> > &mlist,int bidx)
2328
515k
  {
2329
515k
    OBSmartsMatcher matcher;
2330
515k
    if (bidx == -1)
2331
293k
      {
2332
293k
        OBAtom *atom;
2333
293k
        std::vector<OBAtom*>::iterator i;
2334
31.9M
        for (atom = _mol->BeginAtom(i);atom;atom = _mol->NextAtom(i))
2335
31.6M
          if (matcher.EvalAtomExpr(_pat->atom[0].expr,atom))
2336
221k
            {
2337
221k
              _map[0] = atom->GetIdx();
2338
221k
              _uatoms[atom->GetIdx()] = true;
2339
221k
              Match(mlist,0);
2340
221k
              _map[0] = 0;
2341
221k
              _uatoms[atom->GetIdx()] = false;
2342
221k
            }
2343
293k
        return;
2344
293k
      }
2345
2346
221k
    if (bidx == _pat->bcount) //save full match here
2347
74.1k
      {
2348
74.1k
        mlist.push_back(_map);
2349
74.1k
        return;
2350
74.1k
      }
2351
2352
147k
    if (_pat->bond[bidx].grow) //match the next bond
2353
147k
      {
2354
147k
        int src = _pat->bond[bidx].src;
2355
147k
        int dst = _pat->bond[bidx].dst;
2356
2357
147k
        if (_map[src] <= 0 || _map[src] > (signed)_mol->NumAtoms())
2358
0
          return;
2359
2360
147k
        AtomExpr *aexpr = _pat->atom[dst].expr;
2361
147k
        BondExpr *bexpr = _pat->bond[bidx].expr;
2362
147k
        OBAtom *atom,*nbr;
2363
147k
        std::vector<OBBond*>::iterator i;
2364
2365
147k
        atom = _mol->GetAtom(_map[src]);
2366
149k
        for (nbr = atom->BeginNbrAtom(i);nbr;nbr = atom->NextNbrAtom(i))
2367
1.79k
          if (!_uatoms[nbr->GetIdx()] && matcher.EvalAtomExpr(aexpr,nbr) &&
2368
1.33k
              matcher.EvalBondExpr(bexpr,((OBBond*) *i)))
2369
206
            {
2370
206
              _map[dst] = nbr->GetIdx();
2371
206
              _uatoms[nbr->GetIdx()] = true;
2372
206
              Match(mlist,bidx+1);
2373
206
              _uatoms[nbr->GetIdx()] = false;
2374
206
              _map[dst] = 0;
2375
206
            }
2376
147k
      }
2377
0
    else //just check bond here
2378
0
      {
2379
0
        OBBond *bond = _mol->GetBond(_map[_pat->bond[bidx].src],
2380
0
                                     _map[_pat->bond[bidx].dst]);
2381
0
        if (bond && matcher.EvalBondExpr(_pat->bond[bidx].expr,bond))
2382
0
          Match(mlist,bidx+1);
2383
0
      }
2384
147k
  }
2385
2386
  static int GetExprOrder(BondExpr *expr)
2387
0
  {
2388
0
    int tmp1,tmp2;
2389
2390
0
    switch( expr->type )
2391
0
      {
2392
0
      case BE_SINGLE:  return 1;
2393
0
      case BE_DOUBLE:  return 2;
2394
0
      case BE_TRIPLE:  return 3;
2395
0
      case BE_QUAD:    return 4;
2396
0
      case BE_AROM:    return 5;
2397
2398
0
      case BE_UP:
2399
0
      case BE_DOWN:
2400
0
      case BE_UPUNSPEC:
2401
0
      case BE_DOWNUNSPEC:
2402
0
        return 1;
2403
      
2404
0
      case BE_ANDHI:
2405
0
      case BE_ANDLO:
2406
0
        tmp1 = GetExprOrder(expr->bin.lft);
2407
0
        tmp2 = GetExprOrder(expr->bin.rgt);
2408
0
        if (tmp1 == 0) return tmp2;
2409
0
        if (tmp2 == 0) return tmp1;
2410
0
        if (tmp1 == tmp2) return tmp1;
2411
0
        break;
2412
2413
0
      case BE_OR:
2414
0
        tmp1 = GetExprOrder(expr->bin.lft);
2415
0
        if (tmp1 == 0) return 0;
2416
0
        tmp2 = GetExprOrder(expr->bin.rgt);
2417
0
        if (tmp2 == 0) return 0;
2418
0
        if (tmp1 == tmp2) return tmp1;
2419
0
        break;
2420
0
      }
2421
2422
0
    return 0;
2423
0
  }
2424
2425
  static int GetExprCharge(AtomExpr *expr)
2426
0
  {
2427
0
    int tmp1,tmp2;
2428
2429
0
    switch( expr->type )
2430
0
      {
2431
0
      case AE_CHARGE:
2432
0
        return expr->leaf.value;
2433
2434
0
      case AE_ANDHI:
2435
0
      case AE_ANDLO:
2436
0
        tmp1 = GetExprCharge(expr->bin.lft);
2437
0
        tmp2 = GetExprCharge(expr->bin.rgt);
2438
0
        if (tmp1 == 0) return tmp2;
2439
0
        if (tmp2 == 0) return tmp1;
2440
0
        if (tmp1 == tmp2) return tmp1;
2441
0
        break;
2442
2443
0
      case AE_OR:
2444
0
        tmp1 = GetExprCharge(expr->bin.lft);
2445
0
        if (tmp1 == 0) return 0;
2446
0
        tmp2 = GetExprCharge(expr->bin.rgt);
2447
0
        if (tmp2 == 0) return 0;
2448
0
        if (tmp1 == tmp2) return tmp1;
2449
0
        break;
2450
0
      }
2451
2452
0
    return 0;
2453
0
  }
2454
2455
  int OBSmartsPattern::GetCharge(int idx)
2456
0
  {
2457
0
    return GetExprCharge(_pat->atom[idx].expr);
2458
0
  }
2459
2460
  static int GetExprAtomicNum(AtomExpr *expr)
2461
0
  {
2462
0
    int tmp1,tmp2;
2463
2464
0
    switch( expr->type )
2465
0
      {
2466
0
      case AE_ELEM:
2467
0
      case AE_AROMELEM:
2468
0
      case AE_ALIPHELEM:
2469
0
        return expr->leaf.value;
2470
2471
0
      case AE_ANDHI:
2472
0
      case AE_ANDLO:
2473
0
        tmp1 = GetExprAtomicNum(expr->bin.lft);
2474
0
        tmp2 = GetExprAtomicNum(expr->bin.rgt);
2475
0
        if (tmp1 == 0) return tmp2;
2476
0
        if (tmp2 == 0) return tmp1;
2477
0
        if (tmp1 == tmp2) return tmp1;
2478
0
        break;
2479
2480
0
      case AE_OR:
2481
0
        tmp1 = GetExprAtomicNum(expr->bin.lft);
2482
0
        if (tmp1 == 0) return 0;
2483
0
        tmp2 = GetExprAtomicNum(expr->bin.rgt);
2484
0
        if (tmp2 == 0) return 0;
2485
0
        if (tmp1 == tmp2) return tmp1;
2486
0
        break;
2487
0
      }
2488
2489
0
    return 0;
2490
0
  }
2491
2492
  int OBSmartsPattern::GetAtomicNum(int idx)
2493
0
  {
2494
0
    return GetExprAtomicNum(_pat->atom[idx].expr);
2495
0
  }
2496
2497
  void OBSmartsPattern::GetBond(int &src,int &dst,int &ord,int idx)
2498
0
  {
2499
0
    src = _pat->bond[idx].src;
2500
0
    dst = _pat->bond[idx].dst;
2501
0
    ord = GetExprOrder(_pat->bond[idx].expr);
2502
0
  }
2503
2504
  void SmartsLexReplace(std::string &s,std::vector<std::pair<std::string,std::string> > &vlex)
2505
0
  {
2506
0
    size_t j,pos;
2507
0
    std::string token,repstr;
2508
0
    std::vector<std::pair<std::string,std::string> >::iterator i;
2509
2510
0
    for (pos = 0,pos = s.find("$",pos);pos < s.size();pos = s.find("$",pos))
2511
      //for (pos = 0,pos = s.find("$",pos);pos != std::string::npos;pos = s.find("$",pos))
2512
0
      {
2513
0
        pos++;
2514
0
        for (j = pos;j < s.size();++j)
2515
0
          if (!isalpha(s[j]) && !isdigit(s[j]) && s[j] != '_')
2516
0
            break;
2517
0
        if (pos == j)
2518
0
          continue;
2519
2520
0
        token = s.substr(pos,j-pos);
2521
0
        for (i = vlex.begin();i != vlex.end();++i)
2522
0
          if (token == i->first)
2523
0
            {
2524
0
              repstr = "(" + i->second + ")";
2525
0
              s.replace(pos,j-pos,repstr);
2526
0
              j = 0;
2527
0
            }
2528
0
        pos = j;
2529
0
      }
2530
0
  }
2531
2532
} // end namespace OpenBabel
2533
2534
//! \file parsmart.cpp
2535
//! \brief Implementation of Daylight SMARTS parser.