Coverage Report

Created: 2026-04-28 06:16

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openbabel/src/chains.cpp
Line
Count
Source
1
/**********************************************************************
2
chains.cpp - Parse for macromolecule chains and residues.
3
4
Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc.
5
(original author, Roger Sayle, version 1.6, March 1998)
6
(modified by Joe Corkery, OpenEye Scientific Software, March 2001)
7
Some portions Copyright (C) 2001-2006 by Geoffrey R. Hutchison
8
9
This file is part of the Open Babel project.
10
For more information, see <http://openbabel.org/>
11
12
This program is free software; you can redistribute it and/or modify
13
it under the terms of the GNU General Public License as published by
14
the Free Software Foundation version 2 of the License.
15
16
This program is distributed in the hope that it will be useful,
17
but WITHOUT ANY WARRANTY; without even the implied warranty of
18
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19
GNU General Public License for more details.
20
***********************************************************************/
21
22
//////////////////////////////////////////////////////////////////////////////
23
// File Includes
24
//////////////////////////////////////////////////////////////////////////////
25
#include <openbabel/babelconfig.h>
26
27
#include <cstdlib>
28
#include <cstring>
29
#include <cstdio>
30
#include <cctype>
31
#include <map>
32
33
#include <openbabel/mol.h>
34
#include <openbabel/atom.h>
35
#include <openbabel/bond.h>
36
#include <openbabel/chains.h>
37
#include <openbabel/oberror.h>
38
#include <openbabel/obiter.h>
39
#include <openbabel/elements.h>
40
41
using namespace std;
42
43
//////////////////////////////////////////////////////////////////////////////
44
// Preprocessor Definitions
45
//////////////////////////////////////////////////////////////////////////////
46
47
//! The first available index for actual residues
48
//! 0, 1, 2 reserved for UNK, HOH, UNL
49
6
#define RESIDMIN       4
50
//! The maximum number of residue IDs for this code
51
#define RESIDMAX       64
52
53
//! An index of the residue names perceived during a run
54
//! 0, 1, and 2 reserved for UNK, HOH, LIG
55
static char ChainsResName[RESIDMAX][4] = {
56
  /*0*/ "UNK",
57
  /*1*/ "HOH",
58
  /*2*/ "UNL",
59
  /*3*/ "ACE"
60
};
61
62
5.66k
#define ATOMMINAMINO   4
63
#define ATOMMINNUCLEIC 50
64
0
#define MAXPEPTIDE     11
65
0
#define MAXNUCLEIC     15
66
//! The number of amino acids recognized by this code
67
//! Currently: ILE, VAL, ALA, ASN, ASP, ARG, CYS, GLN, GLU
68
//!  GLY, HIS, HYP, LEU, LYS, MET, PHE, PRO, SER, THR, TRP, TYR
69
132
#define AMINOMAX       21
70
//! The number of nucleic acids recognized by this code
71
//! Currently A, C, T, G, U, I
72
42
#define NUCLEOMAX      6
73
#define STACKSIZE      20
74
75
0
#define AI_N           0
76
0
#define AI_CA          1
77
0
#define AI_C           2
78
0
#define AI_O           3
79
0
#define AI_OXT         37
80
81
0
#define AI_P           38
82
0
#define AI_O1P         39
83
0
#define AI_O2P         40
84
0
#define AI_O5          41
85
0
#define AI_C5          42
86
0
#define AI_C4          43
87
0
#define AI_O4          44
88
0
#define AI_C3          45
89
0
#define AI_O3          46
90
0
#define AI_C2          47
91
0
#define AI_O2          48
92
0
#define AI_C1          49
93
94
0
#define BitN           0x0001
95
0
#define BitNTer        0x0002
96
#define BitNPro        0x0004
97
0
#define BitNPT         0x0008
98
#define BitCA          0x0010
99
#define BitCAGly       0x0020
100
0
#define BitC           0x0100
101
#define BitCTer        0x0200
102
#define BitCOXT        0x0400
103
#define BitO           0x1000
104
#define BitOXT         0x2000
105
106
0
#define BitNAll        0x000F
107
0
#define BitCAAll       0x0030
108
0
#define BitCAll        0x0700
109
0
#define BitOAll        0x3000
110
111
0
#define BitP           0x0001
112
0
#define BitPTer        0x0002
113
0
#define BitOP          0x0004
114
0
#define BitO5          0x0008
115
0
#define BitO5Ter       0x0010
116
0
#define BitC5          0x0020
117
0
#define BitC4          0x0040
118
0
#define BitO4          0x0080
119
0
#define BitC3          0x0100
120
#define BitO3          0x0200
121
#define BitO3Ter       0x0400
122
#define BitC2RNA       0x0800
123
#define BitC2DNA       0x1000
124
0
#define BitO2          0x2000
125
0
#define BitC1          0x4000
126
127
#define BitPAll        0x0003
128
#define Bit05All       0x0018
129
0
#define BitO3All       0x0600
130
0
#define BitC2All       0x1800
131
132
834
#define BC_ASSIGN      0x01
133
4.11k
#define BC_COUNT       0x02
134
4.14k
#define BC_ELEM        0x03
135
3.55k
#define BC_EVAL        0x04
136
2.04k
#define BC_IDENT       0x05
137
4.06k
#define BC_LOCAL       0x06
138
139
876
#define BF_SINGLE      0x01
140
102
#define BF_DOUBLE      0x02
141
0
#define BF_TRIPLE      0x04
142
162
#define BF_AROMATIC    0x08
143
144
namespace OpenBabel
145
{
146
  extern OBMessageHandler obErrorLog;
147
148
149
  // Initialize the global chainsparser - declared in chains.h
150
  OBChainsParser chainsparser;
151
152
  //////////////////////////////////////////////////////////////////////////////
153
  // Structure / Type Definitions
154
  //////////////////////////////////////////////////////////////////////////////
155
156
  //! Structure template for atomic patterns in residues for OBChainsParser
157
  typedef struct Template
158
  {
159
    int flag;        //!< binary flag representing this atom type
160
    short elem;      //!< atomic number of this element
161
    short count;     //!< expected valence for this atom type
162
    int n1;          //!< mask 1 used by ConstrainBackbone() and MatchConstraint()
163
    int n2;          //!< mask 2 used by ConstrainBackbone() and MatchConstraint()
164
    int n3;          //!< mask 3 used by ConstrainBackbone() and MatchConstraint()
165
    int n4;          //!< mask 4 used by ConstrainBackbone() and MatchConstraint()
166
  }    Template;
167
168
  /**
169
   * Generic template for peptide residue backbone. \n
170
   * col 1: bitmask \n
171
   * col 2: element number \n
172
   * col 3: neighbour count \n
173
   * col 4-7: 1-4 bitmasks for neighbour atoms (-6 means carbon)
174
   */
175
  static Template Peptide[MAXPEPTIDE] = {
176
    /* N     */    {  0x0001, 7, 2, 0x0030, 0x0100,      0, 0 }, //!< N
177
    /* NTer  */    {  0x0002, 7, 1, 0x0030,      0,      0, 0 }, //!< NTre
178
    /* NPro  */    {  0x0004, 7, 3, 0x0030, 0x0100,     -6, 0 }, //!< NPro
179
    /* NPT   */    {  0x0008, 7, 2, 0x0030,     -6,      0, 0 }, //!< NPT
180
    /* CA    */    {  0x0010, 6, 3, 0x000F, 0x0700,     -6, 0 }, //!< CA
181
    /* CAGly */    {  0x0020, 6, 2, 0x0003, 0x0700,      0, 0 }, //!< CAGly
182
    /* C     */    {  0x0100, 6, 3, 0x0030, 0x1000, 0x0005, 0 }, //!< C
183
    /* CTer  */    {  0x0200, 6, 2, 0x0030, 0x1000,      0, 0 }, //!< CTer
184
    /* COXT  */    {  0x0400, 6, 3, 0x0030, 0x1000, 0x2000, 0 }, //!< COXT
185
    /* O     */    {  0x1000, 8, 1, 0x0700,      0,      0, 0 }, //!< O
186
    /* OXT   */    {  0x2000, 8, 1, 0x0400,      0,      0, 0 }  //!< OXT
187
  };
188
189
  //! Generic template for peptide nucleotide backbone
190
  static Template Nucleotide[MAXNUCLEIC] = {
191
    /* P     */    {  0x0001, 15, 4, 0x0004, 0x0004, 0x0008, 0x0200 },
192
    /* PTer  */    {  0x0002, 15, 3, 0x0004, 0x0004, 0x0008,      0 },
193
    /* OP    */    {  0x0004,  8, 1, 0x0003,      0,      0,      0 },
194
    /* O5    */    {  0x0008,  8, 2, 0x0020, 0x0003,      0,      0 },
195
    /* O5Ter */    {  0x0010,  8, 1, 0x0020,      0,      0,      0 },
196
    /* C5    */    {  0x0020,  6, 2, 0x0018, 0x0040,      0,      0 },
197
    /* C4    */    {  0x0040,  6, 3, 0x0020, 0x0080, 0x0100,      0 },
198
    /* O4    */    {  0x0080,  8, 2, 0x0040, 0x4000,      0,      0 },
199
    /* C3    */    {  0x0100,  6, 3, 0x0040, 0x0600, 0x1800,      0 },
200
    /* O3    */    {  0x0200,  8, 2, 0x0100, 0x0001,      0,      0 },
201
    /* O3Ter */    {  0x0400,  8, 1, 0x0100,      0,      0,      0 },
202
    /* C2RNA */    {  0x0800,  6, 3, 0x0100, 0x4000, 0x2000,      0 },
203
    /* C2DNA */    {  0x1000,  6, 2, 0x0100, 0x4000,      0,      0 },
204
    /* O2    */    {  0x2000,  8, 1, 0x0800,      0,      0,      0 },
205
    /* C1    */    {  0x4000,  6, 3, 0x0080, 0x1800,     -7,      0 }
206
  };
207
208
209
  //////////////////////////////////////////////////////////////////////////////
210
  // Global Variables / Tables
211
  //////////////////////////////////////////////////////////////////////////////
212
213
  //! The number of PDB atom type names recognized by this code
214
#define ATOMMAX        68
215
216
  //! PDB atom types (i.e., columns 13-16 of a PDB file)
217
  //!  index numbers from this array are used in the pseudo-SMILES format
218
  //!  for side-chains in the AminoAcids[] & Nucleotides[] global arrays below
219
  static char ChainsAtomName[ATOMMAX][4] = {
220
    /*  0 */  { ' ', 'N', ' ', ' ' },
221
    /*  1 */  { ' ', 'C', 'A', ' ' },
222
    /*  2 */  { ' ', 'C', ' ', ' ' },
223
    /*  3 */  { ' ', 'O', ' ', ' ' },
224
    /*  4 */  { ' ', 'C', 'B', ' ' },
225
    /*  5 */  { ' ', 'S', 'G', ' ' },
226
    /*  6 */  { ' ', 'O', 'G', ' ' },
227
    /*  7 */  { ' ', 'C', 'G', ' ' },
228
    /*  8 */  { ' ', 'O', 'G', '1' },
229
    /*  9 */  { ' ', 'C', 'G', '1' },
230
    /* 10 */  { ' ', 'C', 'G', '2' },
231
    /* 11 */  { ' ', 'C', 'D', ' ' },
232
    /* 12 */  { ' ', 'O', 'D', ' ' },
233
    /* 13 */  { ' ', 'S', 'D', ' ' },
234
    /* 14 */  { ' ', 'C', 'D', '1' },
235
    /* 15 */  { ' ', 'O', 'D', '1' },
236
    /* 16 */  { ' ', 'N', 'D', '1' },
237
    /* 17 */  { ' ', 'C', 'D', '2' },
238
    /* 18 */  { ' ', 'O', 'D', '2' },
239
    /* 19 */  { ' ', 'N', 'D', '2' },
240
    /* 20 */  { ' ', 'C', 'E', ' ' },
241
    /* 21 */  { ' ', 'N', 'E', ' ' },
242
    /* 22 */  { ' ', 'C', 'E', '1' },
243
    /* 23 */  { ' ', 'O', 'E', '1' },
244
    /* 24 */  { ' ', 'N', 'E', '1' },
245
    /* 25 */  { ' ', 'C', 'E', '2' },
246
    /* 26 */  { ' ', 'O', 'E', '2' },
247
    /* 27 */  { ' ', 'N', 'E', '2' },
248
    /* 28 */  { ' ', 'C', 'E', '3' },
249
    /* 29 */  { ' ', 'C', 'Z', ' ' },
250
    /* 30 */  { ' ', 'N', 'Z', ' ' },
251
    /* 31 */  { ' ', 'C', 'Z', '2' },
252
    /* 32 */  { ' ', 'C', 'Z', '3' },
253
    /* 33 */  { ' ', 'O', 'H', ' ' },
254
    /* 34 */  { ' ', 'N', 'H', '1' },
255
    /* 35 */  { ' ', 'N', 'H', '2' },
256
    /* 36 */  { ' ', 'C', 'H', '2' },
257
    /* 37 */  { ' ', 'O', 'X', 'T' },
258
    /* 38 */  { ' ', 'P', ' ', ' ' },
259
    /* 39 */  { ' ', 'O', '1', 'P' },
260
    /* 40 */  { ' ', 'O', '2', 'P' },
261
    /* 41 */  { ' ', 'O', '5', '*' },
262
    /* 42 */  { ' ', 'C', '5', '*' },
263
    /* 43 */  { ' ', 'C', '4', '*' },
264
    /* 44 */  { ' ', 'O', '4', '*' },
265
    /* 45 */  { ' ', 'C', '3', '*' },
266
    /* 46 */  { ' ', 'O', '3', '*' },
267
    /* 47 */  { ' ', 'C', '2', '*' },
268
    /* 48 */  { ' ', 'O', '2', '*' },
269
    /* 49 */  { ' ', 'C', '1', '*' },
270
    /* 50 */  { ' ', 'N', '9', ' ' },
271
    /* 51 */  { ' ', 'C', '8', ' ' },
272
    /* 52 */  { ' ', 'N', '7', ' ' },
273
    /* 53 */  { ' ', 'C', '5', ' ' },
274
    /* 54 */  { ' ', 'C', '6', ' ' },
275
    /* 55 */  { ' ', 'O', '6', ' ' },
276
    /* 56 */  { ' ', 'N', '6', ' ' },
277
    /* 57 */  { ' ', 'N', '1', ' ' },
278
    /* 58 */  { ' ', 'C', '2', ' ' },
279
    /* 59 */  { ' ', 'O', '2', ' ' },
280
    /* 60 */  { ' ', 'N', '2', ' ' },
281
    /* 61 */  { ' ', 'N', '3', ' ' },
282
    /* 62 */  { ' ', 'C', '4', ' ' },
283
    /* 63 */  { ' ', 'O', '4', ' ' },
284
    /* 64 */  { ' ', 'N', '4', ' ' },
285
    /* 65 */  { ' ', 'C', '5', ' ' },
286
    /* 66 */  { ' ', 'C', '5', 'M' },
287
    /* 67 */  { ' ', 'C', '6', ' ' }
288
  };
289
290
  //! Definition of side chains, associating overall residue name with
291
  //!  the pseudo-SMILES pattern
292
  typedef struct
293
  {
294
    const char *name; //!< Residue name, standardized by PDB
295
    const char *data; //!< pseudo-SMILES definition of side-chain
296
  }    ResidType;
297
298
  /**
299
   * Side chains for recognized amino acids using a pseudo-SMARTS syntax
300
   * for branching and bonds. Numbers indicate atom types defined by
301
   * OpenBabel::ChainsAtomName global array above.
302
   */
303
  static ResidType AminoAcids[AMINOMAX] = {
304
    { "ILE", "1-4(-9-14)-10"                        },
305
    { "VAL", "1-4(-9)-10"                           },
306
    { "ALA", "1-4"                                  },
307
    { "ASN", "1-4-7(=15)-19"                        },
308
    { "ASP", "1-4-7(=15)-18"                        },
309
    { "ARG", "1-4-7-11-21-29(=34)-35"               },
310
    { "CYS", "1-4-5"                                },
311
    { "GLN", "1-4-7-11(=23)-27"                     },
312
    { "GLU", "1-4-7-11(=23)-26"                     },
313
    { "GLY", "1"                                    },
314
    { "HIS", "1-4-7^16~22^27^17~7"                  },
315
    { "HYP", "1-4-7(-12)-11-0"                      },
316
    { "LEU", "1-4-7(-14)-17"                        },
317
    { "LYS", "1-4-7-11-20-30"                       },
318
    { "MET", "1-4-7-13-20"                          },
319
    { "PHE", "1-4-7~14^22~29^25~17^7"               },
320
    { "PRO", "1-4-7-11-0"                           },
321
    { "SER", "1-4-6"                                },
322
    { "THR", "1-4(-8)-10"                           },
323
    { "TRP", "1-4-7~14^24^25~17(^7)^28~32^36~31^25" },
324
    { "TYR", "1-4-7~14^22~29(-33)^25~17^7"          }
325
  };
326
  // Other possible amino acid templates (less common)
327
  /* Pyroglutamate (PCA):        1-4-7-11(=" OE ")-0  PDB Example: 1CEL */
328
  /* Amino-N-Butyric Acid (ABA): 1-4-7                PDB Example: 1BBO */
329
  /* Selenic Acid (SEC):         1-4-"SEG "(-15)-18   PDB Example: 1GP1 */
330
331
  /**
332
   * Side chains for recognized nucleotides using a pseudo-SMARTS syntax
333
   * for branching and bonds. Numbers indicate atom types defined by
334
   * OpenBabel::ChainsAtomName global array above.
335
   */
336
  static ResidType Nucleotides[NUCLEOMAX] = {
337
    { "  A", "49-50-51-52-53-54(-56)-57-58-61-62(-53)-50"      },
338
    { "  C", "49-57-58(-59)-61-62(-64)-65-67-57"               },
339
    { "  G", "49-50-51-52-53-54(-55)-57-58(-60)-61-62(-53)-50" },
340
    { "  T", "49-57-58(-59)-61-62(-63)-65(-66)-67-57"          },
341
    { "  U", "49-57-58(-59)-61-62(-63)-65-67-57"               },
342
    { "  I", "49-50-51-52-53-54(-55)-57-58-61-62(-53)-50"      }
343
  };
344
345
346
  typedef struct
347
  {
348
    int atomid,elem;
349
    int bcount;
350
    int index;
351
  } MonoAtomType;
352
353
  typedef struct
354
  {
355
    int src,dst;
356
    int index;
357
    int flag;
358
  } MonoBondType;
359
360
  typedef struct
361
  {
362
    int type;
363
    union _ByteCode *next;
364
  } MonOpStruct;
365
366
  typedef struct
367
  {
368
    int type;
369
    int value;
370
    union _ByteCode *tcond;
371
    union _ByteCode *fcond;
372
  } BinOpStruct;
373
374
  //! Output array -- residue id, atom id, bond flags, etc.
375
  typedef struct
376
  {
377
    int type;
378
    int resid;
379
    int *atomid;
380
    int *bflags;
381
  } AssignStruct;
382
383
  //! Chemical graph matching virtual machine
384
  typedef union _ByteCode
385
  {
386
    int type;
387
    MonOpStruct eval;     //!< Eval - push current neighbors onto the stack
388
    BinOpStruct count;    //!< Count - test the number of eval bonds
389
    BinOpStruct elem;     //!< Element - test the element of current atom
390
    BinOpStruct ident;    //!< Ident - test the atom for backbone identity
391
    BinOpStruct local;    //!< Local - test whether the atom has been visited
392
    AssignStruct assign;  //!< Assign - assign residue name, atom name and bond type to output
393
  } ByteCode;
394
395
  typedef struct
396
  {
397
    int atom,bond;
398
    int prev;
399
  } StackType;
400
401
  static MonoAtomType MonoAtom[MaxMonoAtom];
402
  static MonoBondType MonoBond[MaxMonoBond];
403
  static int MonoAtomCount;
404
  static int MonoBondCount;
405
406
  static StackType Stack[STACKSIZE];
407
  static int StackPtr;
408
409
  static int  AtomIndex;
410
  static int  BondIndex;
411
  static bool StrictFlag = false;
412
413
  //////////////////////////////////////////////////////////////////////////////
414
  // Static Functions
415
  //////////////////////////////////////////////////////////////////////////////
416
417
  static ByteCode *AllocateByteCode(int type)
418
9.42k
  {
419
9.42k
    ByteCode *result;
420
421
9.42k
    result = new ByteCode;
422
9.42k
    if( !result )
423
0
      {
424
0
        obErrorLog.ThrowError(__FUNCTION__, "Unable to allocate byte codes for biomolecule residue perception.", obError);
425
0
        return (result);
426
0
      }
427
9.42k
    result->type = type;
428
9.42k
    result->eval.next     = nullptr;
429
9.42k
    result->count.tcond   = nullptr;
430
9.42k
    result->count.fcond   = nullptr;
431
9.42k
    result->elem.tcond    = nullptr;
432
9.42k
    result->elem.fcond    = nullptr;
433
9.42k
    result->ident.tcond   = nullptr;
434
9.42k
    result->ident.fcond   = nullptr;
435
9.42k
    result->local.tcond   = nullptr;
436
9.42k
    result->local.fcond   = nullptr;
437
9.42k
    result->assign.atomid = nullptr;
438
9.42k
    result->assign.bflags = nullptr;
439
440
9.42k
    return (result);
441
9.42k
  }
442
443
  //! Free a ByteCode and all corresponding data
444
  static void DeleteByteCode(ByteCode *node)
445
0
  {
446
0
    if (node == nullptr)
447
0
      return;
448
0
    else
449
0
      {
450
0
        switch (node->type)
451
0
          {
452
0
          case BC_ASSIGN:
453
454
0
            if (node->assign.atomid != nullptr) {
455
0
              delete [] node->assign.atomid;
456
0
              node->assign.atomid = nullptr; // prevent double-free
457
0
            }
458
0
            if (node->assign.bflags != nullptr) {
459
0
              delete [] node->assign.bflags;
460
0
              node->assign.bflags = nullptr; // prevent double-free
461
0
            }
462
463
0
            break;
464
465
0
          case BC_COUNT:
466
467
0
            DeleteByteCode(node->count.tcond);
468
0
            DeleteByteCode(node->count.fcond);
469
0
            break;
470
0
          case BC_ELEM:
471
472
0
            DeleteByteCode(node->elem.tcond);
473
0
            DeleteByteCode(node->elem.fcond);
474
0
            break;
475
476
0
          case BC_EVAL:
477
478
0
            DeleteByteCode(node->eval.next);
479
0
            break;
480
481
0
          case BC_IDENT:
482
483
0
            DeleteByteCode(node->ident.tcond);
484
0
            DeleteByteCode(node->ident.fcond);
485
0
            break;
486
487
0
          case BC_LOCAL:
488
489
0
            DeleteByteCode(node->local.tcond);
490
0
            DeleteByteCode(node->local.fcond);
491
0
            break;
492
0
          }
493
494
0
        delete node;
495
0
        node = nullptr;
496
0
      }
497
0
  }
498
499
  static void FatalMemoryError(void)
500
0
  {
501
0
    obErrorLog.ThrowError(__FUNCTION__, "Unable to allocate memory for biomolecule residue / chain perception.", obError);
502
0
  }
503
504
  void GenerateByteCodes(ByteCode **node, int resid, int curr, int prev, int bond)
505
5.82k
  {
506
5.82k
    StackType neighbour[4];
507
5.82k
    StackType original;
508
5.82k
    int count,i,j;
509
5.82k
    ByteCode *ptr;
510
5.82k
    bool done,found;
511
512
5.82k
    if( curr != prev )
513
5.66k
      {
514
5.66k
        if( MonoAtom[curr].atomid < ATOMMINAMINO )
515
18
          {
516
18
            found = false;
517
18
            while( *node && ((*node)->type==BC_IDENT) )
518
0
              {
519
0
                if( (*node)->ident.value == MonoAtom[curr].atomid )
520
0
                  {
521
0
                    node  = (ByteCode**)&(*node)->ident.tcond;
522
0
                    found = true;
523
0
                    break;
524
0
                  }
525
0
                else
526
0
                  node = (ByteCode**)&(*node)->ident.fcond;
527
0
              }
528
529
18
            if (!found)
530
18
              {
531
18
                ptr = AllocateByteCode(BC_IDENT);
532
18
                ptr->ident.tcond = nullptr;
533
18
                ptr->ident.fcond = *node;
534
18
                *node = ptr;
535
18
                node = (ByteCode**)&ptr->ident.tcond;
536
18
                ptr->ident.value = MonoAtom[curr].atomid;
537
18
              }
538
18
            MonoBond[bond].index = BondIndex++;
539
18
            done = true;
540
18
          }
541
5.64k
        else if( MonoAtom[curr].index != -1 )
542
2.14k
          {
543
2.14k
            while( *node && ((*node)->type==BC_IDENT) )
544
0
              node = (ByteCode**)&(*node)->ident.fcond;
545
546
2.14k
            found = false;
547
2.20k
            while( *node && ((*node)->type==BC_LOCAL) )
548
228
              {
549
228
                if( (*node)->local.value == MonoAtom[curr].index )
550
174
                  {
551
174
                    node = (ByteCode**)&(*node)->local.tcond;
552
174
                    found = true;
553
174
                    break;
554
174
                  }
555
54
                else
556
54
                  node = (ByteCode**)&(*node)->local.fcond;
557
228
              }
558
559
2.14k
            if (!found)
560
1.97k
              {
561
1.97k
                ptr = AllocateByteCode(BC_LOCAL);
562
1.97k
                ptr->local.tcond = nullptr;
563
1.97k
                ptr->local.fcond = *node;
564
1.97k
                *node = ptr;
565
1.97k
                node = (ByteCode**)&ptr->local.tcond;
566
1.97k
                ptr->local.value = MonoAtom[curr].index;
567
1.97k
              }
568
569
2.14k
            MonoBond[bond].index = BondIndex++;
570
2.14k
            done = true;
571
2.14k
          }
572
3.49k
        else
573
3.49k
          {
574
3.52k
            while( *node && ((*node)->type==BC_IDENT) )
575
30
              node = (ByteCode**)&(*node)->ident.fcond;
576
3.63k
            while( *node && ((*node)->type==BC_LOCAL) )
577
138
              node = (ByteCode**)&(*node)->local.fcond;
578
579
3.49k
            found = false;
580
4.14k
            while( *node && ((*node)->type==BC_ELEM) )
581
1.95k
              {
582
1.95k
                if( (*node)->elem.value == MonoAtom[curr].elem )
583
1.31k
                  {
584
1.31k
                    node = (ByteCode**)&(*node)->elem.tcond;
585
1.31k
                    found = true;
586
1.31k
                    break;
587
1.31k
                  }
588
642
                else
589
642
                  node = (ByteCode**)&(*node)->elem.fcond;
590
1.95k
              }
591
592
3.49k
            if( !found )
593
2.18k
              {
594
2.18k
                ptr = AllocateByteCode(BC_ELEM);
595
2.18k
                ptr->elem.tcond = nullptr;
596
2.18k
                ptr->elem.fcond = *node;
597
2.18k
                *node = ptr;
598
2.18k
                node = (ByteCode**)&ptr->elem.tcond;
599
2.18k
                ptr->elem.value = MonoAtom[curr].elem;
600
2.18k
              }
601
602
3.49k
            MonoAtom[curr].index = AtomIndex++;
603
3.49k
            MonoBond[bond].index = BondIndex++;
604
3.49k
            done = false;
605
3.49k
          }
606
5.66k
      }
607
162
    else
608
162
      {
609
162
        MonoAtom[curr].index = AtomIndex++;
610
162
        done = false;
611
162
      }
612
613
5.82k
    count = 0;
614
5.82k
    if (!done)
615
3.66k
      {
616
40.7k
        for( i=0; i<MonoBondCount; i++ )
617
37.0k
          {
618
37.0k
            if( MonoBond[i].src == curr )
619
3.01k
              {
620
3.01k
                if( MonoBond[i].dst != prev )
621
1.96k
                  {
622
1.96k
                    neighbour[count].atom = MonoBond[i].dst;
623
1.96k
                    neighbour[count].bond = i;
624
1.96k
                    count++;
625
1.96k
                  }
626
3.01k
              }
627
34.0k
            else if( MonoBond[i].dst == curr )
628
3.66k
              {
629
3.66k
                if( MonoBond[i].src != prev )
630
1.21k
                  {
631
1.21k
                    neighbour[count].atom = MonoBond[i].src;
632
1.21k
                    neighbour[count].bond = i;
633
1.21k
                    count++;
634
1.21k
                  }
635
3.66k
              }
636
37.0k
          }
637
638
3.66k
        if ( *node && ((*node)->type==BC_EVAL) )
639
1.44k
          {
640
1.44k
            found = false;
641
1.44k
            node  = (ByteCode**)&(*node)->eval.next;
642
2.02k
            while( *node && ((*node)->type==BC_COUNT) )
643
1.75k
              {
644
1.75k
                if( (*node)->count.value == count )
645
1.17k
                  {
646
1.17k
                    node = (ByteCode**)&(*node)->count.tcond;
647
1.17k
                    found = true;
648
1.17k
                    break;
649
1.17k
                  }
650
582
                else
651
582
                  node = (ByteCode**)&(*node)->count.fcond;
652
1.75k
              }
653
654
1.44k
            if( !found )
655
270
              {
656
270
                ptr = AllocateByteCode(BC_COUNT);
657
270
                ptr->count.tcond = nullptr;
658
270
                ptr->count.fcond = *node;
659
270
                *node = ptr;
660
270
                node = (ByteCode**)&ptr->count.tcond;
661
270
                ptr->count.value = count;
662
270
              }
663
1.44k
          }
664
2.22k
        else if( count || StrictFlag || StackPtr )
665
2.09k
          {
666
2.09k
            ptr = AllocateByteCode(BC_EVAL);
667
2.09k
            ptr->eval.next = *node;
668
2.09k
            *node = ptr;
669
2.09k
            node = (ByteCode**)&ptr->eval.next;
670
671
2.09k
            ptr = AllocateByteCode(BC_COUNT);
672
2.09k
            ptr->count.tcond = nullptr;
673
2.09k
            ptr->count.fcond = *node;
674
2.09k
            *node = ptr;
675
2.09k
            node = (ByteCode**)&ptr->count.tcond;
676
2.09k
            ptr->count.value = count;
677
2.09k
          }
678
3.66k
      }
679
680
5.82k
    if( count == 1 )
681
1.83k
      {
682
1.83k
        GenerateByteCodes(node,resid,neighbour[0].atom, curr,neighbour[0].bond);
683
1.83k
      }
684
3.99k
    else if( count == 2 )
685
672
      {
686
672
        original = Stack[StackPtr++];
687
672
        Stack[StackPtr-1] = neighbour[0];
688
672
        Stack[StackPtr-1].prev = curr;
689
672
        GenerateByteCodes(node,resid,neighbour[1].atom,
690
672
                          curr,neighbour[1].bond);
691
672
        Stack[StackPtr-1] = neighbour[1];
692
672
        Stack[StackPtr-1].prev = curr;
693
672
        GenerateByteCodes(node,resid,neighbour[0].atom,
694
672
                          curr,neighbour[0].bond);
695
672
        Stack[--StackPtr] = original;
696
672
      }
697
3.31k
    else if( count )
698
0
      {
699
0
        stringstream errorMsg;
700
0
        errorMsg << "Maximum Monomer Fanout Exceeded!" << endl;
701
0
        errorMsg << "Residue " << ChainsResName[resid] << " atom "
702
0
                 << curr << endl;
703
0
        errorMsg << "Previous = " << prev << " Fanout = " << count << endl;
704
0
        obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning);
705
0
      }
706
3.31k
    else if( StackPtr )
707
2.48k
      {
708
2.48k
        StackPtr--;
709
2.48k
        GenerateByteCodes(node,resid,Stack[StackPtr].atom,
710
2.48k
                          Stack[StackPtr].prev,Stack[StackPtr].bond);
711
2.48k
        StackPtr++;
712
2.48k
      }
713
834
    else if( !(*node) )
714
786
      {
715
786
        ptr = AllocateByteCode(BC_ASSIGN);
716
786
        ptr->assign.resid = resid;
717
786
        ptr->assign.atomid = new int[AtomIndex];
718
786
        if( !ptr->assign.atomid )
719
0
          FatalMemoryError();
720
8.42k
        for( i=0; i<MonoAtomCount; i++ )
721
7.63k
          if( (j=MonoAtom[i].index) != -1 )
722
7.62k
            ptr->assign.atomid[j] = MonoAtom[i].atomid;
723
786
        if( BondIndex )
724
780
          {
725
780
            ptr->assign.bflags = new int[BondIndex];
726
8.71k
            for( i=0; i<MonoBondCount; i++ )
727
7.93k
              if( (j=MonoBond[i].index) != -1 )
728
7.93k
                ptr->assign.bflags[j] = MonoBond[i].flag;
729
780
          }
730
786
        *node = ptr;
731
786
      }
732
48
    else if( (*node)->type == BC_ASSIGN )
733
48
      {
734
48
        if( (*node)->assign.resid != resid )
735
0
          {
736
0
            stringstream errorMsg;
737
0
            errorMsg << "Duplicated Monomer Specification!\n";
738
0
            errorMsg << "Residue " << ChainsResName[resid]
739
0
                     << " matches residue ";
740
0
            errorMsg << ChainsResName[(*node)->assign.resid] << endl;
741
0
            obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning);
742
0
          }
743
48
      }
744
745
    /* Restore State! */
746
5.82k
    if( curr != prev )
747
5.66k
      {
748
5.66k
        if( !done )
749
3.49k
          {
750
3.49k
            MonoAtom[curr].index = -1;
751
3.49k
            AtomIndex--;
752
3.49k
          }
753
5.66k
        MonoBond[bond].index = -1;
754
5.66k
        BondIndex--;
755
5.66k
      }
756
5.82k
  }
757
758
  //////////////////////////////////////////////////////////////////////////////
759
  // Constructors / Destructors
760
  //////////////////////////////////////////////////////////////////////////////
761
762
  // validated
763
  OBChainsParser::OBChainsParser(void)
764
6
  {
765
6
    int i, res = RESIDMIN;
766
767
6
    PDecisionTree = (ByteCode*)nullptr;
768
132
    for( i=0 ; i < AMINOMAX ; i++ )
769
126
      {
770
126
        strncpy(ChainsResName[res],AminoAcids[i].name, sizeof(ChainsResName[res]) - 1);
771
126
        ChainsResName[res][sizeof(ChainsResName[res]) - 1] = '\0';
772
126
        DefineMonomer(&PDecisionTree,res,AminoAcids[i].data);
773
126
        res++;
774
126
      }
775
776
6
    NDecisionTree = (ByteCode*)nullptr;
777
42
    for( i=0 ; i< NUCLEOMAX ; i++ )
778
36
      {
779
36
        strncpy(ChainsResName[res],Nucleotides[i].name, sizeof(ChainsResName[res]) - 1);
780
36
        ChainsResName[res][sizeof(ChainsResName[res]) - 1] = '\0';
781
36
        DefineMonomer(&NDecisionTree,res,Nucleotides[i].data);
782
36
        res++;
783
36
      }
784
6
  }
785
786
  OBChainsParser::~OBChainsParser(void)
787
0
  {
788
0
    DeleteByteCode((ByteCode*)PDecisionTree);
789
0
    DeleteByteCode((ByteCode*)NDecisionTree);
790
0
  }
791
792
  //////////////////////////////////////////////////////////////////////////////
793
  // Setup / Cleanup Functions
794
  //////////////////////////////////////////////////////////////////////////////
795
796
  //! Setup parsing for this molecule --
797
  void OBChainsParser::SetupMol(OBMol &mol)
798
0
  {
799
0
    CleanupMol();
800
801
0
    int i;
802
0
    int asize = mol.NumAtoms();
803
0
    int bsize = mol.NumBonds();
804
805
0
    bitmasks.resize(asize, 0);
806
0
    visits.resize(asize, 0);
807
0
    resids.resize(asize, 0);
808
0
    flags.resize(bsize, 0);
809
0
    hetflags.resize(asize, 0);
810
0
    atomids.resize(asize, 0);
811
0
    resnos.resize(asize, 0);
812
0
    sernos.resize(asize, 0);
813
0
    hcounts.resize(asize, 0);
814
0
    chains.resize(asize, ' ');
815
816
0
    for ( i = 0 ; i < asize ; i++ )
817
0
      {
818
0
        atomids[i]  = -1;
819
0
      }
820
0
  }
821
822
  //! Clean up any molecular data left in memory -- frees all memory afterwards
823
  //! Used by OBChainsParser::SetupMol()
824
  void OBChainsParser::CleanupMol(void)
825
0
  {
826
0
    bitmasks.clear();
827
0
    visits.clear();
828
0
    resids.clear();
829
0
    flags.clear();
830
0
    hetflags.clear();
831
0
    atomids.clear();
832
0
    resnos.clear();
833
0
    sernos.clear();
834
0
    hcounts.clear();
835
0
    chains.clear();
836
0
  }
837
838
  //! Clear all residue information for a supplied molecule
839
  void OBChainsParser::ClearResidueInformation(OBMol &mol)
840
0
  {
841
0
    OBResidue *residue;
842
0
    vector<OBResidue*> residues;
843
0
    vector<OBResidue*>::iterator r;
844
845
0
    if (mol.NumResidues() == 0)
846
0
      return; // Done!
847
848
0
    for (residue = mol.BeginResidue(r) ; residue ; residue = mol.NextResidue(r))
849
0
      residues.push_back(residue);
850
851
0
    for ( unsigned int i = 0 ; i < residues.size() ; i++ )
852
0
      mol.DeleteResidue(residues[i]);
853
854
0
    residues.clear();
855
0
  }
856
857
  void OBChainsParser::SetResidueInformation(OBMol &mol, bool nukeSingleResidue)
858
0
  {
859
0
    char buffer[BUFF_SIZE];
860
0
    const char *symbol;
861
0
    string atomid, name;
862
863
0
    OBAtom    *atom;
864
0
    OBResidue *residue;
865
0
    map<char, map<short, OBResidue*> > resmap;
866
0
    unsigned int numAtoms = mol.NumAtoms();
867
868
    //DumpState();
869
870
    // correct serine OG
871
0
    for ( unsigned int i = 0 ; i < numAtoms ; i++ ) {
872
0
      if (resids[i] == RESIDMIN + 17) // serine
873
0
        if (atomids[i] == -1) {
874
0
          atom = mol.GetAtom(i+1);
875
876
0
          FOR_NBORS_OF_ATOM (nbr, atom) {
877
0
            if (atomids[nbr->GetIdx()-1] == 4) // CB
878
0
              atomids[i] = 6; // OG
879
0
          }
880
0
        }
881
0
    }
882
883
0
    for ( unsigned int i = 0 ; i < numAtoms ; i++ ) {
884
0
      atom = mol.GetAtom(i+1); // WARNING: ATOM INDEX ISSUE
885
886
0
      if (atomids[i] == -1) {
887
0
        symbol = OBElements::GetSymbol(atom->GetAtomicNum());
888
0
        if ( symbol[1] ) {
889
0
          buffer[0] = symbol[0];
890
0
          buffer[1] = (char) toupper(symbol[1]);
891
0
        } else {
892
0
          buffer[0] = ' ';
893
0
          buffer[1] = symbol[0];
894
0
        }
895
0
        buffer[2] = ' ';
896
0
        buffer[3] = ' ';
897
0
        buffer[4] = '\0';
898
0
      } else if (atom->GetAtomicNum() == OBElements::Hydrogen) {
899
0
        if (hcounts[i]) {
900
0
          snprintf(buffer, BUFF_SIZE, "H%.2s%c", ChainsAtomName[atomids[i]]+2, hcounts[i]+'0');
901
0
          if (buffer[1] == ' ') {
902
0
            buffer[1] = buffer[3];
903
0
            buffer[2] = '\0';
904
0
          }
905
0
          else if (buffer[2] == ' ') {
906
0
            buffer[2] = buffer[3];
907
0
            buffer[3] = '\0';
908
0
          }
909
0
        } else {
910
0
          snprintf(buffer, BUFF_SIZE, "H%.2s", ChainsAtomName[atomids[i]]+2);
911
0
        }
912
0
      } else
913
0
        snprintf(buffer, BUFF_SIZE, "%.4s", ChainsAtomName[atomids[i]]);
914
915
0
      if (buffer[3] == ' ')
916
0
        buffer[3] = '\0';
917
918
      //cout << "  (2) --> = " << buffer << endl;
919
920
0
      atomid = (buffer[0] == ' ') ? buffer + 1 : buffer;
921
922
      //cout << "  (3) --> = " << buffer << endl;
923
924
0
      if (resmap[chains[i]].find(resnos[i]) != resmap[chains[i]].end()) {
925
0
        residue = resmap[chains[i]][resnos[i]];
926
0
        residue->AddAtom(atom);
927
0
        residue->SetAtomID(atom, atomid);
928
0
        residue->SetHetAtom(atom, hetflags[i]);
929
0
        residue->SetSerialNum(atom, sernos[i]);
930
0
      } else {
931
0
        name    = ChainsResName[resids[i]];
932
933
0
        residue = mol.NewResidue();
934
0
        residue->SetName(name);
935
0
        residue->SetNum(resnos[i]);
936
0
        residue->SetChain(chains[i]);
937
938
0
        residue->AddAtom(atom);
939
0
        residue->SetAtomID(atom, atomid);
940
0
        residue->SetHetAtom(atom, hetflags[i]);
941
0
        residue->SetSerialNum(atom, sernos[i]);
942
943
0
        resmap[chains[i]][resnos[i]] = residue;
944
0
      }
945
0
    }
946
947
0
    if (mol.NumResidues() == 1 && nukeSingleResidue)
948
0
      mol.DeleteResidue(mol.GetResidue(0));
949
0
    else if (mol.NumResidues() == 1 && (mol.GetResidue(0))->GetName() == "UNK")
950
0
      mol.DeleteResidue(mol.GetResidue(0));
951
0
  }
952
953
  //////////////////////////////////////////////////////////////////////////////
954
  // Perception Functions
955
  //////////////////////////////////////////////////////////////////////////////
956
957
  bool OBChainsParser::PerceiveChains(OBMol &mol, bool nukeSingleResidue)
958
0
  {
959
0
    bool result = true;
960
0
    unsigned int idx;
961
962
0
    SetupMol(mol);
963
0
    ClearResidueInformation(mol);
964
965
0
    result = DetermineHetAtoms(mol)          && result;
966
0
    result = DetermineConnectedChains(mol)   && result;
967
0
    result = DeterminePeptideBackbone(mol)   && result;
968
0
    result = DeterminePeptideSidechains(mol) && result;
969
0
    result = DetermineNucleicBackbone(mol)   && result;
970
0
    result = DetermineNucleicSidechains(mol) && result;
971
0
    result = DetermineHydrogens(mol)         && result;
972
973
    // Partially identified residues
974
    // example: CSD in 1LWF (CYS with two Os on the S)
975
0
    bool changed;
976
0
    vector<pair<char,short> > invalidResidues;
977
0
    do {
978
0
      changed = false;
979
980
0
      FOR_ATOMS_OF_MOL (atom, mol) {
981
0
        idx = atom->GetIdx() - 1;
982
983
0
        if (resids[idx] == 0) { // UNK
984
0
          FOR_NBORS_OF_ATOM (nbr, &*atom) {
985
0
            unsigned int idx2 = nbr->GetIdx() - 1;
986
0
            if (resids[idx2] != 0) { // !UNK
987
0
        if (atomids[idx2] == AI_N || atomids[idx2] == AI_C) {
988
    // bound to backbone-N/C
989
0
    hetflags[idx] = true;
990
0
    resids[idx] = 3; // ACE
991
0
    atomids[idx] = -1;
992
0
        } else {
993
0
    resnos[idx] = resnos[idx2];
994
0
    resids[idx] = resids[idx2];
995
0
    changed = true;
996
997
0
    bool addResidue = true;
998
0
    for (unsigned int i = 0; i < invalidResidues.size(); ++i)
999
0
      if ( (invalidResidues[i].first == chains[idx2]) &&
1000
0
           (invalidResidues[i].second == resnos[idx2]) )
1001
0
        addResidue = false;
1002
0
    if (addResidue)
1003
0
      invalidResidues.push_back(pair<char,short>(chains[idx2], resnos[idx2]));
1004
0
        }
1005
0
            }
1006
0
          }
1007
0
        }
1008
1009
0
      }
1010
0
    } while (changed);
1011
0
    for (unsigned int i = 0; i < invalidResidues.size(); ++i) {
1012
0
      FOR_ATOMS_OF_MOL (atom, mol) {
1013
0
        idx = atom->GetIdx() - 1;
1014
0
        if ( (invalidResidues[i].first == chains[idx]) &&
1015
0
             (invalidResidues[i].second == resnos[idx]) ) {
1016
0
          hetflags[idx] = true;
1017
0
          resids[idx] = 0; // UNK
1018
0
          atomids[idx] = -1;
1019
0
        }
1020
0
      }
1021
0
    }
1022
0
    invalidResidues.clear();
1023
1024
    // number the element in the ' ' chain (water, ions, ligands, ...)
1025
0
    short resno = 1;
1026
0
    FOR_ATOMS_OF_MOL (atom, mol) {
1027
0
      idx = atom->GetIdx() - 1;
1028
1029
0
      if (atom->GetHvyDegree() == 0) {
1030
0
        chains[idx] = ' ';
1031
0
        resnos[idx] = resno;
1032
0
        resno++;
1033
0
      } else {
1034
0
        if (resids[idx] != 0) // UNK
1035
0
          continue;
1036
0
        if (hetflags[idx])
1037
0
          continue;
1038
1039
0
        char chain = chains[idx];
1040
0
        FOR_ATOMS_OF_MOL (b, mol) {
1041
0
          unsigned int idx2 = b->GetIdx() - 1;
1042
0
          if (chains[idx2] == chain && !hetflags[idx2]) {
1043
0
            hetflags[idx2] = true;
1044
0
            chains[idx2] = ' ';
1045
0
            resnos[idx2] = resno;
1046
0
            resids[idx2] = 2; // unknown ligand
1047
0
          }
1048
0
        }
1049
1050
0
        resno++;
1051
0
      }
1052
1053
0
    }
1054
1055
0
    SetResidueInformation(mol, nukeSingleResidue);
1056
0
    CleanupMol();
1057
1058
0
    mol.SetChainsPerceived();
1059
1060
0
    obErrorLog.ThrowError(__FUNCTION__,
1061
0
                          "Ran OpenBabel::PerceiveChains", obAuditMsg);
1062
1063
0
    return result;
1064
0
  }
1065
1066
  /////////////////////////////////////////////////////////////////////////////
1067
  // Hetero Atom Perception
1068
  /////////////////////////////////////////////////////////////////////////////
1069
1070
  bool OBChainsParser::DetermineHetAtoms(OBMol &mol)
1071
0
  {
1072
0
    OBAtom *atom;
1073
0
    vector<OBAtom *>::iterator a;
1074
1075
    // find un-connected atoms (e.g., HOH oxygen atoms)
1076
0
    for (atom = mol.BeginAtom(a) ; atom ; atom = mol.NextAtom(a)) {
1077
0
      if (atom->GetAtomicNum() == OBElements::Hydrogen || atom->GetHvyDegree() != 0)
1078
0
        continue;
1079
1080
0
      unsigned int idx = atom->GetIdx() - 1;
1081
1082
0
      if (atom->GetAtomicNum() == OBElements::Oxygen) {
1083
0
        resids[idx]   = 1;
1084
0
        hetflags[idx] = true;
1085
0
      }
1086
0
    }
1087
1088
0
    return true;
1089
0
  }
1090
1091
  /////////////////////////////////////////////////////////////////////////////
1092
  // Connected Chain Perception
1093
  /////////////////////////////////////////////////////////////////////////////
1094
1095
  bool OBChainsParser::DetermineConnectedChains(OBMol &mol)
1096
0
  {
1097
0
    int resid;
1098
0
    unsigned int size;
1099
0
    unsigned int i, idx;
1100
1101
0
    int resno = 1;
1102
0
    int count = 0;
1103
0
    unsigned int numAtoms = mol.NumAtoms();
1104
1105
0
    OBAtom *atom;
1106
0
    vector<OBAtom *>::iterator a;
1107
0
    for (atom = mol.BeginAtom(a) ; atom ; atom = mol.NextAtom(a)) {
1108
0
      idx = atom->GetIdx() - 1;
1109
0
      if (!hetflags[idx] && chains[idx] == ' ' && atom->GetAtomicNum() != OBElements::Hydrogen) {
1110
0
        size = RecurseChain(mol, idx, 'A' + count);
1111
1112
        // size = number of heavy atoms in residue chain
1113
0
        if (size < 4) { // small ligand, probably
1114
0
          if (size == 1 && atom->GetAtomicNum() == OBElements::Oxygen)
1115
0
            resid = 1; /* HOH */
1116
0
          else
1117
0
            resid = 2; /* Unknown ligand */
1118
1119
0
          for (i = 0 ; i < numAtoms ; ++i) {
1120
0
            if (chains[i] == ('A' + count)) {
1121
0
              hetflags[i] = true;
1122
0
              resids[i]   = resid;
1123
0
              resnos[i]   = resno;
1124
0
              chains[i]   = ' ';
1125
0
            }
1126
0
          }
1127
0
          resno++;
1128
0
        } else {
1129
0
          count++; // number of connected chains
1130
0
          if (count > 26) // out of chain IDs
1131
0
            break;
1132
0
        }
1133
0
      }
1134
0
    }
1135
1136
    /*
1137
       if( count == 1 )
1138
       for ( i = 0 ; i < numAtoms ; i++ )
1139
       chains[i] = ' ';
1140
    */
1141
1142
0
    return true;
1143
0
  }
1144
1145
  ///@todo atom index
1146
  unsigned int OBChainsParser::RecurseChain(OBMol &mol, unsigned int i, int c)
1147
0
  {
1148
0
    OBAtom *atom, *nbr;
1149
0
    vector<OBBond *>::iterator b;
1150
0
    unsigned int result, index;
1151
1152
0
    atom = mol.GetAtom(i + 1);
1153
1154
    // ignore hydrogens
1155
0
    if (atom->GetAtomicNum() == OBElements::Hydrogen )
1156
0
      return 0;
1157
1158
0
    result    = 1;
1159
0
    chains[i] = c;
1160
1161
    // recurse till we have all atoms for this chain
1162
0
    for (nbr = atom->BeginNbrAtom(b); nbr; nbr = atom->NextNbrAtom(b)) {
1163
0
      index = nbr->GetIdx() - 1;
1164
0
      if (chains[index] == ' ')
1165
0
        result += RecurseChain(mol, index, c);
1166
0
    }
1167
1168
    // and return how many we found
1169
0
    return result;
1170
0
  }
1171
1172
  //////////////////////////////////////////////////////////////////////////////
1173
  // Peptide Backbone Perception
1174
  //////////////////////////////////////////////////////////////////////////////
1175
1176
  bool OBChainsParser::DeterminePeptideBackbone(OBMol &mol)
1177
0
  {
1178
0
    ConstrainBackbone(mol, Peptide, MAXPEPTIDE);
1179
1180
0
    unsigned int i, numAtoms = mol.NumAtoms();
1181
1182
    // Cyclic peptides have no NTer (1SKI)
1183
    // timvdm 30/06/08
1184
0
    bool foundNTer = false;
1185
0
    for (i = 0 ; i < numAtoms; i++)
1186
0
      if (bitmasks[i] & BitNTer)
1187
0
        foundNTer = true;
1188
0
    if (!foundNTer)
1189
0
      for (i = 0 ; i < numAtoms; i++)
1190
0
        if (bitmasks[i] & BitNAll)
1191
0
          bitmasks[i] |= BitNTer;
1192
1193
1194
    /* Order Peptide Backbone */
1195
1196
0
    for ( i = 0 ; i < numAtoms ; i++ )
1197
0
      if (atomids[i] == -1)
1198
0
        {
1199
0
          if( bitmasks[i] & BitNTer )
1200
0
            {
1201
0
              atomids[i] = AI_N;
1202
0
              TracePeptideChain(mol,i,1);
1203
0
            }
1204
0
          else if( (bitmasks[i]&BitNPT) && !(bitmasks[i]&BitN) )
1205
0
            {
1206
0
              atomids[i] = AI_N;
1207
0
              TracePeptideChain(mol,i,1);
1208
0
            }
1209
0
        }
1210
1211
    /* Carbonyl Double Bond */
1212
1213
0
    OBBond *bond;
1214
0
    vector<OBBond*>::iterator b;
1215
0
    for (bond = mol.BeginBond(b) ; bond ; bond = mol.NextBond(b))
1216
0
      {
1217
0
        if ((atomids[bond->GetBeginAtomIdx()-1] == 2 && atomids[bond->GetEndAtomIdx()-1] == 3) ||
1218
0
            (atomids[bond->GetBeginAtomIdx()-1] == 3 && atomids[bond->GetEndAtomIdx()-1] == 2))
1219
0
          flags[bond->GetIdx()] |= BF_DOUBLE;
1220
0
      }
1221
1222
0
    return true;
1223
0
  }
1224
1225
  void OBChainsParser::ConstrainBackbone(OBMol &mol, Template *templ, int tmax)
1226
0
  {
1227
0
    static OBAtom *neighbour[6];
1228
0
    Template *pep;
1229
0
    OBAtom *na = nullptr;
1230
0
    OBAtom *nb = nullptr;
1231
0
    OBAtom *nc = nullptr;
1232
0
    OBAtom *nd = nullptr;
1233
0
    OBAtom *atom, *nbr;
1234
0
    bool change, result;
1235
0
    int i, count;
1236
0
    unsigned int idx;
1237
1238
0
    vector<OBAtom *>::iterator a;
1239
0
    vector<OBBond *>::iterator b;
1240
1241
    /* First Pass */
1242
1243
0
    for (atom = mol.BeginAtom(a) ; atom ; atom = mol.NextAtom(a))
1244
0
      {
1245
0
        idx = atom->GetIdx() - 1;
1246
0
        bitmasks[idx] = 0;
1247
0
        for ( i = 0 ; i < tmax ; i++ )
1248
0
          if ( (static_cast<unsigned int>(templ[i].elem)  == atom->GetAtomicNum()) &&
1249
0
               (static_cast<unsigned int>(templ[i].count) == atom->GetHvyDegree()))
1250
0
            bitmasks[idx] |= templ[i].flag;
1251
0
      }
1252
1253
    /* Second Pass */
1254
1255
0
    do
1256
0
      {
1257
0
        change = false;
1258
0
        for (atom = mol.BeginAtom(a) ; atom ; atom = mol.NextAtom(a))
1259
0
          {
1260
0
            idx = atom->GetIdx() - 1;
1261
0
            if (bitmasks[idx]) // Determine Neighbours
1262
0
              {
1263
0
                count = 0;
1264
0
                for (nbr = atom->BeginNbrAtom(b) ; nbr ; nbr = atom->NextNbrAtom(b))
1265
0
                  if (nbr->GetAtomicNum() != OBElements::Hydrogen)
1266
0
                    neighbour[count++] = nbr;
1267
1268
0
                if (count >= 1)
1269
0
                  na = neighbour[0];
1270
0
                if (count >= 2)
1271
0
                  nb = neighbour[1];
1272
0
                if (count >= 3)
1273
0
                  nc = neighbour[2];
1274
0
                if (count >= 4)
1275
0
                  nd = neighbour[3];
1276
1277
0
                for ( i = 0 ; i < tmax ; i++ )
1278
0
                  if ( templ[i].flag & bitmasks[idx] )
1279
0
                    {
1280
0
                      pep    = &templ[i];
1281
0
                      result = true;
1282
1283
0
                      if (count == 4)
1284
0
                        result = Match4Constraints(pep,na,nb,nc,nd);
1285
0
                      else if (count == 3)
1286
0
                        result = Match3Constraints(pep,na,nb,nc);
1287
0
                      else if (count == 2)
1288
0
                        result = Match2Constraints(pep,na,nb);
1289
0
                      else if (count == 1)
1290
0
                        result = MatchConstraint(na,pep->n1);
1291
1292
0
                      if(result == false)
1293
0
                        {
1294
0
                          bitmasks[idx] &= ~pep->flag;
1295
0
                          change = true;
1296
0
                        }
1297
0
                    }
1298
0
              }
1299
0
          }
1300
0
      }
1301
0
    while( change );
1302
0
  }
1303
1304
  bool OBChainsParser::MatchConstraint(OBAtom *atom, int mask)
1305
0
  {
1306
0
    if (atom == nullptr)
1307
0
      return (false);
1308
1309
0
    if( mask < 0 )
1310
0
      return(atom->GetAtomicNum() == static_cast<unsigned int>(-mask));
1311
0
    else
1312
0
      return(((bitmasks[atom->GetIdx()-1]&mask) == 0) ? false : true);
1313
0
  }
1314
1315
  bool OBChainsParser::Match2Constraints(Template *tmpl, OBAtom *na, OBAtom *nb)
1316
0
  {
1317
0
    if (na == nullptr || nb == nullptr)
1318
0
      return (false); // don't even try to evaluate it
1319
1320
0
    if( MatchConstraint(na,tmpl->n2) )
1321
0
      if( MatchConstraint(nb,tmpl->n1) )
1322
0
        return( true );
1323
0
    if( MatchConstraint(nb,tmpl->n2) )
1324
0
      if( MatchConstraint(na,tmpl->n1) )
1325
0
        return( true );
1326
0
    return( false );
1327
0
  }
1328
1329
  bool OBChainsParser::Match3Constraints(Template *tmpl, OBAtom *na, OBAtom *nb, OBAtom *nc)
1330
0
  {
1331
0
    if (na == nullptr || nb == nullptr || nc == nullptr)
1332
0
      return (false); // don't even try to evaluate it
1333
1334
0
    if( MatchConstraint(na,tmpl->n3) )
1335
0
      if( Match2Constraints(tmpl,nb,nc) )
1336
0
        return( true );
1337
0
    if( MatchConstraint(nb,tmpl->n3) )
1338
0
      if( Match2Constraints(tmpl,na,nc) )
1339
0
        return( true );
1340
0
    if( MatchConstraint(nc,tmpl->n3) )
1341
0
      if( Match2Constraints(tmpl,na,nb) )
1342
0
        return( true );
1343
0
    return( false );
1344
0
  }
1345
1346
  bool OBChainsParser::Match4Constraints(Template *tmpl, OBAtom *na, OBAtom *nb, OBAtom *nc, OBAtom *nd)
1347
0
  {
1348
0
    if (na == nullptr || nb == nullptr || nc == nullptr || nd == nullptr)
1349
0
      return (false); // don't even try to evaluate it
1350
1351
0
    if( MatchConstraint(na,tmpl->n4) )
1352
0
      if( Match3Constraints(tmpl,nb,nc,nd) )
1353
0
        return( true );
1354
0
    if( MatchConstraint(nb,tmpl->n4) )
1355
0
      if( Match3Constraints(tmpl,na,nc,nd) )
1356
0
        return( true );
1357
0
    if( MatchConstraint(nc,tmpl->n4) )
1358
0
      if( Match3Constraints(tmpl,na,nb,nd) )
1359
0
        return( true );
1360
0
    if( MatchConstraint(nd,tmpl->n4) )
1361
0
      if( Match3Constraints(tmpl,na,nb,nc) )
1362
0
        return( true );
1363
0
    return( false );
1364
0
  }
1365
1366
  void OBChainsParser::TracePeptideChain(OBMol &mol, unsigned int i, int r)
1367
0
  {
1368
0
    unsigned int neighbour[4];
1369
0
    unsigned int na = 0;
1370
0
    unsigned int nb = 0;
1371
0
    unsigned int nc = 0;
1372
0
    OBAtom *atom, *nbr;
1373
0
    int count;
1374
0
    int j,k;
1375
0
    unsigned int idx;
1376
1377
0
    j = k = 0; // ignore warning
1378
1379
0
    vector<OBBond *>::iterator b;
1380
1381
    /* Determine Neighbours */
1382
1383
0
    atom = mol.GetAtom(i+1);
1384
0
    idx  = atom->GetIdx() - 1;
1385
0
    if (visits[i])
1386
0
      return;
1387
0
    visits[i] = true;
1388
1389
0
    count = 0;
1390
0
    for (nbr = atom->BeginNbrAtom(b) ; nbr ; nbr = atom->NextNbrAtom(b))
1391
0
      if (nbr->GetAtomicNum() != OBElements::Hydrogen)
1392
0
        neighbour[count++] = nbr->GetIdx()-1;
1393
1394
0
    resnos[idx] = r;
1395
1396
0
    if (count >= 1)
1397
0
      na = neighbour[0];
1398
0
    if (count >= 2)
1399
0
      nb = neighbour[1];
1400
0
    if (count >= 3)
1401
0
      nc = neighbour[2];
1402
1403
0
    switch( atomids[i] )
1404
0
      {
1405
0
      case(AI_N):
1406
0
        for( j=0; j<count; j++ )
1407
0
          if (bitmasks[neighbour[j]] & BitCAAll)
1408
0
            {
1409
0
              atomids[neighbour[j]] = AI_CA;
1410
0
              if (!visits[neighbour[j]])
1411
0
                TracePeptideChain(mol,neighbour[j],r);
1412
0
            }
1413
0
        break;
1414
1415
0
      case(AI_CA):
1416
0
        if( count == 3 )
1417
0
          {
1418
0
            if ( bitmasks[na] & BitNAll )
1419
0
              na = nc;
1420
0
            else if ( bitmasks[nb] & BitNAll )
1421
0
              nb = nc;
1422
1423
0
            if ( bitmasks[na] & BitC )
1424
0
              {
1425
0
                j = na;
1426
0
                k = nb;
1427
0
              }
1428
0
            else if ( bitmasks[nb] & BitC )
1429
0
              {
1430
0
                j = nb;
1431
0
                k = na;
1432
0
              }
1433
0
            else if( bitmasks[na] & BitCAll )
1434
0
              {
1435
0
                j = na;
1436
0
                k = nb;
1437
0
              }
1438
0
            else if (bitmasks[nb] & BitCAll )
1439
0
              {
1440
0
                j = nb;
1441
0
                k = na;
1442
0
              }
1443
1444
0
            atomids[j]  = AI_C;
1445
0
            bitmasks[k] = 0;
1446
1447
0
            if (!visits[j])
1448
0
              TracePeptideChain(mol,j,r);
1449
0
          }
1450
0
        else if (count == 2)
1451
0
          {
1452
0
            if ( bitmasks[na] & BitCAll )
1453
0
              {
1454
0
                atomids[na] = AI_C;
1455
0
                if (!visits[na])
1456
0
                  TracePeptideChain(mol,na,r);
1457
0
              }
1458
0
            else if ( bitmasks[nb] & BitCAll )
1459
0
              {
1460
0
                atomids[nb] = AI_C;
1461
0
                if (!visits[nb])
1462
0
                  TracePeptideChain(mol,nb,r);
1463
0
              }
1464
0
          }
1465
0
        break;
1466
1467
0
      case(AI_C):
1468
0
        k = AI_O;
1469
0
        for ( j = 0; j < count; j++ )
1470
0
          {
1471
0
            if ( bitmasks[neighbour[j]] & BitNAll )
1472
0
              {
1473
0
                atomids[neighbour[j]] = AI_N;
1474
0
                if (!visits[neighbour[j]])
1475
0
                  TracePeptideChain(mol,neighbour[j],r+1);
1476
0
              }
1477
0
            else if( bitmasks[neighbour[j]] & BitOAll )
1478
0
              {
1479
0
                atomids[neighbour[j]] = k;
1480
0
                resnos[neighbour[j]]  = r;
1481
0
                k = AI_OXT;  /* OXT */
1482
0
              }
1483
0
          }
1484
0
        break;
1485
0
      }
1486
0
  }
1487
1488
  //////////////////////////////////////////////////////////////////////////////
1489
  // Peptide Sidechains Perception
1490
  //////////////////////////////////////////////////////////////////////////////
1491
1492
  bool OBChainsParser::DeterminePeptideSidechains(OBMol &mol)
1493
0
  {
1494
0
    int resid;
1495
0
    int max = mol.NumAtoms();
1496
1497
0
    for (int i = 0 ; i < max ; ++i)
1498
0
      if (atomids[i] == AI_CA)
1499
0
        {
1500
0
          resid = IdentifyResidue(PDecisionTree, mol, i, resnos[i]);
1501
0
          AssignResidue(mol,resnos[i],chains[i],resid);
1502
0
        }
1503
1504
0
    return true;
1505
0
  }
1506
1507
  void OBChainsParser::AssignResidue(OBMol &mol, int r, int c, int i)
1508
0
  {
1509
0
    int max = mol.NumAtoms();
1510
1511
0
    for (int j = 0 ; j < max ; ++j)
1512
0
      if ((resnos[j] == r) && (chains[j] == c) && !hetflags[j])
1513
0
        resids[j] = i;
1514
0
  }
1515
1516
  int OBChainsParser::IdentifyResidue(void *tree, OBMol &mol, unsigned int seed,
1517
                                      int resno)
1518
0
  {
1519
0
    ByteCode *ptr;
1520
1521
0
    int AtomCount, BondCount;
1522
0
    int curr,prev,bond;
1523
0
    int bcount;
1524
0
    int i,j;
1525
1526
0
    ptr    = (ByteCode *) tree;
1527
0
    bcount = 0;
1528
1529
0
    Stack[0].atom = seed;
1530
0
    Stack[0].prev = seed;
1531
0
    StackPtr = 0;
1532
1533
0
    ResMonoAtom[0] = seed;
1534
0
    AtomCount = 1;
1535
0
    BondCount = 0;
1536
1537
0
    OBAtom *atom, *nbr;
1538
0
    vector<OBBond *>::iterator b;
1539
1540
0
    while( ptr ) {
1541
0
      switch(ptr->type)
1542
0
        {
1543
0
        case(BC_IDENT):  curr = Stack[StackPtr-1].atom;
1544
0
          if( atomids[curr] == ptr->ident.value )
1545
0
            {
1546
0
              bond = Stack[StackPtr-1].bond;
1547
0
              ResMonoBond[BondCount++] = bond;
1548
0
              ptr = ptr->ident.tcond;
1549
0
              StackPtr--;
1550
0
            }
1551
0
          else
1552
0
            ptr = ptr->ident.fcond;
1553
0
          break;
1554
1555
0
        case(BC_LOCAL):  curr = Stack[StackPtr-1].atom;
1556
0
          if( curr == ResMonoAtom[ptr->local.value] )
1557
0
            {
1558
0
              bond = Stack[StackPtr-1].bond;
1559
0
              ResMonoBond[BondCount++] = bond;
1560
0
              ptr = ptr->local.tcond;
1561
0
              StackPtr--;
1562
0
            }
1563
0
          else
1564
0
            ptr = ptr->local.fcond;
1565
0
          break;
1566
1567
0
        case(BC_ELEM):   curr = Stack[StackPtr-1].atom;
1568
0
          if( mol.GetAtom(curr+1)->GetAtomicNum() == static_cast<unsigned int>(ptr->elem.value) )
1569
0
            {
1570
0
              bond = Stack[StackPtr-1].bond;
1571
0
              ResMonoAtom[AtomCount++] = curr;
1572
0
              ResMonoBond[BondCount++] = bond;
1573
0
              resnos[curr] = resno;
1574
0
              ptr = ptr->elem.tcond;
1575
0
              StackPtr--;
1576
0
            }
1577
0
          else
1578
0
            ptr = ptr->elem.fcond;
1579
0
          break;
1580
1581
0
        case(BC_EVAL):   bcount = 0;
1582
0
          curr = Stack[StackPtr].atom;
1583
0
          prev = Stack[StackPtr].prev;
1584
1585
0
          atom = mol.GetAtom(curr+1); // WARNING, potential atom index issue
1586
0
          for (nbr = atom->BeginNbrAtom(b); nbr; nbr = atom->NextNbrAtom(b))
1587
0
            {
1588
0
              if (nbr->GetAtomicNum() == OBElements::Hydrogen)
1589
0
                continue;
1590
1591
0
              j = nbr->GetIdx() - 1;
1592
0
              if (!((curr == prev) && bitmasks[j]) && (j != prev))
1593
0
                {
1594
0
                  Stack[StackPtr].prev = curr;
1595
0
                  Stack[StackPtr].atom = j;
1596
0
                  Stack[StackPtr].bond = (*b)->GetIdx();
1597
0
                  StackPtr++;
1598
0
                  bcount++;
1599
0
                }
1600
0
            }
1601
1602
0
          ptr = ptr->eval.next;
1603
0
          break;
1604
1605
0
        case(BC_COUNT):
1606
0
          if( bcount == ptr->count.value )
1607
0
            {
1608
0
              ptr = ptr->count.tcond;
1609
0
            }
1610
0
          else
1611
0
            ptr = ptr->count.fcond;
1612
0
          break;
1613
1614
0
        case(BC_ASSIGN):
1615
0
          for( i=0; i<AtomCount; i++ ) {
1616
0
            if( !bitmasks[ResMonoAtom[i]])
1617
0
              {
1618
0
                j = ptr->assign.atomid[i];
1619
0
                atomids[ResMonoAtom[i]] = j;
1620
0
              }
1621
0
          }
1622
0
          for( i=0; i<BondCount; i++ )
1623
0
            {
1624
0
              j = ptr->assign.bflags[i];
1625
0
              flags[ResMonoBond[i]] = j;
1626
0
            }
1627
0
          return( ptr->assign.resid );
1628
0
          break;
1629
1630
0
        default:  /* Illegal Instruction! */
1631
0
          return( 0 );
1632
0
        } // (switch)
1633
0
    } // while (loop through atoms)
1634
0
    return 0;
1635
0
  }
1636
1637
  //////////////////////////////////////////////////////////////////////////////
1638
  // Nucleic Backbone Perception
1639
  /////////////////////////////////////////////////////////////////////////////
1640
1641
  bool OBChainsParser::DetermineNucleicBackbone(OBMol &mol)
1642
0
  {
1643
0
    ConstrainBackbone(mol, Nucleotide, MAXNUCLEIC);
1644
1645
0
    int i, max = mol.NumAtoms();
1646
1647
    /* Order Nucleic Backbone */
1648
1649
0
    for( i = 0 ; i < max ; i++ )
1650
0
      if( atomids[i] == -1 )
1651
0
        {
1652
0
          if( bitmasks[i] & BitPTer )
1653
0
            {
1654
0
              atomids[i] = AI_P;
1655
0
              TraceNucleicChain(mol,i,1);
1656
0
            }
1657
0
          else if( bitmasks[i] & BitO5Ter )
1658
0
            {
1659
0
              atomids[i] = AI_O5;
1660
0
              TraceNucleicChain(mol,i,1);
1661
0
            }
1662
0
        }
1663
1664
0
    return true;
1665
0
  }
1666
1667
  void OBChainsParser::TraceNucleicChain(OBMol &mol, unsigned int i, int r)
1668
0
  {
1669
0
    unsigned int neighbour[4];
1670
0
    int count;
1671
0
    int j,k;
1672
1673
0
    OBAtom *atom, *nbr;
1674
0
    vector<OBBond *>::iterator b;
1675
1676
0
    if (visits[i])
1677
0
      return;
1678
0
    visits[i] = true;
1679
1680
0
    count = 0;
1681
0
    atom  = mol.GetAtom(i + 1);
1682
0
    for (nbr = atom->BeginNbrAtom(b) ; nbr ; nbr = atom->NextNbrAtom(b))
1683
0
      if (nbr->GetAtomicNum() != OBElements::Hydrogen)
1684
0
        neighbour[count++] = nbr->GetIdx() - 1;
1685
1686
0
    resnos[i] = r;
1687
1688
0
    switch( atomids[i] )
1689
0
      {
1690
0
      case(AI_P):
1691
0
        k = AI_O1P;  /* O1P */
1692
0
        for( j=0; j<count; j++ )
1693
0
          {
1694
0
            if( bitmasks[neighbour[j]] & BitO5 )
1695
0
              {
1696
0
                atomids[neighbour[j]] = AI_O5;
1697
0
                if (!visits[neighbour[j]])
1698
0
                  TraceNucleicChain(mol,neighbour[j],r);
1699
0
              }
1700
0
            else if( bitmasks[neighbour[j]] & BitOP )
1701
0
              {
1702
0
                atomids[neighbour[j]] = k;
1703
0
                resnos[neighbour[j]]  = r;
1704
0
                k = AI_O2P;  /* O2P */
1705
0
              }
1706
0
          }
1707
1708
0
        break;
1709
1710
0
      case(AI_O5):
1711
0
        for( j=0; j<count; j++ )
1712
0
          if( bitmasks[neighbour[j]] & BitC5 )
1713
0
            {
1714
0
              atomids[neighbour[j]] = AI_C5;
1715
0
              if (!visits[neighbour[j]])
1716
0
                TraceNucleicChain(mol,neighbour[j],r);
1717
0
            }
1718
1719
0
        break;
1720
1721
0
      case(AI_C5):
1722
0
        for( j=0 ; j<count; j++ )
1723
0
          if( bitmasks[neighbour[j]] & BitC4 )
1724
0
            {
1725
0
              atomids[neighbour[j]] = AI_C4;
1726
0
              if (!visits[neighbour[j]])
1727
0
                TraceNucleicChain(mol,neighbour[j],r);
1728
0
            }
1729
1730
0
        break;
1731
1732
0
      case(AI_C4):
1733
0
        for( j=0; j<count; j++ )
1734
0
          {
1735
0
            if( bitmasks[neighbour[j]] & BitC3 )
1736
0
              {
1737
0
                atomids[neighbour[j]] = AI_C3;
1738
0
                if (!visits[neighbour[j]])
1739
0
                  TraceNucleicChain(mol,neighbour[j],r);
1740
0
              }
1741
0
            else if( bitmasks[neighbour[j]] & BitO4 )
1742
0
              {
1743
0
                atomids[neighbour[j]] = AI_O4;
1744
0
                resnos[neighbour[j]]  = r;
1745
0
              }
1746
0
          }
1747
1748
0
        break;
1749
1750
0
      case(AI_C3):
1751
0
        for( j=0; j<count; j++ )
1752
0
          {
1753
0
            if( bitmasks[neighbour[j]] & BitO3All )
1754
0
              {
1755
0
                atomids[neighbour[j]] = AI_O3;
1756
0
                if (!visits[neighbour[j]])
1757
0
                  TraceNucleicChain(mol,neighbour[j],r);
1758
0
              }
1759
0
            else if( bitmasks[neighbour[j]] & BitC2All )
1760
0
              {
1761
0
                atomids[neighbour[j]] = AI_C2;
1762
0
                if (!visits[neighbour[j]])
1763
0
                  TraceNucleicChain(mol,neighbour[j],r);
1764
0
              }
1765
0
          }
1766
1767
0
        break;
1768
1769
0
      case(AI_O3):
1770
0
        for( j=0; j<count; j++ )
1771
0
          if( bitmasks[neighbour[j]] & BitP )
1772
0
            {
1773
0
              atomids[neighbour[j]] = AI_P;
1774
0
              if (!visits[neighbour[j]])
1775
0
                TraceNucleicChain(mol,neighbour[j],r+1);
1776
0
            }
1777
1778
0
        break;
1779
1780
0
      case(AI_C2):
1781
0
        for( j=0; j<count; j++ )
1782
0
          {
1783
0
            if( bitmasks[neighbour[j]] & BitC1 )
1784
0
              {
1785
0
                atomids[neighbour[j]] = AI_C1;
1786
0
                resnos[neighbour[j]]  = r;
1787
0
              }
1788
0
            else if( bitmasks[neighbour[j]] & BitO2 )
1789
0
              {
1790
0
                atomids[neighbour[j]] = AI_O2;
1791
0
                resnos[neighbour[j]]  = r;
1792
0
              }
1793
0
          }
1794
1795
0
        break;
1796
0
      }
1797
0
  }
1798
1799
  //////////////////////////////////////////////////////////////////////////////
1800
  // Nucleic Sidechains Perception
1801
  //////////////////////////////////////////////////////////////////////////////
1802
1803
  bool OBChainsParser::DetermineNucleicSidechains(OBMol &mol)
1804
0
  {
1805
0
    for( unsigned int i = 0 ; i < mol.NumAtoms() ; i++ )
1806
0
      if( atomids[i] == 49 )
1807
0
        {
1808
0
          int resid = IdentifyResidue(NDecisionTree,mol,i,resnos[i]);
1809
0
          AssignResidue(mol,resnos[i],chains[i],resid);
1810
0
        }
1811
1812
0
    return true;
1813
0
  }
1814
1815
  //////////////////////////////////////////////////////////////////////////////
1816
  // Hydrogens Perception
1817
  //////////////////////////////////////////////////////////////////////////////
1818
1819
  bool OBChainsParser::DetermineHydrogens(OBMol &mol)
1820
0
  {
1821
0
    OBAtom *atom, *nbr;
1822
0
    int idx,sidx;
1823
1824
0
    int max = mol.NumAtoms();
1825
0
    for ( int i = 0 ; i < max ; i++ )
1826
0
      hcounts[i] = 0;
1827
1828
    /* First Pass */
1829
1830
0
    vector<OBAtom*>::iterator a;
1831
0
    vector<OBBond*>::iterator b;
1832
1833
0
    for(atom = mol.BeginAtom(a); atom ; atom = mol.NextAtom(a))
1834
0
      if(atom->GetAtomicNum() == OBElements::Hydrogen)
1835
0
        {
1836
0
          nbr = atom->BeginNbrAtom(b);
1837
0
          if (nbr != nullptr)
1838
0
            {
1839
0
              idx  = atom->GetIdx() - 1;
1840
0
              sidx = nbr->GetIdx() - 1;
1841
1842
0
              hcounts[idx]  = ++hcounts[sidx];
1843
0
              hetflags[idx] = hetflags[sidx];
1844
0
              atomids[idx]  = atomids[sidx];
1845
0
              resids[idx]   = resids[sidx];
1846
0
              resnos[idx]   = resnos[sidx];
1847
0
              chains[idx]   = chains[sidx];
1848
0
            }
1849
0
        }
1850
1851
    /* Second Pass */
1852
1853
0
    for(atom = mol.BeginAtom(a) ; atom ; atom = mol.NextAtom(a))
1854
0
      if (atom->GetAtomicNum() == OBElements::Hydrogen)
1855
0
        {
1856
0
          nbr = atom->BeginNbrAtom(b);
1857
0
          if (nbr != nullptr && hcounts[nbr->GetIdx()-1] == 1)
1858
0
            hcounts[atom->GetIdx()-1] = 0;
1859
0
        }
1860
1861
0
    return true;
1862
0
  }
1863
1864
  //////////////////////////////////////////////////////////////////////////////
1865
  // Utility Functions
1866
  //////////////////////////////////////////////////////////////////////////////
1867
1868
  // validated
1869
  void OBChainsParser::DefineMonomer(void **tree, int resid, const char *smiles)
1870
162
  {
1871
162
    int i;
1872
1873
162
    MonoAtomCount = 0;
1874
162
    MonoBondCount = 0;
1875
1876
162
    ParseSmiles(smiles,-1);
1877
1878
1.14k
    for( i=0; i<MonoBondCount; i++ )
1879
978
      MonoBond[i].index = -1;
1880
1.21k
    for( i=0; i<MonoAtomCount; i++ )
1881
1.05k
      MonoAtom[i].index = -1;
1882
162
    AtomIndex = BondIndex = 0;
1883
1884
162
    StackPtr = 0;
1885
162
    GenerateByteCodes((ByteCode**)tree, resid, 0, 0, 0 );
1886
162
  }
1887
1888
  int OBChainsParser::IdentifyElement(char *ptr)
1889
1.05k
  {
1890
1.05k
    int ch;
1891
1892
1.05k
    ch = toupper(ptr[1]);
1893
1.05k
    switch( toupper(ptr[0]) )
1894
1.05k
      {
1895
1.05k
      case(' '):  switch( ch )
1896
1.05k
          {
1897
0
          case('B'):  return(  5 );
1898
750
          case('C'):  return(  6 );
1899
0
          case('D'):  return(  1 );
1900
0
          case('F'):  return(  9 );
1901
0
          case('H'):  return(  1 );
1902
0
          case('I'):  return( 53 );
1903
0
          case('K'):  return( 19 );
1904
0
          case('L'):  return(  1 );
1905
192
          case('N'):  return(  7 );
1906
102
          case('O'):  return(  8 );
1907
0
          case('P'):  return( 15 );
1908
12
          case('S'):  return( 16 );
1909
0
          case('U'):  return( 92 );
1910
0
          case('V'):  return( 23 );
1911
0
          case('W'):  return( 74 );
1912
0
          case('Y'):  return( 39 );
1913
1.05k
          }
1914
0
        break;
1915
1916
0
      case('A'):  switch( ch )
1917
0
          {
1918
0
          case('C'):  return( 89 );
1919
0
          case('G'):  return( 47 );
1920
0
          case('L'):  return( 13 );
1921
0
          case('M'):  return( 95 );
1922
0
          case('R'):  return( 18 );
1923
0
          case('S'):  return( 33 );
1924
0
          case('T'):  return( 85 );
1925
0
          case('U'):  return( 79 );
1926
0
          }
1927
0
        break;
1928
1929
0
      case('B'):  switch( ch )
1930
0
          {
1931
0
          case('A'):  return( 56 );
1932
0
          case('E'):  return(  4 );
1933
0
          case('I'):  return( 83 );
1934
0
          case('K'):  return( 97 );
1935
0
          case('R'):  return( 35 );
1936
0
          case(' '):  return(  5 );
1937
0
          }
1938
0
        break;
1939
1940
0
      case('C'):  switch( ch )
1941
0
          {
1942
0
          case('A'):  return( 20 );
1943
0
          case('D'):  return( 48 );
1944
0
          case('E'):  return( 58 );
1945
0
          case('F'):  return( 98 );
1946
0
          case('L'):  return( 17 );
1947
0
          case('M'):  return( 96 );
1948
0
          case('O'):  return( 27 );
1949
0
          case('R'):  return( 24 );
1950
0
          case('S'):  return( 55 );
1951
0
          case('U'):  return( 29 );
1952
0
          case(' '):  return(  6 );
1953
0
          }
1954
0
        break;
1955
1956
0
      case('D'):  if( ch=='Y' )
1957
0
          {
1958
0
            return( 66 );
1959
0
          }
1960
0
        else if( ch==' ' )
1961
0
          return( 1 );
1962
0
        break;
1963
1964
0
      case('E'):  if( ch=='R' )
1965
0
          {
1966
0
            return( 68 );
1967
0
          }
1968
0
        else if( ch=='S' )
1969
0
          {
1970
0
            return( 99 );
1971
0
          }
1972
0
        else if( ch=='U' )
1973
0
          return( 63 );
1974
0
        break;
1975
1976
0
      case('F'):  if( ch=='E' )
1977
0
          {
1978
0
            return(  26 );
1979
0
          }
1980
0
        else if( ch=='M' )
1981
0
          {
1982
0
            return( 100 );
1983
0
          }
1984
0
        else if( ch=='R' )
1985
0
          {
1986
0
            return(  87 );
1987
0
          }
1988
0
        else if( ch=='F' )
1989
0
          return(   9 );
1990
0
        break;
1991
1992
0
      case('G'):  if( ch=='A' )
1993
0
          {
1994
0
            return( 31 );
1995
0
          }
1996
0
        else if( ch=='D' )
1997
0
          {
1998
0
            return( 64 );
1999
0
          }
2000
0
        else if( ch=='E' )
2001
0
          return( 32 );
2002
0
        break;
2003
2004
0
      case('H'):  if( ch=='E' )
2005
0
          {
2006
0
            return(  2 );
2007
0
          }
2008
0
        else if( ch=='F' )
2009
0
          {
2010
0
            return( 72 );
2011
0
          }
2012
0
        else if( ch=='G' )
2013
0
          {
2014
0
            return( 80 );
2015
0
          }
2016
0
        else if( ch=='O' )
2017
0
          {
2018
0
            return( 67 );
2019
0
          }
2020
0
        else if( ch==' ' )
2021
0
          return(  1 );
2022
0
        break;
2023
2024
0
      case('I'):  if( ch=='N' )
2025
0
          {
2026
0
            return( 49 );
2027
0
          }
2028
0
        else if( ch=='R' )
2029
0
          {
2030
0
            return( 77 );
2031
0
          }
2032
0
        else if( ch==' ' )
2033
0
          return( 53 );
2034
0
        break;
2035
2036
0
      case('K'):  if( ch=='R' )
2037
0
          {
2038
0
            return( 36 );
2039
0
          }
2040
0
        else if( ch==' ' )
2041
0
          return( 19 );
2042
0
        break;
2043
2044
0
      case('L'):  if( ch=='A' )
2045
0
          {
2046
0
            return(  57 );
2047
0
          }
2048
0
        else if( ch=='I' )
2049
0
          {
2050
0
            return(   3 );
2051
0
          }
2052
0
        else if( (ch=='R') || (ch=='W') )
2053
0
          {
2054
0
            return( 103 );
2055
0
          }
2056
0
        else if( ch=='U' )
2057
0
          {
2058
0
            return(  71 );
2059
0
          }
2060
0
        else if( ch==' ' )
2061
0
          return(   1 );
2062
0
        break;
2063
2064
0
      case('M'):  if( ch=='D' )
2065
0
          {
2066
0
            return( 101 );
2067
0
          }
2068
0
        else if( ch=='G' )
2069
0
          {
2070
0
            return(  12 );
2071
0
          }
2072
0
        else if( ch=='N' )
2073
0
          {
2074
0
            return(  25 );
2075
0
          }
2076
0
        else if( ch=='O' )
2077
0
          return(  42 );
2078
0
        break;
2079
2080
0
      case('N'):  switch( ch )
2081
0
          {
2082
0
          case('A'):  return(  11 );
2083
0
          case('B'):  return(  41 );
2084
0
          case('D'):  return(  60 );
2085
0
          case('E'):  return(  10 );
2086
0
          case('I'):  return(  28 );
2087
0
          case('O'):  return( 102 );
2088
0
          case('P'):  return(  93 );
2089
0
          case(' '):  return(   7 );
2090
0
          }
2091
0
        break;
2092
2093
0
      case('O'):  if( ch=='S' )
2094
0
          {
2095
0
            return( 76 );
2096
0
          }
2097
0
        else if( ch==' ' )
2098
0
          return( 8 );
2099
0
        break;
2100
2101
0
      case('P'):  switch( ch )
2102
0
          {
2103
0
          case('A'):  return( 91 );
2104
0
          case('B'):  return( 82 );
2105
0
          case('D'):  return( 46 );
2106
0
          case('M'):  return( 61 );
2107
0
          case('O'):  return( 84 );
2108
0
          case('R'):  return( 59 );
2109
0
          case('T'):  return( 78 );
2110
0
          case('U'):  return( 94 );
2111
0
          case(' '):  return( 15 );
2112
0
          }
2113
0
        break;
2114
2115
0
      case('R'):  switch( ch )
2116
0
          {
2117
0
          case('A'):  return( 88 );
2118
0
          case('B'):  return( 37 );
2119
0
          case('E'):  return( 75 );
2120
0
          case('H'):  return( 45 );
2121
0
          case('N'):  return( 86 );
2122
0
          case('U'):  return( 44 );
2123
0
          }
2124
0
        break;
2125
2126
0
      case('S'):  switch( ch )
2127
0
          {
2128
0
          case('B'):  return( 51 );
2129
0
          case('C'):  return( 21 );
2130
0
          case('E'):  return( 34 );
2131
0
          case('I'):  return( 14 );
2132
0
          case('M'):  return( 62 );
2133
0
          case('N'):  return( 50 );
2134
0
          case('R'):  return( 38 );
2135
0
          case(' '):  return( 16 );
2136
0
          }
2137
0
        break;
2138
2139
0
      case('T'):  switch( ch )
2140
0
          {
2141
0
          case('A'):  return( 73 );
2142
0
          case('B'):  return( 65 );
2143
0
          case('C'):  return( 43 );
2144
0
          case('E'):  return( 52 );
2145
0
          case('H'):  return( 90 );
2146
0
          case('I'):  return( 22 );
2147
0
          case('L'):  return( 81 );
2148
0
          case('M'):  return( 69 );
2149
0
          }
2150
0
        break;
2151
2152
0
      case('U'):  if( ch==' ' )
2153
0
          return( 92 );
2154
0
        break;
2155
2156
0
      case('V'):  if( ch==' ' )
2157
0
          return( 23 );
2158
0
        break;
2159
2160
0
      case('W'):  if( ch==' ' )
2161
0
          return( 74 );
2162
0
        break;
2163
2164
0
      case('X'):  if( ch=='E' )
2165
0
          return( 54 );
2166
0
        break;
2167
2168
0
      case('Y'):  if( ch=='B' )
2169
0
          {
2170
0
            return( 70 );
2171
0
          }
2172
0
        else if( ch==' ' )
2173
0
          return( 39 );
2174
0
        break;
2175
2176
0
      case('Z'):  if( ch=='N' )
2177
0
          {
2178
0
            return( 30 );
2179
0
          }
2180
0
        else if( ch=='R' )
2181
0
          return( 40 );
2182
0
        break;
2183
1.05k
      }
2184
2185
0
    if( (*ptr>='0') && (*ptr<='9') )
2186
0
      if( (ch=='H') || (ch=='D') )
2187
0
        return( 1 ); /* Hydrogen */
2188
2189
0
    return( 0 );
2190
0
  }
2191
2192
  const char *OBChainsParser::ParseSmiles(const char *ptr, int prev)
2193
318
  {
2194
318
    char *name;
2195
318
    int atomid;
2196
318
    int next;
2197
318
    int type;
2198
318
    int ch;
2199
2200
318
    type = 0;
2201
2.59k
    while( (ch = *ptr++) )
2202
2.43k
      {
2203
2.43k
        switch( ch )
2204
2.43k
          {
2205
786
          case('-'): type = BF_SINGLE;
2206
786
            break;
2207
30
          case('='): type = BF_DOUBLE;
2208
30
            break;
2209
0
          case('#'): type = BF_TRIPLE;
2210
0
            break;
2211
90
          case('^'): type = BF_SINGLE|BF_AROMATIC;
2212
90
            break;
2213
72
          case('~'): type = BF_DOUBLE|BF_AROMATIC;
2214
72
            break;
2215
2216
156
          case(')'): return( ptr );
2217
0
          case('.'): prev = -1;
2218
0
            break;
2219
156
          case('('): ptr = ParseSmiles(ptr,prev);
2220
156
            break;
2221
2222
1.14k
          default:
2223
1.14k
            atomid = ch-'0';
2224
1.88k
            while( isdigit(*ptr) )
2225
744
              atomid = (atomid*10)+(*ptr++)-'0';
2226
2227
4.92k
            for( next=0; next<MonoAtomCount; next++ )
2228
3.87k
              if( MonoAtom[next].atomid == atomid )
2229
84
                break;
2230
2231
1.14k
            if( next == MonoAtomCount )
2232
1.05k
              {
2233
1.05k
                name = ChainsAtomName[atomid];
2234
1.05k
                MonoAtom[next].elem = IdentifyElement(name);
2235
1.05k
                MonoAtom[next].atomid = atomid;
2236
1.05k
                MonoAtom[next].bcount = 0;
2237
1.05k
                MonoAtomCount++;
2238
1.05k
              }
2239
2240
1.14k
            if( prev != -1 )
2241
978
              {
2242
978
                MonoBond[MonoBondCount].flag = type;
2243
978
                MonoBond[MonoBondCount].src = prev;
2244
978
                MonoBond[MonoBondCount].dst = next;
2245
978
                MonoBondCount++;
2246
2247
978
                MonoAtom[prev].bcount++;
2248
978
                MonoAtom[next].bcount++;
2249
978
              }
2250
1.14k
            prev = next;
2251
2.43k
          }
2252
2.43k
      }
2253
162
    return( ptr-1 );
2254
318
  }
2255
2256
} // end namespace OpenBabel
2257
2258
//! \file chains.cpp
2259
//! \brief Parse for macromolecule chains and residues.