Coverage Report

Created: 2025-11-09 07:08

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openbabel/src/residue.cpp
Line
Count
Source
1
/**********************************************************************
2
residue.cpp - Handle macromolecule residues.
3
4
Copyright (C) 2001, 2002  OpenEye Scientific Software, Inc.
5
Some portions Copyright (C) 2001-2006 by Geoffrey R. Hutchison
6
7
This file is part of the Open Babel project.
8
For more information, see <http://openbabel.org/>
9
10
This program is free software; you can redistribute it and/or modify
11
it under the terms of the GNU General Public License as published by
12
the Free Software Foundation version 2 of the License.
13
14
This program is distributed in the hope that it will be useful,
15
but WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
GNU General Public License for more details.
18
***********************************************************************/
19
20
/**********************************************************************
21
Global arrays Residue, ElemDesc and function GetResidueNumber were
22
obtained in part or whole from RasMol2 by Roger Sayle.
23
***********************************************************************/
24
25
///////////////////////////////////////////////////////////////////////////////
26
// File Includes
27
///////////////////////////////////////////////////////////////////////////////
28
29
#include <cstring>
30
31
#include <openbabel/babelconfig.h>
32
33
#include <openbabel/residue.h>
34
#include <openbabel/atom.h>
35
#include <openbabel/oberror.h>
36
#include <openbabel/bitvec.h>
37
#include <openbabel/bond.h>
38
#include <openbabel/elements.h>
39
#include <cstring>
40
#include <cstdlib>
41
42
using namespace std;
43
44
namespace OpenBabel
45
{
46
   ////////////////////////////////////////////////////////////////////////////////
47
  // Global Variables
48
  ////////////////////////////////////////////////////////////////////////////////
49
50
  char Residue[MAXRES][4] = {
51
    /*===============*/
52
    /*  Amino Acids  */
53
    /*===============*/
54
55
    /* Ordered by Cumulative Frequency in Brookhaven *
56
     * Protein Databank, December 1991               */
57
58
    "ALA", /* 8.4% */     "GLY", /* 8.3% */
59
    "LEU", /* 8.0% */     "SER", /* 7.5% */
60
    "VAL", /* 7.1% */     "THR", /* 6.4% */
61
    "LYS", /* 5.8% */     "ASP", /* 5.5% */
62
    "ILE", /* 5.2% */     "ASN", /* 4.9% */
63
    "GLU", /* 4.9% */     "PRO", /* 4.4% */
64
    "ARG", /* 3.8% */     "PHE", /* 3.7% */
65
    "GLN", /* 3.5% */     "TYR", /* 3.5% */
66
    "HIS", /* 2.3% */     "CYS", /* 2.0% */
67
    "MET", /* 1.8% */     "TRP", /* 1.4% */
68
69
    "ASX", "GLX", "PCA", "HYP",
70
71
    /*===================*/
72
    /*  DNA Nucleotides  */
73
    /*===================*/
74
    "  A", "  C", "  G", "  T",
75
76
    /*===================*/
77
    /*  RNA Nucleotides  */
78
    /*===================*/
79
    "  U", " +U", "  I", "1MA",
80
    "5MC", "OMC", "1MG", "2MG",
81
    "M2G", "7MG", "OMG", " YG",
82
    "H2U", "5MU", "PSU",
83
84
    /*=================*/
85
    /*  Miscellaneous  */
86
    /*=================*/
87
    "UNK", "ACE", "FOR", "HOH",
88
    "DOD", "SO4", "PO4", "NAD",
89
    "COA", "NAP", "NDP"
90
  };
91
92
  /* Avoid SGI Compiler Warnings! */
93
    char ElemDesc[MAXELEM][4] = {
94
    { ' ', 'N', ' ', ' ' },  /* 0*/
95
    { ' ', 'C', 'A', ' ' },  /* 1*/
96
    { ' ', 'C', ' ', ' ' },  /* 2*/
97
    { ' ', 'O', ' ', ' ' },  /* 3*/   /* 0-3   Amino Acid Backbone    */
98
    { ' ', 'C', '\'', ' ' }, /* 4*/
99
    { ' ', 'O', 'T', ' ' },  /* 5*/
100
    { ' ', 'S', ' ', ' ' },  /* 6*/
101
    { ' ', 'P', ' ', ' ' },  /* 7*/   /* 4-7   Shapely Amino Backbone */
102
    { ' ', 'O', '1', 'P' },  /* 8*/
103
    { ' ', 'O', '2', 'P' },  /* 9*/
104
    { ' ', 'O', '5', '*' },  /*10*/
105
    { ' ', 'C', '5', '*' },  /*11*/
106
    { ' ', 'C', '4', '*' },  /*12*/
107
    { ' ', 'O', '4', '*' },  /*13*/
108
    { ' ', 'C', '3', '*' },  /*14*/
109
    { ' ', 'O', '3', '*' },  /*15*/
110
    { ' ', 'C', '2', '*' },  /*16*/
111
    { ' ', 'O', '2', '*' },  /*17*/
112
    { ' ', 'C', '1', '*' },  /*18*/   /* 7-18  Nucleic Acid Backbone  */
113
    { ' ', 'C', 'A', '2' },  /*19*/   /* 19    Shapely Special        */
114
    { ' ', 'S', 'G', ' ' },  /*20*/   /* 20    Cysteine Sulphur       */
115
    { ' ', 'N', '1', ' ' },  /*21*/
116
    { ' ', 'N', '2', ' ' },  /*22*/
117
    { ' ', 'N', '3', ' ' },  /*23*/
118
    { ' ', 'N', '4', ' ' },  /*24*/
119
    { ' ', 'N', '6', ' ' },  /*25*/
120
    { ' ', 'O', '2', ' ' },  /*26*/
121
    { ' ', 'O', '4', ' ' },  /*27*/
122
    { ' ', 'O', '6', ' ' }   /*28*/   /* 21-28 Nucleic Acid H-Bonding */
123
  };
124
125
 /** \class OBResidue residue.h <openbabel/residue.h>
126
      \brief Residue information
127
128
      The residue information is drawn from PDB or MOL2 files (or similar), which
129
      track biomolecule information,
130
      and are stored in the OBResidue class. OBResidues are stored inside the
131
      OBAtom class and OBMol classes.
132
      The residue information for an atom can be requested in
133
      the following way:
134
      \code
135
      OBAtom *atom;
136
      OBResidue *r;
137
      atom = mol.GetAtom(1);
138
      r = atom->GetResidue();
139
      \endcode
140
      The residue information for a molecule can be manipulated too:
141
      \code
142
      cout << "This molecule has " << mol.NumResidues() << " residues." << endl;
143
      OBResidue *r;
144
      r = mol.GetResidue(1);
145
      \endcode
146
  */
147
148
  ///////////////////////////////////////////////////////////////////////////////
149
  // Residue Functions
150
  ///////////////////////////////////////////////////////////////////////////////
151
152
  static unsigned int GetAtomIDNumber(const char *atomid)
153
0
  {
154
0
    if (atomid != nullptr)
155
0
      {
156
0
        int ch1 = toupper(atomid[0]);
157
0
        int ch2 = toupper(atomid[1]);
158
0
        int ch3 = toupper(atomid[2]);
159
0
        int ch4 = toupper(atomid[3]);
160
161
0
        if (ch1 == ' ')
162
0
          {
163
0
            switch(ch2)
164
0
              {
165
0
              case 'C':
166
167
0
                switch(ch3)
168
0
                  {
169
0
                  case 'A':
170
171
0
                    if (ch4 == ' ')
172
0
                      return 1;
173
0
                    else if (ch4 == '2')
174
0
                      return 19;
175
176
0
                    break;
177
178
0
                  case ' ':
179
0
                    if (ch4 == ' ')
180
0
                      return 2;
181
0
                    break;
182
183
0
                  case '\'':
184
0
                    if (ch4 == ' ')
185
0
                      return 4;
186
0
                    break;
187
188
0
                  case '1':
189
0
                    if (ch4 == '*')
190
0
                      return 18;
191
0
                    break;
192
193
0
                  case '2':
194
0
                    if (ch4 == '*')
195
0
                      return 16;
196
0
                    break;
197
198
0
                  case '3':
199
0
                    if (ch4 == '*')
200
0
                      return 14;
201
0
                    break;
202
203
0
                  case '4':
204
0
                    if (ch4 == '*')
205
0
                      return 12;
206
0
                    break;
207
208
0
                  case '5':
209
0
                    if (ch4 == '*')
210
0
                      return 11;
211
0
                    break;
212
213
0
                  }
214
215
0
                break;
216
217
0
              case 'N':
218
219
0
                if (ch4 == ' ')
220
0
                  {
221
0
                    switch (ch3)
222
0
                      {
223
0
                      case ' ':
224
0
                        return 0;
225
0
                      case '1':
226
0
                        return 21;
227
0
                      case '2':
228
0
                        return 22;
229
0
                      case '3':
230
0
                        return 23;
231
0
                      case '4':
232
0
                        return 24;
233
0
                      case '6':
234
0
                        return 25;
235
0
                      }
236
0
                  }
237
238
0
                break;
239
240
0
              case 'O':
241
242
0
                switch(ch3)
243
0
                  {
244
0
                  case ' ':
245
246
0
                    if (ch4 == ' ')
247
0
                      return 3;
248
249
0
                    break;
250
251
0
                  case 'T':
252
253
0
                    if (ch4 == ' ')
254
0
                      return 5;
255
256
0
                    break;
257
258
0
                  case '1':
259
260
0
                    if (ch4 == 'P')
261
0
                      return 8;
262
263
0
                    break;
264
265
0
                  case '2':
266
267
0
                    if (ch4 == 'P')
268
0
                      return 9;
269
0
                    else if (ch4 == '*')
270
0
                      return 17;
271
0
                    else if (ch4 == ' ')
272
0
                      return 26;
273
274
0
                    break;
275
276
0
                  case '3':
277
278
0
                    if (ch4 == '*')
279
0
                      return 15;
280
281
0
                    break;
282
283
0
                  case '4':
284
285
0
                    if (ch4 == '*')
286
0
                      return 13;
287
0
                    else if (ch4 == ' ')
288
0
                      return 27;
289
290
0
                    break;
291
292
0
                  case '5':
293
294
0
                    if (ch4 == '*')
295
0
                      return 10;
296
297
0
                    break;
298
299
0
                  case '6':
300
301
0
                    if (ch4 == ' ')
302
0
                      return 28;
303
304
0
                    break;
305
0
                  }
306
307
0
                break;
308
309
0
              case 'P':
310
311
0
                if ((ch3 == ' ') && (ch4 == ' '))
312
0
                  return 7;
313
314
0
                break;
315
316
0
              case 'S':
317
318
0
                if (ch4 == ' ')
319
0
                  {
320
0
                    if (ch3 == ' ')
321
0
                      return 6;
322
0
                    else if (ch3 == 'G')
323
0
                      return 20;
324
0
                  }
325
326
0
                break;
327
0
              }
328
0
          }
329
330
0
        return MAXELEM;
331
0
      }
332
333
0
    else
334
0
      {
335
0
        obErrorLog.ThrowError(__FUNCTION__, "NULL Atom IDs specified", obWarning);
336
0
        return MAXELEM;
337
0
      }
338
0
  }
339
340
  static unsigned int GetResidueNumber(const char *res)
341
392k
  {
342
392k
    if (res != nullptr && strlen(res) > 2)
343
383k
      {
344
383k
        int ch1 = toupper(res[0]);
345
383k
        int ch2 = toupper(res[1]);
346
383k
        int ch3 = toupper(res[2]);
347
348
383k
        switch( ch1 )
349
383k
          {
350
1
          case(' '):
351
352
1
            if( ch2 == ' ' )
353
1
              {
354
1
                switch( ch3 )
355
1
                  {
356
0
                  case('A'):  return( 24 );
357
0
                  case('C'):  return( 25 );
358
0
                  case('G'):  return( 26 );
359
1
                  case('T'):  return( 27 );
360
0
                  case('U'):  return( 28 );
361
0
                  case('I'):  return( 30 );
362
1
                  }
363
1
              }
364
0
            else if( ch2 == '+' )
365
0
              {
366
0
                if( ch3 == 'U' )
367
0
                  return( 29 );
368
0
              }
369
0
            else if( ch2 == 'Y' )
370
0
              {
371
0
                if( ch3 == 'G' )
372
0
                  return( 39 );
373
0
              }
374
375
0
            break;
376
377
2
          case('0'):
378
379
2
            if( ch2 == 'M' )
380
0
              {
381
0
                if( ch3 == 'C' )
382
0
                  return( 33 );
383
0
                else if( ch3 == 'G' )
384
0
                  return( 38 );
385
0
              }
386
387
2
            break;
388
389
8
          case('1'):
390
391
8
            if( ch2 == 'M' )
392
0
              {
393
0
                if( ch3 == 'A' )
394
0
                  return( 31 );
395
0
                else if( ch3 == 'G' )
396
0
                  return( 34 );
397
0
              }
398
399
8
            break;
400
401
8
          case('2'):
402
403
5
            if( ch2 == 'M' )
404
0
              if( ch3 == 'G' )
405
0
                return( 35 );
406
407
5
            break;
408
409
5
          case('5'):
410
411
0
            if( ch2 == 'M' )
412
0
              {
413
0
                if( ch3 == 'C' )
414
0
                  return( 32 );
415
0
                else if( ch3 == 'U' )
416
0
                  return( 41 );
417
0
              }
418
419
0
            break;
420
421
1
          case('7'):
422
423
1
            if( ch2 == 'M' )
424
0
              if( ch3 == 'G' )
425
0
                return( 37 );
426
427
1
            break;
428
429
240k
          case('A'):
430
431
240k
            if( ch2 == 'L' )
432
224k
              {
433
224k
                if( ch3 == 'A' )
434
224k
                  return(  0 );
435
224k
              }
436
15.9k
            else if( ch2 == 'S' )
437
8.15k
              {
438
8.15k
                if( ch3 == 'P' )
439
397
                  return(  7 );
440
7.75k
                else if( ch3 == 'N' )
441
1.23k
                  return(  9 );
442
6.51k
                else if( ch3 == 'X' )
443
6.51k
                  return( 20 );
444
8.15k
              }
445
7.81k
            else if( ch2 == 'R' )
446
7.73k
              {
447
7.73k
                if( ch3 == 'G' )
448
7.66k
                  return( 12 );
449
7.73k
              }
450
74
            else if( ch2 == 'C' )
451
55
              {
452
55
                if( ch3 == 'E' )
453
0
                  return( 44 );
454
55
              }
455
19
            else if( ch2 == 'D' )
456
0
              {
457
0
                if( ch3 == 'E' )
458
0
                  return( 24 );    /* "ADE" -> "  A" */
459
0
              }
460
461
148
            break;
462
463
411
          case('C'):
464
465
411
            if( ch2 == 'Y' )
466
409
              {
467
409
                if( ch3 == 'S' )
468
409
                  return( 17 );
469
0
                else if( ch3 == 'H' )
470
0
                  return( 17 );    /* "CYH" -> "CYS" */
471
0
                else if( ch3 == 'T' )
472
0
                  return( 25 );    /* "CYT" -> "  C" */
473
409
              }
474
2
            else if( ch2 == 'O' )
475
0
              {
476
0
                if( ch3 == 'A' )
477
0
                  return( 51 );
478
0
              }
479
2
            else if( ch2 == 'P' )
480
0
              {
481
0
                if( ch3 == 'R' )
482
0
                  return( 11 );    /* "CPR" -> "PRO" */
483
0
              }
484
2
            else if( ch2 == 'S' )
485
0
              {
486
0
                if( ch3 == 'H' )
487
0
                  return( 17 );    /* "CSH" -> "CYS" */
488
0
                else if( ch3 == 'M' )
489
0
                  return( 17 );    /* "CSM" -> "CYS" */
490
0
              }
491
492
2
            break;
493
494
2
          case('D'):
495
496
0
            if( ch2 == 'O' )
497
0
              {
498
0
                if( ch3 == 'D' )
499
0
                  return( 47 );
500
0
              }
501
0
            else if( ch2 == '2' )
502
0
              {
503
0
                if( ch3 == 'O' )
504
0
                  return( 47 );    /* "D2O" -> "DOD" */
505
0
              }
506
507
0
            break;
508
509
3
          case('F'):
510
511
3
            if( ch2 == 'O' )
512
1
              if( ch3 == 'R' )
513
1
                return( 45 );
514
515
2
            break;
516
517
7.80k
          case('G'):
518
519
7.80k
            if( ch2 == 'L' )
520
7.40k
              {
521
7.40k
                if( ch3 == 'Y' )
522
4.15k
                  return(  1 );
523
3.25k
                else if( ch3 == 'U' )
524
1.54k
                  return( 10 );
525
1.70k
                else if( ch3 == 'N' )
526
1.41k
                  return( 14 );
527
298
                else if( ch3 == 'X' )
528
298
                  return( 21 );
529
7.40k
              }
530
394
            else if( ch2 == 'U' )
531
0
              {
532
0
                if( ch3 == 'A' )
533
0
                  return( 26 );    /* "GUA" -> "  G" */
534
0
              }
535
536
394
            break;
537
538
398
          case('H'):
539
540
398
            if( ch2 == 'I' )
541
352
              {
542
352
                if( ch3 == 'S' )
543
351
                  return( 16 );
544
352
              }
545
46
            else if( ch2 == 'O' )
546
0
              {
547
0
                if( ch3 == 'H' )
548
0
                  return( 46 );
549
0
              }
550
46
            else if( ch2 == 'Y' )
551
0
              {
552
0
                if( ch3 == 'P' )
553
0
                  return( 23 );
554
0
              }
555
46
            else if( ch2 == '2' )
556
0
              {
557
0
                if( ch3 == 'O' )
558
0
                  return( 46 );    /* "H20" -> "HOH" */
559
0
                else if( ch3 == 'U' )
560
0
                  return( 40 );
561
0
              }
562
563
47
            break;
564
565
421
          case('I'):
566
567
421
            if( ch2 == 'L' )
568
409
              if( ch3 == 'E' )
569
409
                return(  8 );
570
571
12
            break;
572
573
17.9k
          case('L'):
574
575
17.9k
            if( ch2 == 'E' )
576
17.7k
              {
577
17.7k
                if( ch3 == 'U' )
578
17.7k
                  return(  2 );
579
17.7k
              }
580
214
            else if( ch2 == 'Y' )
581
214
              {
582
214
                if( ch3 == 'S' )
583
214
                  return(  6 );
584
214
              }
585
586
0
            break;
587
588
29.0k
          case('M'):
589
590
29.0k
            if( ch2 == 'E' )
591
29.0k
              {
592
29.0k
                if( ch3 == 'T' )
593
29.0k
                  return( 18 );
594
29.0k
              }
595
0
            else if( ch2 == '2' )
596
0
              {
597
0
                if( ch3 == 'G' )
598
0
                  return( 36 );
599
0
              }
600
601
4
            break;
602
603
6
          case('N'):
604
605
6
            if( ch2 == 'A' )
606
1
              {
607
1
                if( ch3 == 'D' )
608
0
                  return( 50 );
609
1
                else if( ch3 == 'P' )
610
0
                  return( 52 );
611
1
              }
612
5
            else if( ch2 == 'D' )
613
1
              {
614
1
                if( ch3 == 'P' )
615
1
                  return( 53 );
616
1
              }
617
618
5
            break;
619
620
58.6k
          case('P'):
621
622
58.6k
            if( ch2 == 'R' )
623
6.67k
              {
624
6.67k
                if( ch3 == 'O' )
625
6.66k
                  return( 11 );
626
6.67k
              }
627
51.9k
            else if( ch2 == 'H' )
628
50.8k
              {
629
50.8k
                if( ch3 == 'E' )
630
50.8k
                  return( 13 );
631
50.8k
              }
632
1.05k
            else if( ch2 == 'C' )
633
0
              {
634
0
                if( ch3 == 'A' )
635
0
                  return( 22 );
636
0
              }
637
1.05k
            else if( ch2 == 'O' )
638
0
              {
639
0
                if( ch3 == '4' )
640
0
                  return( 49 );
641
0
              }
642
1.05k
            else if( ch2 == 'S' )
643
1
              {
644
1
                if( ch3 == 'U' )
645
0
                  return( 42 );
646
1
              }
647
648
1.05k
            break;
649
650
1.05k
          case('S'):
651
652
752
            if( ch2 == 'E' )
653
750
              {
654
750
                if( ch3 == 'R' )
655
234
                  return(  3 );
656
750
              }
657
2
            else if( ch2 == 'O' )
658
0
              {
659
0
                if( ch3 == '4' )
660
0
                  return( 48 );
661
0
                else if( ch3 == 'L' )
662
0
                  return( 46 );    /* "SOL" -> "HOH" */
663
0
              }
664
2
            else if( ch2 == 'U' )
665
0
              {
666
0
                if( ch3 == 'L' )
667
0
                  return( 48 );    /* "SUL" -> "SO4" */
668
0
              }
669
670
518
            break;
671
672
16.4k
          case('T'):
673
674
16.4k
            if( ch2 == 'H' )
675
14.2k
              {
676
14.2k
                if( ch3 == 'R' )
677
14.2k
                  return(  5 );
678
0
                else if( ch3 == 'Y' )
679
0
                  return( 27 );    /* "THY" -> "  T" */
680
14.2k
              }
681
2.19k
            else if( ch2 == 'Y' )
682
1.54k
              {
683
1.54k
                if( ch3 == 'R' )
684
1.54k
                  return( 15 );
685
1.54k
              }
686
651
            else if( ch2 == 'R' )
687
643
              {
688
643
                if( ch3 == 'P' )
689
160
                  return( 19 );
690
483
                else if( ch3 == 'Y' )
691
0
                  return( 19 );    /* "TRY" -> "TRP" */
692
643
              }
693
8
            else if( ch2 == 'I' )
694
6
              {
695
6
                if( ch3 == 'P' )
696
1
                  return( 46 );    /* "TIP" -> "HOH" */
697
6
              }
698
699
493
            break;
700
701
5.49k
          case('U'):
702
703
5.49k
            if( ch2 == 'N' )
704
5.48k
              {
705
5.48k
                if( ch3 == 'K' )
706
385
                  return( 43 );
707
5.48k
              }
708
1
            else if( ch2 == 'R' )
709
1
              {
710
1
                if( ch3 == 'A' )
711
0
                  return( 28 );    /* "URA" -> "  U" */
712
1
                else if( ch3 == 'I' )
713
0
                  return( 28 );    /* "URI" -> "  U" */
714
1
              }
715
716
5.10k
            break;
717
718
5.10k
          case('V'):
719
720
388
            if( ch2 == 'A' )
721
388
              if( ch3 == 'L' )
722
388
                return(  4 );
723
724
0
            break;
725
726
1
          case('W'):
727
728
1
            if( ch2 == 'A' )
729
0
              if( ch3 == 'T' )
730
0
                return( 46 );    /* "WAT" -> "HOH" */
731
732
1
            break;
733
383k
          }
734
735
383k
      }
736
737
21.6k
    return OBResidueIndex::UNK;
738
392k
  }
739
740
1.15k
  void    OBResidue::SetInsertionCode(const char insertioncode) {
741
1.15k
  _insertioncode=insertioncode;
742
1.15k
  }
743
744
745
  static void SetResidueKeys(const char   *residue,
746
                             unsigned int &reskey,
747
                             unsigned int &aakey)
748
392k
  {
749
392k
    reskey = GetResidueNumber(residue);
750
392k
    switch(reskey)
751
392k
      {
752
224k
      case OBResidueIndex::ALA:
753
224k
        aakey = AA_ALA;
754
224k
        break;
755
4.15k
      case OBResidueIndex::GLY:
756
4.15k
        aakey = AA_GLY;
757
4.15k
        break;
758
17.7k
      case OBResidueIndex::LEU:
759
17.7k
        aakey = AA_LEU;
760
17.7k
        break;
761
234
      case OBResidueIndex::SER:
762
234
        aakey = AA_SER;
763
234
        break;
764
388
      case OBResidueIndex::VAL:
765
388
        aakey = AA_VAL;
766
388
        break;
767
14.2k
      case OBResidueIndex::THR:
768
14.2k
        aakey = AA_THR;
769
14.2k
        break;
770
214
      case OBResidueIndex::LYS:
771
214
        aakey = AA_LYS;
772
214
        break;
773
397
      case OBResidueIndex::ASP:
774
397
        aakey = AA_ASP;
775
397
        break;
776
409
      case OBResidueIndex::ILE:
777
409
        aakey = AA_ILE;
778
409
        break;
779
1.23k
      case OBResidueIndex::ASN:
780
1.23k
        aakey = AA_ASN;
781
1.23k
        break;
782
1.54k
      case OBResidueIndex::GLU:
783
1.54k
        aakey = AA_GLU;
784
1.54k
        break;
785
6.66k
      case OBResidueIndex::PRO:
786
6.66k
        aakey = AA_PRO;
787
6.66k
        break;
788
7.66k
      case OBResidueIndex::ARG:
789
7.66k
        aakey = AA_ARG;
790
7.66k
        break;
791
50.8k
      case OBResidueIndex::PHE:
792
50.8k
        aakey = AA_PHE;
793
50.8k
        break;
794
1.41k
      case OBResidueIndex::GLN:
795
1.41k
        aakey = AA_GLN;
796
1.41k
        break;
797
1.54k
      case OBResidueIndex::TYR:
798
1.54k
        aakey = AA_TYR;
799
1.54k
        break;
800
351
      case OBResidueIndex::HIS:
801
351
        aakey = AA_HIS;
802
351
        break;
803
409
      case OBResidueIndex::CYS:
804
409
        aakey = AA_CYS;
805
409
        break;
806
29.0k
      case OBResidueIndex::MET:
807
29.0k
        aakey = AA_MET;
808
29.0k
        break;
809
160
      case OBResidueIndex::TRP:
810
160
        aakey = AA_TRP;
811
160
        break;
812
28.8k
      default:
813
28.8k
        aakey = 0;
814
28.8k
        break;
815
392k
      }
816
392k
  }
817
818
  ///////////////////////////////////////////////////////////////////////////////
819
  // OBResidue: Constructors / Destructor
820
  ///////////////////////////////////////////////////////////////////////////////
821
822
  OBResidue::OBResidue()
823
392k
  {
824
392k
    _chain    = 'A';
825
392k
    _idx      = 0;
826
392k
    _aakey    = 0;
827
392k
    _reskey   = OBResidueIndex::UNK;
828
392k
    _resnum   = "";
829
392k
    _resname  = "";
830
392k
    _vdata.clear();
831
392k
    _insertioncode=0;
832
392k
  }
833
834
  OBResidue::OBResidue(const OBResidue &src) :
835
0
    OBBase()
836
0
  {
837
0
    _chain    = src._chain;
838
0
    _aakey    = src._aakey;
839
0
    _reskey   = src._reskey;
840
0
    _resnum   = src._resnum;
841
0
    _resname  = src._resname;
842
0
    _atomid   = src._atomid;
843
0
    _hetatm   = src._hetatm;
844
0
    _sernum   = src._sernum;
845
0
    _insertioncode=src._insertioncode;
846
847
0
  }
848
849
  OBResidue::~OBResidue()
850
392k
  {
851
392k
    vector<OBAtom*>::iterator a;
852
412k
    for ( a = _atoms.begin() ; a != _atoms.end() ; ++a )
853
19.4k
      (*a)->SetResidue(nullptr);
854
392k
    _atoms.clear();
855
856
392k
  }
857
858
  ///////////////////////////////////////////////////////////////////////////////
859
  // OBResidue: Operator Overloads
860
  ///////////////////////////////////////////////////////////////////////////////
861
862
  OBResidue &OBResidue::operator = (const OBResidue &src)
863
    //copy residue information
864
    // Currently does not copy vdata informtion
865
369
  {
866
369
    if (this != &src)
867
369
      {
868
369
        _chain    = src._chain;
869
369
        _aakey    = src._aakey;
870
369
        _reskey   = src._reskey;
871
369
        _resnum   = src._resnum;
872
369
        _resname  = src._resname;
873
369
        _atomid   = src._atomid;
874
369
        _hetatm   = src._hetatm;
875
369
        _sernum   = src._sernum;
876
369
        _insertioncode = src._insertioncode;
877
369
      }
878
879
369
    return(*this);
880
369
  }
881
882
  ///////////////////////////////////////////////////////////////////////////////
883
  // OBResidue: Data Access / Manipulation
884
  ///////////////////////////////////////////////////////////////////////////////
885
886
  void OBResidue::AddAtom(OBAtom *atom)
887
5.19M
  {
888
5.19M
    if (atom != nullptr)
889
5.19M
      {
890
5.19M
        atom->SetResidue(this);
891
892
5.19M
        _atoms.push_back(atom);
893
5.19M
        _atomid.push_back("");
894
5.19M
        _hetatm.push_back(false);
895
5.19M
        _sernum.push_back(0);
896
5.19M
      }
897
5.19M
  }
898
899
  void OBResidue::InsertAtom(OBAtom *atom)
900
0
  {
901
0
    AddAtom(atom);
902
0
  }
903
904
  void OBResidue::RemoveAtom(OBAtom *atom)
905
5.17M
  {
906
5.17M
    if (atom != nullptr && _atoms.size())
907
5.17M
      {
908
96.9M
        for ( unsigned int i = 0 ; i < _atoms.size() ; ++i)
909
91.7M
          {
910
91.7M
            if (_atoms[i] != nullptr && _atoms[i] == atom)
911
5.17M
              {
912
5.17M
                atom->SetResidue(nullptr);
913
5.17M
                _atoms.erase(_atoms.begin() + i);
914
5.17M
                _atomid.erase(_atomid.begin() + i);
915
5.17M
                _hetatm.erase(_hetatm.begin() + i);
916
5.17M
                _sernum.erase(_sernum.begin() + i);
917
5.17M
              }
918
91.7M
          }
919
5.17M
      }
920
5.17M
  }
921
922
  bool OBResidue::Clear(void)
923
0
  {
924
0
    for (unsigned int i = 0 ; i < _atoms.size() ; ++i)
925
0
      _atoms[i]->SetResidue(nullptr);
926
927
0
    _chain   = 'A';
928
0
    _idx     = 0;
929
0
    _aakey   = 0;
930
0
    _reskey  = OBResidueIndex::UNK;
931
0
    _resnum  = "";
932
0
    _resname = "";
933
0
    _insertioncode=0;
934
935
0
    _atoms.clear();
936
0
    _atomid.clear();
937
0
    _hetatm.clear();
938
0
    _sernum.clear();
939
940
0
    return (OBBase::Clear());
941
0
  }
942
943
  void OBResidue::SetChain(char chain)
944
7.55k
  {
945
7.55k
    _chain = chain;
946
7.55k
  }
947
948
  void OBResidue::SetChainNum(unsigned int chainnum)
949
384k
  {
950
384k
    _chain = (char) ('A' + chainnum - 1);
951
384k
  }
952
953
  void OBResidue::SetIdx(unsigned int idx)
954
956k
  {
955
956k
    _idx = idx;
956
956k
  }
957
958
  void OBResidue::SetName(const string &name)
959
392k
  {
960
392k
    _resname = name;
961
392k
    SetResidueKeys(_resname.c_str(), _reskey, _aakey);
962
392k
  }
963
964
  void OBResidue::SetSegName(const string &name)
965
175
  {
966
175
    _segname = name;
967
175
  }  
968
969
  void OBResidue::SetNum(const unsigned int resnum)
970
390k
  {
971
390k
    stringstream temp;
972
390k
    temp << resnum;
973
390k
    _resnum = temp.str();
974
390k
  }
975
976
  void OBResidue::SetNum(const string resnum)
977
2.41k
  {
978
2.41k
    _resnum = resnum;
979
2.41k
  }
980
981
  void OBResidue::SetAtomID(OBAtom *atom, const string &id)
982
5.19M
  {
983
101M
    for ( unsigned int i = 0 ; i < _atoms.size() ; ++i )
984
96.7M
      if (_atoms[i] == atom)
985
5.19M
        _atomid[i] = id;
986
5.19M
  }
987
988
  void OBResidue::SetHetAtom(OBAtom *atom, bool hetatm)
989
55.2k
  {
990
54.8M
    for ( unsigned int i = 0 ; i < _atoms.size() ; ++i )
991
54.8M
      if (_atoms[i] == atom)
992
55.2k
        _hetatm[i] = hetatm;
993
55.2k
  }
994
995
  void OBResidue::SetSerialNum(OBAtom *atom, unsigned int sernum)
996
5.19M
  {
997
101M
    for ( unsigned int i = 0 ; i < _atoms.size() ; ++i )
998
96.7M
      if (_atoms[i] == atom)
999
5.19M
        _sernum[i] = sernum;
1000
5.19M
  }
1001
1002
  vector<OBAtom*> OBResidue::GetAtoms(void) const
1003
38.7k
  {
1004
38.7k
    return _atoms;
1005
38.7k
  }
1006
1007
  vector<OBBond*> OBResidue::GetBonds(bool exterior) const
1008
0
  {
1009
0
    OBAtom         *atom;
1010
0
    vector<OBBond*> bonds;
1011
0
    OBBitVec        idxs;
1012
0
    unsigned int    sz;
1013
1014
0
    sz = (unsigned int) _atoms.size();
1015
0
    for ( unsigned int i = 0 ; i < sz ; ++i )
1016
0
      {
1017
0
        atom = _atoms[i];
1018
0
        OBBond *bond;
1019
0
        vector<OBBond*>::iterator b;
1020
0
        for (bond = atom->BeginBond(b) ; bond ; bond = atom->NextBond(b))
1021
0
          {
1022
0
            if (!idxs.BitIsSet(bond->GetIdx()))
1023
0
              {
1024
0
                if (!exterior)
1025
0
                  {
1026
0
                    if (bond->GetNbrAtom(atom)->GetResidue() == this)
1027
0
                      bonds.push_back(&(*bond));
1028
0
                  }
1029
0
                else
1030
0
                  bonds.push_back(&(*bond));
1031
1032
0
                idxs.SetBitOn(bond->GetIdx());
1033
0
              }
1034
0
          }
1035
0
      }
1036
1037
0
    return bonds;
1038
0
  }
1039
1040
  string OBResidue::GetName(void) const
1041
322k
  {
1042
322k
    return _resname;
1043
322k
  }
1044
1045
  string OBResidue::GetSegName(void) const
1046
20.8k
  {
1047
20.8k
    return _segname;
1048
20.8k
  }
1049
1050
  std::string OBResidue::GetNumString(void)
1051
218k
  {
1052
218k
    return _resnum;
1053
218k
  }
1054
1055
  int OBResidue::GetNum(void)
1056
25.8k
  {
1057
25.8k
    return atoi(_resnum.c_str());
1058
25.8k
  }
1059
1060
  unsigned int OBResidue::GetNumAtoms(void) const
1061
0
  {
1062
0
    return (unsigned int)_atoms.size();
1063
0
  }
1064
1065
  unsigned int OBResidue::GetNumHvyAtoms(void) const
1066
0
  {
1067
0
    unsigned int num_hvy_atoms = 0;
1068
1069
0
    for (auto atom = this->CBeginAtoms(); atom != this->CEndAtoms(); atom++)
1070
0
    {
1071
0
      if ((*atom)->GetAtomicNum() != OBElements::Hydrogen)
1072
0
      {
1073
0
        num_hvy_atoms++;
1074
0
      }
1075
0
    }
1076
1077
0
    return num_hvy_atoms;
1078
0
  }
1079
1080
  char OBResidue::GetChain(void) const
1081
152k
  {
1082
152k
    return _chain;
1083
152k
  }
1084
1085
  unsigned int OBResidue::GetChainNum(void) const
1086
33
  {
1087
33
    if (isdigit(_chain))
1088
0
      return (_chain - '0');
1089
33
    else
1090
33
      return (_chain - 'A' + 1);
1091
33
  }
1092
1093
  unsigned int OBResidue::GetIdx(void) const
1094
1.06k
  {
1095
1.06k
    return(_idx);
1096
1.06k
  }
1097
1098
  unsigned int OBResidue::GetResKey(void) const
1099
0
  {
1100
0
    return(_reskey);
1101
0
  }
1102
1103
  char OBResidue::GetInsertionCode(void) const
1104
34.9k
  {
1105
34.9k
    return(_insertioncode);
1106
34.9k
  }
1107
1108
  string OBResidue::GetAtomID(OBAtom *atom) const
1109
131k
  {
1110
58.8M
    for ( unsigned int i = 0 ; i < _atoms.size() ; ++i )
1111
58.8M
      if (_atoms[i] == atom)
1112
131k
        return _atomid[i];
1113
0
    return "";
1114
131k
  }
1115
1116
  unsigned int OBResidue::GetSerialNum(OBAtom *atom) const
1117
484
  {
1118
2.09k
    for ( unsigned int i = 0 ; i < _atoms.size() ; ++i )
1119
2.09k
      if (_atoms[i] == atom)
1120
484
        return _sernum[i];
1121
0
    return 0;
1122
484
  }
1123
1124
  bool OBResidue::IsHetAtom(OBAtom *atom) const
1125
21.5k
  {
1126
52.7M
    for ( unsigned int i = 0 ; i < _atoms.size() ; ++i )
1127
52.7M
      if (_atoms[i] == atom)
1128
21.5k
        return _hetatm[i];
1129
0
    return false;
1130
21.5k
  }
1131
1132
  ///////////////////////////////////////////////////////////////////////////////
1133
  // OBResidue: Iteration Utilities
1134
  ///////////////////////////////////////////////////////////////////////////////
1135
1136
  OBAtom *OBResidue::BeginAtom(vector<OBAtom*>::iterator &i)
1137
312
  {
1138
312
    i = _atoms.begin();
1139
312
    return ((i == _atoms.end()) ? nullptr : *i);
1140
312
  }
1141
1142
  OBAtom *OBResidue::NextAtom(vector<OBAtom*>::iterator &i)
1143
421
  {
1144
421
    ++i;
1145
421
    return ((i == _atoms.end()) ? nullptr : *i);
1146
421
  }
1147
1148
  ///////////////////////////////////////////////////////////////////////////////
1149
  // OBResidue: Information Functions
1150
  ///////////////////////////////////////////////////////////////////////////////
1151
1152
  bool OBResidue::GetAminoAcidProperty(int property) const
1153
0
  {
1154
0
    switch(property)
1155
0
      {
1156
0
      case OBAminoAcidProperty::ACIDIC:
1157
0
        return IS_ACIDIC(_aakey)      != 0;
1158
0
      case OBAminoAcidProperty::ACYCLIC:
1159
0
        return IS_ACYCLIC(_aakey)     != 0;
1160
0
      case OBAminoAcidProperty::ALIPHATIC:
1161
0
        return IS_ALIPHATIC(_aakey)   != 0;
1162
0
      case OBAminoAcidProperty::AROMATIC:
1163
0
        return IS_AROMATIC(_aakey)    != 0;
1164
0
      case OBAminoAcidProperty::BASIC:
1165
0
        return IS_BASIC(_aakey)       != 0;
1166
0
      case OBAminoAcidProperty::BURIED:
1167
0
        return IS_BURIED(_aakey)      != 0;
1168
0
      case OBAminoAcidProperty::CHARGED:
1169
0
        return IS_CHARGED(_aakey)     != 0;
1170
0
      case OBAminoAcidProperty::CYCLIC:
1171
0
        return IS_CYCLIC(_aakey)      != 0;
1172
0
      case OBAminoAcidProperty::HYDROPHOBIC:
1173
0
        return IS_HYDROPHOBIC(_aakey) != 0;
1174
0
      case OBAminoAcidProperty::LARGE:
1175
0
        return IS_LARGE(_aakey)       != 0;
1176
0
      case OBAminoAcidProperty::MEDIUM:
1177
0
        return IS_MEDIUM(_aakey)      != 0;
1178
0
      case OBAminoAcidProperty::NEGATIVE:
1179
0
        return IS_NEGATIVE(_aakey)    != 0;
1180
0
      case OBAminoAcidProperty::NEUTRAL:
1181
0
        return IS_NEUTRAL(_aakey)     != 0;
1182
0
      case OBAminoAcidProperty::POLAR:
1183
0
        return IS_POLAR(_aakey)       != 0;
1184
0
      case OBAminoAcidProperty::POSITIVE:
1185
0
        return IS_POSITIVE(_aakey)    != 0;
1186
0
      case OBAminoAcidProperty::SMALL:
1187
0
        return IS_SMALL(_aakey)       != 0;
1188
0
      case OBAminoAcidProperty::SURFACE:
1189
0
        return IS_SURFACE(_aakey)     != 0;
1190
0
      default:
1191
0
        return false;
1192
0
      }
1193
0
  }
1194
1195
  bool OBResidue::GetAtomProperty(OBAtom *atom, int property) const
1196
0
  {
1197
0
    if (atom != nullptr)
1198
0
      {
1199
0
        unsigned int atomid = GetAtomIDNumber(GetAtomID(atom).c_str());
1200
1201
0
        switch(property)
1202
0
          {
1203
0
          case OBResidueAtomProperty::ALPHA_CARBON:
1204
0
            return (atomid == 1);
1205
1206
0
          case OBResidueAtomProperty::AMINO_BACKBONE:
1207
0
            return (atomid <= 3);
1208
1209
0
          case OBResidueAtomProperty::BACKBONE:
1210
0
            return (atomid <= 18);
1211
1212
0
          case OBResidueAtomProperty::CYSTEINE_SULPHUR:
1213
0
            return (atomid == 20);
1214
1215
0
          case OBResidueAtomProperty::LIGAND:
1216
0
            return IsHetAtom(atom) &&
1217
0
              !GetResidueProperty(OBResidueProperty::SOLVENT);
1218
1219
0
          case OBResidueAtomProperty::NUCLEIC_BACKBONE:
1220
0
            return ((atomid >= 7) && (atomid <= 18));
1221
1222
0
          case OBResidueAtomProperty::SHAPELY_BACKBONE:
1223
0
            return (atomid <= 7);
1224
1225
0
          case OBResidueAtomProperty::SHAPELY_SPECIAL:
1226
0
            return (atomid == 19);
1227
1228
0
          case OBResidueAtomProperty::SIDECHAIN:
1229
0
            return GetResidueProperty(OBResidueProperty::AMINO_NUCLEO) &&
1230
0
              (atomid > 18);
1231
1232
0
          case OBResidueAtomProperty::SUGAR_PHOSPHATE:
1233
0
            return (atomid == 7);
1234
0
          }
1235
0
      }
1236
1237
0
    return false;
1238
0
  }
1239
1240
  bool OBResidue::GetResidueProperty(int property) const
1241
0
  {
1242
0
    switch(property)
1243
0
      {
1244
0
      case OBResidueProperty::AMINO:
1245
0
        return (_reskey <= OBResidueIndex::HYP);
1246
1247
0
      case OBResidueProperty::AMINO_NUCLEO:
1248
0
        return (_reskey <= OBResidueIndex::PSU);
1249
1250
0
      case OBResidueProperty::COENZYME:
1251
0
        return (_reskey >= OBResidueIndex::NAD) &&
1252
0
          (_reskey <= OBResidueIndex::NDP);
1253
1254
0
      case OBResidueProperty::ION:
1255
0
        return (_reskey == OBResidueIndex::SO4) ||
1256
0
          (_reskey == OBResidueIndex::PO4);
1257
1258
0
      case OBResidueProperty::NUCLEO:
1259
0
        return (_reskey >= OBResidueIndex::A) &&
1260
0
          (_reskey <= OBResidueIndex::PSU);
1261
1262
0
      case OBResidueProperty::PROTEIN:
1263
0
        return (_reskey <= OBResidueIndex::HYP) ||
1264
0
          ((_reskey >= OBResidueIndex::UNK) &&
1265
0
           (_reskey <= OBResidueIndex::FOR));
1266
1267
0
      case OBResidueProperty::PURINE:
1268
0
        return (_reskey == OBResidueIndex::A) ||
1269
0
          (_reskey == OBResidueIndex::G);
1270
1271
0
      case OBResidueProperty::PYRIMIDINE:
1272
0
        return (_reskey == OBResidueIndex::C) ||
1273
0
          (_reskey == OBResidueIndex::T);
1274
1275
0
      case OBResidueProperty::SOLVENT:
1276
0
        return (_reskey >= OBResidueIndex::HOH) &&
1277
0
          (_reskey <= OBResidueIndex::PO4);
1278
1279
0
      case OBResidueProperty::WATER:
1280
0
        return (_reskey == OBResidueIndex::HOH) ||
1281
0
          (_reskey == OBResidueIndex::DOD);
1282
1283
0
      default:
1284
0
        return false;
1285
0
      }
1286
0
  }
1287
1288
  bool OBResidue::IsResidueType(int restype) const
1289
0
  {
1290
0
    return ((int)_reskey == restype);
1291
0
  }
1292
1293
} // end namespace OpenBabel
1294
1295
//! \file residue.cpp
1296
//! \brief Handle macromolecule residues.