/src/openbabel/src/chains.cpp
Line | Count | Source |
1 | | /********************************************************************** |
2 | | chains.cpp - Parse for macromolecule chains and residues. |
3 | | |
4 | | Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc. |
5 | | (original author, Roger Sayle, version 1.6, March 1998) |
6 | | (modified by Joe Corkery, OpenEye Scientific Software, March 2001) |
7 | | Some portions Copyright (C) 2001-2006 by Geoffrey R. Hutchison |
8 | | |
9 | | This file is part of the Open Babel project. |
10 | | For more information, see <http://openbabel.org/> |
11 | | |
12 | | This program is free software; you can redistribute it and/or modify |
13 | | it under the terms of the GNU General Public License as published by |
14 | | the Free Software Foundation version 2 of the License. |
15 | | |
16 | | This program is distributed in the hope that it will be useful, |
17 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
19 | | GNU General Public License for more details. |
20 | | ***********************************************************************/ |
21 | | |
22 | | ////////////////////////////////////////////////////////////////////////////// |
23 | | // File Includes |
24 | | ////////////////////////////////////////////////////////////////////////////// |
25 | | #include <openbabel/babelconfig.h> |
26 | | |
27 | | #include <cstdlib> |
28 | | #include <cstring> |
29 | | #include <cstdio> |
30 | | #include <cctype> |
31 | | #include <map> |
32 | | |
33 | | #include <openbabel/mol.h> |
34 | | #include <openbabel/atom.h> |
35 | | #include <openbabel/bond.h> |
36 | | #include <openbabel/chains.h> |
37 | | #include <openbabel/oberror.h> |
38 | | #include <openbabel/obiter.h> |
39 | | #include <openbabel/elements.h> |
40 | | |
41 | | using namespace std; |
42 | | |
43 | | ////////////////////////////////////////////////////////////////////////////// |
44 | | // Preprocessor Definitions |
45 | | ////////////////////////////////////////////////////////////////////////////// |
46 | | |
47 | | //! The first available index for actual residues |
48 | | //! 0, 1, 2 reserved for UNK, HOH, UNL |
49 | 6 | #define RESIDMIN 4 |
50 | | //! The maximum number of residue IDs for this code |
51 | | #define RESIDMAX 64 |
52 | | |
53 | | //! An index of the residue names perceived during a run |
54 | | //! 0, 1, and 2 reserved for UNK, HOH, LIG |
55 | | static char ChainsResName[RESIDMAX][4] = { |
56 | | /*0*/ "UNK", |
57 | | /*1*/ "HOH", |
58 | | /*2*/ "UNL", |
59 | | /*3*/ "ACE" |
60 | | }; |
61 | | |
62 | 5.66k | #define ATOMMINAMINO 4 |
63 | | #define ATOMMINNUCLEIC 50 |
64 | 0 | #define MAXPEPTIDE 11 |
65 | 0 | #define MAXNUCLEIC 15 |
66 | | //! The number of amino acids recognized by this code |
67 | | //! Currently: ILE, VAL, ALA, ASN, ASP, ARG, CYS, GLN, GLU |
68 | | //! GLY, HIS, HYP, LEU, LYS, MET, PHE, PRO, SER, THR, TRP, TYR |
69 | 132 | #define AMINOMAX 21 |
70 | | //! The number of nucleic acids recognized by this code |
71 | | //! Currently A, C, T, G, U, I |
72 | 42 | #define NUCLEOMAX 6 |
73 | | #define STACKSIZE 20 |
74 | | |
75 | 0 | #define AI_N 0 |
76 | 0 | #define AI_CA 1 |
77 | 0 | #define AI_C 2 |
78 | 0 | #define AI_O 3 |
79 | 0 | #define AI_OXT 37 |
80 | | |
81 | 0 | #define AI_P 38 |
82 | 0 | #define AI_O1P 39 |
83 | 0 | #define AI_O2P 40 |
84 | 0 | #define AI_O5 41 |
85 | 0 | #define AI_C5 42 |
86 | 0 | #define AI_C4 43 |
87 | 0 | #define AI_O4 44 |
88 | 0 | #define AI_C3 45 |
89 | 0 | #define AI_O3 46 |
90 | 0 | #define AI_C2 47 |
91 | 0 | #define AI_O2 48 |
92 | 0 | #define AI_C1 49 |
93 | | |
94 | 0 | #define BitN 0x0001 |
95 | 0 | #define BitNTer 0x0002 |
96 | | #define BitNPro 0x0004 |
97 | 0 | #define BitNPT 0x0008 |
98 | | #define BitCA 0x0010 |
99 | | #define BitCAGly 0x0020 |
100 | 0 | #define BitC 0x0100 |
101 | | #define BitCTer 0x0200 |
102 | | #define BitCOXT 0x0400 |
103 | | #define BitO 0x1000 |
104 | | #define BitOXT 0x2000 |
105 | | |
106 | 0 | #define BitNAll 0x000F |
107 | 0 | #define BitCAAll 0x0030 |
108 | 0 | #define BitCAll 0x0700 |
109 | 0 | #define BitOAll 0x3000 |
110 | | |
111 | 0 | #define BitP 0x0001 |
112 | 0 | #define BitPTer 0x0002 |
113 | 0 | #define BitOP 0x0004 |
114 | 0 | #define BitO5 0x0008 |
115 | 0 | #define BitO5Ter 0x0010 |
116 | 0 | #define BitC5 0x0020 |
117 | 0 | #define BitC4 0x0040 |
118 | 0 | #define BitO4 0x0080 |
119 | 0 | #define BitC3 0x0100 |
120 | | #define BitO3 0x0200 |
121 | | #define BitO3Ter 0x0400 |
122 | | #define BitC2RNA 0x0800 |
123 | | #define BitC2DNA 0x1000 |
124 | 0 | #define BitO2 0x2000 |
125 | 0 | #define BitC1 0x4000 |
126 | | |
127 | | #define BitPAll 0x0003 |
128 | | #define Bit05All 0x0018 |
129 | 0 | #define BitO3All 0x0600 |
130 | 0 | #define BitC2All 0x1800 |
131 | | |
132 | 834 | #define BC_ASSIGN 0x01 |
133 | 4.11k | #define BC_COUNT 0x02 |
134 | 4.14k | #define BC_ELEM 0x03 |
135 | 3.55k | #define BC_EVAL 0x04 |
136 | 2.04k | #define BC_IDENT 0x05 |
137 | 4.06k | #define BC_LOCAL 0x06 |
138 | | |
139 | 876 | #define BF_SINGLE 0x01 |
140 | 102 | #define BF_DOUBLE 0x02 |
141 | 0 | #define BF_TRIPLE 0x04 |
142 | 162 | #define BF_AROMATIC 0x08 |
143 | | |
144 | | namespace OpenBabel |
145 | | { |
146 | | extern OBMessageHandler obErrorLog; |
147 | | |
148 | | |
149 | | // Initialize the global chainsparser - declared in chains.h |
150 | | OBChainsParser chainsparser; |
151 | | |
152 | | ////////////////////////////////////////////////////////////////////////////// |
153 | | // Structure / Type Definitions |
154 | | ////////////////////////////////////////////////////////////////////////////// |
155 | | |
156 | | //! Structure template for atomic patterns in residues for OBChainsParser |
157 | | typedef struct Template |
158 | | { |
159 | | int flag; //!< binary flag representing this atom type |
160 | | short elem; //!< atomic number of this element |
161 | | short count; //!< expected valence for this atom type |
162 | | int n1; //!< mask 1 used by ConstrainBackbone() and MatchConstraint() |
163 | | int n2; //!< mask 2 used by ConstrainBackbone() and MatchConstraint() |
164 | | int n3; //!< mask 3 used by ConstrainBackbone() and MatchConstraint() |
165 | | int n4; //!< mask 4 used by ConstrainBackbone() and MatchConstraint() |
166 | | } Template; |
167 | | |
168 | | /** |
169 | | * Generic template for peptide residue backbone. \n |
170 | | * col 1: bitmask \n |
171 | | * col 2: element number \n |
172 | | * col 3: neighbour count \n |
173 | | * col 4-7: 1-4 bitmasks for neighbour atoms (-6 means carbon) |
174 | | */ |
175 | | static Template Peptide[MAXPEPTIDE] = { |
176 | | /* N */ { 0x0001, 7, 2, 0x0030, 0x0100, 0, 0 }, //!< N |
177 | | /* NTer */ { 0x0002, 7, 1, 0x0030, 0, 0, 0 }, //!< NTre |
178 | | /* NPro */ { 0x0004, 7, 3, 0x0030, 0x0100, -6, 0 }, //!< NPro |
179 | | /* NPT */ { 0x0008, 7, 2, 0x0030, -6, 0, 0 }, //!< NPT |
180 | | /* CA */ { 0x0010, 6, 3, 0x000F, 0x0700, -6, 0 }, //!< CA |
181 | | /* CAGly */ { 0x0020, 6, 2, 0x0003, 0x0700, 0, 0 }, //!< CAGly |
182 | | /* C */ { 0x0100, 6, 3, 0x0030, 0x1000, 0x0005, 0 }, //!< C |
183 | | /* CTer */ { 0x0200, 6, 2, 0x0030, 0x1000, 0, 0 }, //!< CTer |
184 | | /* COXT */ { 0x0400, 6, 3, 0x0030, 0x1000, 0x2000, 0 }, //!< COXT |
185 | | /* O */ { 0x1000, 8, 1, 0x0700, 0, 0, 0 }, //!< O |
186 | | /* OXT */ { 0x2000, 8, 1, 0x0400, 0, 0, 0 } //!< OXT |
187 | | }; |
188 | | |
189 | | //! Generic template for peptide nucleotide backbone |
190 | | static Template Nucleotide[MAXNUCLEIC] = { |
191 | | /* P */ { 0x0001, 15, 4, 0x0004, 0x0004, 0x0008, 0x0200 }, |
192 | | /* PTer */ { 0x0002, 15, 3, 0x0004, 0x0004, 0x0008, 0 }, |
193 | | /* OP */ { 0x0004, 8, 1, 0x0003, 0, 0, 0 }, |
194 | | /* O5 */ { 0x0008, 8, 2, 0x0020, 0x0003, 0, 0 }, |
195 | | /* O5Ter */ { 0x0010, 8, 1, 0x0020, 0, 0, 0 }, |
196 | | /* C5 */ { 0x0020, 6, 2, 0x0018, 0x0040, 0, 0 }, |
197 | | /* C4 */ { 0x0040, 6, 3, 0x0020, 0x0080, 0x0100, 0 }, |
198 | | /* O4 */ { 0x0080, 8, 2, 0x0040, 0x4000, 0, 0 }, |
199 | | /* C3 */ { 0x0100, 6, 3, 0x0040, 0x0600, 0x1800, 0 }, |
200 | | /* O3 */ { 0x0200, 8, 2, 0x0100, 0x0001, 0, 0 }, |
201 | | /* O3Ter */ { 0x0400, 8, 1, 0x0100, 0, 0, 0 }, |
202 | | /* C2RNA */ { 0x0800, 6, 3, 0x0100, 0x4000, 0x2000, 0 }, |
203 | | /* C2DNA */ { 0x1000, 6, 2, 0x0100, 0x4000, 0, 0 }, |
204 | | /* O2 */ { 0x2000, 8, 1, 0x0800, 0, 0, 0 }, |
205 | | /* C1 */ { 0x4000, 6, 3, 0x0080, 0x1800, -7, 0 } |
206 | | }; |
207 | | |
208 | | |
209 | | ////////////////////////////////////////////////////////////////////////////// |
210 | | // Global Variables / Tables |
211 | | ////////////////////////////////////////////////////////////////////////////// |
212 | | |
213 | | //! The number of PDB atom type names recognized by this code |
214 | | #define ATOMMAX 68 |
215 | | |
216 | | //! PDB atom types (i.e., columns 13-16 of a PDB file) |
217 | | //! index numbers from this array are used in the pseudo-SMILES format |
218 | | //! for side-chains in the AminoAcids[] & Nucleotides[] global arrays below |
219 | | static char ChainsAtomName[ATOMMAX][4] = { |
220 | | /* 0 */ { ' ', 'N', ' ', ' ' }, |
221 | | /* 1 */ { ' ', 'C', 'A', ' ' }, |
222 | | /* 2 */ { ' ', 'C', ' ', ' ' }, |
223 | | /* 3 */ { ' ', 'O', ' ', ' ' }, |
224 | | /* 4 */ { ' ', 'C', 'B', ' ' }, |
225 | | /* 5 */ { ' ', 'S', 'G', ' ' }, |
226 | | /* 6 */ { ' ', 'O', 'G', ' ' }, |
227 | | /* 7 */ { ' ', 'C', 'G', ' ' }, |
228 | | /* 8 */ { ' ', 'O', 'G', '1' }, |
229 | | /* 9 */ { ' ', 'C', 'G', '1' }, |
230 | | /* 10 */ { ' ', 'C', 'G', '2' }, |
231 | | /* 11 */ { ' ', 'C', 'D', ' ' }, |
232 | | /* 12 */ { ' ', 'O', 'D', ' ' }, |
233 | | /* 13 */ { ' ', 'S', 'D', ' ' }, |
234 | | /* 14 */ { ' ', 'C', 'D', '1' }, |
235 | | /* 15 */ { ' ', 'O', 'D', '1' }, |
236 | | /* 16 */ { ' ', 'N', 'D', '1' }, |
237 | | /* 17 */ { ' ', 'C', 'D', '2' }, |
238 | | /* 18 */ { ' ', 'O', 'D', '2' }, |
239 | | /* 19 */ { ' ', 'N', 'D', '2' }, |
240 | | /* 20 */ { ' ', 'C', 'E', ' ' }, |
241 | | /* 21 */ { ' ', 'N', 'E', ' ' }, |
242 | | /* 22 */ { ' ', 'C', 'E', '1' }, |
243 | | /* 23 */ { ' ', 'O', 'E', '1' }, |
244 | | /* 24 */ { ' ', 'N', 'E', '1' }, |
245 | | /* 25 */ { ' ', 'C', 'E', '2' }, |
246 | | /* 26 */ { ' ', 'O', 'E', '2' }, |
247 | | /* 27 */ { ' ', 'N', 'E', '2' }, |
248 | | /* 28 */ { ' ', 'C', 'E', '3' }, |
249 | | /* 29 */ { ' ', 'C', 'Z', ' ' }, |
250 | | /* 30 */ { ' ', 'N', 'Z', ' ' }, |
251 | | /* 31 */ { ' ', 'C', 'Z', '2' }, |
252 | | /* 32 */ { ' ', 'C', 'Z', '3' }, |
253 | | /* 33 */ { ' ', 'O', 'H', ' ' }, |
254 | | /* 34 */ { ' ', 'N', 'H', '1' }, |
255 | | /* 35 */ { ' ', 'N', 'H', '2' }, |
256 | | /* 36 */ { ' ', 'C', 'H', '2' }, |
257 | | /* 37 */ { ' ', 'O', 'X', 'T' }, |
258 | | /* 38 */ { ' ', 'P', ' ', ' ' }, |
259 | | /* 39 */ { ' ', 'O', '1', 'P' }, |
260 | | /* 40 */ { ' ', 'O', '2', 'P' }, |
261 | | /* 41 */ { ' ', 'O', '5', '*' }, |
262 | | /* 42 */ { ' ', 'C', '5', '*' }, |
263 | | /* 43 */ { ' ', 'C', '4', '*' }, |
264 | | /* 44 */ { ' ', 'O', '4', '*' }, |
265 | | /* 45 */ { ' ', 'C', '3', '*' }, |
266 | | /* 46 */ { ' ', 'O', '3', '*' }, |
267 | | /* 47 */ { ' ', 'C', '2', '*' }, |
268 | | /* 48 */ { ' ', 'O', '2', '*' }, |
269 | | /* 49 */ { ' ', 'C', '1', '*' }, |
270 | | /* 50 */ { ' ', 'N', '9', ' ' }, |
271 | | /* 51 */ { ' ', 'C', '8', ' ' }, |
272 | | /* 52 */ { ' ', 'N', '7', ' ' }, |
273 | | /* 53 */ { ' ', 'C', '5', ' ' }, |
274 | | /* 54 */ { ' ', 'C', '6', ' ' }, |
275 | | /* 55 */ { ' ', 'O', '6', ' ' }, |
276 | | /* 56 */ { ' ', 'N', '6', ' ' }, |
277 | | /* 57 */ { ' ', 'N', '1', ' ' }, |
278 | | /* 58 */ { ' ', 'C', '2', ' ' }, |
279 | | /* 59 */ { ' ', 'O', '2', ' ' }, |
280 | | /* 60 */ { ' ', 'N', '2', ' ' }, |
281 | | /* 61 */ { ' ', 'N', '3', ' ' }, |
282 | | /* 62 */ { ' ', 'C', '4', ' ' }, |
283 | | /* 63 */ { ' ', 'O', '4', ' ' }, |
284 | | /* 64 */ { ' ', 'N', '4', ' ' }, |
285 | | /* 65 */ { ' ', 'C', '5', ' ' }, |
286 | | /* 66 */ { ' ', 'C', '5', 'M' }, |
287 | | /* 67 */ { ' ', 'C', '6', ' ' } |
288 | | }; |
289 | | |
290 | | //! Definition of side chains, associating overall residue name with |
291 | | //! the pseudo-SMILES pattern |
292 | | typedef struct |
293 | | { |
294 | | const char *name; //!< Residue name, standardized by PDB |
295 | | const char *data; //!< pseudo-SMILES definition of side-chain |
296 | | } ResidType; |
297 | | |
298 | | /** |
299 | | * Side chains for recognized amino acids using a pseudo-SMARTS syntax |
300 | | * for branching and bonds. Numbers indicate atom types defined by |
301 | | * OpenBabel::ChainsAtomName global array above. |
302 | | */ |
303 | | static ResidType AminoAcids[AMINOMAX] = { |
304 | | { "ILE", "1-4(-9-14)-10" }, |
305 | | { "VAL", "1-4(-9)-10" }, |
306 | | { "ALA", "1-4" }, |
307 | | { "ASN", "1-4-7(=15)-19" }, |
308 | | { "ASP", "1-4-7(=15)-18" }, |
309 | | { "ARG", "1-4-7-11-21-29(=34)-35" }, |
310 | | { "CYS", "1-4-5" }, |
311 | | { "GLN", "1-4-7-11(=23)-27" }, |
312 | | { "GLU", "1-4-7-11(=23)-26" }, |
313 | | { "GLY", "1" }, |
314 | | { "HIS", "1-4-7^16~22^27^17~7" }, |
315 | | { "HYP", "1-4-7(-12)-11-0" }, |
316 | | { "LEU", "1-4-7(-14)-17" }, |
317 | | { "LYS", "1-4-7-11-20-30" }, |
318 | | { "MET", "1-4-7-13-20" }, |
319 | | { "PHE", "1-4-7~14^22~29^25~17^7" }, |
320 | | { "PRO", "1-4-7-11-0" }, |
321 | | { "SER", "1-4-6" }, |
322 | | { "THR", "1-4(-8)-10" }, |
323 | | { "TRP", "1-4-7~14^24^25~17(^7)^28~32^36~31^25" }, |
324 | | { "TYR", "1-4-7~14^22~29(-33)^25~17^7" } |
325 | | }; |
326 | | // Other possible amino acid templates (less common) |
327 | | /* Pyroglutamate (PCA): 1-4-7-11(=" OE ")-0 PDB Example: 1CEL */ |
328 | | /* Amino-N-Butyric Acid (ABA): 1-4-7 PDB Example: 1BBO */ |
329 | | /* Selenic Acid (SEC): 1-4-"SEG "(-15)-18 PDB Example: 1GP1 */ |
330 | | |
331 | | /** |
332 | | * Side chains for recognized nucleotides using a pseudo-SMARTS syntax |
333 | | * for branching and bonds. Numbers indicate atom types defined by |
334 | | * OpenBabel::ChainsAtomName global array above. |
335 | | */ |
336 | | static ResidType Nucleotides[NUCLEOMAX] = { |
337 | | { " A", "49-50-51-52-53-54(-56)-57-58-61-62(-53)-50" }, |
338 | | { " C", "49-57-58(-59)-61-62(-64)-65-67-57" }, |
339 | | { " G", "49-50-51-52-53-54(-55)-57-58(-60)-61-62(-53)-50" }, |
340 | | { " T", "49-57-58(-59)-61-62(-63)-65(-66)-67-57" }, |
341 | | { " U", "49-57-58(-59)-61-62(-63)-65-67-57" }, |
342 | | { " I", "49-50-51-52-53-54(-55)-57-58-61-62(-53)-50" } |
343 | | }; |
344 | | |
345 | | |
346 | | typedef struct |
347 | | { |
348 | | int atomid,elem; |
349 | | int bcount; |
350 | | int index; |
351 | | } MonoAtomType; |
352 | | |
353 | | typedef struct |
354 | | { |
355 | | int src,dst; |
356 | | int index; |
357 | | int flag; |
358 | | } MonoBondType; |
359 | | |
360 | | typedef struct |
361 | | { |
362 | | int type; |
363 | | union _ByteCode *next; |
364 | | } MonOpStruct; |
365 | | |
366 | | typedef struct |
367 | | { |
368 | | int type; |
369 | | int value; |
370 | | union _ByteCode *tcond; |
371 | | union _ByteCode *fcond; |
372 | | } BinOpStruct; |
373 | | |
374 | | //! Output array -- residue id, atom id, bond flags, etc. |
375 | | typedef struct |
376 | | { |
377 | | int type; |
378 | | int resid; |
379 | | int *atomid; |
380 | | int *bflags; |
381 | | } AssignStruct; |
382 | | |
383 | | //! Chemical graph matching virtual machine |
384 | | typedef union _ByteCode |
385 | | { |
386 | | int type; |
387 | | MonOpStruct eval; //!< Eval - push current neighbors onto the stack |
388 | | BinOpStruct count; //!< Count - test the number of eval bonds |
389 | | BinOpStruct elem; //!< Element - test the element of current atom |
390 | | BinOpStruct ident; //!< Ident - test the atom for backbone identity |
391 | | BinOpStruct local; //!< Local - test whether the atom has been visited |
392 | | AssignStruct assign; //!< Assign - assign residue name, atom name and bond type to output |
393 | | } ByteCode; |
394 | | |
395 | | typedef struct |
396 | | { |
397 | | int atom,bond; |
398 | | int prev; |
399 | | } StackType; |
400 | | |
401 | | static MonoAtomType MonoAtom[MaxMonoAtom]; |
402 | | static MonoBondType MonoBond[MaxMonoBond]; |
403 | | static int MonoAtomCount; |
404 | | static int MonoBondCount; |
405 | | |
406 | | static StackType Stack[STACKSIZE]; |
407 | | static int StackPtr; |
408 | | |
409 | | static int AtomIndex; |
410 | | static int BondIndex; |
411 | | static bool StrictFlag = false; |
412 | | |
413 | | ////////////////////////////////////////////////////////////////////////////// |
414 | | // Static Functions |
415 | | ////////////////////////////////////////////////////////////////////////////// |
416 | | |
417 | | static ByteCode *AllocateByteCode(int type) |
418 | 9.42k | { |
419 | 9.42k | ByteCode *result; |
420 | | |
421 | 9.42k | result = new ByteCode; |
422 | 9.42k | if( !result ) |
423 | 0 | { |
424 | 0 | obErrorLog.ThrowError(__FUNCTION__, "Unable to allocate byte codes for biomolecule residue perception.", obError); |
425 | 0 | return (result); |
426 | 0 | } |
427 | 9.42k | result->type = type; |
428 | 9.42k | result->eval.next = nullptr; |
429 | 9.42k | result->count.tcond = nullptr; |
430 | 9.42k | result->count.fcond = nullptr; |
431 | 9.42k | result->elem.tcond = nullptr; |
432 | 9.42k | result->elem.fcond = nullptr; |
433 | 9.42k | result->ident.tcond = nullptr; |
434 | 9.42k | result->ident.fcond = nullptr; |
435 | 9.42k | result->local.tcond = nullptr; |
436 | 9.42k | result->local.fcond = nullptr; |
437 | 9.42k | result->assign.atomid = nullptr; |
438 | 9.42k | result->assign.bflags = nullptr; |
439 | | |
440 | 9.42k | return (result); |
441 | 9.42k | } |
442 | | |
443 | | //! Free a ByteCode and all corresponding data |
444 | | static void DeleteByteCode(ByteCode *node) |
445 | 0 | { |
446 | 0 | if (node == nullptr) |
447 | 0 | return; |
448 | 0 | else |
449 | 0 | { |
450 | 0 | switch (node->type) |
451 | 0 | { |
452 | 0 | case BC_ASSIGN: |
453 | |
|
454 | 0 | if (node->assign.atomid != nullptr) { |
455 | 0 | delete [] node->assign.atomid; |
456 | 0 | node->assign.atomid = nullptr; // prevent double-free |
457 | 0 | } |
458 | 0 | if (node->assign.bflags != nullptr) { |
459 | 0 | delete [] node->assign.bflags; |
460 | 0 | node->assign.bflags = nullptr; // prevent double-free |
461 | 0 | } |
462 | |
|
463 | 0 | break; |
464 | | |
465 | 0 | case BC_COUNT: |
466 | |
|
467 | 0 | DeleteByteCode(node->count.tcond); |
468 | 0 | DeleteByteCode(node->count.fcond); |
469 | 0 | break; |
470 | 0 | case BC_ELEM: |
471 | |
|
472 | 0 | DeleteByteCode(node->elem.tcond); |
473 | 0 | DeleteByteCode(node->elem.fcond); |
474 | 0 | break; |
475 | | |
476 | 0 | case BC_EVAL: |
477 | |
|
478 | 0 | DeleteByteCode(node->eval.next); |
479 | 0 | break; |
480 | | |
481 | 0 | case BC_IDENT: |
482 | |
|
483 | 0 | DeleteByteCode(node->ident.tcond); |
484 | 0 | DeleteByteCode(node->ident.fcond); |
485 | 0 | break; |
486 | | |
487 | 0 | case BC_LOCAL: |
488 | |
|
489 | 0 | DeleteByteCode(node->local.tcond); |
490 | 0 | DeleteByteCode(node->local.fcond); |
491 | 0 | break; |
492 | 0 | } |
493 | | |
494 | 0 | delete node; |
495 | 0 | node = nullptr; |
496 | 0 | } |
497 | 0 | } |
498 | | |
499 | | static void FatalMemoryError(void) |
500 | 0 | { |
501 | 0 | obErrorLog.ThrowError(__FUNCTION__, "Unable to allocate memory for biomolecule residue / chain perception.", obError); |
502 | 0 | } |
503 | | |
504 | | void GenerateByteCodes(ByteCode **node, int resid, int curr, int prev, int bond) |
505 | 5.82k | { |
506 | 5.82k | StackType neighbour[4]; |
507 | 5.82k | StackType original; |
508 | 5.82k | int count,i,j; |
509 | 5.82k | ByteCode *ptr; |
510 | 5.82k | bool done,found; |
511 | | |
512 | 5.82k | if( curr != prev ) |
513 | 5.66k | { |
514 | 5.66k | if( MonoAtom[curr].atomid < ATOMMINAMINO ) |
515 | 18 | { |
516 | 18 | found = false; |
517 | 18 | while( *node && ((*node)->type==BC_IDENT) ) |
518 | 0 | { |
519 | 0 | if( (*node)->ident.value == MonoAtom[curr].atomid ) |
520 | 0 | { |
521 | 0 | node = (ByteCode**)&(*node)->ident.tcond; |
522 | 0 | found = true; |
523 | 0 | break; |
524 | 0 | } |
525 | 0 | else |
526 | 0 | node = (ByteCode**)&(*node)->ident.fcond; |
527 | 0 | } |
528 | | |
529 | 18 | if (!found) |
530 | 18 | { |
531 | 18 | ptr = AllocateByteCode(BC_IDENT); |
532 | 18 | ptr->ident.tcond = nullptr; |
533 | 18 | ptr->ident.fcond = *node; |
534 | 18 | *node = ptr; |
535 | 18 | node = (ByteCode**)&ptr->ident.tcond; |
536 | 18 | ptr->ident.value = MonoAtom[curr].atomid; |
537 | 18 | } |
538 | 18 | MonoBond[bond].index = BondIndex++; |
539 | 18 | done = true; |
540 | 18 | } |
541 | 5.64k | else if( MonoAtom[curr].index != -1 ) |
542 | 2.14k | { |
543 | 2.14k | while( *node && ((*node)->type==BC_IDENT) ) |
544 | 0 | node = (ByteCode**)&(*node)->ident.fcond; |
545 | | |
546 | 2.14k | found = false; |
547 | 2.20k | while( *node && ((*node)->type==BC_LOCAL) ) |
548 | 228 | { |
549 | 228 | if( (*node)->local.value == MonoAtom[curr].index ) |
550 | 174 | { |
551 | 174 | node = (ByteCode**)&(*node)->local.tcond; |
552 | 174 | found = true; |
553 | 174 | break; |
554 | 174 | } |
555 | 54 | else |
556 | 54 | node = (ByteCode**)&(*node)->local.fcond; |
557 | 228 | } |
558 | | |
559 | 2.14k | if (!found) |
560 | 1.97k | { |
561 | 1.97k | ptr = AllocateByteCode(BC_LOCAL); |
562 | 1.97k | ptr->local.tcond = nullptr; |
563 | 1.97k | ptr->local.fcond = *node; |
564 | 1.97k | *node = ptr; |
565 | 1.97k | node = (ByteCode**)&ptr->local.tcond; |
566 | 1.97k | ptr->local.value = MonoAtom[curr].index; |
567 | 1.97k | } |
568 | | |
569 | 2.14k | MonoBond[bond].index = BondIndex++; |
570 | 2.14k | done = true; |
571 | 2.14k | } |
572 | 3.49k | else |
573 | 3.49k | { |
574 | 3.52k | while( *node && ((*node)->type==BC_IDENT) ) |
575 | 30 | node = (ByteCode**)&(*node)->ident.fcond; |
576 | 3.63k | while( *node && ((*node)->type==BC_LOCAL) ) |
577 | 138 | node = (ByteCode**)&(*node)->local.fcond; |
578 | | |
579 | 3.49k | found = false; |
580 | 4.14k | while( *node && ((*node)->type==BC_ELEM) ) |
581 | 1.95k | { |
582 | 1.95k | if( (*node)->elem.value == MonoAtom[curr].elem ) |
583 | 1.31k | { |
584 | 1.31k | node = (ByteCode**)&(*node)->elem.tcond; |
585 | 1.31k | found = true; |
586 | 1.31k | break; |
587 | 1.31k | } |
588 | 642 | else |
589 | 642 | node = (ByteCode**)&(*node)->elem.fcond; |
590 | 1.95k | } |
591 | | |
592 | 3.49k | if( !found ) |
593 | 2.18k | { |
594 | 2.18k | ptr = AllocateByteCode(BC_ELEM); |
595 | 2.18k | ptr->elem.tcond = nullptr; |
596 | 2.18k | ptr->elem.fcond = *node; |
597 | 2.18k | *node = ptr; |
598 | 2.18k | node = (ByteCode**)&ptr->elem.tcond; |
599 | 2.18k | ptr->elem.value = MonoAtom[curr].elem; |
600 | 2.18k | } |
601 | | |
602 | 3.49k | MonoAtom[curr].index = AtomIndex++; |
603 | 3.49k | MonoBond[bond].index = BondIndex++; |
604 | 3.49k | done = false; |
605 | 3.49k | } |
606 | 5.66k | } |
607 | 162 | else |
608 | 162 | { |
609 | 162 | MonoAtom[curr].index = AtomIndex++; |
610 | 162 | done = false; |
611 | 162 | } |
612 | | |
613 | 5.82k | count = 0; |
614 | 5.82k | if (!done) |
615 | 3.66k | { |
616 | 40.7k | for( i=0; i<MonoBondCount; i++ ) |
617 | 37.0k | { |
618 | 37.0k | if( MonoBond[i].src == curr ) |
619 | 3.01k | { |
620 | 3.01k | if( MonoBond[i].dst != prev ) |
621 | 1.96k | { |
622 | 1.96k | neighbour[count].atom = MonoBond[i].dst; |
623 | 1.96k | neighbour[count].bond = i; |
624 | 1.96k | count++; |
625 | 1.96k | } |
626 | 3.01k | } |
627 | 34.0k | else if( MonoBond[i].dst == curr ) |
628 | 3.66k | { |
629 | 3.66k | if( MonoBond[i].src != prev ) |
630 | 1.21k | { |
631 | 1.21k | neighbour[count].atom = MonoBond[i].src; |
632 | 1.21k | neighbour[count].bond = i; |
633 | 1.21k | count++; |
634 | 1.21k | } |
635 | 3.66k | } |
636 | 37.0k | } |
637 | | |
638 | 3.66k | if ( *node && ((*node)->type==BC_EVAL) ) |
639 | 1.44k | { |
640 | 1.44k | found = false; |
641 | 1.44k | node = (ByteCode**)&(*node)->eval.next; |
642 | 2.02k | while( *node && ((*node)->type==BC_COUNT) ) |
643 | 1.75k | { |
644 | 1.75k | if( (*node)->count.value == count ) |
645 | 1.17k | { |
646 | 1.17k | node = (ByteCode**)&(*node)->count.tcond; |
647 | 1.17k | found = true; |
648 | 1.17k | break; |
649 | 1.17k | } |
650 | 582 | else |
651 | 582 | node = (ByteCode**)&(*node)->count.fcond; |
652 | 1.75k | } |
653 | | |
654 | 1.44k | if( !found ) |
655 | 270 | { |
656 | 270 | ptr = AllocateByteCode(BC_COUNT); |
657 | 270 | ptr->count.tcond = nullptr; |
658 | 270 | ptr->count.fcond = *node; |
659 | 270 | *node = ptr; |
660 | 270 | node = (ByteCode**)&ptr->count.tcond; |
661 | 270 | ptr->count.value = count; |
662 | 270 | } |
663 | 1.44k | } |
664 | 2.22k | else if( count || StrictFlag || StackPtr ) |
665 | 2.09k | { |
666 | 2.09k | ptr = AllocateByteCode(BC_EVAL); |
667 | 2.09k | ptr->eval.next = *node; |
668 | 2.09k | *node = ptr; |
669 | 2.09k | node = (ByteCode**)&ptr->eval.next; |
670 | | |
671 | 2.09k | ptr = AllocateByteCode(BC_COUNT); |
672 | 2.09k | ptr->count.tcond = nullptr; |
673 | 2.09k | ptr->count.fcond = *node; |
674 | 2.09k | *node = ptr; |
675 | 2.09k | node = (ByteCode**)&ptr->count.tcond; |
676 | 2.09k | ptr->count.value = count; |
677 | 2.09k | } |
678 | 3.66k | } |
679 | | |
680 | 5.82k | if( count == 1 ) |
681 | 1.83k | { |
682 | 1.83k | GenerateByteCodes(node,resid,neighbour[0].atom, curr,neighbour[0].bond); |
683 | 1.83k | } |
684 | 3.99k | else if( count == 2 ) |
685 | 672 | { |
686 | 672 | original = Stack[StackPtr++]; |
687 | 672 | Stack[StackPtr-1] = neighbour[0]; |
688 | 672 | Stack[StackPtr-1].prev = curr; |
689 | 672 | GenerateByteCodes(node,resid,neighbour[1].atom, |
690 | 672 | curr,neighbour[1].bond); |
691 | 672 | Stack[StackPtr-1] = neighbour[1]; |
692 | 672 | Stack[StackPtr-1].prev = curr; |
693 | 672 | GenerateByteCodes(node,resid,neighbour[0].atom, |
694 | 672 | curr,neighbour[0].bond); |
695 | 672 | Stack[--StackPtr] = original; |
696 | 672 | } |
697 | 3.31k | else if( count ) |
698 | 0 | { |
699 | 0 | stringstream errorMsg; |
700 | 0 | errorMsg << "Maximum Monomer Fanout Exceeded!" << endl; |
701 | 0 | errorMsg << "Residue " << ChainsResName[resid] << " atom " |
702 | 0 | << curr << endl; |
703 | 0 | errorMsg << "Previous = " << prev << " Fanout = " << count << endl; |
704 | 0 | obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning); |
705 | 0 | } |
706 | 3.31k | else if( StackPtr ) |
707 | 2.48k | { |
708 | 2.48k | StackPtr--; |
709 | 2.48k | GenerateByteCodes(node,resid,Stack[StackPtr].atom, |
710 | 2.48k | Stack[StackPtr].prev,Stack[StackPtr].bond); |
711 | 2.48k | StackPtr++; |
712 | 2.48k | } |
713 | 834 | else if( !(*node) ) |
714 | 786 | { |
715 | 786 | ptr = AllocateByteCode(BC_ASSIGN); |
716 | 786 | ptr->assign.resid = resid; |
717 | 786 | ptr->assign.atomid = new int[AtomIndex]; |
718 | 786 | if( !ptr->assign.atomid ) |
719 | 0 | FatalMemoryError(); |
720 | 8.42k | for( i=0; i<MonoAtomCount; i++ ) |
721 | 7.63k | if( (j=MonoAtom[i].index) != -1 ) |
722 | 7.62k | ptr->assign.atomid[j] = MonoAtom[i].atomid; |
723 | 786 | if( BondIndex ) |
724 | 780 | { |
725 | 780 | ptr->assign.bflags = new int[BondIndex]; |
726 | 8.71k | for( i=0; i<MonoBondCount; i++ ) |
727 | 7.93k | if( (j=MonoBond[i].index) != -1 ) |
728 | 7.93k | ptr->assign.bflags[j] = MonoBond[i].flag; |
729 | 780 | } |
730 | 786 | *node = ptr; |
731 | 786 | } |
732 | 48 | else if( (*node)->type == BC_ASSIGN ) |
733 | 48 | { |
734 | 48 | if( (*node)->assign.resid != resid ) |
735 | 0 | { |
736 | 0 | stringstream errorMsg; |
737 | 0 | errorMsg << "Duplicated Monomer Specification!\n"; |
738 | 0 | errorMsg << "Residue " << ChainsResName[resid] |
739 | 0 | << " matches residue "; |
740 | 0 | errorMsg << ChainsResName[(*node)->assign.resid] << endl; |
741 | 0 | obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning); |
742 | 0 | } |
743 | 48 | } |
744 | | |
745 | | /* Restore State! */ |
746 | 5.82k | if( curr != prev ) |
747 | 5.66k | { |
748 | 5.66k | if( !done ) |
749 | 3.49k | { |
750 | 3.49k | MonoAtom[curr].index = -1; |
751 | 3.49k | AtomIndex--; |
752 | 3.49k | } |
753 | 5.66k | MonoBond[bond].index = -1; |
754 | 5.66k | BondIndex--; |
755 | 5.66k | } |
756 | 5.82k | } |
757 | | |
758 | | ////////////////////////////////////////////////////////////////////////////// |
759 | | // Constructors / Destructors |
760 | | ////////////////////////////////////////////////////////////////////////////// |
761 | | |
762 | | // validated |
763 | | OBChainsParser::OBChainsParser(void) |
764 | 6 | { |
765 | 6 | int i, res = RESIDMIN; |
766 | | |
767 | 6 | PDecisionTree = (ByteCode*)nullptr; |
768 | 132 | for( i=0 ; i < AMINOMAX ; i++ ) |
769 | 126 | { |
770 | 126 | strncpy(ChainsResName[res],AminoAcids[i].name, sizeof(ChainsResName[res]) - 1); |
771 | 126 | ChainsResName[res][sizeof(ChainsResName[res]) - 1] = '\0'; |
772 | 126 | DefineMonomer(&PDecisionTree,res,AminoAcids[i].data); |
773 | 126 | res++; |
774 | 126 | } |
775 | | |
776 | 6 | NDecisionTree = (ByteCode*)nullptr; |
777 | 42 | for( i=0 ; i< NUCLEOMAX ; i++ ) |
778 | 36 | { |
779 | 36 | strncpy(ChainsResName[res],Nucleotides[i].name, sizeof(ChainsResName[res]) - 1); |
780 | 36 | ChainsResName[res][sizeof(ChainsResName[res]) - 1] = '\0'; |
781 | 36 | DefineMonomer(&NDecisionTree,res,Nucleotides[i].data); |
782 | 36 | res++; |
783 | 36 | } |
784 | 6 | } |
785 | | |
786 | | OBChainsParser::~OBChainsParser(void) |
787 | 0 | { |
788 | 0 | DeleteByteCode((ByteCode*)PDecisionTree); |
789 | 0 | DeleteByteCode((ByteCode*)NDecisionTree); |
790 | 0 | } |
791 | | |
792 | | ////////////////////////////////////////////////////////////////////////////// |
793 | | // Setup / Cleanup Functions |
794 | | ////////////////////////////////////////////////////////////////////////////// |
795 | | |
796 | | //! Setup parsing for this molecule -- |
797 | | void OBChainsParser::SetupMol(OBMol &mol) |
798 | 0 | { |
799 | 0 | CleanupMol(); |
800 | |
|
801 | 0 | int i; |
802 | 0 | int asize = mol.NumAtoms(); |
803 | 0 | int bsize = mol.NumBonds(); |
804 | |
|
805 | 0 | bitmasks.resize(asize, 0); |
806 | 0 | visits.resize(asize, 0); |
807 | 0 | resids.resize(asize, 0); |
808 | 0 | flags.resize(bsize, 0); |
809 | 0 | hetflags.resize(asize, 0); |
810 | 0 | atomids.resize(asize, 0); |
811 | 0 | resnos.resize(asize, 0); |
812 | 0 | sernos.resize(asize, 0); |
813 | 0 | hcounts.resize(asize, 0); |
814 | 0 | chains.resize(asize, ' '); |
815 | |
|
816 | 0 | for ( i = 0 ; i < asize ; i++ ) |
817 | 0 | { |
818 | 0 | atomids[i] = -1; |
819 | 0 | } |
820 | 0 | } |
821 | | |
822 | | //! Clean up any molecular data left in memory -- frees all memory afterwards |
823 | | //! Used by OBChainsParser::SetupMol() |
824 | | void OBChainsParser::CleanupMol(void) |
825 | 0 | { |
826 | 0 | bitmasks.clear(); |
827 | 0 | visits.clear(); |
828 | 0 | resids.clear(); |
829 | 0 | flags.clear(); |
830 | 0 | hetflags.clear(); |
831 | 0 | atomids.clear(); |
832 | 0 | resnos.clear(); |
833 | 0 | sernos.clear(); |
834 | 0 | hcounts.clear(); |
835 | 0 | chains.clear(); |
836 | 0 | } |
837 | | |
838 | | //! Clear all residue information for a supplied molecule |
839 | | void OBChainsParser::ClearResidueInformation(OBMol &mol) |
840 | 0 | { |
841 | 0 | OBResidue *residue; |
842 | 0 | vector<OBResidue*> residues; |
843 | 0 | vector<OBResidue*>::iterator r; |
844 | |
|
845 | 0 | if (mol.NumResidues() == 0) |
846 | 0 | return; // Done! |
847 | | |
848 | 0 | for (residue = mol.BeginResidue(r) ; residue ; residue = mol.NextResidue(r)) |
849 | 0 | residues.push_back(residue); |
850 | |
|
851 | 0 | for ( unsigned int i = 0 ; i < residues.size() ; i++ ) |
852 | 0 | mol.DeleteResidue(residues[i]); |
853 | |
|
854 | 0 | residues.clear(); |
855 | 0 | } |
856 | | |
857 | | void OBChainsParser::SetResidueInformation(OBMol &mol, bool nukeSingleResidue) |
858 | 0 | { |
859 | 0 | char buffer[BUFF_SIZE]; |
860 | 0 | const char *symbol; |
861 | 0 | string atomid, name; |
862 | |
|
863 | 0 | OBAtom *atom; |
864 | 0 | OBResidue *residue; |
865 | 0 | map<char, map<short, OBResidue*> > resmap; |
866 | 0 | unsigned int numAtoms = mol.NumAtoms(); |
867 | | |
868 | | //DumpState(); |
869 | | |
870 | | // correct serine OG |
871 | 0 | for ( unsigned int i = 0 ; i < numAtoms ; i++ ) { |
872 | 0 | if (resids[i] == RESIDMIN + 17) // serine |
873 | 0 | if (atomids[i] == -1) { |
874 | 0 | atom = mol.GetAtom(i+1); |
875 | |
|
876 | 0 | FOR_NBORS_OF_ATOM (nbr, atom) { |
877 | 0 | if (atomids[nbr->GetIdx()-1] == 4) // CB |
878 | 0 | atomids[i] = 6; // OG |
879 | 0 | } |
880 | 0 | } |
881 | 0 | } |
882 | |
|
883 | 0 | for ( unsigned int i = 0 ; i < numAtoms ; i++ ) { |
884 | 0 | atom = mol.GetAtom(i+1); // WARNING: ATOM INDEX ISSUE |
885 | |
|
886 | 0 | if (atomids[i] == -1) { |
887 | 0 | symbol = OBElements::GetSymbol(atom->GetAtomicNum()); |
888 | 0 | if ( symbol[1] ) { |
889 | 0 | buffer[0] = symbol[0]; |
890 | 0 | buffer[1] = (char) toupper(symbol[1]); |
891 | 0 | } else { |
892 | 0 | buffer[0] = ' '; |
893 | 0 | buffer[1] = symbol[0]; |
894 | 0 | } |
895 | 0 | buffer[2] = ' '; |
896 | 0 | buffer[3] = ' '; |
897 | 0 | buffer[4] = '\0'; |
898 | 0 | } else if (atom->GetAtomicNum() == OBElements::Hydrogen) { |
899 | 0 | if (hcounts[i]) { |
900 | 0 | snprintf(buffer, BUFF_SIZE, "H%.2s%c", ChainsAtomName[atomids[i]]+2, hcounts[i]+'0'); |
901 | 0 | if (buffer[1] == ' ') { |
902 | 0 | buffer[1] = buffer[3]; |
903 | 0 | buffer[2] = '\0'; |
904 | 0 | } |
905 | 0 | else if (buffer[2] == ' ') { |
906 | 0 | buffer[2] = buffer[3]; |
907 | 0 | buffer[3] = '\0'; |
908 | 0 | } |
909 | 0 | } else { |
910 | 0 | snprintf(buffer, BUFF_SIZE, "H%.2s", ChainsAtomName[atomids[i]]+2); |
911 | 0 | } |
912 | 0 | } else |
913 | 0 | snprintf(buffer, BUFF_SIZE, "%.4s", ChainsAtomName[atomids[i]]); |
914 | |
|
915 | 0 | if (buffer[3] == ' ') |
916 | 0 | buffer[3] = '\0'; |
917 | | |
918 | | //cout << " (2) --> = " << buffer << endl; |
919 | |
|
920 | 0 | atomid = (buffer[0] == ' ') ? buffer + 1 : buffer; |
921 | | |
922 | | //cout << " (3) --> = " << buffer << endl; |
923 | |
|
924 | 0 | if (resmap[chains[i]].find(resnos[i]) != resmap[chains[i]].end()) { |
925 | 0 | residue = resmap[chains[i]][resnos[i]]; |
926 | 0 | residue->AddAtom(atom); |
927 | 0 | residue->SetAtomID(atom, atomid); |
928 | 0 | residue->SetHetAtom(atom, hetflags[i]); |
929 | 0 | residue->SetSerialNum(atom, sernos[i]); |
930 | 0 | } else { |
931 | 0 | name = ChainsResName[resids[i]]; |
932 | |
|
933 | 0 | residue = mol.NewResidue(); |
934 | 0 | residue->SetName(name); |
935 | 0 | residue->SetNum(resnos[i]); |
936 | 0 | residue->SetChain(chains[i]); |
937 | |
|
938 | 0 | residue->AddAtom(atom); |
939 | 0 | residue->SetAtomID(atom, atomid); |
940 | 0 | residue->SetHetAtom(atom, hetflags[i]); |
941 | 0 | residue->SetSerialNum(atom, sernos[i]); |
942 | |
|
943 | 0 | resmap[chains[i]][resnos[i]] = residue; |
944 | 0 | } |
945 | 0 | } |
946 | |
|
947 | 0 | if (mol.NumResidues() == 1 && nukeSingleResidue) |
948 | 0 | mol.DeleteResidue(mol.GetResidue(0)); |
949 | 0 | else if (mol.NumResidues() == 1 && (mol.GetResidue(0))->GetName() == "UNK") |
950 | 0 | mol.DeleteResidue(mol.GetResidue(0)); |
951 | 0 | } |
952 | | |
953 | | ////////////////////////////////////////////////////////////////////////////// |
954 | | // Perception Functions |
955 | | ////////////////////////////////////////////////////////////////////////////// |
956 | | |
957 | | bool OBChainsParser::PerceiveChains(OBMol &mol, bool nukeSingleResidue) |
958 | 0 | { |
959 | 0 | bool result = true; |
960 | 0 | unsigned int idx; |
961 | |
|
962 | 0 | SetupMol(mol); |
963 | 0 | ClearResidueInformation(mol); |
964 | |
|
965 | 0 | result = DetermineHetAtoms(mol) && result; |
966 | 0 | result = DetermineConnectedChains(mol) && result; |
967 | 0 | result = DeterminePeptideBackbone(mol) && result; |
968 | 0 | result = DeterminePeptideSidechains(mol) && result; |
969 | 0 | result = DetermineNucleicBackbone(mol) && result; |
970 | 0 | result = DetermineNucleicSidechains(mol) && result; |
971 | 0 | result = DetermineHydrogens(mol) && result; |
972 | | |
973 | | // Partially identified residues |
974 | | // example: CSD in 1LWF (CYS with two Os on the S) |
975 | 0 | bool changed; |
976 | 0 | vector<pair<char,short> > invalidResidues; |
977 | 0 | do { |
978 | 0 | changed = false; |
979 | |
|
980 | 0 | FOR_ATOMS_OF_MOL (atom, mol) { |
981 | 0 | idx = atom->GetIdx() - 1; |
982 | |
|
983 | 0 | if (resids[idx] == 0) { // UNK |
984 | 0 | FOR_NBORS_OF_ATOM (nbr, &*atom) { |
985 | 0 | unsigned int idx2 = nbr->GetIdx() - 1; |
986 | 0 | if (resids[idx2] != 0) { // !UNK |
987 | 0 | if (atomids[idx2] == AI_N || atomids[idx2] == AI_C) { |
988 | | // bound to backbone-N/C |
989 | 0 | hetflags[idx] = true; |
990 | 0 | resids[idx] = 3; // ACE |
991 | 0 | atomids[idx] = -1; |
992 | 0 | } else { |
993 | 0 | resnos[idx] = resnos[idx2]; |
994 | 0 | resids[idx] = resids[idx2]; |
995 | 0 | changed = true; |
996 | |
|
997 | 0 | bool addResidue = true; |
998 | 0 | for (unsigned int i = 0; i < invalidResidues.size(); ++i) |
999 | 0 | if ( (invalidResidues[i].first == chains[idx2]) && |
1000 | 0 | (invalidResidues[i].second == resnos[idx2]) ) |
1001 | 0 | addResidue = false; |
1002 | 0 | if (addResidue) |
1003 | 0 | invalidResidues.push_back(pair<char,short>(chains[idx2], resnos[idx2])); |
1004 | 0 | } |
1005 | 0 | } |
1006 | 0 | } |
1007 | 0 | } |
1008 | |
|
1009 | 0 | } |
1010 | 0 | } while (changed); |
1011 | 0 | for (unsigned int i = 0; i < invalidResidues.size(); ++i) { |
1012 | 0 | FOR_ATOMS_OF_MOL (atom, mol) { |
1013 | 0 | idx = atom->GetIdx() - 1; |
1014 | 0 | if ( (invalidResidues[i].first == chains[idx]) && |
1015 | 0 | (invalidResidues[i].second == resnos[idx]) ) { |
1016 | 0 | hetflags[idx] = true; |
1017 | 0 | resids[idx] = 0; // UNK |
1018 | 0 | atomids[idx] = -1; |
1019 | 0 | } |
1020 | 0 | } |
1021 | 0 | } |
1022 | 0 | invalidResidues.clear(); |
1023 | | |
1024 | | // number the element in the ' ' chain (water, ions, ligands, ...) |
1025 | 0 | short resno = 1; |
1026 | 0 | FOR_ATOMS_OF_MOL (atom, mol) { |
1027 | 0 | idx = atom->GetIdx() - 1; |
1028 | |
|
1029 | 0 | if (atom->GetHvyDegree() == 0) { |
1030 | 0 | chains[idx] = ' '; |
1031 | 0 | resnos[idx] = resno; |
1032 | 0 | resno++; |
1033 | 0 | } else { |
1034 | 0 | if (resids[idx] != 0) // UNK |
1035 | 0 | continue; |
1036 | 0 | if (hetflags[idx]) |
1037 | 0 | continue; |
1038 | | |
1039 | 0 | char chain = chains[idx]; |
1040 | 0 | FOR_ATOMS_OF_MOL (b, mol) { |
1041 | 0 | unsigned int idx2 = b->GetIdx() - 1; |
1042 | 0 | if (chains[idx2] == chain && !hetflags[idx2]) { |
1043 | 0 | hetflags[idx2] = true; |
1044 | 0 | chains[idx2] = ' '; |
1045 | 0 | resnos[idx2] = resno; |
1046 | 0 | resids[idx2] = 2; // unknown ligand |
1047 | 0 | } |
1048 | 0 | } |
1049 | |
|
1050 | 0 | resno++; |
1051 | 0 | } |
1052 | |
|
1053 | 0 | } |
1054 | |
|
1055 | 0 | SetResidueInformation(mol, nukeSingleResidue); |
1056 | 0 | CleanupMol(); |
1057 | |
|
1058 | 0 | mol.SetChainsPerceived(); |
1059 | |
|
1060 | 0 | obErrorLog.ThrowError(__FUNCTION__, |
1061 | 0 | "Ran OpenBabel::PerceiveChains", obAuditMsg); |
1062 | |
|
1063 | 0 | return result; |
1064 | 0 | } |
1065 | | |
1066 | | ///////////////////////////////////////////////////////////////////////////// |
1067 | | // Hetero Atom Perception |
1068 | | ///////////////////////////////////////////////////////////////////////////// |
1069 | | |
1070 | | bool OBChainsParser::DetermineHetAtoms(OBMol &mol) |
1071 | 0 | { |
1072 | 0 | OBAtom *atom; |
1073 | 0 | vector<OBAtom *>::iterator a; |
1074 | | |
1075 | | // find un-connected atoms (e.g., HOH oxygen atoms) |
1076 | 0 | for (atom = mol.BeginAtom(a) ; atom ; atom = mol.NextAtom(a)) { |
1077 | 0 | if (atom->GetAtomicNum() == OBElements::Hydrogen || atom->GetHvyDegree() != 0) |
1078 | 0 | continue; |
1079 | | |
1080 | 0 | unsigned int idx = atom->GetIdx() - 1; |
1081 | |
|
1082 | 0 | if (atom->GetAtomicNum() == OBElements::Oxygen) { |
1083 | 0 | resids[idx] = 1; |
1084 | 0 | hetflags[idx] = true; |
1085 | 0 | } |
1086 | 0 | } |
1087 | |
|
1088 | 0 | return true; |
1089 | 0 | } |
1090 | | |
1091 | | ///////////////////////////////////////////////////////////////////////////// |
1092 | | // Connected Chain Perception |
1093 | | ///////////////////////////////////////////////////////////////////////////// |
1094 | | |
1095 | | bool OBChainsParser::DetermineConnectedChains(OBMol &mol) |
1096 | 0 | { |
1097 | 0 | int resid; |
1098 | 0 | unsigned int size; |
1099 | 0 | unsigned int i, idx; |
1100 | |
|
1101 | 0 | int resno = 1; |
1102 | 0 | int count = 0; |
1103 | 0 | unsigned int numAtoms = mol.NumAtoms(); |
1104 | |
|
1105 | 0 | OBAtom *atom; |
1106 | 0 | vector<OBAtom *>::iterator a; |
1107 | 0 | for (atom = mol.BeginAtom(a) ; atom ; atom = mol.NextAtom(a)) { |
1108 | 0 | idx = atom->GetIdx() - 1; |
1109 | 0 | if (!hetflags[idx] && chains[idx] == ' ' && atom->GetAtomicNum() != OBElements::Hydrogen) { |
1110 | 0 | size = RecurseChain(mol, idx, 'A' + count); |
1111 | | |
1112 | | // size = number of heavy atoms in residue chain |
1113 | 0 | if (size < 4) { // small ligand, probably |
1114 | 0 | if (size == 1 && atom->GetAtomicNum() == OBElements::Oxygen) |
1115 | 0 | resid = 1; /* HOH */ |
1116 | 0 | else |
1117 | 0 | resid = 2; /* Unknown ligand */ |
1118 | |
|
1119 | 0 | for (i = 0 ; i < numAtoms ; ++i) { |
1120 | 0 | if (chains[i] == ('A' + count)) { |
1121 | 0 | hetflags[i] = true; |
1122 | 0 | resids[i] = resid; |
1123 | 0 | resnos[i] = resno; |
1124 | 0 | chains[i] = ' '; |
1125 | 0 | } |
1126 | 0 | } |
1127 | 0 | resno++; |
1128 | 0 | } else { |
1129 | 0 | count++; // number of connected chains |
1130 | 0 | if (count > 26) // out of chain IDs |
1131 | 0 | break; |
1132 | 0 | } |
1133 | 0 | } |
1134 | 0 | } |
1135 | | |
1136 | | /* |
1137 | | if( count == 1 ) |
1138 | | for ( i = 0 ; i < numAtoms ; i++ ) |
1139 | | chains[i] = ' '; |
1140 | | */ |
1141 | |
|
1142 | 0 | return true; |
1143 | 0 | } |
1144 | | |
1145 | | ///@todo atom index |
1146 | | unsigned int OBChainsParser::RecurseChain(OBMol &mol, unsigned int i, int c) |
1147 | 0 | { |
1148 | 0 | OBAtom *atom, *nbr; |
1149 | 0 | vector<OBBond *>::iterator b; |
1150 | 0 | unsigned int result, index; |
1151 | |
|
1152 | 0 | atom = mol.GetAtom(i + 1); |
1153 | | |
1154 | | // ignore hydrogens |
1155 | 0 | if (atom->GetAtomicNum() == OBElements::Hydrogen ) |
1156 | 0 | return 0; |
1157 | | |
1158 | 0 | result = 1; |
1159 | 0 | chains[i] = c; |
1160 | | |
1161 | | // recurse till we have all atoms for this chain |
1162 | 0 | for (nbr = atom->BeginNbrAtom(b); nbr; nbr = atom->NextNbrAtom(b)) { |
1163 | 0 | index = nbr->GetIdx() - 1; |
1164 | 0 | if (chains[index] == ' ') |
1165 | 0 | result += RecurseChain(mol, index, c); |
1166 | 0 | } |
1167 | | |
1168 | | // and return how many we found |
1169 | 0 | return result; |
1170 | 0 | } |
1171 | | |
1172 | | ////////////////////////////////////////////////////////////////////////////// |
1173 | | // Peptide Backbone Perception |
1174 | | ////////////////////////////////////////////////////////////////////////////// |
1175 | | |
1176 | | bool OBChainsParser::DeterminePeptideBackbone(OBMol &mol) |
1177 | 0 | { |
1178 | 0 | ConstrainBackbone(mol, Peptide, MAXPEPTIDE); |
1179 | |
|
1180 | 0 | unsigned int i, numAtoms = mol.NumAtoms(); |
1181 | | |
1182 | | // Cyclic peptides have no NTer (1SKI) |
1183 | | // timvdm 30/06/08 |
1184 | 0 | bool foundNTer = false; |
1185 | 0 | for (i = 0 ; i < numAtoms; i++) |
1186 | 0 | if (bitmasks[i] & BitNTer) |
1187 | 0 | foundNTer = true; |
1188 | 0 | if (!foundNTer) |
1189 | 0 | for (i = 0 ; i < numAtoms; i++) |
1190 | 0 | if (bitmasks[i] & BitNAll) |
1191 | 0 | bitmasks[i] |= BitNTer; |
1192 | | |
1193 | | |
1194 | | /* Order Peptide Backbone */ |
1195 | |
|
1196 | 0 | for ( i = 0 ; i < numAtoms ; i++ ) |
1197 | 0 | if (atomids[i] == -1) |
1198 | 0 | { |
1199 | 0 | if( bitmasks[i] & BitNTer ) |
1200 | 0 | { |
1201 | 0 | atomids[i] = AI_N; |
1202 | 0 | TracePeptideChain(mol,i,1); |
1203 | 0 | } |
1204 | 0 | else if( (bitmasks[i]&BitNPT) && !(bitmasks[i]&BitN) ) |
1205 | 0 | { |
1206 | 0 | atomids[i] = AI_N; |
1207 | 0 | TracePeptideChain(mol,i,1); |
1208 | 0 | } |
1209 | 0 | } |
1210 | | |
1211 | | /* Carbonyl Double Bond */ |
1212 | |
|
1213 | 0 | OBBond *bond; |
1214 | 0 | vector<OBBond*>::iterator b; |
1215 | 0 | for (bond = mol.BeginBond(b) ; bond ; bond = mol.NextBond(b)) |
1216 | 0 | { |
1217 | 0 | if ((atomids[bond->GetBeginAtomIdx()-1] == 2 && atomids[bond->GetEndAtomIdx()-1] == 3) || |
1218 | 0 | (atomids[bond->GetBeginAtomIdx()-1] == 3 && atomids[bond->GetEndAtomIdx()-1] == 2)) |
1219 | 0 | flags[bond->GetIdx()] |= BF_DOUBLE; |
1220 | 0 | } |
1221 | |
|
1222 | 0 | return true; |
1223 | 0 | } |
1224 | | |
1225 | | void OBChainsParser::ConstrainBackbone(OBMol &mol, Template *templ, int tmax) |
1226 | 0 | { |
1227 | 0 | static OBAtom *neighbour[6]; |
1228 | 0 | Template *pep; |
1229 | 0 | OBAtom *na = nullptr; |
1230 | 0 | OBAtom *nb = nullptr; |
1231 | 0 | OBAtom *nc = nullptr; |
1232 | 0 | OBAtom *nd = nullptr; |
1233 | 0 | OBAtom *atom, *nbr; |
1234 | 0 | bool change, result; |
1235 | 0 | int i, count; |
1236 | 0 | unsigned int idx; |
1237 | |
|
1238 | 0 | vector<OBAtom *>::iterator a; |
1239 | 0 | vector<OBBond *>::iterator b; |
1240 | | |
1241 | | /* First Pass */ |
1242 | |
|
1243 | 0 | for (atom = mol.BeginAtom(a) ; atom ; atom = mol.NextAtom(a)) |
1244 | 0 | { |
1245 | 0 | idx = atom->GetIdx() - 1; |
1246 | 0 | bitmasks[idx] = 0; |
1247 | 0 | for ( i = 0 ; i < tmax ; i++ ) |
1248 | 0 | if ( (static_cast<unsigned int>(templ[i].elem) == atom->GetAtomicNum()) && |
1249 | 0 | (static_cast<unsigned int>(templ[i].count) == atom->GetHvyDegree())) |
1250 | 0 | bitmasks[idx] |= templ[i].flag; |
1251 | 0 | } |
1252 | | |
1253 | | /* Second Pass */ |
1254 | |
|
1255 | 0 | do |
1256 | 0 | { |
1257 | 0 | change = false; |
1258 | 0 | for (atom = mol.BeginAtom(a) ; atom ; atom = mol.NextAtom(a)) |
1259 | 0 | { |
1260 | 0 | idx = atom->GetIdx() - 1; |
1261 | 0 | if (bitmasks[idx]) // Determine Neighbours |
1262 | 0 | { |
1263 | 0 | count = 0; |
1264 | 0 | for (nbr = atom->BeginNbrAtom(b) ; nbr ; nbr = atom->NextNbrAtom(b)) |
1265 | 0 | if (nbr->GetAtomicNum() != OBElements::Hydrogen) |
1266 | 0 | neighbour[count++] = nbr; |
1267 | |
|
1268 | 0 | if (count >= 1) |
1269 | 0 | na = neighbour[0]; |
1270 | 0 | if (count >= 2) |
1271 | 0 | nb = neighbour[1]; |
1272 | 0 | if (count >= 3) |
1273 | 0 | nc = neighbour[2]; |
1274 | 0 | if (count >= 4) |
1275 | 0 | nd = neighbour[3]; |
1276 | |
|
1277 | 0 | for ( i = 0 ; i < tmax ; i++ ) |
1278 | 0 | if ( templ[i].flag & bitmasks[idx] ) |
1279 | 0 | { |
1280 | 0 | pep = &templ[i]; |
1281 | 0 | result = true; |
1282 | |
|
1283 | 0 | if (count == 4) |
1284 | 0 | result = Match4Constraints(pep,na,nb,nc,nd); |
1285 | 0 | else if (count == 3) |
1286 | 0 | result = Match3Constraints(pep,na,nb,nc); |
1287 | 0 | else if (count == 2) |
1288 | 0 | result = Match2Constraints(pep,na,nb); |
1289 | 0 | else if (count == 1) |
1290 | 0 | result = MatchConstraint(na,pep->n1); |
1291 | |
|
1292 | 0 | if(result == false) |
1293 | 0 | { |
1294 | 0 | bitmasks[idx] &= ~pep->flag; |
1295 | 0 | change = true; |
1296 | 0 | } |
1297 | 0 | } |
1298 | 0 | } |
1299 | 0 | } |
1300 | 0 | } |
1301 | 0 | while( change ); |
1302 | 0 | } |
1303 | | |
1304 | | bool OBChainsParser::MatchConstraint(OBAtom *atom, int mask) |
1305 | 0 | { |
1306 | 0 | if (atom == nullptr) |
1307 | 0 | return (false); |
1308 | | |
1309 | 0 | if( mask < 0 ) |
1310 | 0 | return(atom->GetAtomicNum() == static_cast<unsigned int>(-mask)); |
1311 | 0 | else |
1312 | 0 | return(((bitmasks[atom->GetIdx()-1]&mask) == 0) ? false : true); |
1313 | 0 | } |
1314 | | |
1315 | | bool OBChainsParser::Match2Constraints(Template *tmpl, OBAtom *na, OBAtom *nb) |
1316 | 0 | { |
1317 | 0 | if (na == nullptr || nb == nullptr) |
1318 | 0 | return (false); // don't even try to evaluate it |
1319 | | |
1320 | 0 | if( MatchConstraint(na,tmpl->n2) ) |
1321 | 0 | if( MatchConstraint(nb,tmpl->n1) ) |
1322 | 0 | return( true ); |
1323 | 0 | if( MatchConstraint(nb,tmpl->n2) ) |
1324 | 0 | if( MatchConstraint(na,tmpl->n1) ) |
1325 | 0 | return( true ); |
1326 | 0 | return( false ); |
1327 | 0 | } |
1328 | | |
1329 | | bool OBChainsParser::Match3Constraints(Template *tmpl, OBAtom *na, OBAtom *nb, OBAtom *nc) |
1330 | 0 | { |
1331 | 0 | if (na == nullptr || nb == nullptr || nc == nullptr) |
1332 | 0 | return (false); // don't even try to evaluate it |
1333 | | |
1334 | 0 | if( MatchConstraint(na,tmpl->n3) ) |
1335 | 0 | if( Match2Constraints(tmpl,nb,nc) ) |
1336 | 0 | return( true ); |
1337 | 0 | if( MatchConstraint(nb,tmpl->n3) ) |
1338 | 0 | if( Match2Constraints(tmpl,na,nc) ) |
1339 | 0 | return( true ); |
1340 | 0 | if( MatchConstraint(nc,tmpl->n3) ) |
1341 | 0 | if( Match2Constraints(tmpl,na,nb) ) |
1342 | 0 | return( true ); |
1343 | 0 | return( false ); |
1344 | 0 | } |
1345 | | |
1346 | | bool OBChainsParser::Match4Constraints(Template *tmpl, OBAtom *na, OBAtom *nb, OBAtom *nc, OBAtom *nd) |
1347 | 0 | { |
1348 | 0 | if (na == nullptr || nb == nullptr || nc == nullptr || nd == nullptr) |
1349 | 0 | return (false); // don't even try to evaluate it |
1350 | | |
1351 | 0 | if( MatchConstraint(na,tmpl->n4) ) |
1352 | 0 | if( Match3Constraints(tmpl,nb,nc,nd) ) |
1353 | 0 | return( true ); |
1354 | 0 | if( MatchConstraint(nb,tmpl->n4) ) |
1355 | 0 | if( Match3Constraints(tmpl,na,nc,nd) ) |
1356 | 0 | return( true ); |
1357 | 0 | if( MatchConstraint(nc,tmpl->n4) ) |
1358 | 0 | if( Match3Constraints(tmpl,na,nb,nd) ) |
1359 | 0 | return( true ); |
1360 | 0 | if( MatchConstraint(nd,tmpl->n4) ) |
1361 | 0 | if( Match3Constraints(tmpl,na,nb,nc) ) |
1362 | 0 | return( true ); |
1363 | 0 | return( false ); |
1364 | 0 | } |
1365 | | |
1366 | | void OBChainsParser::TracePeptideChain(OBMol &mol, unsigned int i, int r) |
1367 | 0 | { |
1368 | 0 | unsigned int neighbour[4]; |
1369 | 0 | unsigned int na = 0; |
1370 | 0 | unsigned int nb = 0; |
1371 | 0 | unsigned int nc = 0; |
1372 | 0 | OBAtom *atom, *nbr; |
1373 | 0 | int count; |
1374 | 0 | int j,k; |
1375 | 0 | unsigned int idx; |
1376 | |
|
1377 | 0 | j = k = 0; // ignore warning |
1378 | |
|
1379 | 0 | vector<OBBond *>::iterator b; |
1380 | | |
1381 | | /* Determine Neighbours */ |
1382 | |
|
1383 | 0 | atom = mol.GetAtom(i+1); |
1384 | 0 | idx = atom->GetIdx() - 1; |
1385 | 0 | if (visits[i]) |
1386 | 0 | return; |
1387 | 0 | visits[i] = true; |
1388 | |
|
1389 | 0 | count = 0; |
1390 | 0 | for (nbr = atom->BeginNbrAtom(b) ; nbr ; nbr = atom->NextNbrAtom(b)) |
1391 | 0 | if (nbr->GetAtomicNum() != OBElements::Hydrogen) |
1392 | 0 | neighbour[count++] = nbr->GetIdx()-1; |
1393 | |
|
1394 | 0 | resnos[idx] = r; |
1395 | |
|
1396 | 0 | if (count >= 1) |
1397 | 0 | na = neighbour[0]; |
1398 | 0 | if (count >= 2) |
1399 | 0 | nb = neighbour[1]; |
1400 | 0 | if (count >= 3) |
1401 | 0 | nc = neighbour[2]; |
1402 | |
|
1403 | 0 | switch( atomids[i] ) |
1404 | 0 | { |
1405 | 0 | case(AI_N): |
1406 | 0 | for( j=0; j<count; j++ ) |
1407 | 0 | if (bitmasks[neighbour[j]] & BitCAAll) |
1408 | 0 | { |
1409 | 0 | atomids[neighbour[j]] = AI_CA; |
1410 | 0 | if (!visits[neighbour[j]]) |
1411 | 0 | TracePeptideChain(mol,neighbour[j],r); |
1412 | 0 | } |
1413 | 0 | break; |
1414 | | |
1415 | 0 | case(AI_CA): |
1416 | 0 | if( count == 3 ) |
1417 | 0 | { |
1418 | 0 | if ( bitmasks[na] & BitNAll ) |
1419 | 0 | na = nc; |
1420 | 0 | else if ( bitmasks[nb] & BitNAll ) |
1421 | 0 | nb = nc; |
1422 | |
|
1423 | 0 | if ( bitmasks[na] & BitC ) |
1424 | 0 | { |
1425 | 0 | j = na; |
1426 | 0 | k = nb; |
1427 | 0 | } |
1428 | 0 | else if ( bitmasks[nb] & BitC ) |
1429 | 0 | { |
1430 | 0 | j = nb; |
1431 | 0 | k = na; |
1432 | 0 | } |
1433 | 0 | else if( bitmasks[na] & BitCAll ) |
1434 | 0 | { |
1435 | 0 | j = na; |
1436 | 0 | k = nb; |
1437 | 0 | } |
1438 | 0 | else if (bitmasks[nb] & BitCAll ) |
1439 | 0 | { |
1440 | 0 | j = nb; |
1441 | 0 | k = na; |
1442 | 0 | } |
1443 | |
|
1444 | 0 | atomids[j] = AI_C; |
1445 | 0 | bitmasks[k] = 0; |
1446 | |
|
1447 | 0 | if (!visits[j]) |
1448 | 0 | TracePeptideChain(mol,j,r); |
1449 | 0 | } |
1450 | 0 | else if (count == 2) |
1451 | 0 | { |
1452 | 0 | if ( bitmasks[na] & BitCAll ) |
1453 | 0 | { |
1454 | 0 | atomids[na] = AI_C; |
1455 | 0 | if (!visits[na]) |
1456 | 0 | TracePeptideChain(mol,na,r); |
1457 | 0 | } |
1458 | 0 | else if ( bitmasks[nb] & BitCAll ) |
1459 | 0 | { |
1460 | 0 | atomids[nb] = AI_C; |
1461 | 0 | if (!visits[nb]) |
1462 | 0 | TracePeptideChain(mol,nb,r); |
1463 | 0 | } |
1464 | 0 | } |
1465 | 0 | break; |
1466 | | |
1467 | 0 | case(AI_C): |
1468 | 0 | k = AI_O; |
1469 | 0 | for ( j = 0; j < count; j++ ) |
1470 | 0 | { |
1471 | 0 | if ( bitmasks[neighbour[j]] & BitNAll ) |
1472 | 0 | { |
1473 | 0 | atomids[neighbour[j]] = AI_N; |
1474 | 0 | if (!visits[neighbour[j]]) |
1475 | 0 | TracePeptideChain(mol,neighbour[j],r+1); |
1476 | 0 | } |
1477 | 0 | else if( bitmasks[neighbour[j]] & BitOAll ) |
1478 | 0 | { |
1479 | 0 | atomids[neighbour[j]] = k; |
1480 | 0 | resnos[neighbour[j]] = r; |
1481 | 0 | k = AI_OXT; /* OXT */ |
1482 | 0 | } |
1483 | 0 | } |
1484 | 0 | break; |
1485 | 0 | } |
1486 | 0 | } |
1487 | | |
1488 | | ////////////////////////////////////////////////////////////////////////////// |
1489 | | // Peptide Sidechains Perception |
1490 | | ////////////////////////////////////////////////////////////////////////////// |
1491 | | |
1492 | | bool OBChainsParser::DeterminePeptideSidechains(OBMol &mol) |
1493 | 0 | { |
1494 | 0 | int resid; |
1495 | 0 | int max = mol.NumAtoms(); |
1496 | |
|
1497 | 0 | for (int i = 0 ; i < max ; ++i) |
1498 | 0 | if (atomids[i] == AI_CA) |
1499 | 0 | { |
1500 | 0 | resid = IdentifyResidue(PDecisionTree, mol, i, resnos[i]); |
1501 | 0 | AssignResidue(mol,resnos[i],chains[i],resid); |
1502 | 0 | } |
1503 | |
|
1504 | 0 | return true; |
1505 | 0 | } |
1506 | | |
1507 | | void OBChainsParser::AssignResidue(OBMol &mol, int r, int c, int i) |
1508 | 0 | { |
1509 | 0 | int max = mol.NumAtoms(); |
1510 | |
|
1511 | 0 | for (int j = 0 ; j < max ; ++j) |
1512 | 0 | if ((resnos[j] == r) && (chains[j] == c) && !hetflags[j]) |
1513 | 0 | resids[j] = i; |
1514 | 0 | } |
1515 | | |
1516 | | int OBChainsParser::IdentifyResidue(void *tree, OBMol &mol, unsigned int seed, |
1517 | | int resno) |
1518 | 0 | { |
1519 | 0 | ByteCode *ptr; |
1520 | |
|
1521 | 0 | int AtomCount, BondCount; |
1522 | 0 | int curr,prev,bond; |
1523 | 0 | int bcount; |
1524 | 0 | int i,j; |
1525 | |
|
1526 | 0 | ptr = (ByteCode *) tree; |
1527 | 0 | bcount = 0; |
1528 | |
|
1529 | 0 | Stack[0].atom = seed; |
1530 | 0 | Stack[0].prev = seed; |
1531 | 0 | StackPtr = 0; |
1532 | |
|
1533 | 0 | ResMonoAtom[0] = seed; |
1534 | 0 | AtomCount = 1; |
1535 | 0 | BondCount = 0; |
1536 | |
|
1537 | 0 | OBAtom *atom, *nbr; |
1538 | 0 | vector<OBBond *>::iterator b; |
1539 | |
|
1540 | 0 | while( ptr ) { |
1541 | 0 | switch(ptr->type) |
1542 | 0 | { |
1543 | 0 | case(BC_IDENT): curr = Stack[StackPtr-1].atom; |
1544 | 0 | if( atomids[curr] == ptr->ident.value ) |
1545 | 0 | { |
1546 | 0 | bond = Stack[StackPtr-1].bond; |
1547 | 0 | ResMonoBond[BondCount++] = bond; |
1548 | 0 | ptr = ptr->ident.tcond; |
1549 | 0 | StackPtr--; |
1550 | 0 | } |
1551 | 0 | else |
1552 | 0 | ptr = ptr->ident.fcond; |
1553 | 0 | break; |
1554 | | |
1555 | 0 | case(BC_LOCAL): curr = Stack[StackPtr-1].atom; |
1556 | 0 | if( curr == ResMonoAtom[ptr->local.value] ) |
1557 | 0 | { |
1558 | 0 | bond = Stack[StackPtr-1].bond; |
1559 | 0 | ResMonoBond[BondCount++] = bond; |
1560 | 0 | ptr = ptr->local.tcond; |
1561 | 0 | StackPtr--; |
1562 | 0 | } |
1563 | 0 | else |
1564 | 0 | ptr = ptr->local.fcond; |
1565 | 0 | break; |
1566 | | |
1567 | 0 | case(BC_ELEM): curr = Stack[StackPtr-1].atom; |
1568 | 0 | if( mol.GetAtom(curr+1)->GetAtomicNum() == static_cast<unsigned int>(ptr->elem.value) ) |
1569 | 0 | { |
1570 | 0 | bond = Stack[StackPtr-1].bond; |
1571 | 0 | ResMonoAtom[AtomCount++] = curr; |
1572 | 0 | ResMonoBond[BondCount++] = bond; |
1573 | 0 | resnos[curr] = resno; |
1574 | 0 | ptr = ptr->elem.tcond; |
1575 | 0 | StackPtr--; |
1576 | 0 | } |
1577 | 0 | else |
1578 | 0 | ptr = ptr->elem.fcond; |
1579 | 0 | break; |
1580 | | |
1581 | 0 | case(BC_EVAL): bcount = 0; |
1582 | 0 | curr = Stack[StackPtr].atom; |
1583 | 0 | prev = Stack[StackPtr].prev; |
1584 | |
|
1585 | 0 | atom = mol.GetAtom(curr+1); // WARNING, potential atom index issue |
1586 | 0 | for (nbr = atom->BeginNbrAtom(b); nbr; nbr = atom->NextNbrAtom(b)) |
1587 | 0 | { |
1588 | 0 | if (nbr->GetAtomicNum() == OBElements::Hydrogen) |
1589 | 0 | continue; |
1590 | | |
1591 | 0 | j = nbr->GetIdx() - 1; |
1592 | 0 | if (!((curr == prev) && bitmasks[j]) && (j != prev)) |
1593 | 0 | { |
1594 | 0 | Stack[StackPtr].prev = curr; |
1595 | 0 | Stack[StackPtr].atom = j; |
1596 | 0 | Stack[StackPtr].bond = (*b)->GetIdx(); |
1597 | 0 | StackPtr++; |
1598 | 0 | bcount++; |
1599 | 0 | } |
1600 | 0 | } |
1601 | |
|
1602 | 0 | ptr = ptr->eval.next; |
1603 | 0 | break; |
1604 | | |
1605 | 0 | case(BC_COUNT): |
1606 | 0 | if( bcount == ptr->count.value ) |
1607 | 0 | { |
1608 | 0 | ptr = ptr->count.tcond; |
1609 | 0 | } |
1610 | 0 | else |
1611 | 0 | ptr = ptr->count.fcond; |
1612 | 0 | break; |
1613 | | |
1614 | 0 | case(BC_ASSIGN): |
1615 | 0 | for( i=0; i<AtomCount; i++ ) { |
1616 | 0 | if( !bitmasks[ResMonoAtom[i]]) |
1617 | 0 | { |
1618 | 0 | j = ptr->assign.atomid[i]; |
1619 | 0 | atomids[ResMonoAtom[i]] = j; |
1620 | 0 | } |
1621 | 0 | } |
1622 | 0 | for( i=0; i<BondCount; i++ ) |
1623 | 0 | { |
1624 | 0 | j = ptr->assign.bflags[i]; |
1625 | 0 | flags[ResMonoBond[i]] = j; |
1626 | 0 | } |
1627 | 0 | return( ptr->assign.resid ); |
1628 | 0 | break; |
1629 | | |
1630 | 0 | default: /* Illegal Instruction! */ |
1631 | 0 | return( 0 ); |
1632 | 0 | } // (switch) |
1633 | 0 | } // while (loop through atoms) |
1634 | 0 | return 0; |
1635 | 0 | } |
1636 | | |
1637 | | ////////////////////////////////////////////////////////////////////////////// |
1638 | | // Nucleic Backbone Perception |
1639 | | ///////////////////////////////////////////////////////////////////////////// |
1640 | | |
1641 | | bool OBChainsParser::DetermineNucleicBackbone(OBMol &mol) |
1642 | 0 | { |
1643 | 0 | ConstrainBackbone(mol, Nucleotide, MAXNUCLEIC); |
1644 | |
|
1645 | 0 | int i, max = mol.NumAtoms(); |
1646 | | |
1647 | | /* Order Nucleic Backbone */ |
1648 | |
|
1649 | 0 | for( i = 0 ; i < max ; i++ ) |
1650 | 0 | if( atomids[i] == -1 ) |
1651 | 0 | { |
1652 | 0 | if( bitmasks[i] & BitPTer ) |
1653 | 0 | { |
1654 | 0 | atomids[i] = AI_P; |
1655 | 0 | TraceNucleicChain(mol,i,1); |
1656 | 0 | } |
1657 | 0 | else if( bitmasks[i] & BitO5Ter ) |
1658 | 0 | { |
1659 | 0 | atomids[i] = AI_O5; |
1660 | 0 | TraceNucleicChain(mol,i,1); |
1661 | 0 | } |
1662 | 0 | } |
1663 | |
|
1664 | 0 | return true; |
1665 | 0 | } |
1666 | | |
1667 | | void OBChainsParser::TraceNucleicChain(OBMol &mol, unsigned int i, int r) |
1668 | 0 | { |
1669 | 0 | unsigned int neighbour[4]; |
1670 | 0 | int count; |
1671 | 0 | int j,k; |
1672 | |
|
1673 | 0 | OBAtom *atom, *nbr; |
1674 | 0 | vector<OBBond *>::iterator b; |
1675 | |
|
1676 | 0 | if (visits[i]) |
1677 | 0 | return; |
1678 | 0 | visits[i] = true; |
1679 | |
|
1680 | 0 | count = 0; |
1681 | 0 | atom = mol.GetAtom(i + 1); |
1682 | 0 | for (nbr = atom->BeginNbrAtom(b) ; nbr ; nbr = atom->NextNbrAtom(b)) |
1683 | 0 | if (nbr->GetAtomicNum() != OBElements::Hydrogen) |
1684 | 0 | neighbour[count++] = nbr->GetIdx() - 1; |
1685 | |
|
1686 | 0 | resnos[i] = r; |
1687 | |
|
1688 | 0 | switch( atomids[i] ) |
1689 | 0 | { |
1690 | 0 | case(AI_P): |
1691 | 0 | k = AI_O1P; /* O1P */ |
1692 | 0 | for( j=0; j<count; j++ ) |
1693 | 0 | { |
1694 | 0 | if( bitmasks[neighbour[j]] & BitO5 ) |
1695 | 0 | { |
1696 | 0 | atomids[neighbour[j]] = AI_O5; |
1697 | 0 | if (!visits[neighbour[j]]) |
1698 | 0 | TraceNucleicChain(mol,neighbour[j],r); |
1699 | 0 | } |
1700 | 0 | else if( bitmasks[neighbour[j]] & BitOP ) |
1701 | 0 | { |
1702 | 0 | atomids[neighbour[j]] = k; |
1703 | 0 | resnos[neighbour[j]] = r; |
1704 | 0 | k = AI_O2P; /* O2P */ |
1705 | 0 | } |
1706 | 0 | } |
1707 | |
|
1708 | 0 | break; |
1709 | | |
1710 | 0 | case(AI_O5): |
1711 | 0 | for( j=0; j<count; j++ ) |
1712 | 0 | if( bitmasks[neighbour[j]] & BitC5 ) |
1713 | 0 | { |
1714 | 0 | atomids[neighbour[j]] = AI_C5; |
1715 | 0 | if (!visits[neighbour[j]]) |
1716 | 0 | TraceNucleicChain(mol,neighbour[j],r); |
1717 | 0 | } |
1718 | |
|
1719 | 0 | break; |
1720 | | |
1721 | 0 | case(AI_C5): |
1722 | 0 | for( j=0 ; j<count; j++ ) |
1723 | 0 | if( bitmasks[neighbour[j]] & BitC4 ) |
1724 | 0 | { |
1725 | 0 | atomids[neighbour[j]] = AI_C4; |
1726 | 0 | if (!visits[neighbour[j]]) |
1727 | 0 | TraceNucleicChain(mol,neighbour[j],r); |
1728 | 0 | } |
1729 | |
|
1730 | 0 | break; |
1731 | | |
1732 | 0 | case(AI_C4): |
1733 | 0 | for( j=0; j<count; j++ ) |
1734 | 0 | { |
1735 | 0 | if( bitmasks[neighbour[j]] & BitC3 ) |
1736 | 0 | { |
1737 | 0 | atomids[neighbour[j]] = AI_C3; |
1738 | 0 | if (!visits[neighbour[j]]) |
1739 | 0 | TraceNucleicChain(mol,neighbour[j],r); |
1740 | 0 | } |
1741 | 0 | else if( bitmasks[neighbour[j]] & BitO4 ) |
1742 | 0 | { |
1743 | 0 | atomids[neighbour[j]] = AI_O4; |
1744 | 0 | resnos[neighbour[j]] = r; |
1745 | 0 | } |
1746 | 0 | } |
1747 | |
|
1748 | 0 | break; |
1749 | | |
1750 | 0 | case(AI_C3): |
1751 | 0 | for( j=0; j<count; j++ ) |
1752 | 0 | { |
1753 | 0 | if( bitmasks[neighbour[j]] & BitO3All ) |
1754 | 0 | { |
1755 | 0 | atomids[neighbour[j]] = AI_O3; |
1756 | 0 | if (!visits[neighbour[j]]) |
1757 | 0 | TraceNucleicChain(mol,neighbour[j],r); |
1758 | 0 | } |
1759 | 0 | else if( bitmasks[neighbour[j]] & BitC2All ) |
1760 | 0 | { |
1761 | 0 | atomids[neighbour[j]] = AI_C2; |
1762 | 0 | if (!visits[neighbour[j]]) |
1763 | 0 | TraceNucleicChain(mol,neighbour[j],r); |
1764 | 0 | } |
1765 | 0 | } |
1766 | |
|
1767 | 0 | break; |
1768 | | |
1769 | 0 | case(AI_O3): |
1770 | 0 | for( j=0; j<count; j++ ) |
1771 | 0 | if( bitmasks[neighbour[j]] & BitP ) |
1772 | 0 | { |
1773 | 0 | atomids[neighbour[j]] = AI_P; |
1774 | 0 | if (!visits[neighbour[j]]) |
1775 | 0 | TraceNucleicChain(mol,neighbour[j],r+1); |
1776 | 0 | } |
1777 | |
|
1778 | 0 | break; |
1779 | | |
1780 | 0 | case(AI_C2): |
1781 | 0 | for( j=0; j<count; j++ ) |
1782 | 0 | { |
1783 | 0 | if( bitmasks[neighbour[j]] & BitC1 ) |
1784 | 0 | { |
1785 | 0 | atomids[neighbour[j]] = AI_C1; |
1786 | 0 | resnos[neighbour[j]] = r; |
1787 | 0 | } |
1788 | 0 | else if( bitmasks[neighbour[j]] & BitO2 ) |
1789 | 0 | { |
1790 | 0 | atomids[neighbour[j]] = AI_O2; |
1791 | 0 | resnos[neighbour[j]] = r; |
1792 | 0 | } |
1793 | 0 | } |
1794 | |
|
1795 | 0 | break; |
1796 | 0 | } |
1797 | 0 | } |
1798 | | |
1799 | | ////////////////////////////////////////////////////////////////////////////// |
1800 | | // Nucleic Sidechains Perception |
1801 | | ////////////////////////////////////////////////////////////////////////////// |
1802 | | |
1803 | | bool OBChainsParser::DetermineNucleicSidechains(OBMol &mol) |
1804 | 0 | { |
1805 | 0 | for( unsigned int i = 0 ; i < mol.NumAtoms() ; i++ ) |
1806 | 0 | if( atomids[i] == 49 ) |
1807 | 0 | { |
1808 | 0 | int resid = IdentifyResidue(NDecisionTree,mol,i,resnos[i]); |
1809 | 0 | AssignResidue(mol,resnos[i],chains[i],resid); |
1810 | 0 | } |
1811 | |
|
1812 | 0 | return true; |
1813 | 0 | } |
1814 | | |
1815 | | ////////////////////////////////////////////////////////////////////////////// |
1816 | | // Hydrogens Perception |
1817 | | ////////////////////////////////////////////////////////////////////////////// |
1818 | | |
1819 | | bool OBChainsParser::DetermineHydrogens(OBMol &mol) |
1820 | 0 | { |
1821 | 0 | OBAtom *atom, *nbr; |
1822 | 0 | int idx,sidx; |
1823 | |
|
1824 | 0 | int max = mol.NumAtoms(); |
1825 | 0 | for ( int i = 0 ; i < max ; i++ ) |
1826 | 0 | hcounts[i] = 0; |
1827 | | |
1828 | | /* First Pass */ |
1829 | |
|
1830 | 0 | vector<OBAtom*>::iterator a; |
1831 | 0 | vector<OBBond*>::iterator b; |
1832 | |
|
1833 | 0 | for(atom = mol.BeginAtom(a); atom ; atom = mol.NextAtom(a)) |
1834 | 0 | if(atom->GetAtomicNum() == OBElements::Hydrogen) |
1835 | 0 | { |
1836 | 0 | nbr = atom->BeginNbrAtom(b); |
1837 | 0 | if (nbr != nullptr) |
1838 | 0 | { |
1839 | 0 | idx = atom->GetIdx() - 1; |
1840 | 0 | sidx = nbr->GetIdx() - 1; |
1841 | |
|
1842 | 0 | hcounts[idx] = ++hcounts[sidx]; |
1843 | 0 | hetflags[idx] = hetflags[sidx]; |
1844 | 0 | atomids[idx] = atomids[sidx]; |
1845 | 0 | resids[idx] = resids[sidx]; |
1846 | 0 | resnos[idx] = resnos[sidx]; |
1847 | 0 | chains[idx] = chains[sidx]; |
1848 | 0 | } |
1849 | 0 | } |
1850 | | |
1851 | | /* Second Pass */ |
1852 | |
|
1853 | 0 | for(atom = mol.BeginAtom(a) ; atom ; atom = mol.NextAtom(a)) |
1854 | 0 | if (atom->GetAtomicNum() == OBElements::Hydrogen) |
1855 | 0 | { |
1856 | 0 | nbr = atom->BeginNbrAtom(b); |
1857 | 0 | if (nbr != nullptr && hcounts[nbr->GetIdx()-1] == 1) |
1858 | 0 | hcounts[atom->GetIdx()-1] = 0; |
1859 | 0 | } |
1860 | |
|
1861 | 0 | return true; |
1862 | 0 | } |
1863 | | |
1864 | | ////////////////////////////////////////////////////////////////////////////// |
1865 | | // Utility Functions |
1866 | | ////////////////////////////////////////////////////////////////////////////// |
1867 | | |
1868 | | // validated |
1869 | | void OBChainsParser::DefineMonomer(void **tree, int resid, const char *smiles) |
1870 | 162 | { |
1871 | 162 | int i; |
1872 | | |
1873 | 162 | MonoAtomCount = 0; |
1874 | 162 | MonoBondCount = 0; |
1875 | | |
1876 | 162 | ParseSmiles(smiles,-1); |
1877 | | |
1878 | 1.14k | for( i=0; i<MonoBondCount; i++ ) |
1879 | 978 | MonoBond[i].index = -1; |
1880 | 1.21k | for( i=0; i<MonoAtomCount; i++ ) |
1881 | 1.05k | MonoAtom[i].index = -1; |
1882 | 162 | AtomIndex = BondIndex = 0; |
1883 | | |
1884 | 162 | StackPtr = 0; |
1885 | 162 | GenerateByteCodes((ByteCode**)tree, resid, 0, 0, 0 ); |
1886 | 162 | } |
1887 | | |
1888 | | int OBChainsParser::IdentifyElement(char *ptr) |
1889 | 1.05k | { |
1890 | 1.05k | int ch; |
1891 | | |
1892 | 1.05k | ch = toupper(ptr[1]); |
1893 | 1.05k | switch( toupper(ptr[0]) ) |
1894 | 1.05k | { |
1895 | 1.05k | case(' '): switch( ch ) |
1896 | 1.05k | { |
1897 | 0 | case('B'): return( 5 ); |
1898 | 750 | case('C'): return( 6 ); |
1899 | 0 | case('D'): return( 1 ); |
1900 | 0 | case('F'): return( 9 ); |
1901 | 0 | case('H'): return( 1 ); |
1902 | 0 | case('I'): return( 53 ); |
1903 | 0 | case('K'): return( 19 ); |
1904 | 0 | case('L'): return( 1 ); |
1905 | 192 | case('N'): return( 7 ); |
1906 | 102 | case('O'): return( 8 ); |
1907 | 0 | case('P'): return( 15 ); |
1908 | 12 | case('S'): return( 16 ); |
1909 | 0 | case('U'): return( 92 ); |
1910 | 0 | case('V'): return( 23 ); |
1911 | 0 | case('W'): return( 74 ); |
1912 | 0 | case('Y'): return( 39 ); |
1913 | 1.05k | } |
1914 | 0 | break; |
1915 | | |
1916 | 0 | case('A'): switch( ch ) |
1917 | 0 | { |
1918 | 0 | case('C'): return( 89 ); |
1919 | 0 | case('G'): return( 47 ); |
1920 | 0 | case('L'): return( 13 ); |
1921 | 0 | case('M'): return( 95 ); |
1922 | 0 | case('R'): return( 18 ); |
1923 | 0 | case('S'): return( 33 ); |
1924 | 0 | case('T'): return( 85 ); |
1925 | 0 | case('U'): return( 79 ); |
1926 | 0 | } |
1927 | 0 | break; |
1928 | | |
1929 | 0 | case('B'): switch( ch ) |
1930 | 0 | { |
1931 | 0 | case('A'): return( 56 ); |
1932 | 0 | case('E'): return( 4 ); |
1933 | 0 | case('I'): return( 83 ); |
1934 | 0 | case('K'): return( 97 ); |
1935 | 0 | case('R'): return( 35 ); |
1936 | 0 | case(' '): return( 5 ); |
1937 | 0 | } |
1938 | 0 | break; |
1939 | | |
1940 | 0 | case('C'): switch( ch ) |
1941 | 0 | { |
1942 | 0 | case('A'): return( 20 ); |
1943 | 0 | case('D'): return( 48 ); |
1944 | 0 | case('E'): return( 58 ); |
1945 | 0 | case('F'): return( 98 ); |
1946 | 0 | case('L'): return( 17 ); |
1947 | 0 | case('M'): return( 96 ); |
1948 | 0 | case('O'): return( 27 ); |
1949 | 0 | case('R'): return( 24 ); |
1950 | 0 | case('S'): return( 55 ); |
1951 | 0 | case('U'): return( 29 ); |
1952 | 0 | case(' '): return( 6 ); |
1953 | 0 | } |
1954 | 0 | break; |
1955 | | |
1956 | 0 | case('D'): if( ch=='Y' ) |
1957 | 0 | { |
1958 | 0 | return( 66 ); |
1959 | 0 | } |
1960 | 0 | else if( ch==' ' ) |
1961 | 0 | return( 1 ); |
1962 | 0 | break; |
1963 | | |
1964 | 0 | case('E'): if( ch=='R' ) |
1965 | 0 | { |
1966 | 0 | return( 68 ); |
1967 | 0 | } |
1968 | 0 | else if( ch=='S' ) |
1969 | 0 | { |
1970 | 0 | return( 99 ); |
1971 | 0 | } |
1972 | 0 | else if( ch=='U' ) |
1973 | 0 | return( 63 ); |
1974 | 0 | break; |
1975 | | |
1976 | 0 | case('F'): if( ch=='E' ) |
1977 | 0 | { |
1978 | 0 | return( 26 ); |
1979 | 0 | } |
1980 | 0 | else if( ch=='M' ) |
1981 | 0 | { |
1982 | 0 | return( 100 ); |
1983 | 0 | } |
1984 | 0 | else if( ch=='R' ) |
1985 | 0 | { |
1986 | 0 | return( 87 ); |
1987 | 0 | } |
1988 | 0 | else if( ch=='F' ) |
1989 | 0 | return( 9 ); |
1990 | 0 | break; |
1991 | | |
1992 | 0 | case('G'): if( ch=='A' ) |
1993 | 0 | { |
1994 | 0 | return( 31 ); |
1995 | 0 | } |
1996 | 0 | else if( ch=='D' ) |
1997 | 0 | { |
1998 | 0 | return( 64 ); |
1999 | 0 | } |
2000 | 0 | else if( ch=='E' ) |
2001 | 0 | return( 32 ); |
2002 | 0 | break; |
2003 | | |
2004 | 0 | case('H'): if( ch=='E' ) |
2005 | 0 | { |
2006 | 0 | return( 2 ); |
2007 | 0 | } |
2008 | 0 | else if( ch=='F' ) |
2009 | 0 | { |
2010 | 0 | return( 72 ); |
2011 | 0 | } |
2012 | 0 | else if( ch=='G' ) |
2013 | 0 | { |
2014 | 0 | return( 80 ); |
2015 | 0 | } |
2016 | 0 | else if( ch=='O' ) |
2017 | 0 | { |
2018 | 0 | return( 67 ); |
2019 | 0 | } |
2020 | 0 | else if( ch==' ' ) |
2021 | 0 | return( 1 ); |
2022 | 0 | break; |
2023 | | |
2024 | 0 | case('I'): if( ch=='N' ) |
2025 | 0 | { |
2026 | 0 | return( 49 ); |
2027 | 0 | } |
2028 | 0 | else if( ch=='R' ) |
2029 | 0 | { |
2030 | 0 | return( 77 ); |
2031 | 0 | } |
2032 | 0 | else if( ch==' ' ) |
2033 | 0 | return( 53 ); |
2034 | 0 | break; |
2035 | | |
2036 | 0 | case('K'): if( ch=='R' ) |
2037 | 0 | { |
2038 | 0 | return( 36 ); |
2039 | 0 | } |
2040 | 0 | else if( ch==' ' ) |
2041 | 0 | return( 19 ); |
2042 | 0 | break; |
2043 | | |
2044 | 0 | case('L'): if( ch=='A' ) |
2045 | 0 | { |
2046 | 0 | return( 57 ); |
2047 | 0 | } |
2048 | 0 | else if( ch=='I' ) |
2049 | 0 | { |
2050 | 0 | return( 3 ); |
2051 | 0 | } |
2052 | 0 | else if( (ch=='R') || (ch=='W') ) |
2053 | 0 | { |
2054 | 0 | return( 103 ); |
2055 | 0 | } |
2056 | 0 | else if( ch=='U' ) |
2057 | 0 | { |
2058 | 0 | return( 71 ); |
2059 | 0 | } |
2060 | 0 | else if( ch==' ' ) |
2061 | 0 | return( 1 ); |
2062 | 0 | break; |
2063 | | |
2064 | 0 | case('M'): if( ch=='D' ) |
2065 | 0 | { |
2066 | 0 | return( 101 ); |
2067 | 0 | } |
2068 | 0 | else if( ch=='G' ) |
2069 | 0 | { |
2070 | 0 | return( 12 ); |
2071 | 0 | } |
2072 | 0 | else if( ch=='N' ) |
2073 | 0 | { |
2074 | 0 | return( 25 ); |
2075 | 0 | } |
2076 | 0 | else if( ch=='O' ) |
2077 | 0 | return( 42 ); |
2078 | 0 | break; |
2079 | | |
2080 | 0 | case('N'): switch( ch ) |
2081 | 0 | { |
2082 | 0 | case('A'): return( 11 ); |
2083 | 0 | case('B'): return( 41 ); |
2084 | 0 | case('D'): return( 60 ); |
2085 | 0 | case('E'): return( 10 ); |
2086 | 0 | case('I'): return( 28 ); |
2087 | 0 | case('O'): return( 102 ); |
2088 | 0 | case('P'): return( 93 ); |
2089 | 0 | case(' '): return( 7 ); |
2090 | 0 | } |
2091 | 0 | break; |
2092 | | |
2093 | 0 | case('O'): if( ch=='S' ) |
2094 | 0 | { |
2095 | 0 | return( 76 ); |
2096 | 0 | } |
2097 | 0 | else if( ch==' ' ) |
2098 | 0 | return( 8 ); |
2099 | 0 | break; |
2100 | | |
2101 | 0 | case('P'): switch( ch ) |
2102 | 0 | { |
2103 | 0 | case('A'): return( 91 ); |
2104 | 0 | case('B'): return( 82 ); |
2105 | 0 | case('D'): return( 46 ); |
2106 | 0 | case('M'): return( 61 ); |
2107 | 0 | case('O'): return( 84 ); |
2108 | 0 | case('R'): return( 59 ); |
2109 | 0 | case('T'): return( 78 ); |
2110 | 0 | case('U'): return( 94 ); |
2111 | 0 | case(' '): return( 15 ); |
2112 | 0 | } |
2113 | 0 | break; |
2114 | | |
2115 | 0 | case('R'): switch( ch ) |
2116 | 0 | { |
2117 | 0 | case('A'): return( 88 ); |
2118 | 0 | case('B'): return( 37 ); |
2119 | 0 | case('E'): return( 75 ); |
2120 | 0 | case('H'): return( 45 ); |
2121 | 0 | case('N'): return( 86 ); |
2122 | 0 | case('U'): return( 44 ); |
2123 | 0 | } |
2124 | 0 | break; |
2125 | | |
2126 | 0 | case('S'): switch( ch ) |
2127 | 0 | { |
2128 | 0 | case('B'): return( 51 ); |
2129 | 0 | case('C'): return( 21 ); |
2130 | 0 | case('E'): return( 34 ); |
2131 | 0 | case('I'): return( 14 ); |
2132 | 0 | case('M'): return( 62 ); |
2133 | 0 | case('N'): return( 50 ); |
2134 | 0 | case('R'): return( 38 ); |
2135 | 0 | case(' '): return( 16 ); |
2136 | 0 | } |
2137 | 0 | break; |
2138 | | |
2139 | 0 | case('T'): switch( ch ) |
2140 | 0 | { |
2141 | 0 | case('A'): return( 73 ); |
2142 | 0 | case('B'): return( 65 ); |
2143 | 0 | case('C'): return( 43 ); |
2144 | 0 | case('E'): return( 52 ); |
2145 | 0 | case('H'): return( 90 ); |
2146 | 0 | case('I'): return( 22 ); |
2147 | 0 | case('L'): return( 81 ); |
2148 | 0 | case('M'): return( 69 ); |
2149 | 0 | } |
2150 | 0 | break; |
2151 | | |
2152 | 0 | case('U'): if( ch==' ' ) |
2153 | 0 | return( 92 ); |
2154 | 0 | break; |
2155 | | |
2156 | 0 | case('V'): if( ch==' ' ) |
2157 | 0 | return( 23 ); |
2158 | 0 | break; |
2159 | | |
2160 | 0 | case('W'): if( ch==' ' ) |
2161 | 0 | return( 74 ); |
2162 | 0 | break; |
2163 | | |
2164 | 0 | case('X'): if( ch=='E' ) |
2165 | 0 | return( 54 ); |
2166 | 0 | break; |
2167 | | |
2168 | 0 | case('Y'): if( ch=='B' ) |
2169 | 0 | { |
2170 | 0 | return( 70 ); |
2171 | 0 | } |
2172 | 0 | else if( ch==' ' ) |
2173 | 0 | return( 39 ); |
2174 | 0 | break; |
2175 | | |
2176 | 0 | case('Z'): if( ch=='N' ) |
2177 | 0 | { |
2178 | 0 | return( 30 ); |
2179 | 0 | } |
2180 | 0 | else if( ch=='R' ) |
2181 | 0 | return( 40 ); |
2182 | 0 | break; |
2183 | 1.05k | } |
2184 | | |
2185 | 0 | if( (*ptr>='0') && (*ptr<='9') ) |
2186 | 0 | if( (ch=='H') || (ch=='D') ) |
2187 | 0 | return( 1 ); /* Hydrogen */ |
2188 | | |
2189 | 0 | return( 0 ); |
2190 | 0 | } |
2191 | | |
2192 | | const char *OBChainsParser::ParseSmiles(const char *ptr, int prev) |
2193 | 318 | { |
2194 | 318 | char *name; |
2195 | 318 | int atomid; |
2196 | 318 | int next; |
2197 | 318 | int type; |
2198 | 318 | int ch; |
2199 | | |
2200 | 318 | type = 0; |
2201 | 2.59k | while( (ch = *ptr++) ) |
2202 | 2.43k | { |
2203 | 2.43k | switch( ch ) |
2204 | 2.43k | { |
2205 | 786 | case('-'): type = BF_SINGLE; |
2206 | 786 | break; |
2207 | 30 | case('='): type = BF_DOUBLE; |
2208 | 30 | break; |
2209 | 0 | case('#'): type = BF_TRIPLE; |
2210 | 0 | break; |
2211 | 90 | case('^'): type = BF_SINGLE|BF_AROMATIC; |
2212 | 90 | break; |
2213 | 72 | case('~'): type = BF_DOUBLE|BF_AROMATIC; |
2214 | 72 | break; |
2215 | | |
2216 | 156 | case(')'): return( ptr ); |
2217 | 0 | case('.'): prev = -1; |
2218 | 0 | break; |
2219 | 156 | case('('): ptr = ParseSmiles(ptr,prev); |
2220 | 156 | break; |
2221 | | |
2222 | 1.14k | default: |
2223 | 1.14k | atomid = ch-'0'; |
2224 | 1.88k | while( isdigit(*ptr) ) |
2225 | 744 | atomid = (atomid*10)+(*ptr++)-'0'; |
2226 | | |
2227 | 4.92k | for( next=0; next<MonoAtomCount; next++ ) |
2228 | 3.87k | if( MonoAtom[next].atomid == atomid ) |
2229 | 84 | break; |
2230 | | |
2231 | 1.14k | if( next == MonoAtomCount ) |
2232 | 1.05k | { |
2233 | 1.05k | name = ChainsAtomName[atomid]; |
2234 | 1.05k | MonoAtom[next].elem = IdentifyElement(name); |
2235 | 1.05k | MonoAtom[next].atomid = atomid; |
2236 | 1.05k | MonoAtom[next].bcount = 0; |
2237 | 1.05k | MonoAtomCount++; |
2238 | 1.05k | } |
2239 | | |
2240 | 1.14k | if( prev != -1 ) |
2241 | 978 | { |
2242 | 978 | MonoBond[MonoBondCount].flag = type; |
2243 | 978 | MonoBond[MonoBondCount].src = prev; |
2244 | 978 | MonoBond[MonoBondCount].dst = next; |
2245 | 978 | MonoBondCount++; |
2246 | | |
2247 | 978 | MonoAtom[prev].bcount++; |
2248 | 978 | MonoAtom[next].bcount++; |
2249 | 978 | } |
2250 | 1.14k | prev = next; |
2251 | 2.43k | } |
2252 | 2.43k | } |
2253 | 162 | return( ptr-1 ); |
2254 | 318 | } |
2255 | | |
2256 | | } // end namespace OpenBabel |
2257 | | |
2258 | | //! \file chains.cpp |
2259 | | //! \brief Parse for macromolecule chains and residues. |