/src/INCHI-1-SRC/INCHI_BASE/src/readinch.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * International Chemical Identifier (InChI) |
3 | | * Version 1 |
4 | | * Software version 1.06 |
5 | | * December 15, 2020 |
6 | | * |
7 | | * The InChI library and programs are free software developed under the |
8 | | * auspices of the International Union of Pure and Applied Chemistry (IUPAC). |
9 | | * Originally developed at NIST. |
10 | | * Modifications and additions by IUPAC and the InChI Trust. |
11 | | * Some portions of code were developed/changed by external contributors |
12 | | * (either contractor or volunteer) which are listed in the file |
13 | | * 'External-contributors' included in this distribution. |
14 | | * |
15 | | * IUPAC/InChI-Trust Licence No.1.0 for the |
16 | | * International Chemical Identifier (InChI) |
17 | | * Copyright (C) IUPAC and InChI Trust |
18 | | * |
19 | | * This library is free software; you can redistribute it and/or modify it |
20 | | * under the terms of the IUPAC/InChI Trust InChI Licence No.1.0, |
21 | | * or any later version. |
22 | | * |
23 | | * Please note that this library is distributed WITHOUT ANY WARRANTIES |
24 | | * whatsoever, whether expressed or implied. |
25 | | * See the IUPAC/InChI-Trust InChI Licence No.1.0 for more details. |
26 | | * |
27 | | * You should have received a copy of the IUPAC/InChI Trust InChI |
28 | | * Licence No. 1.0 with this library; if not, please e-mail: |
29 | | * |
30 | | * info@inchi-trust.org |
31 | | * |
32 | | */ |
33 | | |
34 | | |
35 | | #include <stdlib.h> |
36 | | #include <limits.h> |
37 | | #include <math.h> |
38 | | #include <float.h> |
39 | | #include <string.h> |
40 | | #include <ctype.h> |
41 | | |
42 | | #include "mode.h" |
43 | | |
44 | | #include "ichierr.h" |
45 | | #include "extr_ct.h" |
46 | | #include "ichi_io.h" |
47 | | |
48 | | #include "inchi_api.h" |
49 | | #include "readinch.h" |
50 | | |
51 | 0 | #define NO_ATOM (-1) /* non-existent (central) atom */ |
52 | | |
53 | | |
54 | | #ifndef AB_MAX_WELL_DEFINED_PARITY |
55 | | #define AB_MAX_WELL_DEFINED_PARITY inchi_max(INCHI_PARITY_ODD, INCHI_PARITY_EVEN) /* 1, 2 => well defined parities, uncluding 'unknown' */ |
56 | | #endif |
57 | | |
58 | | #ifndef AB_MIN_WELL_DEFINED_PARITY |
59 | | #define AB_MIN_WELL_DEFINED_PARITY inchi_min(INCHI_PARITY_ODD, INCHI_PARITY_EVEN) /* min(INCHI_PARITY_ODD, INCHI_PARITY_EVEN) */ |
60 | | #endif |
61 | | |
62 | | |
63 | | #if ( defined(TARGET_API_LIB) || defined(TARGET_EXE_USING_API) ) |
64 | | |
65 | | #ifndef AB_PARITY_UNKN |
66 | | #define AB_PARITY_UNKN 3 /* 3 => user marked as unknown parity */ |
67 | | #endif |
68 | | #ifndef AB_PARITY_UNDF |
69 | | #define AB_PARITY_UNDF 4 /* 4 => parity cannot be defined because of symmetry or not well defined geometry */ |
70 | | #endif |
71 | | |
72 | 0 | #define ATOM_PARITY_WELL_DEF(X) (AB_MIN_WELL_DEFINED_PARITY <= (X) && (X) <= AB_MAX_WELL_DEFINED_PARITY) |
73 | | |
74 | 0 | #define SB_PARITY_FLAG 0x38 /* disconnected structure has undef. parity */ |
75 | | |
76 | 0 | #define SB_PARITY_SHFT 3 |
77 | | |
78 | 0 | #define SB_PARITY_MASK 0x07 |
79 | | |
80 | | #define SB_PARITY_1(X) (X & SB_PARITY_MASK) /* refers to connected structure */ |
81 | | |
82 | | #define SB_PARITY_2(X) (((X) >> SB_PARITY_SHFT) & SB_PARITY_MASK) /* refers to connected structure */ |
83 | | |
84 | | |
85 | | |
86 | | #endif /* #if ( defined(TARGET_API_LIB) || defined(TARGET_EXE_USING_API) ) */ |
87 | | |
88 | | |
89 | | #if ( defined( TARGET_LIB_FOR_WINCHI ) || defined(TARGET_EXE_STANDALONE) ) |
90 | | int Extract0DParities( inp_ATOM *at, |
91 | | int nNumAtoms, |
92 | | inchi_Stereo0D *stereo0D, |
93 | | int num_stereo0D, |
94 | | char *pStrErr, |
95 | | int *err, |
96 | | int vABParityUnknown ); |
97 | | #endif |
98 | | |
99 | | |
100 | | void find_and_interpret_structure_header( char *szLine, |
101 | | char *pSdfLabel, |
102 | | char *pSdfValue, |
103 | | unsigned long *Id, |
104 | | int hlen, |
105 | | ReadINCHI_CtlData *ir ); |
106 | | |
107 | | |
108 | | |
109 | | /****************************************************************************/ |
110 | | inchi_Stereo0D * CreateInchi_Stereo0D( int num_stereo0D ) |
111 | 0 | { |
112 | 0 | return (inchi_Stereo0D*) inchi_calloc( num_stereo0D, sizeof( inchi_Stereo0D ) ); |
113 | 0 | } |
114 | | |
115 | | |
116 | | /****************************************************************************/ |
117 | | void FreeInchi_Stereo0D( inchi_Stereo0D **stereo0D ) |
118 | 0 | { |
119 | 0 | if (stereo0D && *stereo0D) |
120 | 0 | { |
121 | 0 | inchi_free( *stereo0D ); |
122 | 0 | *stereo0D = NULL; |
123 | 0 | } |
124 | 0 | } |
125 | | |
126 | | |
127 | | /****************************************************************************/ |
128 | | int Extract0DParities( inp_ATOM *at, |
129 | | int nNumAtoms, |
130 | | inchi_Stereo0D *stereo0D, |
131 | | int num_stereo0D, |
132 | | char *pStrErr, |
133 | | int *err, |
134 | | int vABParityUnknown ) |
135 | 0 | { |
136 | | |
137 | | /* |
138 | | vABParityUnknown holds actual value of an internal constant signifying |
139 | | unknown parity: either the same as for undefined parity (default==standard) |
140 | | or a specific one (non-std; requested by SLUUD switch). |
141 | | */ |
142 | 0 | if (stereo0D && num_stereo0D > 0) |
143 | 0 | { |
144 | 0 | int i0D, a2, k, k_prev, type, j, j1, j2, len, parity, parityNM; |
145 | 0 | int sb_ord_from_i1, sb_ord_from_i2, sn_ord_from_i1, sn_ord_from_i2; |
146 | 0 | AT_NUMB i1n, i2n, i1, i2; |
147 | |
|
148 | 0 | for (i0D = 0; i0D < num_stereo0D; i0D++) |
149 | 0 | { |
150 | 0 | parity = ( stereo0D[i0D].parity & SB_PARITY_MASK ); |
151 | 0 | parityNM = ( stereo0D[i0D].parity & SB_PARITY_FLAG ) >> SB_PARITY_SHFT; |
152 | |
|
153 | 0 | if (parity == INCHI_PARITY_NONE || |
154 | 0 | parity != INCHI_PARITY_ODD && parity != INCHI_PARITY_EVEN && |
155 | 0 | parity != INCHI_PARITY_UNKNOWN && parity != INCHI_PARITY_UNDEFINED) |
156 | 0 | { |
157 | 0 | char szTemp[16]; |
158 | 0 | sprintf( szTemp, "#%d", i0D + 1 ); |
159 | 0 | TREAT_ERR( *err, 0, "Wrong 0D stereo descriptor(s):" ); |
160 | 0 | TREAT_ERR( *err, 0, szTemp ); |
161 | 0 | continue; /* warning */ |
162 | 0 | } |
163 | | |
164 | 0 | type = stereo0D[i0D].type; |
165 | 0 | a2 = stereo0D[i0D].central_atom; /* central atom or -1 */ |
166 | 0 | j = -1; |
167 | 0 | len = 0; |
168 | 0 | sb_ord_from_i1 = sb_ord_from_i2 = sn_ord_from_i1 = sn_ord_from_i2 = -1; |
169 | 0 | i1n = i2n = i1 = i2 = MAX_ATOMS + 1; |
170 | |
|
171 | 0 | if (( type == INCHI_StereoType_Tetrahedral || |
172 | 0 | type == INCHI_StereoType_Allene ) && |
173 | 0 | 0 <= a2 && a2 < nNumAtoms || |
174 | 0 | type == INCHI_StereoType_DoubleBond && |
175 | 0 | a2 == NO_ATOM) |
176 | 0 | { |
177 | | /* test the quadruplet */ |
178 | 0 | for (j = 0, k_prev = -1; j < 4; j++, k_prev = k) |
179 | 0 | { |
180 | 0 | k = stereo0D[i0D].neighbor[j]; |
181 | 0 | if (k < 0 || k >= nNumAtoms || k_prev == k) |
182 | 0 | break; |
183 | | /* tetrahedral atom connectivity test */ |
184 | 0 | if (type == INCHI_StereoType_Tetrahedral && |
185 | 0 | k != a2 && |
186 | 0 | !is_in_the_list( at[a2].neighbor, (AT_NUMB) k, at[a2].valence )) |
187 | 0 | { |
188 | 0 | break; |
189 | 0 | } |
190 | | /* Double bond, Cumulene and allene are tested in the next if() */ |
191 | 0 | } |
192 | 0 | } |
193 | | |
194 | | /* Find in the adjacency lists the double bond neighbor that leads to the opposite atom */ |
195 | 0 | if (j == 4 && ( type == INCHI_StereoType_Allene || |
196 | 0 | type == INCHI_StereoType_DoubleBond )) |
197 | 0 | { |
198 | 0 | AT_NUMB *p1 = NULL, *p2 = NULL, *q1 = NULL, *q2 = NULL; |
199 | 0 | i1n = (AT_NUMB) stereo0D[i0D].neighbor[0]; |
200 | 0 | i1 = (AT_NUMB) stereo0D[i0D].neighbor[1]; |
201 | 0 | i2 = (AT_NUMB) stereo0D[i0D].neighbor[2]; |
202 | 0 | i2n = (AT_NUMB) stereo0D[i0D].neighbor[3]; |
203 | | |
204 | | /* find q1 and q2 */ |
205 | 0 | if (!( q1 = is_in_the_list( at[i1].neighbor, i1n, at[i1].valence ) ) || |
206 | 0 | !( q2 = is_in_the_list( at[i2].neighbor, i2n, at[i2].valence ) )) |
207 | 0 | { |
208 | 0 | j = -2; /* error flag */ |
209 | 0 | } |
210 | 0 | else |
211 | 0 | { |
212 | | /* allene or cumulene; follow double bonds from i1 to i2 */ |
213 | 0 | if (!( p1 = is_in_the_list( at[i1].neighbor, i2, at[i1].valence ) )) |
214 | 0 | { |
215 | | /* at[i1] and at[i2] are not connected: can be only allene or cumulene */ |
216 | |
|
217 | 0 | AT_NUMB prev, cur, next; |
218 | 0 | int num_dbond, i, next_ord, half_len; |
219 | |
|
220 | 0 | cur = next = i1; |
221 | 0 | len = half_len = 0; |
222 | 0 | while (len < 20) |
223 | 0 | { |
224 | | /* arbitrary very high upper limit to prevent infinite loop */ |
225 | 0 | prev = cur; |
226 | 0 | cur = next; |
227 | |
|
228 | 0 | for (i = 0, num_dbond = 0; i < at[cur].valence; i++) |
229 | 0 | { |
230 | | /* follow double bond path && avoid going back */ |
231 | 0 | if (at[cur].bond_type[i] == BOND_TYPE_DOUBLE && |
232 | 0 | prev != at[cur].neighbor[i]) |
233 | 0 | { |
234 | 0 | next = at[cur].neighbor[i]; |
235 | 0 | next_ord = i; |
236 | 0 | num_dbond++; |
237 | 0 | } |
238 | 0 | } |
239 | |
|
240 | 0 | if (num_dbond == 1 && next != i1) |
241 | 0 | { |
242 | 0 | len++; |
243 | 0 | if (len == 1) |
244 | 0 | sb_ord_from_i1 = next_ord; |
245 | |
|
246 | 0 | if (type == INCHI_StereoType_Allene && next == (AT_NUMB) a2) |
247 | 0 | half_len = len; |
248 | 0 | } |
249 | 0 | else |
250 | 0 | break; |
251 | 0 | } |
252 | |
|
253 | 0 | if (cur == i2 && prev != cur && 0 == num_dbond && len > 1 && |
254 | 0 | ( p2 = is_in_the_list( at[i2].neighbor, prev, at[i2].valence ) ) && |
255 | 0 | ( type != INCHI_StereoType_Allene || len == 2 * half_len )) |
256 | 0 | { |
257 | 0 | sb_ord_from_i2 = p2 - at[i2].neighbor; |
258 | 0 | sn_ord_from_i1 = q1 - at[i1].neighbor; |
259 | 0 | sn_ord_from_i2 = q2 - at[i2].neighbor; |
260 | 0 | } |
261 | 0 | else |
262 | 0 | { |
263 | 0 | j = -5; /* error flag */ |
264 | 0 | } |
265 | 0 | } |
266 | 0 | else |
267 | 0 | { |
268 | | /* allene must have been already processed, otherwise error */ |
269 | 0 | if (type == INCHI_StereoType_Allene) |
270 | 0 | { |
271 | | /* error: atoms #1 and #2 of allene are connected */ |
272 | 0 | j = -3; /* error flag */ |
273 | 0 | } |
274 | 0 | else |
275 | 0 | { |
276 | | /* double bond only; the bond type is not checked because at the end |
277 | | of the normalization it may happen to be alternating */ |
278 | 0 | if (type == INCHI_StereoType_DoubleBond && |
279 | 0 | ( p2 = is_in_the_list( at[i2].neighbor, i1, at[i2].valence ) )) |
280 | 0 | { |
281 | 0 | sb_ord_from_i1 = p1 - at[i1].neighbor; |
282 | 0 | sb_ord_from_i2 = p2 - at[i2].neighbor; |
283 | 0 | sn_ord_from_i1 = q1 - at[i1].neighbor; |
284 | 0 | sn_ord_from_i2 = q2 - at[i2].neighbor; |
285 | 0 | } |
286 | 0 | else |
287 | 0 | { |
288 | 0 | j = -4; /* error flag */ |
289 | 0 | } |
290 | 0 | } |
291 | 0 | } |
292 | 0 | } |
293 | 0 | } |
294 | |
|
295 | 0 | if (j != 4) |
296 | 0 | { |
297 | 0 | char szTemp[16]; |
298 | 0 | sprintf( szTemp, "#%d", i0D + 1 ); |
299 | 0 | TREAT_ERR( *err, 0, "Wrong 0D stereo descriptor(s):" ); |
300 | 0 | TREAT_ERR( *err, 0, szTemp ); |
301 | 0 | continue; /* error */ |
302 | 0 | } |
303 | | |
304 | 0 | switch (type) |
305 | 0 | { |
306 | 0 | case INCHI_StereoType_None: |
307 | 0 | continue; |
308 | 0 | case INCHI_StereoType_DoubleBond: |
309 | 0 | case INCHI_StereoType_Allene: |
310 | 0 | for (j1 = 0; j1 < MAX_NUM_STEREO_BONDS && at[i1].sb_parity[j1]; j1++) |
311 | 0 | { |
312 | 0 | ; |
313 | 0 | } |
314 | 0 | for (j2 = 0; j2 < MAX_NUM_STEREO_BONDS && at[i2].sb_parity[j2]; j2++) |
315 | 0 | { |
316 | 0 | ; |
317 | 0 | } |
318 | 0 | if (j1 < MAX_NUM_STEREO_BONDS && j2 < MAX_NUM_STEREO_BONDS && |
319 | 0 | sb_ord_from_i1 >= 0 && sb_ord_from_i2 >= 0 && |
320 | 0 | sn_ord_from_i1 >= 0 && sn_ord_from_i2 >= 0) |
321 | 0 | { |
322 | 0 | switch (parity) |
323 | 0 | { |
324 | 0 | case INCHI_PARITY_ODD: |
325 | 0 | at[i1].sb_parity[j1] = AB_PARITY_ODD; |
326 | 0 | at[i2].sb_parity[j2] = AB_PARITY_EVEN; |
327 | 0 | break; |
328 | 0 | case INCHI_PARITY_EVEN: |
329 | 0 | at[i1].sb_parity[j1] = AB_PARITY_ODD; |
330 | 0 | at[i2].sb_parity[j2] = AB_PARITY_ODD; |
331 | 0 | break; |
332 | 0 | case INCHI_PARITY_UNDEFINED: |
333 | 0 | at[i1].sb_parity[j1] = AB_PARITY_UNDF; |
334 | 0 | at[i2].sb_parity[j2] = AB_PARITY_UNDF; |
335 | 0 | break; |
336 | 0 | default: |
337 | 0 | if (parity == INCHI_PARITY_UNKNOWN) |
338 | 0 | { |
339 | 0 | at[i1].sb_parity[j1] = vABParityUnknown; |
340 | 0 | at[i2].sb_parity[j2] = vABParityUnknown; |
341 | 0 | } |
342 | 0 | else |
343 | 0 | { |
344 | 0 | at[i1].sb_parity[j1] = AB_PARITY_NONE; |
345 | 0 | at[i2].sb_parity[j2] = AB_PARITY_NONE; |
346 | 0 | } |
347 | 0 | break; |
348 | 0 | } |
349 | 0 | switch (parityNM) |
350 | 0 | { |
351 | 0 | case INCHI_PARITY_ODD: |
352 | 0 | at[i1].sb_parity[j1] |= AB_PARITY_ODD << SB_PARITY_SHFT; |
353 | 0 | at[i2].sb_parity[j2] |= AB_PARITY_EVEN << SB_PARITY_SHFT; |
354 | 0 | break; |
355 | 0 | case INCHI_PARITY_EVEN: |
356 | 0 | at[i1].sb_parity[j1] |= AB_PARITY_ODD << SB_PARITY_SHFT; |
357 | 0 | at[i2].sb_parity[j2] |= AB_PARITY_ODD << SB_PARITY_SHFT; |
358 | 0 | break; |
359 | 0 | case INCHI_PARITY_UNDEFINED: |
360 | 0 | at[i1].sb_parity[j1] |= AB_PARITY_UNDF << SB_PARITY_SHFT; |
361 | 0 | at[i2].sb_parity[j2] |= AB_PARITY_UNDF << SB_PARITY_SHFT; |
362 | 0 | break; |
363 | 0 | default: |
364 | 0 | if (parityNM == INCHI_PARITY_UNKNOWN) |
365 | 0 | { |
366 | 0 | at[i1].sb_parity[j1] |= vABParityUnknown << SB_PARITY_SHFT; |
367 | 0 | at[i2].sb_parity[j2] |= vABParityUnknown << SB_PARITY_SHFT; |
368 | 0 | } |
369 | 0 | break; |
370 | 0 | } |
371 | 0 | at[i1].sb_ord[j1] = sb_ord_from_i1; |
372 | 0 | at[i1].sn_ord[j1] = sn_ord_from_i1; |
373 | 0 | at[i1].sn_orig_at_num[j1] = at[i1n].orig_at_number; |
374 | |
|
375 | 0 | at[i2].sb_ord[j2] = sb_ord_from_i2; |
376 | 0 | at[i2].sn_ord[j2] = sn_ord_from_i2; |
377 | 0 | at[i2].sn_orig_at_num[j2] = at[i2n].orig_at_number; |
378 | 0 | } |
379 | 0 | break; |
380 | 0 | case INCHI_StereoType_Tetrahedral: |
381 | 0 | switch (parity) |
382 | 0 | { |
383 | 0 | case INCHI_PARITY_ODD: |
384 | 0 | at[a2].p_parity = AB_PARITY_ODD; |
385 | 0 | break; |
386 | 0 | case INCHI_PARITY_EVEN: |
387 | 0 | at[a2].p_parity = AB_PARITY_EVEN; |
388 | 0 | break; |
389 | 0 | case INCHI_PARITY_UNDEFINED: |
390 | 0 | at[a2].p_parity = AB_PARITY_UNDF; |
391 | 0 | break; |
392 | 0 | default: |
393 | 0 | if (parity == INCHI_PARITY_UNKNOWN) |
394 | 0 | { |
395 | 0 | at[a2].p_parity = vABParityUnknown; |
396 | 0 | break; |
397 | 0 | } |
398 | 0 | else |
399 | 0 | { |
400 | 0 | continue; |
401 | 0 | } |
402 | 0 | } |
403 | 0 | for (j = 0; j < 4; j++) |
404 | 0 | { |
405 | 0 | k = stereo0D[i0D].neighbor[j]; |
406 | 0 | at[a2].p_orig_at_num[j] = at[k].orig_at_number; |
407 | 0 | } |
408 | 0 | break; |
409 | | |
410 | 0 | default: |
411 | 0 | break; |
412 | 0 | } |
413 | 0 | } |
414 | | /* take care of Unknown stereobonds: */ |
415 | | /* copy their Unknown stereo descriptors to at->bond_stereo (2005-03-01) */ |
416 | | /* Note: to this stage, unk/undef set to what was requested */ |
417 | | /*( through vABParityUnknown ) (2009-12-12) */ |
418 | 0 | FixUnkn0DStereoBonds( at, nNumAtoms ); |
419 | 0 | #ifdef TARGET_API_LIB |
420 | |
|
421 | 0 | if (k = ReconcileAllCmlBondParities( at, nNumAtoms, 0 )) |
422 | 0 | { |
423 | 0 | char szErrCode[16]; |
424 | 0 | sprintf( szErrCode, "%d", k ); |
425 | 0 | AddErrorMessage( pStrErr, "0D Parities Reconciliation failed:" ); |
426 | 0 | AddErrorMessage( pStrErr, szErrCode ); |
427 | 0 | } |
428 | |
|
429 | 0 | #endif |
430 | 0 | } |
431 | | |
432 | 0 | return 0; |
433 | 0 | } |
434 | | |
435 | | |
436 | | /****************************************************************************/ |
437 | | char* FindToken( INCHI_IOSTREAM *inp_file, |
438 | | int *bTooLongLine, |
439 | | const char *sToken, |
440 | | int lToken, |
441 | | char *szLine, |
442 | | int nLenLine, |
443 | | char *p, |
444 | | int *res ) |
445 | 0 | { |
446 | 0 | char *q; |
447 | 0 | int res2; |
448 | |
|
449 | 0 | while (!( q = strstr( p, sToken ) )) |
450 | 0 | { |
451 | 0 | if (( q = strrchr( p, '/' ) ) && ( q + lToken > szLine + *res )) |
452 | 0 | { |
453 | 0 | *res -= q - szLine; /* res = the length of the szLine to be left in */ |
454 | 0 | memmove( szLine, q, *res + 1 ); |
455 | 0 | } |
456 | 0 | else |
457 | 0 | { |
458 | 0 | *res = 0; |
459 | 0 | } |
460 | |
|
461 | 0 | res2 = inchi_ios_getsTab1( szLine + *res, nLenLine - *res - 1, |
462 | 0 | inp_file, bTooLongLine ); |
463 | |
|
464 | 0 | if (!*bTooLongLine || 0 > res2) |
465 | 0 | { |
466 | | /* the line is over or end of file */ |
467 | 0 | return NULL; |
468 | 0 | } |
469 | 0 | else |
470 | 0 | { |
471 | 0 | *res += res2; |
472 | 0 | p = szLine; |
473 | 0 | } |
474 | 0 | } |
475 | | |
476 | 0 | return q + lToken; |
477 | 0 | } |
478 | | |
479 | | |
480 | | /****************************************************************************/ |
481 | | char *LoadLine( INCHI_IOSTREAM *inp_file, |
482 | | int *bTooLongLine, |
483 | | int *bItemIsOver, |
484 | | char **s, |
485 | | char *szLine, |
486 | | int nLenLine, |
487 | | int nMinLen2Load, |
488 | | char *p, |
489 | | int *res ) |
490 | 0 | { |
491 | |
|
492 | 0 | int pos = p - szLine, res2; |
493 | |
|
494 | 0 | if (!*bItemIsOver && nLenLine - ( *res - pos ) > nMinLen2Load) |
495 | 0 | { |
496 | | /* load the next portion if possible */ |
497 | |
|
498 | 0 | if (pos) |
499 | 0 | { |
500 | 0 | *res -= pos; |
501 | 0 | memmove( szLine, p, *res + 1 ); |
502 | 0 | p = szLine; |
503 | 0 | if (*s) |
504 | 0 | { |
505 | 0 | *s -= pos; |
506 | 0 | } |
507 | |
|
508 | 0 | pos = 0; |
509 | 0 | } |
510 | |
|
511 | 0 | res2 = inchi_ios_getsTab1( szLine + *res, |
512 | 0 | nLenLine - *res - 1, |
513 | 0 | inp_file, bTooLongLine ); |
514 | |
|
515 | 0 | if (res2 > 0) |
516 | 0 | { |
517 | 0 | *bItemIsOver = ( ( *s = strchr( p + *res, '/' ) ) || !*bTooLongLine ); |
518 | 0 | *res += res2; |
519 | 0 | } |
520 | 0 | else |
521 | 0 | { |
522 | 0 | *bItemIsOver = 1; |
523 | 0 | } |
524 | 0 | } |
525 | |
|
526 | 0 | return p; |
527 | 0 | } |
528 | | |
529 | | |
530 | | /*****************************************************************************/ |
531 | 0 | #define AT_BONDS_VAL(AT,I) AT[I].chem_bonds_valence |
532 | 0 | #define ISOLATED_ATOM 15 |
533 | 0 | #define NUM_ISO_Hk(AT,I,K) AT[I].num_iso_H[K] |
534 | 0 | #define inchi_NUMH2(AT,N) NUMH(AT,N) |
535 | 0 | #define AT_NUM_BONDS(AT) (AT).valence |
536 | 0 | #define IS_METAL_ATOM(AT,I) is_el_a_metal( AT[I].el_number ) |
537 | | |
538 | | |
539 | | /****************************************************************************/ |
540 | | int InchiToInpAtom( INCHI_IOSTREAM *inp_file, |
541 | | MOL_COORD **szCoord, |
542 | | int bDoNotAddH, |
543 | | int vABParityUnknown, |
544 | | INPUT_TYPE nInputType, |
545 | | inp_ATOM **at, |
546 | | int max_num_at, |
547 | | int *num_dimensions, |
548 | | int *num_bonds, |
549 | | char *pSdfLabel, |
550 | | char *pSdfValue, |
551 | | unsigned long *Id, |
552 | | INCHI_MODE *pInpAtomFlags, |
553 | | int *err, |
554 | | char *pStrErr ) |
555 | 0 | { |
556 | 0 | int num_atoms = 0, bFindNext = 0, bItemIsOver; |
557 | 0 | int i, k, k2, res, bond_type, bond_stereo1, bond_stereo2, bond_char, neigh, bond_parity, bond_parityNM; |
558 | 0 | int res2, bTooLongLine2, hk; |
559 | 0 | char szLine[INCHI_LINE_LEN], *p, *q, *s, parity; |
560 | 0 | int b2D = 0, b3D = 0, b23D, nNumBonds = 0, bNonZeroXYZ, bNonMetal; |
561 | 0 | int len_stereo0D = 0, max_len_stereo0D = 0; |
562 | 0 | inp_ATOM *atom = NULL; |
563 | 0 | MOL_COORD *pszCoord = NULL; |
564 | 0 | INCHI_MODE InpAtomFlags = 0; /* 0 or FLAG_INP_AT_NONCHIRAL or FLAG_INP_AT_CHIRAL */ |
565 | 0 | inchi_Stereo0D *atom_stereo0D = NULL; |
566 | 0 | static const char szIsoH[] = "hdt"; |
567 | | /* plain tags */ |
568 | 0 | static const char sStructHdrPln[] = "Structure:"; |
569 | 0 | static char sStructHdrPlnAuxStart[64] = ""; /*"$1.1Beta/";*/ |
570 | 0 | static int lenStructHdrPlnAuxStart = 0; |
571 | 0 | static const char sStructHdrPlnRevAt[] = "/rA:"; |
572 | 0 | static const char sStructHdrPlnRevBn[] = "/rB:"; |
573 | 0 | static const char sStructHdrPlnRevXYZ[] = "/rC:"; |
574 | 0 | const char *sToken; |
575 | 0 | int lToken, len, hlen; |
576 | |
|
577 | 0 | ReadINCHI_CtlData ir; |
578 | |
|
579 | 0 | if (!lenStructHdrPlnAuxStart) |
580 | 0 | { |
581 | 0 | lenStructHdrPlnAuxStart = sprintf( sStructHdrPlnAuxStart, "AuxInfo=" ); |
582 | 0 | } |
583 | |
|
584 | 0 | if (at) |
585 | 0 | { |
586 | 0 | if (*at && max_num_at) |
587 | 0 | memset( *at, 0, max_num_at * sizeof( **at ) ); |
588 | 0 | if (szCoord && *szCoord) |
589 | 0 | { |
590 | 0 | inchi_free( *szCoord ); |
591 | 0 | *szCoord = NULL; |
592 | 0 | } |
593 | 0 | } |
594 | 0 | else |
595 | 0 | { |
596 | 0 | bFindNext = 1; |
597 | 0 | } |
598 | |
|
599 | 0 | ir.bHeaderRead = ir.bErrorMsg = ir.bRestoreInfo = 0; |
600 | 0 | *num_dimensions = *num_bonds = 0; |
601 | | |
602 | |
|
603 | 0 | if (nInputType != INPUT_INCHI_PLAIN) |
604 | 0 | return num_atoms; |
605 | | |
606 | | /* |
607 | | Extract reversibility info from plain text INChI format |
608 | | */ |
609 | | |
610 | 0 | ir.bHeaderRead = hk = 0; |
611 | 0 | while (0 < ( res = inchi_ios_getsTab( szLine, sizeof( szLine ) - 1, inp_file, &ir.bTooLongLine ) )) |
612 | 0 | { |
613 | |
|
614 | 0 | if (!ir.bTooLongLine && |
615 | 0 | ( hlen = sizeof( sStructHdrPln ) - 1, !memcmp( szLine, sStructHdrPln, hlen ) )) |
616 | | |
617 | 0 | { |
618 | 0 | num_atoms = 0; |
619 | 0 | find_and_interpret_structure_header( szLine, pSdfLabel, pSdfValue, |
620 | 0 | Id, hlen, &ir ); |
621 | 0 | } |
622 | | |
623 | 0 | else if (!memcmp( szLine, sStructHdrPlnAuxStart, lenStructHdrPlnAuxStart )) |
624 | 0 | { |
625 | | /* Reject to deal with polymers for now */ |
626 | 0 | if (szLine && strstr( szLine, "/Z:" )) |
627 | 0 | { |
628 | 0 | *err = INCHI_INP_ERROR_ERR; |
629 | 0 | num_atoms = INCHI_INP_ERROR_RET; |
630 | 0 | TREAT_ERR( *err, 0, "Reading polymer AuxInfo is not supported yet" ); |
631 | 0 | goto bypass_end_of_INChI_plain; |
632 | 0 | } |
633 | | |
634 | | /* Found the header of the AuxInfo, read AuxInfo head of the line */ |
635 | 0 | if (!ir.bHeaderRead) |
636 | 0 | { |
637 | 0 | ir.ulongID = 0LU; |
638 | 0 | if (Id) |
639 | 0 | { |
640 | 0 | *Id = ir.ulongID; |
641 | 0 | } |
642 | 0 | if (pSdfLabel) |
643 | 0 | { |
644 | 0 | pSdfLabel[0] = '\0'; |
645 | 0 | } |
646 | 0 | if (pSdfValue) |
647 | 0 | { |
648 | 0 | pSdfValue[0] = '\0'; |
649 | 0 | } |
650 | 0 | } |
651 | |
|
652 | 0 | ir.bHeaderRead = 0; |
653 | | |
654 | | /* Check for empty "AuxInfo=ver//" */ |
655 | 0 | p = strchr( szLine + lenStructHdrPlnAuxStart, '/' ); |
656 | |
|
657 | 0 | if (p && p[1] == '/' && ( !p[2] || '\n' == p[2] )) |
658 | 0 | { |
659 | 0 | goto bypass_end_of_INChI_plain; |
660 | 0 | } |
661 | | |
662 | | /* |
663 | | Search for atoms block (plain) |
664 | | */ |
665 | | |
666 | 0 | p = szLine; |
667 | 0 | sToken = sStructHdrPlnRevAt; |
668 | 0 | lToken = sizeof( sStructHdrPlnRevAt ) - 1; |
669 | | |
670 | | /* Search for sToken in the line; load next segments of the line if sToken has not found */ |
671 | |
|
672 | 0 | p = FindToken( inp_file, &ir.bTooLongLine, sToken, lToken, |
673 | 0 | szLine, sizeof( szLine ), p, &res ); |
674 | |
|
675 | 0 | if (!p) |
676 | 0 | { |
677 | 0 | *err = INCHI_INP_ERROR_ERR; |
678 | 0 | num_atoms = INCHI_INP_ERROR_RET; |
679 | 0 | TREAT_ERR( *err, 0, "Missing atom data" ); |
680 | 0 | goto bypass_end_of_INChI_plain; |
681 | 0 | } |
682 | 0 | else |
683 | 0 | { |
684 | | /* atoms block started */ |
685 | 0 | i = 0; |
686 | 0 | res2 = bTooLongLine2 = -1; |
687 | 0 | bItemIsOver = ( s = strchr( p, '/' ) ) || !ir.bTooLongLine; |
688 | 0 | while (1) |
689 | 0 | { |
690 | |
|
691 | 0 | p = LoadLine( inp_file, &ir.bTooLongLine, &bItemIsOver, &s, |
692 | 0 | szLine, sizeof( szLine ), INCHI_LINE_ADD, p, &res ); |
693 | |
|
694 | 0 | if (!i) |
695 | 0 | { |
696 | | /* allocate atom */ |
697 | 0 | num_atoms = strtol( p, &q, 10 ); |
698 | |
|
699 | 0 | if (!num_atoms || !q || !*q) |
700 | 0 | { |
701 | 0 | num_atoms = 0; /* no atom data */ |
702 | 0 | goto bypass_end_of_INChI_plain; |
703 | 0 | } |
704 | 0 | p = q; |
705 | | |
706 | | /* Molfile chirality flag */ |
707 | 0 | switch (*p) |
708 | 0 | { |
709 | 0 | case 'c': |
710 | 0 | InpAtomFlags |= FLAG_INP_AT_CHIRAL; |
711 | 0 | p++; |
712 | 0 | break; |
713 | 0 | case 'n': |
714 | 0 | InpAtomFlags |= FLAG_INP_AT_NONCHIRAL; |
715 | 0 | p++; |
716 | 0 | break; |
717 | 0 | } |
718 | | |
719 | 0 | if (at && *at) |
720 | 0 | { |
721 | 0 | if (num_atoms > max_num_at) |
722 | 0 | { |
723 | 0 | inchi_free( *at ); |
724 | 0 | *at = NULL; |
725 | 0 | } |
726 | 0 | else |
727 | 0 | { |
728 | 0 | memset( *at, 0, max_num_at * sizeof( **at ) ); |
729 | 0 | atom = *at; |
730 | 0 | } |
731 | 0 | } |
732 | |
|
733 | 0 | if (!at || !*at) |
734 | 0 | { |
735 | |
|
736 | 0 | atom = CreateInpAtom( num_atoms + 1 ); |
737 | |
|
738 | 0 | if (!atom) |
739 | 0 | { |
740 | 0 | num_atoms = INCHI_INP_FATAL_RET; /* was -1; error */ |
741 | 0 | *err = INCHI_INP_FATAL_ERR; |
742 | 0 | TREAT_ERR( *err, 0, "Out of RAM" ); |
743 | 0 | goto bypass_end_of_INChI_plain; |
744 | 0 | } |
745 | 0 | } |
746 | | |
747 | 0 | { |
748 | 0 | max_len_stereo0D = num_atoms + 1; |
749 | |
|
750 | 0 | atom_stereo0D = CreateInchi_Stereo0D( max_len_stereo0D ); |
751 | |
|
752 | 0 | if (!atom_stereo0D) |
753 | 0 | { |
754 | 0 | num_atoms = INCHI_INP_FATAL_RET; /* fatal error: cannot allocate */ |
755 | 0 | *err = INCHI_INP_FATAL_ERR; |
756 | 0 | TREAT_ERR( *err, 0, "Out of RAM" ); |
757 | 0 | goto bypass_end_of_INChI_plain; |
758 | 0 | } |
759 | 0 | } |
760 | 0 | } |
761 | | |
762 | | /* element, first char */ |
763 | 0 | if (!isalpha( UCINT *p ) || !isupper( UCINT *p ) || i >= num_atoms) |
764 | 0 | { |
765 | 0 | break; /* end of atoms block */ |
766 | 0 | } |
767 | | |
768 | 0 | atom[i].elname[0] = *p++; |
769 | | |
770 | | /* element, second char */ |
771 | 0 | if (isalpha( UCINT *p ) && islower( UCINT *p )) |
772 | 0 | { |
773 | 0 | atom[i].elname[1] = *p++; |
774 | 0 | } |
775 | |
|
776 | 0 | atom[i].el_number = get_periodic_table_number( atom[i].elname ); |
777 | | |
778 | | /* bonds' valence + number of non-isotopic H */ |
779 | 0 | if (isdigit( UCINT *p )) |
780 | 0 | { |
781 | 0 | AT_BONDS_VAL( atom, i ) = (char) strtol( p, &q, 10 ); |
782 | 0 | if (!AT_BONDS_VAL( atom, i )) |
783 | 0 | AT_BONDS_VAL( atom, i ) = ISOLATED_ATOM; /* same convention as in MOLfile, found zero bonds valence */ |
784 | 0 | p = q; |
785 | 0 | } |
786 | | |
787 | | /* charge */ |
788 | 0 | atom[i].charge = ( *p == '+' ) ? 1 : ( *p == '-' ) ? -1 : 0; |
789 | 0 | if (atom[i].charge) |
790 | 0 | { |
791 | 0 | p++; |
792 | 0 | if (isdigit( UCINT *p )) |
793 | 0 | { |
794 | 0 | atom[i].charge *= (S_CHAR) ( strtol( p, &q, 10 ) & CHAR_MASK ); |
795 | 0 | p = q; |
796 | 0 | } |
797 | 0 | } |
798 | | |
799 | | /* radical */ |
800 | 0 | if (*p == '.') |
801 | 0 | { |
802 | 0 | p++; |
803 | 0 | if (isdigit( UCINT *p )) |
804 | 0 | { |
805 | 0 | atom[i].radical = (S_CHAR) strtol( p, &q, 10 ); |
806 | 0 | p = q; |
807 | 0 | } |
808 | 0 | } |
809 | | |
810 | | /* isotopic mass */ |
811 | 0 | if (*p == 'i') |
812 | 0 | { |
813 | 0 | p++; |
814 | 0 | if (isdigit( UCINT *p )) |
815 | 0 | { |
816 | 0 | int mw = strtol( p, &q, 10 ); |
817 | 0 | p = q; |
818 | 0 | mw -= get_atomic_mass_from_elnum( atom[i].el_number ); |
819 | 0 | if (mw >= 0) |
820 | 0 | mw++; |
821 | 0 | atom[i].iso_atw_diff = mw; |
822 | 0 | } |
823 | 0 | } |
824 | | |
825 | | /* parity */ |
826 | 0 | switch (*p) |
827 | 0 | { |
828 | 0 | case 'o': |
829 | 0 | parity = INCHI_PARITY_ODD; |
830 | 0 | p++; |
831 | 0 | break; |
832 | 0 | case 'e': |
833 | 0 | parity = INCHI_PARITY_EVEN; |
834 | 0 | p++; |
835 | 0 | break; |
836 | 0 | case 'u': |
837 | 0 | parity = INCHI_PARITY_UNKNOWN; |
838 | 0 | p++; |
839 | 0 | break; |
840 | 0 | case '?': |
841 | 0 | parity = INCHI_PARITY_UNDEFINED; |
842 | 0 | p++; |
843 | 0 | break; |
844 | 0 | default: |
845 | 0 | parity = 0; |
846 | 0 | break; |
847 | 0 | } |
848 | | |
849 | 0 | if (parity) |
850 | 0 | { |
851 | 0 | atom_stereo0D[len_stereo0D].central_atom = i; |
852 | 0 | atom_stereo0D[len_stereo0D].parity = parity; |
853 | 0 | atom_stereo0D[len_stereo0D].type = INCHI_StereoType_Tetrahedral; |
854 | 0 | len_stereo0D++; |
855 | 0 | } |
856 | | |
857 | | /* isotopic h, d, t */ |
858 | 0 | for (k = 0; k < NUM_H_ISOTOPES; k++) |
859 | 0 | { |
860 | 0 | if (*p == szIsoH[k]) |
861 | 0 | { |
862 | 0 | NUM_ISO_Hk( atom, i, k ) = 1; |
863 | 0 | p++; |
864 | 0 | if (isdigit( UCINT *p )) |
865 | 0 | { |
866 | 0 | NUM_ISO_Hk( atom, i, k ) = (char) strtol( p, &q, 10 ); |
867 | 0 | p = q; |
868 | 0 | } |
869 | 0 | } |
870 | 0 | } |
871 | |
|
872 | 0 | i++; |
873 | 0 | } |
874 | | |
875 | 0 | if (!bItemIsOver || i != num_atoms || s && p != s) |
876 | 0 | { |
877 | 0 | num_atoms = INCHI_INP_ERROR_RET; /* error */ |
878 | 0 | *err = INCHI_INP_ERROR_ERR; |
879 | 0 | TREAT_ERR( *err, 0, "Wrong number of atoms" ); |
880 | 0 | goto bypass_end_of_INChI_plain; |
881 | 0 | } |
882 | 0 | } |
883 | | |
884 | | /* |
885 | | Search for bonds block (plain) and read it |
886 | | */ |
887 | | |
888 | | /*p = szLine;*/ |
889 | 0 | sToken = sStructHdrPlnRevBn; |
890 | 0 | lToken = sizeof( sStructHdrPlnRevBn ) - 1; |
891 | | |
892 | | /* Search for sToken in the line; load next segments of the line if sToken has not found */ |
893 | 0 | p = FindToken( inp_file, &ir.bTooLongLine, sToken, lToken, szLine, sizeof( szLine ), p, &res ); |
894 | |
|
895 | 0 | if (!p) |
896 | 0 | { |
897 | 0 | num_atoms = INCHI_INP_ERROR_RET; /* error */ |
898 | 0 | *err = INCHI_INP_ERROR_ERR; |
899 | 0 | TREAT_ERR( *err, 0, "Missing bonds data" ); |
900 | 0 | goto bypass_end_of_INChI_plain; |
901 | 0 | } |
902 | 0 | else |
903 | 0 | { |
904 | | /* bonds block started */ |
905 | |
|
906 | 0 | i = 1; |
907 | |
|
908 | 0 | res2 = bTooLongLine2 = -1; |
909 | |
|
910 | 0 | bItemIsOver = ( s = strchr( p, '/' ) ) || !ir.bTooLongLine; |
911 | |
|
912 | 0 | if (1 == num_atoms) |
913 | 0 | { |
914 | | /* needed because the next '/' may be still out of szLine */ |
915 | |
|
916 | 0 | p = LoadLine( inp_file, &ir.bTooLongLine, &bItemIsOver, &s, |
917 | 0 | szLine, sizeof( szLine ), INCHI_LINE_ADD, p, &res ); |
918 | 0 | } |
919 | |
|
920 | 0 | while (i < num_atoms) |
921 | 0 | { |
922 | |
|
923 | 0 | p = LoadLine( inp_file, &ir.bTooLongLine, &bItemIsOver, &s, |
924 | 0 | szLine, sizeof( szLine ), INCHI_LINE_ADD, p, &res ); |
925 | |
|
926 | 0 | if (i >= num_atoms || s && p >= s) |
927 | 0 | { |
928 | 0 | break; /* end of bonds (plain) */ |
929 | 0 | } |
930 | | |
931 | | /* bond, first char */ |
932 | 0 | if (*p == ';') |
933 | 0 | { |
934 | 0 | p++; |
935 | 0 | i++; |
936 | 0 | continue; |
937 | 0 | } |
938 | | |
939 | 0 | if (!isalpha( UCINT *p )) |
940 | 0 | { |
941 | 0 | num_atoms = INCHI_INP_ERROR_RET; /* error */ |
942 | 0 | *err = INCHI_INP_ERROR_ERR; |
943 | 0 | TREAT_ERR( *err, 0, "Wrong bonds data" ); |
944 | 0 | goto bypass_end_of_INChI_plain; |
945 | 0 | } |
946 | | |
947 | 0 | bond_char = *p++; |
948 | | |
949 | | /* bond parity */ |
950 | 0 | switch (*p) |
951 | 0 | { |
952 | 0 | case '-': |
953 | 0 | bond_parity = INCHI_PARITY_ODD; |
954 | 0 | p++; |
955 | 0 | break; |
956 | 0 | case '+': |
957 | 0 | bond_parity = INCHI_PARITY_EVEN; |
958 | 0 | p++; |
959 | 0 | break; |
960 | 0 | case 'u': |
961 | 0 | bond_parity = INCHI_PARITY_UNKNOWN; |
962 | 0 | p++; |
963 | 0 | break; |
964 | 0 | case '?': |
965 | 0 | bond_parity = INCHI_PARITY_UNDEFINED; |
966 | 0 | p++; |
967 | 0 | break; |
968 | 0 | default: |
969 | 0 | bond_parity = 0; |
970 | 0 | break; |
971 | 0 | } |
972 | | |
973 | 0 | if (bond_parity) |
974 | 0 | { |
975 | 0 | switch (*p) |
976 | 0 | { |
977 | 0 | case '-': |
978 | 0 | bond_parityNM = INCHI_PARITY_ODD; |
979 | 0 | p++; |
980 | 0 | break; |
981 | 0 | case '+': |
982 | 0 | bond_parityNM = INCHI_PARITY_EVEN; |
983 | 0 | p++; |
984 | 0 | break; |
985 | 0 | case 'u': |
986 | 0 | bond_parityNM = INCHI_PARITY_UNKNOWN; |
987 | 0 | p++; |
988 | 0 | break; |
989 | 0 | case '?': |
990 | 0 | bond_parityNM = INCHI_PARITY_UNDEFINED; |
991 | 0 | p++; |
992 | 0 | break; |
993 | 0 | default: |
994 | 0 | bond_parityNM = 0; |
995 | 0 | break; |
996 | 0 | } |
997 | 0 | } |
998 | 0 | else |
999 | 0 | { |
1000 | 0 | bond_parityNM = 0; |
1001 | 0 | } |
1002 | | |
1003 | | /* neighbor of the current atom */ |
1004 | 0 | if (!isdigit( UCINT *p )) |
1005 | 0 | { |
1006 | 0 | num_atoms = INCHI_INP_ERROR_RET; /* error */ |
1007 | 0 | *err = INCHI_INP_ERROR_ERR; |
1008 | 0 | TREAT_ERR( *err, 0, "Wrong bonds data" ); |
1009 | 0 | goto bypass_end_of_INChI_plain; |
1010 | 0 | } |
1011 | | |
1012 | 0 | neigh = (int) strtol( p, &q, 10 ) - 1; |
1013 | |
|
1014 | 0 | #if ( FIX_CURE53_ISSUE_HEAP_BUFFER_OVERFLOW_INCHITOINPATOM==1 ) |
1015 | 0 | if (i >= num_atoms || neigh >= num_atoms || neigh < 0) |
1016 | 0 | { |
1017 | | #else |
1018 | | if (i >= num_atoms || neigh >= num_atoms) { |
1019 | | #endif |
1020 | 0 | num_atoms = INCHI_INP_ERROR_RET; /* error */ |
1021 | 0 | *err = INCHI_INP_ERROR_ERR; |
1022 | 0 | TREAT_ERR( *err, 0, "Bond to nonexistent atom" ); |
1023 | 0 | goto bypass_end_of_INChI_plain; |
1024 | 0 | } |
1025 | | |
1026 | 0 | p = q; |
1027 | 0 | bond_stereo1 = bond_stereo2 = 0; |
1028 | | |
1029 | | /* bond type & 2D stereo */ |
1030 | 0 | switch (bond_char) |
1031 | 0 | { |
1032 | 0 | case 'v': |
1033 | 0 | bond_type = INCHI_BOND_TYPE_SINGLE; |
1034 | 0 | bond_stereo1 = INCHI_BOND_STEREO_SINGLE_1EITHER; |
1035 | 0 | bond_stereo2 = INCHI_BOND_STEREO_SINGLE_2EITHER; |
1036 | 0 | break; |
1037 | 0 | case 'V': |
1038 | 0 | bond_type = INCHI_BOND_TYPE_SINGLE; |
1039 | 0 | bond_stereo1 = INCHI_BOND_STEREO_SINGLE_2EITHER; |
1040 | 0 | bond_stereo2 = INCHI_BOND_STEREO_SINGLE_1EITHER; |
1041 | 0 | break; |
1042 | 0 | case 'w': |
1043 | 0 | bond_type = INCHI_BOND_TYPE_DOUBLE; |
1044 | 0 | bond_stereo1 = |
1045 | 0 | bond_stereo2 = INCHI_BOND_STEREO_DOUBLE_EITHER; |
1046 | 0 | break; |
1047 | 0 | case 's': |
1048 | 0 | bond_type = INCHI_BOND_TYPE_SINGLE; |
1049 | 0 | break; |
1050 | 0 | case 'd': |
1051 | 0 | bond_type = INCHI_BOND_TYPE_DOUBLE; |
1052 | 0 | break; |
1053 | 0 | case 't': |
1054 | 0 | bond_type = INCHI_BOND_TYPE_TRIPLE; |
1055 | 0 | break; |
1056 | 0 | case 'a': |
1057 | 0 | bond_type = INCHI_BOND_TYPE_ALTERN; |
1058 | 0 | break; |
1059 | 0 | case 'p': |
1060 | 0 | bond_type = INCHI_BOND_TYPE_SINGLE; |
1061 | 0 | bond_stereo1 = INCHI_BOND_STEREO_SINGLE_1UP; |
1062 | 0 | bond_stereo2 = INCHI_BOND_STEREO_SINGLE_2UP; |
1063 | 0 | break; |
1064 | 0 | case 'P': |
1065 | 0 | bond_type = INCHI_BOND_TYPE_SINGLE; |
1066 | 0 | bond_stereo1 = INCHI_BOND_STEREO_SINGLE_2UP; |
1067 | 0 | bond_stereo2 = INCHI_BOND_STEREO_SINGLE_1UP; |
1068 | 0 | break; |
1069 | 0 | case 'n': |
1070 | 0 | bond_type = INCHI_BOND_TYPE_SINGLE; |
1071 | 0 | bond_stereo1 = INCHI_BOND_STEREO_SINGLE_1DOWN; |
1072 | 0 | bond_stereo2 = INCHI_BOND_STEREO_SINGLE_2DOWN; |
1073 | 0 | break; |
1074 | 0 | case 'N': |
1075 | 0 | bond_type = INCHI_BOND_TYPE_SINGLE; |
1076 | 0 | bond_stereo1 = INCHI_BOND_STEREO_SINGLE_2DOWN; |
1077 | 0 | bond_stereo2 = INCHI_BOND_STEREO_SINGLE_1DOWN; |
1078 | 0 | break; |
1079 | 0 | default: |
1080 | 0 | num_atoms = INCHI_INP_ERROR_RET; /* error */ |
1081 | 0 | *err = INCHI_INP_ERROR_ERR; |
1082 | 0 | TREAT_ERR( *err, 0, "Wrong bond type" ); |
1083 | 0 | goto bypass_end_of_INChI_plain; |
1084 | 0 | } |
1085 | | |
1086 | 0 | k = AT_NUM_BONDS( atom[i] )++; /* AT_NUM_BONDS(AT) ==> (AT).valence */ |
1087 | |
|
1088 | 0 | atom[i].bond_type[k] = bond_type; |
1089 | 0 | atom[i].bond_stereo[k] = bond_stereo1; |
1090 | 0 | atom[i].neighbor[k] = (AT_NUMB) neigh; |
1091 | |
|
1092 | 0 | k2 = AT_NUM_BONDS( atom[neigh] )++; /* AT_NUM_BONDS(AT) ==> (AT).valence */ |
1093 | 0 | atom[neigh].bond_type[k2] = bond_type; |
1094 | 0 | atom[neigh].bond_stereo[k2] = bond_stereo2; |
1095 | 0 | atom[neigh].neighbor[k2] = (AT_NUMB) i; |
1096 | |
|
1097 | 0 | bond_parity |= ( bond_parityNM << SB_PARITY_SHFT ); |
1098 | |
|
1099 | 0 | if (bond_parity) |
1100 | 0 | { |
1101 | 0 | if (max_len_stereo0D <= len_stereo0D) |
1102 | 0 | { |
1103 | | /* realloc atom_Stereo0D */ |
1104 | |
|
1105 | 0 | inchi_Stereo0D *new_atom_stereo0D = CreateInchi_Stereo0D( max_len_stereo0D + num_atoms ); |
1106 | |
|
1107 | 0 | if (!new_atom_stereo0D) |
1108 | 0 | { |
1109 | 0 | num_atoms = INCHI_INP_FATAL_RET; /* fatal error: cannot allocate */ |
1110 | 0 | *err = INCHI_INP_FATAL_ERR; |
1111 | 0 | TREAT_ERR( *err, 0, "Out of RAM" ); |
1112 | 0 | goto bypass_end_of_INChI_plain; |
1113 | 0 | } |
1114 | | |
1115 | 0 | memcpy( new_atom_stereo0D, atom_stereo0D, len_stereo0D * sizeof( *atom_stereo0D ) ); |
1116 | 0 | FreeInchi_Stereo0D( &atom_stereo0D ); |
1117 | 0 | atom_stereo0D = new_atom_stereo0D; |
1118 | 0 | max_len_stereo0D += num_atoms; |
1119 | 0 | } |
1120 | | |
1121 | | /* (a) i may be allene endpoint and neigh = allene middle point or |
1122 | | (b) i may be allene middle point and neigh = allene endpoint |
1123 | | !!!!! CURRENTLY ONLY (b) IS ALLOWED !!!!! |
1124 | | */ |
1125 | | |
1126 | 0 | atom_stereo0D[len_stereo0D].neighbor[1] = neigh; /* neigh < i */ |
1127 | 0 | atom_stereo0D[len_stereo0D].neighbor[2] = i; |
1128 | 0 | atom_stereo0D[len_stereo0D].parity = bond_parity; |
1129 | 0 | atom_stereo0D[len_stereo0D].type = INCHI_StereoType_DoubleBond; /* incl allenes & cumulenes */ |
1130 | 0 | len_stereo0D++; |
1131 | 0 | } |
1132 | 0 | } |
1133 | | |
1134 | 0 | if (!bItemIsOver || i != num_atoms || s && p != s) |
1135 | 0 | { |
1136 | 0 | num_atoms = INCHI_INP_ERROR_RET; /* error */ |
1137 | 0 | *err = INCHI_INP_ERROR_ERR; |
1138 | 0 | TREAT_ERR( *err, 0, "Wrong number of bonds" ); |
1139 | 0 | goto bypass_end_of_INChI_plain; |
1140 | 0 | } |
1141 | 0 | } |
1142 | | |
1143 | | /* |
1144 | | Search for coordinates block (plain) |
1145 | | */ |
1146 | | |
1147 | | /*p = szLine;*/ |
1148 | 0 | sToken = sStructHdrPlnRevXYZ; |
1149 | 0 | lToken = sizeof( sStructHdrPlnRevXYZ ) - 1; |
1150 | | |
1151 | | /* search for sToken in the line; load next segments of the line if sToken has not found */ |
1152 | 0 | p = FindToken( inp_file, &ir.bTooLongLine, sToken, lToken, szLine, sizeof( szLine ), p, &res ); |
1153 | |
|
1154 | 0 | if (!p) |
1155 | 0 | { |
1156 | 0 | num_atoms = INCHI_INP_ERROR_RET; /* error */ |
1157 | 0 | *err = INCHI_INP_ERROR_ERR; |
1158 | 0 | TREAT_ERR( *err, 0, "Missing atom coordinates data" ); |
1159 | 0 | goto bypass_end_of_INChI_plain; |
1160 | 0 | } |
1161 | 0 | else |
1162 | 0 | { |
1163 | | /* Coordinates block started */ |
1164 | 0 | if (pszCoord = (MOL_COORD*) inchi_malloc( inchi_max( num_atoms, 1 ) * sizeof( MOL_COORD ) )) |
1165 | 0 | { |
1166 | 0 | memset( pszCoord, ' ', inchi_max( num_atoms, 1 ) * sizeof( MOL_COORD ) ); |
1167 | 0 | } |
1168 | 0 | else |
1169 | 0 | { |
1170 | 0 | num_atoms = INCHI_INP_FATAL_RET; /* allocation error */ |
1171 | 0 | *err = INCHI_INP_FATAL_ERR; |
1172 | 0 | TREAT_ERR( *err, 0, "Out of RAM" ); |
1173 | 0 | goto bypass_end_of_INChI_plain; |
1174 | 0 | } |
1175 | | |
1176 | 0 | i = 0; |
1177 | 0 | res2 = bTooLongLine2 = -1; |
1178 | 0 | bItemIsOver = ( s = strchr( p, '/' ) ) || !ir.bTooLongLine; |
1179 | |
|
1180 | 0 | while (i < num_atoms) |
1181 | 0 | { |
1182 | |
|
1183 | 0 | p = LoadLine( inp_file, &ir.bTooLongLine, &bItemIsOver, &s, |
1184 | 0 | szLine, sizeof( szLine ), INCHI_LINE_ADD, p, &res ); |
1185 | |
|
1186 | 0 | if (i >= num_atoms || s && p >= s) |
1187 | 0 | { |
1188 | 0 | break; /* end of bonds (plain) */ |
1189 | 0 | } |
1190 | | |
1191 | | /* coord, first char */ |
1192 | 0 | if (*p == ';') |
1193 | 0 | { |
1194 | 0 | for (k = 0; k < NUM_COORD; k++) |
1195 | 0 | { |
1196 | 0 | pszCoord[i][LEN_COORD*k + 4] = '0'; |
1197 | 0 | } |
1198 | 0 | p++; |
1199 | 0 | i++; |
1200 | 0 | continue; |
1201 | 0 | } |
1202 | | |
1203 | 0 | for (k = 0; k < 3; k++) |
1204 | 0 | { |
1205 | 0 | double xyz; |
1206 | 0 | bNonZeroXYZ = 0; |
1207 | 0 | if (*p == ';') |
1208 | 0 | { |
1209 | 0 | pszCoord[i][LEN_COORD*k + 4] = '0'; |
1210 | 0 | xyz = 0.0; |
1211 | 0 | } |
1212 | 0 | else |
1213 | 0 | { |
1214 | 0 | if (*p == ',') |
1215 | 0 | { |
1216 | | /* empty */ |
1217 | 0 | pszCoord[i][LEN_COORD*k + 4] = '0'; |
1218 | 0 | xyz = 0.0; |
1219 | 0 | p++; |
1220 | 0 | } |
1221 | 0 | else |
1222 | 0 | { |
1223 | 0 | xyz = strtod( p, &q ); |
1224 | 0 | bNonZeroXYZ = fabs( xyz ) > MIN_BOND_LENGTH; |
1225 | 0 | if (q != NULL) |
1226 | 0 | { |
1227 | 0 | memcpy( pszCoord[i] + LEN_COORD*k, p, q - p ); |
1228 | 0 | if (*q == ',') |
1229 | 0 | q++; |
1230 | 0 | p = q; |
1231 | 0 | } |
1232 | 0 | else |
1233 | 0 | pszCoord[i][LEN_COORD*k + 4] = '0'; |
1234 | 0 | } |
1235 | 0 | } |
1236 | |
|
1237 | 0 | switch (k) |
1238 | 0 | { |
1239 | 0 | case 0: |
1240 | 0 | atom[i].x = xyz; |
1241 | 0 | b2D |= bNonZeroXYZ; |
1242 | 0 | break; |
1243 | 0 | case 1: |
1244 | 0 | atom[i].y = xyz; |
1245 | 0 | b2D |= bNonZeroXYZ; |
1246 | 0 | break; |
1247 | 0 | case 2: |
1248 | 0 | b3D |= bNonZeroXYZ; |
1249 | 0 | atom[i].z = xyz; |
1250 | 0 | break; |
1251 | 0 | } |
1252 | 0 | } |
1253 | | |
1254 | 0 | if (*p == ';') |
1255 | 0 | { |
1256 | 0 | p++; /* end of this triple of coordinates */ |
1257 | 0 | i++; |
1258 | 0 | } |
1259 | 0 | else |
1260 | 0 | { |
1261 | 0 | num_atoms = INCHI_INP_ERROR_RET; /* error in input data: atoms, bonds & coord must be present together */ |
1262 | 0 | *err = INCHI_INP_ERROR_ERR; |
1263 | 0 | TREAT_ERR( *err, 0, "Wrong atom coordinates data" ); |
1264 | 0 | goto bypass_end_of_INChI_plain; |
1265 | 0 | } |
1266 | 0 | } |
1267 | | |
1268 | 0 | if (!bItemIsOver || s && p != s || i != num_atoms) |
1269 | 0 | { |
1270 | 0 | num_atoms = INCHI_INP_ERROR_RET; /* error */ |
1271 | 0 | *err = INCHI_INP_ERROR_ERR; |
1272 | 0 | TREAT_ERR( *err, 0, "Wrong number of coordinates" ); |
1273 | 0 | goto bypass_end_of_INChI_plain; |
1274 | 0 | } |
1275 | 0 | } /* end of coordinates */ |
1276 | | |
1277 | | /* |
1278 | | Set special valences and implicit H (xml) |
1279 | | */ |
1280 | | |
1281 | 0 | b23D = b2D | b3D; |
1282 | 0 | b2D = b3D = 0; |
1283 | 0 | if (at) |
1284 | 0 | { |
1285 | 0 | if (!*at) |
1286 | 0 | { |
1287 | 0 | int a1, a2, n1, n2, valence; |
1288 | 0 | int chem_bonds_valence; |
1289 | 0 | int nX = 0, nY = 0, nZ = 0, nXYZ; |
1290 | 0 | *at = atom; |
1291 | | |
1292 | | /* special valences */ |
1293 | |
|
1294 | 0 | for (bNonMetal = 0; bNonMetal < 1; bNonMetal++) |
1295 | 0 | { |
1296 | 0 | for (a1 = 0; a1 < num_atoms; a1++) |
1297 | 0 | { |
1298 | 0 | int num_bond_type[MAX_INPUT_BOND_TYPE - MIN_INPUT_BOND_TYPE + 1]; |
1299 | 0 | int bHasMetalNeighbor = 0; |
1300 | |
|
1301 | 0 | memset( num_bond_type, 0, sizeof( num_bond_type ) ); |
1302 | |
|
1303 | 0 | valence = AT_BONDS_VAL( atom, a1 ); /* save atom valence if available */ |
1304 | 0 | AT_BONDS_VAL( atom, a1 ) = 0; |
1305 | |
|
1306 | 0 | atom[a1].orig_at_number = a1 + 1; |
1307 | |
|
1308 | 0 | nX = nY = nZ = 0; |
1309 | |
|
1310 | 0 | for (n1 = 0; n1 < AT_NUM_BONDS( atom[a1] ); n1++) /*AT_NUM_BONDS(AT) ==> (AT).valence */ |
1311 | 0 | { |
1312 | 0 | bond_type = atom[a1].bond_type[n1] - MIN_INPUT_BOND_TYPE; |
1313 | 0 | if (bond_type < 0 || bond_type > MAX_INPUT_BOND_TYPE - MIN_INPUT_BOND_TYPE) |
1314 | 0 | { |
1315 | 0 | bond_type = 0; |
1316 | 0 | TREAT_ERR( *err, 0, "Unknown bond type in InChI aux assigned as a single bond" ); |
1317 | 0 | } |
1318 | |
|
1319 | 0 | num_bond_type[bond_type] ++; |
1320 | 0 | nNumBonds++; |
1321 | 0 | if (b23D) |
1322 | 0 | { |
1323 | 0 | neigh = atom[a1].neighbor[n1]; |
1324 | 0 | nX |= ( fabs( atom[a1].x - atom[neigh].x ) > MIN_BOND_LENGTH ); |
1325 | 0 | nY |= ( fabs( atom[a1].y - atom[neigh].y ) > MIN_BOND_LENGTH ); |
1326 | 0 | nZ |= ( fabs( atom[a1].z - atom[neigh].z ) > MIN_BOND_LENGTH ); |
1327 | 0 | } |
1328 | 0 | } |
1329 | |
|
1330 | 0 | chem_bonds_valence = 0; |
1331 | 0 | for (n1 = 0; MIN_INPUT_BOND_TYPE + n1 <= 3 && MIN_INPUT_BOND_TYPE + n1 <= MAX_INPUT_BOND_TYPE; n1++) |
1332 | 0 | { |
1333 | 0 | chem_bonds_valence += ( MIN_INPUT_BOND_TYPE + n1 ) * num_bond_type[n1]; |
1334 | 0 | } |
1335 | |
|
1336 | 0 | if (MIN_INPUT_BOND_TYPE <= INCHI_BOND_TYPE_ALTERN && INCHI_BOND_TYPE_ALTERN <= MAX_INPUT_BOND_TYPE && |
1337 | 0 | ( n2 = num_bond_type[INCHI_BOND_TYPE_ALTERN - MIN_INPUT_BOND_TYPE] )) |
1338 | 0 | { |
1339 | | /* accept input aromatic bonds for now */ |
1340 | 0 | switch (n2) |
1341 | 0 | { |
1342 | 0 | case 2: |
1343 | 0 | chem_bonds_valence += 3; /* =A- */ |
1344 | 0 | break; |
1345 | | |
1346 | 0 | case 3: |
1347 | 0 | chem_bonds_valence += 4; /* =A< */ |
1348 | 0 | break; |
1349 | | |
1350 | 0 | default: |
1351 | | /* if 1 or >= 4 aromatic bonds then replace such bonds with single bonds */ |
1352 | 0 | for (n1 = 0; n1 < AT_NUM_BONDS( atom[a1] ); n1++) /* AT_NUM_BONDS(AT) ==> (AT).valence */ |
1353 | 0 | { |
1354 | 0 | if (atom[a1].bond_type[n1] == INCHI_BOND_TYPE_ALTERN) |
1355 | 0 | { |
1356 | 0 | AT_NUMB *p1; |
1357 | 0 | a2 = atom[a1].neighbor[n1]; |
1358 | 0 | p1 = is_in_the_list( atom[a2].neighbor, (AT_NUMB) a1, AT_NUM_BONDS( atom[a2] ) ); /*AT_NUM_BONDS(AT) ==> (AT).valence*/ |
1359 | 0 | if (p1) |
1360 | 0 | { |
1361 | 0 | atom[a1].bond_type[n1] = |
1362 | 0 | atom[a2].bond_type[p1 - atom[a2].neighbor] = INCHI_BOND_TYPE_SINGLE; |
1363 | 0 | } |
1364 | 0 | else |
1365 | 0 | { |
1366 | 0 | *err = -2; /* Program error */ |
1367 | 0 | TREAT_ERR( *err, 0, "Program error interpreting InChI aux" ); |
1368 | 0 | num_atoms = 0; |
1369 | 0 | goto bypass_end_of_INChI_plain; /* no structure */ |
1370 | 0 | } |
1371 | 0 | } |
1372 | 0 | } |
1373 | | |
1374 | 0 | chem_bonds_valence += n2; |
1375 | 0 | *err |= 32; /* Unrecognized aromatic bond(s) replaced with single */ |
1376 | 0 | TREAT_ERR( *err, 0, "Atom has 1 or more than 3 aromatic bonds" ); |
1377 | 0 | break; |
1378 | 0 | } |
1379 | 0 | } |
1380 | | |
1381 | | /* added 2006-07-19 to process aromatic bonds same way as from molfile */ |
1382 | 0 | if (n2 && !valence) |
1383 | 0 | { |
1384 | 0 | int num_H = NUMH( atom, a1 ); /* only isotopic */ |
1385 | 0 | int chem_valence = chem_bonds_valence; |
1386 | 0 | int bUnusualValenceArom = |
1387 | 0 | detect_unusual_el_valence( (int) atom[a1].el_number, atom[a1].charge, |
1388 | 0 | atom[a1].radical, chem_valence, |
1389 | 0 | num_H, atom[a1].valence ); |
1390 | 0 | int bUnusualValenceNoArom = |
1391 | 0 | detect_unusual_el_valence( (int) atom[a1].el_number, atom[a1].charge, |
1392 | 0 | atom[a1].radical, chem_valence - 1, |
1393 | 0 | num_H, atom[a1].valence ); |
1394 | |
|
1395 | 0 | #if ( CHECK_AROMBOND2ALT == 1 ) |
1396 | 0 | if (bUnusualValenceArom && !bUnusualValenceNoArom && 0 == nBondsValToMetal( atom, a1 )) |
1397 | | #else |
1398 | | if (bUnusualValenceArom && !bUnusualValenceNoArom) |
1399 | | #endif |
1400 | | |
1401 | 0 | { |
1402 | | /* typically NH in 5-member aromatic ring */ |
1403 | 0 | chem_bonds_valence--; |
1404 | 0 | } |
1405 | 0 | } |
1406 | 0 | else if (n2 && valence) |
1407 | 0 | { |
1408 | | /* atom has aromatic bonds AND the chemical valence is known */ |
1409 | 0 | int num_H = NUMH( atom, a1 ); |
1410 | 0 | int chem_valence = chem_bonds_valence + num_H; |
1411 | 0 | if (valence == chem_valence - 1) |
1412 | 0 | { |
1413 | | /* typically NH in 5-member aromatic ring */ |
1414 | 0 | chem_bonds_valence--; |
1415 | 0 | } |
1416 | 0 | } |
1417 | |
|
1418 | 0 | atom[a1].chem_bonds_valence = chem_bonds_valence; |
1419 | |
|
1420 | 0 | atom[a1].num_H = get_num_H( atom[a1].elname, |
1421 | 0 | atom[a1].num_H, |
1422 | 0 | atom[a1].num_iso_H, |
1423 | 0 | atom[a1].charge, |
1424 | 0 | atom[a1].radical, |
1425 | 0 | atom[a1].chem_bonds_valence, |
1426 | 0 | valence, |
1427 | 0 | 0, |
1428 | 0 | bDoNotAddH, |
1429 | 0 | bHasMetalNeighbor ); |
1430 | 0 | } |
1431 | 0 | } |
1432 | | |
1433 | 0 | nNumBonds /= 2; |
1434 | |
|
1435 | 0 | if (b23D && nNumBonds) |
1436 | 0 | { |
1437 | 0 | nXYZ = nX + nY + nZ; |
1438 | 0 | b2D = ( nXYZ > 0 ); |
1439 | 0 | b3D = ( nXYZ == 3 ); |
1440 | 0 | *num_dimensions = b3D ? 3 : b2D ? 2 : 0; |
1441 | 0 | *num_bonds = nNumBonds; |
1442 | 0 | } |
1443 | | |
1444 | | /*======= 0D parities =================================*/ |
1445 | |
|
1446 | 0 | for (i = 0; i < len_stereo0D; i++) |
1447 | 0 | { |
1448 | 0 | AT_NUMB *p1, *p2; |
1449 | 0 | int sb_ord_from_a1 = -1, sb_ord_from_a2 = -1, bEnd1 = 0, bEnd2 = 0; |
1450 | |
|
1451 | 0 | switch (atom_stereo0D[i].type) |
1452 | 0 | { |
1453 | | |
1454 | 0 | case INCHI_StereoType_Tetrahedral: |
1455 | 0 | a1 = atom_stereo0D[i].central_atom; |
1456 | 0 | if (atom_stereo0D[i].parity && ( AT_NUM_BONDS( atom[a1] ) == 3 || AT_NUM_BONDS( atom[a1] ) == 4 )) |
1457 | 0 | { |
1458 | 0 | int ii, kk = 0; |
1459 | 0 | if (AT_NUM_BONDS( atom[a1] ) == 3) |
1460 | 0 | atom_stereo0D[i].neighbor[kk++] = a1; |
1461 | 0 | for (ii = 0; ii < AT_NUM_BONDS( atom[a1] ); ii++) |
1462 | 0 | atom_stereo0D[i].neighbor[kk++] = atom[a1].neighbor[ii]; |
1463 | 0 | } |
1464 | |
|
1465 | 0 | break; |
1466 | | |
1467 | 0 | case INCHI_StereoType_DoubleBond: |
1468 | 0 | #define MAX_CHAIN_LEN 20 |
1469 | 0 | a1 = atom_stereo0D[i].neighbor[1]; |
1470 | 0 | a2 = atom_stereo0D[i].neighbor[2]; |
1471 | 0 | p1 = is_in_the_list( atom[a1].neighbor, (AT_NUMB) a2, AT_NUM_BONDS( atom[a1] ) ); |
1472 | 0 | p2 = is_in_the_list( atom[a2].neighbor, (AT_NUMB) a1, AT_NUM_BONDS( atom[a2] ) ); |
1473 | 0 | if (!p1 || !p2) |
1474 | 0 | { |
1475 | 0 | atom_stereo0D[i].type = INCHI_StereoType_None; |
1476 | 0 | atom_stereo0D[i].central_atom = NO_ATOM; |
1477 | 0 | atom_stereo0D[i].neighbor[0] = |
1478 | 0 | atom_stereo0D[i].neighbor[3] = -1; |
1479 | 0 | *err |= 64; /* Error in cumulene stereo */ |
1480 | 0 | TREAT_ERR( *err, 0, "0D stereobond not recognized" ); |
1481 | 0 | break; |
1482 | 0 | } |
1483 | | |
1484 | | /* streobond, allene, or cumulene */ |
1485 | | |
1486 | 0 | sb_ord_from_a1 = p1 - atom[a1].neighbor; |
1487 | 0 | sb_ord_from_a2 = p2 - atom[a2].neighbor; |
1488 | |
|
1489 | 0 | if (AT_NUM_BONDS( atom[a1] ) == 2 && |
1490 | 0 | atom[a1].bond_type[0] + atom[a1].bond_type[1] == 2 * INCHI_BOND_TYPE_DOUBLE && |
1491 | 0 | 0 == inchi_NUMH2( atom, a1 ) && |
1492 | 0 | ( AT_NUM_BONDS( atom[a2] ) != 2 || |
1493 | 0 | atom[a2].bond_type[0] + atom[a2].bond_type[1] != 2 * INCHI_BOND_TYPE_DOUBLE )) |
1494 | 0 | { |
1495 | 0 | bEnd2 = 1; /* a2 is the end-atom, a1 is middle atom */ |
1496 | 0 | } |
1497 | |
|
1498 | 0 | if (AT_NUM_BONDS( atom[a2] ) == 2 && |
1499 | 0 | atom[a2].bond_type[0] + atom[a2].bond_type[1] == 2 * INCHI_BOND_TYPE_DOUBLE && |
1500 | 0 | 0 == inchi_NUMH2( atom, a2 ) && |
1501 | 0 | ( AT_NUM_BONDS( atom[a1] ) != 2 || |
1502 | 0 | atom[a1].bond_type[0] + atom[a1].bond_type[1] != 2 * INCHI_BOND_TYPE_DOUBLE )) |
1503 | 0 | { |
1504 | 0 | bEnd1 = 1; /* a1 is the end-atom, a2 is middle atom */ |
1505 | 0 | } |
1506 | |
|
1507 | 0 | if (bEnd2 + bEnd1 == 1) |
1508 | 0 | { |
1509 | | /* allene or cumulene */ |
1510 | |
|
1511 | 0 | AT_NUMB chain[MAX_CHAIN_LEN + 1], prev, cur, next; |
1512 | |
|
1513 | 0 | if (bEnd2 && !bEnd1) |
1514 | 0 | { |
1515 | 0 | cur = a1; |
1516 | 0 | a1 = a2; |
1517 | 0 | a2 = cur; |
1518 | 0 | sb_ord_from_a1 = sb_ord_from_a2; |
1519 | 0 | } |
1520 | |
|
1521 | 0 | sb_ord_from_a2 = -1; |
1522 | 0 | cur = a1; |
1523 | 0 | next = a2; |
1524 | 0 | len = 0; |
1525 | 0 | chain[len++] = cur; |
1526 | 0 | chain[len++] = next; |
1527 | |
|
1528 | 0 | while (len < MAX_CHAIN_LEN) |
1529 | 0 | { |
1530 | | /* arbitrary very high upper limit to prevent infinite loop */ |
1531 | |
|
1532 | 0 | prev = cur; |
1533 | 0 | cur = next; |
1534 | | /* follow double bond path && avoid going back */ |
1535 | 0 | if (AT_NUM_BONDS( atom[cur] ) == 2 && |
1536 | 0 | atom[cur].bond_type[0] + atom[cur].bond_type[1] == 2 * INCHI_BOND_TYPE_DOUBLE && |
1537 | 0 | 0 == inchi_NUMH2( atom, cur )) |
1538 | 0 | { |
1539 | 0 | next = atom[cur].neighbor[atom[cur].neighbor[0] == prev]; |
1540 | 0 | chain[len++] = next; |
1541 | 0 | } |
1542 | 0 | else |
1543 | 0 | { |
1544 | 0 | break; |
1545 | 0 | } |
1546 | 0 | } |
1547 | 0 | if (len > 2 && |
1548 | 0 | ( p2 = is_in_the_list( atom[cur].neighbor, (AT_NUMB) prev, AT_NUM_BONDS( atom[cur] ) ) )) |
1549 | 0 | { |
1550 | 0 | sb_ord_from_a2 = p2 - atom[cur].neighbor; |
1551 | 0 | a2 = cur; |
1552 | | /* by design we need to pick up the first non-stereo-bond-neighbor as "sn"-atom */ |
1553 | 0 | atom_stereo0D[i].neighbor[0] = atom[a1].neighbor[sb_ord_from_a1 == 0]; |
1554 | 0 | atom_stereo0D[i].neighbor[1] = a1; |
1555 | 0 | atom_stereo0D[i].neighbor[2] = a2; |
1556 | 0 | atom_stereo0D[i].neighbor[3] = atom[a2].neighbor[sb_ord_from_a2 == 0]; |
1557 | |
|
1558 | 0 | if (len % 2) |
1559 | 0 | { |
1560 | 0 | atom_stereo0D[i].central_atom = chain[len / 2]; |
1561 | 0 | atom_stereo0D[i].type = INCHI_StereoType_Allene; |
1562 | 0 | } |
1563 | 0 | else |
1564 | 0 | { |
1565 | 0 | atom_stereo0D[i].central_atom = NO_ATOM; |
1566 | 0 | } |
1567 | 0 | } |
1568 | 0 | else |
1569 | 0 | { |
1570 | | /* error */ |
1571 | 0 | atom_stereo0D[i].type = INCHI_StereoType_None; |
1572 | 0 | atom_stereo0D[i].central_atom = NO_ATOM; |
1573 | 0 | atom_stereo0D[i].neighbor[0] = |
1574 | 0 | atom_stereo0D[i].neighbor[3] = -1; |
1575 | 0 | *err |= 64; /* Error in cumulene stereo */ |
1576 | 0 | TREAT_ERR( *err, 0, "Cumulene stereo not recognized (0D)" ); |
1577 | 0 | } |
1578 | 0 | #undef MAX_CHAIN_LEN |
1579 | 0 | } |
1580 | 0 | else |
1581 | 0 | { |
1582 | | /****** a normal possibly stereogenic bond -- not an allene or cumulene *******/ |
1583 | | /* by design we need to pick up the first non-stereo-bond-neighbor as "sn"-atom */ |
1584 | 0 | sb_ord_from_a1 = p1 - atom[a1].neighbor; |
1585 | 0 | sb_ord_from_a2 = p2 - atom[a2].neighbor; |
1586 | 0 | atom_stereo0D[i].neighbor[0] = atom[a1].neighbor[p1 == atom[a1].neighbor]; |
1587 | 0 | atom_stereo0D[i].neighbor[3] = atom[a2].neighbor[p2 == atom[a2].neighbor]; |
1588 | 0 | atom_stereo0D[i].central_atom = NO_ATOM; |
1589 | 0 | } |
1590 | |
|
1591 | 0 | if (atom_stereo0D[i].type != INCHI_StereoType_None && |
1592 | 0 | sb_ord_from_a1 >= 0 && sb_ord_from_a2 >= 0 && |
1593 | 0 | ATOM_PARITY_WELL_DEF( SB_PARITY_2( atom_stereo0D[i].parity ) )) |
1594 | 0 | { |
1595 | | /* Detected well-defined disconnected stereo |
1596 | | * locate first non-metal neighbors */ |
1597 | |
|
1598 | 0 | int a, n, j, /* k,*/ sb_ord, cur_neigh, min_neigh; |
1599 | |
|
1600 | 0 | for (k = 0; k < 2; k++) |
1601 | 0 | { |
1602 | 0 | a = k ? atom_stereo0D[i].neighbor[2] : atom_stereo0D[i].neighbor[1]; |
1603 | 0 | sb_ord = k ? sb_ord_from_a2 : sb_ord_from_a1; |
1604 | 0 | min_neigh = num_atoms; |
1605 | 0 | for (n = j = 0; j < AT_NUM_BONDS( atom[a] ); j++) |
1606 | 0 | { |
1607 | 0 | cur_neigh = atom[a].neighbor[j]; |
1608 | 0 | if (j != sb_ord && !IS_METAL_ATOM( atom, cur_neigh )) |
1609 | 0 | { |
1610 | 0 | min_neigh = inchi_min( cur_neigh, min_neigh ); |
1611 | 0 | } |
1612 | 0 | } |
1613 | 0 | if (min_neigh < num_atoms) |
1614 | 0 | { |
1615 | 0 | atom_stereo0D[i].neighbor[k ? 3 : 0] = min_neigh; |
1616 | 0 | } |
1617 | 0 | else |
1618 | 0 | { |
1619 | 0 | TREAT_ERR( *err, 0, "Cannot find non-metal stereobond neighor (0D)" ); |
1620 | 0 | } |
1621 | 0 | } |
1622 | 0 | } |
1623 | |
|
1624 | 0 | break; |
1625 | 0 | } |
1626 | 0 | } |
1627 | | /* end of 0D parities extraction */ |
1628 | | /*exit_cycle:;*/ |
1629 | 0 | } |
1630 | | |
1631 | | /* Transfer atom_stereo0D[] to atom[] */ |
1632 | 0 | if (len_stereo0D) |
1633 | 0 | { |
1634 | 0 | Extract0DParities( atom, num_atoms, atom_stereo0D, len_stereo0D, |
1635 | 0 | pStrErr, err, vABParityUnknown ); |
1636 | 0 | } |
1637 | |
|
1638 | 0 | if (pInpAtomFlags) |
1639 | 0 | { |
1640 | | /* save chirality flag */ |
1641 | 0 | *pInpAtomFlags |= InpAtomFlags; |
1642 | 0 | } |
1643 | 0 | } |
1644 | 0 | else if (atom) |
1645 | 0 | { |
1646 | 0 | inchi_free( atom ); |
1647 | 0 | atom = NULL; |
1648 | 0 | } |
1649 | | |
1650 | 0 | #if ( FIX_READ_AUX_MEM_LEAK == 1 ) |
1651 | | /* 2005-08-04 avoid memory leak */ |
1652 | 0 | if (atom_stereo0D) /* && !(stereo0D && *stereo0D == atom_stereo0D) ) */ |
1653 | 0 | { |
1654 | 0 | FreeInchi_Stereo0D( &atom_stereo0D ); |
1655 | 0 | } |
1656 | 0 | #endif |
1657 | |
|
1658 | 0 | if (szCoord) |
1659 | 0 | { |
1660 | 0 | *szCoord = pszCoord; |
1661 | 0 | pszCoord = NULL; |
1662 | 0 | } |
1663 | 0 | else if (pszCoord) |
1664 | 0 | { |
1665 | 0 | inchi_free( pszCoord ); |
1666 | 0 | pszCoord = NULL; |
1667 | 0 | } |
1668 | |
|
1669 | 0 | goto bypass_end_of_INChI_plain; |
1670 | | /*return num_atoms;*/ |
1671 | 0 | } |
1672 | 0 | } /* while ( 0 < (res = inchi_ios_getsTab( szLine, sizeof(szLine)-1, inp_file, &ir.bTooLongLine ) ) ) */ |
1673 | | |
1674 | | /* End of structure reading cycle */ |
1675 | 0 | if (atom_stereo0D) |
1676 | 0 | FreeInchi_Stereo0D( &atom_stereo0D ); |
1677 | 0 | if (res <= 0) |
1678 | 0 | { |
1679 | 0 | if (*err == INCHI_INP_ERROR_ERR) |
1680 | 0 | { |
1681 | 0 | return num_atoms; |
1682 | 0 | } |
1683 | 0 | *err = INCHI_INP_EOF_ERR; |
1684 | |
|
1685 | 0 | return INCHI_INP_EOF_RET; /* no more data */ |
1686 | 0 | } |
1687 | | |
1688 | 0 | bypass_end_of_INChI_plain: |
1689 | | /* Cleanup */ |
1690 | 0 | if (num_atoms == INCHI_INP_ERROR_RET && atom_stereo0D) |
1691 | 0 | { |
1692 | 0 | FreeInchi_Stereo0D( &atom_stereo0D ); |
1693 | 0 | } |
1694 | 0 | while (ir.bTooLongLine && |
1695 | 0 | 0 < inchi_ios_getsTab1( szLine, sizeof( szLine ) - 1, inp_file, &ir.bTooLongLine )) |
1696 | 0 | { |
1697 | 0 | ; |
1698 | 0 | } |
1699 | |
|
1700 | 0 | return num_atoms; |
1701 | 0 | #undef AT_NUM_BONDS |
1702 | 0 | #undef AT_NUMB |
1703 | 0 | #undef is_in_the_list |
1704 | 0 | #undef inchi_NUMH2 |
1705 | |
|
1706 | 0 | #undef MoreParms |
1707 | 0 | #undef INPUT_FILE |
1708 | 0 | #undef CreateInpAtom |
1709 | 0 | #undef AT_BONDS_VAL |
1710 | 0 | #undef ISOLATED_ATOM |
1711 | 0 | #undef NUM_ISO_Hk |
1712 | 0 | #undef IS_METAL_ATOM |
1713 | 0 | } |
1714 | | |
1715 | | |
1716 | | |
1717 | | /****************************************************************************/ |
1718 | | void find_and_interpret_structure_header( char *szLine, |
1719 | | char *pSdfLabel, |
1720 | | char *pSdfValue, |
1721 | | unsigned long *Id, |
1722 | | int hlen, |
1723 | | ReadINCHI_CtlData *ir ) |
1724 | 0 | { |
1725 | 0 | int len; |
1726 | 0 | char *p, *q; |
1727 | 0 | static const char sStructHdrPlnNoLblVal[] = " is missing"; |
1728 | | |
1729 | |
|
1730 | 0 | p = szLine + hlen; |
1731 | 0 | ir->ulongID = 0LU; |
1732 | | |
1733 | | /* structure number */ |
1734 | 0 | ir->ulongID = strtoul( p, &q, 10 ); |
1735 | 0 | if (q && q[0] == '.' && q[1] == ' ') |
1736 | 0 | { |
1737 | 0 | p = q + 2; |
1738 | 0 | } |
1739 | 0 | p = p + strspn( p, " \n\r" ); |
1740 | |
|
1741 | 0 | if (pSdfLabel) |
1742 | 0 | { |
1743 | 0 | pSdfLabel[0] = '\0'; |
1744 | 0 | } |
1745 | 0 | if (pSdfValue) |
1746 | 0 | { |
1747 | 0 | pSdfValue[0] = '\0'; |
1748 | 0 | } |
1749 | |
|
1750 | 0 | if (*p) |
1751 | 0 | { |
1752 | | /* has label name */ |
1753 | | |
1754 | | /*p ++;*/ |
1755 | 0 | if (q = strchr( p, '=' )) |
1756 | 0 | { |
1757 | | |
1758 | | /* '=' separates label name from the value */ |
1759 | 0 | len = inchi_min( q - p + 1, MAX_SDF_HEADER - 1 ); |
1760 | |
|
1761 | 0 | if (pSdfLabel) |
1762 | 0 | { |
1763 | 0 | mystrncpy( pSdfLabel, p, len ); |
1764 | 0 | lrtrim( pSdfLabel, &len ); |
1765 | 0 | } |
1766 | |
|
1767 | 0 | p = q + 1; |
1768 | 0 | q = p + (int) strlen( p ); |
1769 | |
|
1770 | 0 | if (q - p > 0) |
1771 | 0 | { |
1772 | 0 | len = inchi_min( q - p + 1, MAX_SDF_VALUE - 1 ); |
1773 | 0 | if (pSdfValue) |
1774 | 0 | { |
1775 | 0 | mystrncpy( pSdfValue, p, len ); |
1776 | 0 | } |
1777 | 0 | p = q; |
1778 | 0 | } |
1779 | 0 | } |
1780 | 0 | else if (q = strstr( p, sStructHdrPlnNoLblVal )) |
1781 | 0 | { |
1782 | 0 | len = inchi_min( q - p + 1, MAX_SDF_HEADER - 1 ); |
1783 | 0 | if (pSdfLabel) |
1784 | 0 | { |
1785 | 0 | mystrncpy( pSdfLabel, p, len ); |
1786 | 0 | } |
1787 | 0 | p = q + 1; |
1788 | 0 | } |
1789 | 0 | } |
1790 | |
|
1791 | 0 | if (Id) |
1792 | 0 | { |
1793 | 0 | *Id = ir->ulongID; |
1794 | 0 | } |
1795 | |
|
1796 | 0 | ir->bHeaderRead = 1; |
1797 | 0 | ir->bErrorMsg = ir->bRestoreInfo = 0; |
1798 | |
|
1799 | 0 | return; |
1800 | 0 | } |