/src/rdkit/Code/GraphMol/FileParsers/MolSGroupParsing.cpp
Line | Count | Source |
1 | | // |
2 | | // Copyright (C) 2002-2018 Greg Landrum and T5 Informatics GmbH |
3 | | // |
4 | | // @@ All Rights Reserved @@ |
5 | | // This file is part of the RDKit. |
6 | | // The contents are covered by the terms of the BSD license |
7 | | // which is included in the file license.txt, found at the root |
8 | | // of the RDKit source tree. |
9 | | // |
10 | | |
11 | | #include "FileParsers.h" |
12 | | #include "FileParserUtils.h" |
13 | | #include "MolSGroupParsing.h" |
14 | | |
15 | | namespace RDKit { |
16 | | namespace SGroupParsing { |
17 | | |
18 | | /* ------------------ V2000 Utils ------------------ */ |
19 | | |
20 | | unsigned int ParseSGroupIntField(const std::string &text, unsigned int line, |
21 | 385k | unsigned int &pos, bool isFieldCounter) { |
22 | 385k | ++pos; // Account for separation space |
23 | 385k | unsigned int fieldValue; |
24 | 385k | size_t len = 3 - isFieldCounter; // field counters are smaller |
25 | 385k | try { |
26 | 385k | fieldValue = FileParserUtils::toInt(text.substr(pos, len)); |
27 | 385k | } catch (boost::bad_lexical_cast &) { |
28 | 28.0k | std::ostringstream errout; |
29 | 28.0k | errout << "Cannot convert '" << text.substr(pos, len) << "' to int on line " |
30 | 28.0k | << line; |
31 | 28.0k | throw FileParseException(errout.str()); |
32 | 28.0k | } catch (const std::out_of_range &) { |
33 | 8.26k | std::ostringstream errout; |
34 | 8.26k | errout << "SGroup line too short: '" << text << "' on line " << line; |
35 | 8.26k | throw FileParseException(errout.str()); |
36 | 8.26k | } |
37 | 348k | pos += len; |
38 | 348k | return fieldValue; |
39 | 385k | } |
40 | | |
41 | | unsigned int ParseSGroupIntField(bool &ok, bool strictParsing, |
42 | | const std::string &text, unsigned int line, |
43 | 378k | unsigned int &pos, bool isFieldCounter) { |
44 | 378k | ok = true; |
45 | 378k | unsigned int res = 0; |
46 | 378k | try { |
47 | 378k | res = ParseSGroupIntField(text, line, pos, isFieldCounter); |
48 | 378k | } catch (const std::exception &e) { |
49 | 36.2k | if (strictParsing) { |
50 | 22 | throw; |
51 | 36.2k | } else { |
52 | 36.2k | ok = false; |
53 | 36.2k | BOOST_LOG(rdWarningLog) << e.what() << std::endl; |
54 | 36.2k | } |
55 | 36.2k | } |
56 | 378k | return res; |
57 | 378k | } |
58 | | |
59 | | double ParseSGroupDoubleField(const std::string &text, unsigned int line, |
60 | 18.2k | unsigned int &pos) { |
61 | 18.2k | size_t len = 10; |
62 | 18.2k | double fieldValue; |
63 | 18.2k | try { |
64 | 18.2k | fieldValue = FileParserUtils::toDouble(text.substr(pos, len)); |
65 | 18.2k | } catch (boost::bad_lexical_cast &) { |
66 | 2.78k | std::ostringstream errout; |
67 | 2.78k | errout << "Cannot convert '" << text.substr(pos, len) |
68 | 2.78k | << "' to double on line " << line; |
69 | 2.78k | throw FileParseException(errout.str()); |
70 | 2.78k | } catch (const std::out_of_range &) { |
71 | 1.76k | std::ostringstream errout; |
72 | 1.76k | errout << "SGroup line too short: '" << text << "' on line " << line; |
73 | 1.76k | throw FileParseException(errout.str()); |
74 | 1.76k | } |
75 | 13.7k | pos += len; |
76 | 13.7k | return fieldValue; |
77 | 18.2k | } |
78 | | |
79 | | double ParseSGroupDoubleField(bool &ok, bool strictParsing, |
80 | | const std::string &text, unsigned int line, |
81 | 18.2k | unsigned int &pos) { |
82 | 18.2k | ok = true; |
83 | 18.2k | double res = 0.; |
84 | 18.2k | try { |
85 | 18.2k | res = ParseSGroupDoubleField(text, line, pos); |
86 | 18.2k | } catch (const std::exception &e) { |
87 | 4.55k | if (strictParsing) { |
88 | 2 | throw; |
89 | 4.54k | } else { |
90 | 4.54k | ok = false; |
91 | 4.54k | BOOST_LOG(rdWarningLog) << e.what() << std::endl; |
92 | 4.54k | } |
93 | 4.55k | } |
94 | 18.2k | return res; |
95 | 18.2k | } |
96 | | |
97 | | SubstanceGroup *FindSgIdx(IDX_TO_SGROUP_MAP &sGroupMap, int sgIdx, |
98 | 154k | unsigned int line) { |
99 | 154k | auto sgIt = sGroupMap.find(sgIdx); |
100 | 154k | if (sgIt == sGroupMap.end()) { |
101 | 12.4k | BOOST_LOG(rdWarningLog) << "SGroup " << sgIdx << " referenced on line " |
102 | 0 | << line << " not found." << std::endl; |
103 | 12.4k | return nullptr; |
104 | 12.4k | } |
105 | 142k | return &sgIt->second; |
106 | 154k | } |
107 | | |
108 | | void ParseSGroupV2000STYLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol, |
109 | | const std::string &text, unsigned int line, |
110 | 28.6k | bool strictParsing) { |
111 | 28.6k | PRECONDITION(mol, "bad mol"); |
112 | 28.6k | PRECONDITION(text.substr(0, 6) == "M STY", "bad STY line"); |
113 | | |
114 | 28.6k | unsigned int pos = 6; |
115 | 28.6k | bool ok; |
116 | 28.6k | unsigned int nent = |
117 | 28.6k | ParseSGroupIntField(ok, strictParsing, text, line, pos, true); |
118 | 28.6k | if (!ok) { |
119 | 2.12k | return; |
120 | 2.12k | } |
121 | | |
122 | 62.5k | for (unsigned int ie = 0; ie < nent; ++ie) { |
123 | 53.7k | if (text.size() < pos + 8) { |
124 | 11.5k | std::ostringstream errout; |
125 | 11.5k | errout << "SGroup STY line too short: '" << text << "' on line " << line; |
126 | 11.5k | SGroupWarnOrThrow<>(strictParsing, errout.str()); |
127 | 11.5k | return; |
128 | 11.5k | } |
129 | | |
130 | 42.2k | unsigned int sequenceId = |
131 | 42.2k | ParseSGroupIntField(ok, strictParsing, text, line, pos); |
132 | 42.2k | if (!ok) { |
133 | 6.13k | return; |
134 | 6.13k | } |
135 | | |
136 | 36.0k | std::string typ = text.substr(pos + 1, 3); |
137 | 36.0k | if (SubstanceGroupChecks::isValidType(typ)) { |
138 | 20.1k | auto sgroup = SubstanceGroup(mol, typ); |
139 | 20.1k | sgroup.setProp<unsigned int>("index", sequenceId); |
140 | 20.1k | sGroupMap.emplace(sequenceId, sgroup); |
141 | 20.1k | } else { |
142 | 15.9k | std::ostringstream errout; |
143 | 15.9k | errout << "S group " << typ << " on line " << line; |
144 | 15.9k | SGroupWarnOrThrow<MolFileUnhandledFeatureException>(strictParsing, |
145 | 15.9k | errout.str()); |
146 | 15.9k | } |
147 | 36.0k | pos += 4; |
148 | 36.0k | } |
149 | 26.4k | } |
150 | | |
151 | | void ParseSGroupV2000VectorDataLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol, |
152 | | const std::string &text, unsigned int line, |
153 | 22.3k | bool strictParsing) { |
154 | 22.3k | PRECONDITION(mol, "bad mol"); |
155 | | |
156 | 22.3k | std::string typ = text.substr(3, 3); |
157 | | |
158 | 22.3k | void (SubstanceGroup::*sGroupAddIndexedElement)(const int) = nullptr; |
159 | | |
160 | 22.3k | if (typ == "SAL") { |
161 | 12.3k | sGroupAddIndexedElement = &SubstanceGroup::addAtomWithBookmark; |
162 | 12.3k | } else if (typ == "SBL") { |
163 | 5.43k | sGroupAddIndexedElement = &SubstanceGroup::addBondWithBookmark; |
164 | 5.43k | } else if (typ == "SPA") { |
165 | 4.55k | sGroupAddIndexedElement = &SubstanceGroup::addParentAtomWithBookmark; |
166 | 4.55k | } else { |
167 | 0 | std::ostringstream errout; |
168 | 0 | errout << "Unsupported SGroup line '" << typ |
169 | 0 | << "' passed to Vector Data parser "; |
170 | 0 | throw FileParseException(errout.str()); |
171 | 0 | } |
172 | | |
173 | 22.3k | unsigned int pos = 6; |
174 | 22.3k | bool ok; |
175 | 22.3k | unsigned int sgIdx = ParseSGroupIntField(ok, strictParsing, text, line, pos); |
176 | 22.3k | if (!ok) { |
177 | 1.76k | return; |
178 | 1.76k | } |
179 | 20.5k | SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line); |
180 | 20.5k | if (!sgroup) { |
181 | 1.97k | return; |
182 | 1.97k | } |
183 | 18.6k | unsigned int nent = |
184 | 18.6k | ParseSGroupIntField(ok, strictParsing, text, line, pos, true); |
185 | 18.6k | if (!ok) { |
186 | 1.16k | sgroup->setIsValid(false); |
187 | 1.16k | return; |
188 | 1.16k | } |
189 | | |
190 | 39.1k | for (unsigned int i = 0; i < nent; ++i) { |
191 | 29.3k | if (text.size() < pos + 4) { |
192 | 1.33k | std::ostringstream errout; |
193 | 1.33k | errout << "SGroup line too short: '" << text << "' on line " << line; |
194 | 1.33k | SGroupWarnOrThrow<>(strictParsing, errout.str()); |
195 | 1.33k | sgroup->setIsValid(false); |
196 | 1.33k | return; |
197 | 1.33k | } |
198 | 27.9k | unsigned int nbr = ParseSGroupIntField(ok, strictParsing, text, line, pos); |
199 | 27.9k | if (!ok) { |
200 | 1.79k | sgroup->setIsValid(false); |
201 | 1.79k | return; |
202 | 1.79k | } |
203 | 26.1k | try { |
204 | 26.1k | (sgroup->*sGroupAddIndexedElement)(nbr); |
205 | 26.1k | } catch (const std::exception &e) { |
206 | 4.44k | SGroupWarnOrThrow<>(strictParsing, e.what()); |
207 | 4.44k | sgroup->setIsValid(false); |
208 | 4.44k | return; |
209 | 4.44k | } |
210 | 26.1k | } |
211 | 17.4k | } |
212 | | |
213 | | void ParseSGroupV2000SDILine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol, |
214 | | const std::string &text, unsigned int line, |
215 | 12.5k | bool strictParsing) { |
216 | 12.5k | PRECONDITION(mol, "bad mol"); |
217 | 12.5k | PRECONDITION(text.substr(0, 6) == "M SDI", "bad SDI line"); |
218 | | |
219 | 12.5k | unsigned int pos = 6; |
220 | 12.5k | bool ok; |
221 | 12.5k | unsigned int sgIdx = ParseSGroupIntField(ok, strictParsing, text, line, pos); |
222 | 12.5k | if (!ok) { |
223 | 5.21k | return; |
224 | 5.21k | } |
225 | 7.35k | SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line); |
226 | 7.35k | if (!sgroup) { |
227 | 386 | return; |
228 | 386 | } |
229 | | |
230 | 6.96k | unsigned int nCoords = |
231 | 6.96k | ParseSGroupIntField(ok, strictParsing, text, line, pos, true); |
232 | 6.96k | if (!ok) { |
233 | 634 | sgroup->setIsValid(false); |
234 | 634 | return; |
235 | 634 | } |
236 | 6.33k | if (nCoords != 4) { |
237 | 454 | std::ostringstream errout; |
238 | 454 | errout << "Unexpected number of coordinates for SDI on line " << line; |
239 | 454 | SGroupWarnOrThrow<>(strictParsing, errout.str()); |
240 | 454 | sgroup->setIsValid(false); |
241 | 454 | return; |
242 | 454 | } |
243 | | |
244 | 5.88k | SubstanceGroup::Bracket bracket; |
245 | 11.6k | for (unsigned int i = 0; i < 2; ++i) { |
246 | 9.31k | double x = ParseSGroupDoubleField(ok, strictParsing, text, line, pos); |
247 | 9.31k | if (!ok) { |
248 | 2.60k | sgroup->setIsValid(false); |
249 | 2.60k | return; |
250 | 2.60k | } |
251 | 6.71k | double y = ParseSGroupDoubleField(ok, strictParsing, text, line, pos); |
252 | 6.71k | if (!ok) { |
253 | 922 | sgroup->setIsValid(false); |
254 | 922 | return; |
255 | 922 | } |
256 | 5.79k | double z = 0.; |
257 | 5.79k | bracket[i] = RDGeom::Point3D(x, y, z); |
258 | 5.79k | } |
259 | 2.35k | bracket[2] = RDGeom::Point3D(0., 0., 0.); |
260 | 2.35k | try { |
261 | 2.35k | sgroup->addBracket(bracket); |
262 | 2.35k | } catch (const std::exception &e) { |
263 | 0 | SGroupWarnOrThrow<>(strictParsing, e.what()); |
264 | 0 | sgroup->setIsValid(false); |
265 | 0 | return; |
266 | 0 | } |
267 | 2.35k | } |
268 | | |
269 | | void ParseSGroupV2000SSTLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol, |
270 | | const std::string &text, unsigned int &line, |
271 | 3.68k | bool strictParsing) { |
272 | 3.68k | PRECONDITION(mol, "bad mol"); |
273 | 3.68k | PRECONDITION(text.substr(0, 6) == "M SST", "bad SST line"); |
274 | | |
275 | 3.68k | unsigned int pos = 6; |
276 | 3.68k | bool ok; |
277 | 3.68k | unsigned int nent = |
278 | 3.68k | ParseSGroupIntField(ok, strictParsing, text, line, pos, true); |
279 | 3.68k | if (!ok) { |
280 | 513 | return; |
281 | 513 | } |
282 | | |
283 | 5.04k | for (unsigned int ie = 0; ie < nent; ++ie) { |
284 | 4.67k | if (text.size() < pos + 8) { |
285 | 1.40k | std::ostringstream errout; |
286 | 1.40k | errout << "SGroup SST line too short: '" << text << "' on line " << line; |
287 | 1.40k | SGroupWarnOrThrow<>(strictParsing, errout.str()); |
288 | 1.40k | return; |
289 | 1.40k | } |
290 | | |
291 | 3.27k | unsigned int sgIdx = |
292 | 3.27k | ParseSGroupIntField(ok, strictParsing, text, line, pos); |
293 | 3.27k | if (!ok) { |
294 | 398 | return; |
295 | 398 | } |
296 | 2.87k | SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line); |
297 | 2.87k | if (!sgroup) { |
298 | 360 | return; |
299 | 2.51k | }; |
300 | | |
301 | 2.51k | std::string subType = text.substr(++pos, 3); |
302 | | |
303 | 2.51k | if (!SubstanceGroupChecks::isValidSubType(subType)) { |
304 | 638 | std::ostringstream errout; |
305 | 638 | errout << "Unsupported SGroup subtype '" << subType << "' on line " |
306 | 638 | << line; |
307 | 638 | SGroupWarnOrThrow<>(strictParsing, errout.str()); |
308 | 638 | sgroup->setIsValid(false); |
309 | 638 | return; |
310 | 638 | } |
311 | | |
312 | 1.87k | sgroup->setProp("SUBTYPE", subType); |
313 | 1.87k | pos += 3; |
314 | 1.87k | } |
315 | 3.17k | } |
316 | | |
317 | | void ParseSGroupV2000SMTLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol, |
318 | | const std::string &text, unsigned int &line, |
319 | 3.69k | bool strictParsing) { |
320 | 3.69k | PRECONDITION(mol, "bad mol"); |
321 | 3.69k | PRECONDITION(text.substr(0, 6) == "M SMT", "bad SMT line"); |
322 | | |
323 | 3.69k | unsigned int pos = 6; |
324 | 3.69k | bool ok; |
325 | 3.69k | unsigned int sgIdx = ParseSGroupIntField(ok, strictParsing, text, line, pos); |
326 | 3.69k | if (!ok) { |
327 | 718 | return; |
328 | 718 | } |
329 | 2.98k | SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line); |
330 | 2.98k | if (!sgroup) { |
331 | 464 | return; |
332 | 464 | } |
333 | 2.51k | ++pos; |
334 | | |
335 | 2.51k | if (pos >= text.length()) { |
336 | 927 | std::ostringstream errout; |
337 | 927 | errout << "SGroup line too short: '" << text << "' on line " << line; |
338 | 927 | SGroupWarnOrThrow<>(strictParsing, errout.str()); |
339 | 927 | sgroup->setIsValid(false); |
340 | 927 | return; |
341 | 927 | } |
342 | 1.59k | std::string label = text.substr(pos, text.length() - pos); |
343 | | |
344 | 1.59k | if (sgroup->getProp<std::string>("TYPE") == |
345 | 1.59k | "MUL") { // Case of multiple groups |
346 | 416 | sgroup->setProp("MULT", label); |
347 | | |
348 | 1.17k | } else { // Case of abbreviation groups, but we might not have seen a SCL |
349 | | // line yet |
350 | 1.17k | sgroup->setProp("LABEL", label); |
351 | 1.17k | } |
352 | 1.59k | } |
353 | | |
354 | | void ParseSGroupV2000SLBLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol, |
355 | | const std::string &text, unsigned int line, |
356 | 11.0k | bool strictParsing) { |
357 | 11.0k | PRECONDITION(mol, "bad mol"); |
358 | 11.0k | PRECONDITION(text.substr(0, 6) == "M SLB", "bad SLB line"); |
359 | | |
360 | 11.0k | unsigned int pos = 6; |
361 | 11.0k | bool ok; |
362 | 11.0k | unsigned int nent = |
363 | 11.0k | ParseSGroupIntField(ok, strictParsing, text, line, pos, true); |
364 | 11.0k | if (!ok) { |
365 | 508 | return; |
366 | 508 | } |
367 | | |
368 | 26.2k | for (unsigned int ie = 0; ie < nent; ++ie) { |
369 | 25.2k | if (text.size() < pos + 8) { |
370 | 8.45k | std::ostringstream errout; |
371 | 8.45k | errout << "SGroup SLB line too short: '" << text << "' on line " << line; |
372 | 8.45k | SGroupWarnOrThrow<>(strictParsing, errout.str()); |
373 | 8.45k | return; |
374 | 8.45k | } |
375 | | |
376 | 16.8k | unsigned int sgIdx = |
377 | 16.8k | ParseSGroupIntField(ok, strictParsing, text, line, pos); |
378 | 16.8k | if (!ok) { |
379 | 555 | return; |
380 | 555 | } |
381 | 16.2k | SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line); |
382 | 16.2k | if (!sgroup) { |
383 | 348 | return; |
384 | 348 | } |
385 | 15.9k | unsigned int id = ParseSGroupIntField(ok, strictParsing, text, line, pos); |
386 | 15.9k | if (!ok) { |
387 | 248 | sgroup->setIsValid(false); |
388 | 248 | return; |
389 | 248 | } |
390 | 15.6k | if (id != 0 && !SubstanceGroupChecks::isSubstanceGroupIdFree(*mol, id)) { |
391 | 0 | std::ostringstream errout; |
392 | 0 | errout << "SGroup ID '" << id |
393 | 0 | << "' is assigned to more than one SGroup, on line " << line; |
394 | 0 | SGroupWarnOrThrow<>(strictParsing, errout.str()); |
395 | 0 | sgroup->setIsValid(false); |
396 | 0 | return; |
397 | 0 | } |
398 | | |
399 | 15.6k | sgroup->setProp<unsigned int>("ID", id); |
400 | 15.6k | } |
401 | 10.5k | } |
402 | | |
403 | | void ParseSGroupV2000SCNLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol, |
404 | | const std::string &text, unsigned int line, |
405 | 3.45k | bool strictParsing) { |
406 | 3.45k | PRECONDITION(mol, "bad mol"); |
407 | 3.45k | PRECONDITION(text.substr(0, 6) == "M SCN", "bad SCN line"); |
408 | | |
409 | 3.45k | unsigned int pos = 6; |
410 | 3.45k | bool ok; |
411 | 3.45k | unsigned int nent = |
412 | 3.45k | ParseSGroupIntField(ok, strictParsing, text, line, pos, true); |
413 | 3.45k | if (!ok) { |
414 | 147 | return; |
415 | 147 | } |
416 | | |
417 | 5.80k | for (unsigned int ie = 0; ie < nent; ++ie) { |
418 | 5.41k | if (text.size() < pos + 7) { |
419 | 1.31k | std::ostringstream errout; |
420 | 1.31k | errout << "SGroup SCN line too short: '" << text << "' on line " << line; |
421 | 1.31k | errout << "\n needed: " << pos + 7 << " found: " << text.size(); |
422 | 1.31k | SGroupWarnOrThrow<>(strictParsing, errout.str()); |
423 | 1.31k | return; |
424 | 1.31k | } |
425 | | |
426 | 4.10k | unsigned int sgIdx = |
427 | 4.10k | ParseSGroupIntField(ok, strictParsing, text, line, pos); |
428 | 4.10k | if (!ok) { |
429 | 541 | return; |
430 | 541 | } |
431 | 3.56k | SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line); |
432 | 3.56k | if (!sgroup) { |
433 | 355 | return; |
434 | 355 | } |
435 | | |
436 | 3.21k | std::string connect = text.substr(++pos, 2); |
437 | | |
438 | 3.21k | if (!SubstanceGroupChecks::isValidConnectType(connect)) { |
439 | 710 | std::ostringstream errout; |
440 | 710 | errout << "Unsupported SGroup connection type '" << connect |
441 | 710 | << "' on line " << line; |
442 | 710 | SGroupWarnOrThrow<>(strictParsing, errout.str()); |
443 | 710 | sgroup->setIsValid(false); |
444 | 710 | return; |
445 | 710 | } |
446 | | |
447 | 2.50k | sgroup->setProp("CONNECT", connect); |
448 | 2.50k | pos += 3; |
449 | 2.50k | } |
450 | 3.30k | } |
451 | | |
452 | | void ParseSGroupV2000SDSLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol, |
453 | | const std::string &text, unsigned int line, |
454 | 3.56k | bool strictParsing) { |
455 | 3.56k | PRECONDITION(mol, "bad mol"); |
456 | 3.56k | PRECONDITION(text.substr(0, 10) == "M SDS EXP", "bad SDS line"); |
457 | | |
458 | 3.53k | unsigned int pos = 10; |
459 | 3.53k | bool ok; |
460 | 3.53k | unsigned int nent = |
461 | 3.53k | ParseSGroupIntField(ok, strictParsing, text, line, pos, true); |
462 | 3.53k | if (!ok) { |
463 | 96 | return; |
464 | 96 | } |
465 | | |
466 | 6.65k | for (unsigned int ie = 0; ie < nent; ++ie) { |
467 | 6.14k | if (text.size() < pos + 4) { |
468 | 1.72k | std::ostringstream errout; |
469 | 1.72k | errout << "SGroup SDS line too short: '" << text << "' on line " << line; |
470 | 1.72k | SGroupWarnOrThrow<>(strictParsing, errout.str()); |
471 | 1.72k | return; |
472 | 1.72k | } |
473 | 4.42k | unsigned int sgIdx = |
474 | 4.42k | ParseSGroupIntField(ok, strictParsing, text, line, pos); |
475 | 4.42k | if (!ok) { |
476 | 675 | return; |
477 | 675 | } |
478 | 3.75k | SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line); |
479 | 3.75k | if (!sgroup) { |
480 | 533 | return; |
481 | 533 | } |
482 | | |
483 | 3.21k | sgroup->setProp("ESTATE", "E"); |
484 | 3.21k | } |
485 | 3.43k | } |
486 | | |
487 | | void ParseSGroupV2000SBVLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol, |
488 | | const std::string &text, unsigned int line, |
489 | 7.64k | bool strictParsing) { |
490 | 7.64k | PRECONDITION(mol, "bad mol"); |
491 | 7.64k | PRECONDITION(text.substr(0, 6) == "M SBV", "bad SBV line"); |
492 | | |
493 | 7.64k | unsigned int pos = 6; |
494 | 7.64k | bool ok; |
495 | 7.64k | unsigned int sgIdx = ParseSGroupIntField(ok, strictParsing, text, line, pos); |
496 | 7.64k | if (!ok) { |
497 | 657 | return; |
498 | 657 | } |
499 | 6.98k | SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line); |
500 | 6.98k | if (!sgroup) { |
501 | 748 | return; |
502 | 748 | } |
503 | | |
504 | 6.23k | unsigned int bondMark = |
505 | 6.23k | ParseSGroupIntField(ok, strictParsing, text, line, pos); |
506 | 6.23k | if (!ok) { |
507 | 936 | sgroup->setIsValid(false); |
508 | 936 | return; |
509 | 936 | } |
510 | 5.29k | Bond *bond = mol->getUniqueBondWithBookmark(bondMark); |
511 | | |
512 | 5.29k | RDGeom::Point3D vector; |
513 | 5.29k | if (sgroup->getProp<std::string>("TYPE") == "SUP") { |
514 | 1.39k | vector.x = ParseSGroupDoubleField(ok, strictParsing, text, line, pos); |
515 | 1.39k | if (!ok) { |
516 | 523 | sgroup->setIsValid(false); |
517 | 523 | return; |
518 | 523 | } |
519 | 872 | vector.y = ParseSGroupDoubleField(ok, strictParsing, text, line, pos); |
520 | 872 | if (!ok) { |
521 | 499 | sgroup->setIsValid(false); |
522 | 499 | return; |
523 | 499 | } |
524 | 373 | vector.z = 0.; |
525 | 373 | } |
526 | | |
527 | 4.27k | try { |
528 | 4.27k | sgroup->addCState(bond->getIdx(), vector); |
529 | 4.27k | } catch (const std::exception &e) { |
530 | 2.44k | SGroupWarnOrThrow<>(strictParsing, e.what()); |
531 | 2.44k | sgroup->setIsValid(false); |
532 | 2.44k | return; |
533 | 2.44k | } |
534 | 4.27k | } |
535 | | |
536 | | void ParseSGroupV2000SDTLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol, |
537 | | const std::string &text, unsigned int line, |
538 | 18.5k | bool strictParsing) { |
539 | 18.5k | PRECONDITION(mol, "bad mol"); |
540 | 18.5k | PRECONDITION(text.substr(0, 6) == "M SDT", "bad SDT line"); |
541 | | |
542 | 18.5k | unsigned int pos = 6; |
543 | 18.5k | bool ok; |
544 | 18.5k | unsigned int sgIdx = ParseSGroupIntField(ok, strictParsing, text, line, pos); |
545 | 18.5k | if (!ok) { |
546 | 1.64k | return; |
547 | 1.64k | } |
548 | 16.9k | SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line); |
549 | 16.9k | if (!sgroup) { |
550 | 1.99k | return; |
551 | 1.99k | } |
552 | | |
553 | 14.9k | std::string fieldName; |
554 | 14.9k | std::string fieldType; |
555 | 14.9k | std::string fieldInfo; |
556 | 14.9k | std::string queryType; |
557 | 14.9k | std::string queryOp; |
558 | | |
559 | 14.9k | try { |
560 | 14.9k | fieldName = text.substr(++pos, 30); |
561 | 14.9k | boost::trim_right(fieldName); |
562 | 14.9k | pos += 30; |
563 | 14.9k | fieldType = text.substr(pos, 2); |
564 | 14.9k | boost::trim_right(fieldType); |
565 | 14.9k | pos += 2; |
566 | 14.9k | fieldInfo = text.substr(pos, 20); |
567 | 14.9k | boost::trim_right(fieldInfo); |
568 | 14.9k | pos += 20; |
569 | 14.9k | queryType = text.substr(pos, 2); |
570 | 14.9k | boost::trim_right(queryType); |
571 | 14.9k | pos += 2; |
572 | 14.9k | queryOp = text.substr(pos, text.length() - pos); |
573 | 14.9k | boost::trim_right(queryOp); |
574 | 14.9k | } catch (const std::out_of_range &) { |
575 | | // all kinds of wild things out there... this insulates us from them without |
576 | | // making the code super complicated |
577 | 11.1k | } |
578 | | |
579 | | // only add entries for the remaining properties if they aren't blank |
580 | 14.9k | if (!fieldName.empty()) { |
581 | 13.3k | sgroup->setProp("FIELDNAME", fieldName); |
582 | 13.3k | } |
583 | 14.9k | if (!fieldType.empty()) { |
584 | 6.45k | sgroup->setProp("FIELDTYPE", fieldType); |
585 | 6.45k | } |
586 | 14.9k | if (!fieldInfo.empty()) { |
587 | 6.11k | sgroup->setProp("FIELDINFO", fieldInfo); |
588 | 6.11k | } |
589 | 14.9k | if (!queryType.empty()) { |
590 | 3.71k | sgroup->setProp("QUERYTYPE", queryType); |
591 | 3.71k | } |
592 | 14.9k | if (!queryOp.empty()) { |
593 | 2.94k | sgroup->setProp("QUERYOP", queryOp); |
594 | 2.94k | } |
595 | 14.9k | } |
596 | | |
597 | | void ParseSGroupV2000SDDLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol, |
598 | | const std::string &text, unsigned int line, |
599 | 1.82k | bool strictParsing) { |
600 | 1.82k | PRECONDITION(mol, "bad mol"); |
601 | 1.82k | PRECONDITION(text.substr(0, 6) == "M SDD", "bad SDD line"); |
602 | | |
603 | 1.82k | unsigned int pos = 6; |
604 | 1.82k | bool ok; |
605 | 1.82k | unsigned int sgIdx = ParseSGroupIntField(ok, strictParsing, text, line, pos); |
606 | 1.82k | if (!ok) { |
607 | 344 | return; |
608 | 344 | } |
609 | 1.48k | SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line); |
610 | 1.48k | if (!sgroup) { |
611 | 300 | return; |
612 | 300 | } |
613 | | |
614 | | // Store the rest of the line as is. |
615 | 1.18k | ++pos; |
616 | 1.18k | if (pos < text.length()) { |
617 | 1.00k | sgroup->setProp("FIELDDISP", text.substr(pos, text.length() - pos)); |
618 | 1.00k | } |
619 | 1.18k | } |
620 | | |
621 | | void ParseSGroupV2000SCDSEDLine(IDX_TO_SGROUP_MAP &sGroupMap, |
622 | | IDX_TO_STR_VECT_MAP &dataFieldsMap, RWMol *mol, |
623 | | const std::string &text, unsigned int line, |
624 | | bool strictParsing, unsigned int &counter, |
625 | | unsigned int &lastDataSGroup, |
626 | 51.0k | std::ostringstream ¤tDataField) { |
627 | 51.0k | PRECONDITION(mol, "bad mol"); |
628 | | |
629 | 51.0k | unsigned int pos = 3; |
630 | 51.0k | std::string type = text.substr(pos, 3); |
631 | 51.0k | pos += 3; |
632 | | |
633 | 51.0k | bool ok; |
634 | 51.0k | unsigned int sgIdx = ParseSGroupIntField(ok, strictParsing, text, line, pos); |
635 | 51.0k | if (!ok) { |
636 | 1.77k | return; |
637 | 1.77k | } |
638 | 49.2k | SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line); |
639 | 49.2k | if (!sgroup) { |
640 | 2.58k | return; |
641 | 2.58k | } |
642 | | |
643 | 46.7k | if (lastDataSGroup != 0 && lastDataSGroup != sgIdx) { |
644 | 3.79k | std::ostringstream errout; |
645 | 3.79k | errout << "Found a Data Field not matching the SGroup of the last Data " |
646 | 3.79k | "Field at line " |
647 | 3.79k | << line; |
648 | 3.79k | SGroupWarnOrThrow<>(strictParsing, errout.str()); |
649 | 3.79k | sgroup->setIsValid(false); |
650 | 3.79k | return; |
651 | 42.9k | } else if (lastDataSGroup == 0 && type == "SCD") { |
652 | 8.77k | lastDataSGroup = sgIdx; |
653 | 34.1k | } else if (type == "SED") { |
654 | 33.5k | lastDataSGroup = 0; |
655 | 33.5k | } |
656 | | |
657 | | // have we already seen an SDT line? |
658 | 42.9k | if (!sgroup->hasProp("FIELDNAME")) { |
659 | | // one can read the docs and draw the conclusion that this is mandatory, |
660 | | // but it's also possible to interpret them the other way, and we know |
661 | | // that there are CTABs out there with empty fieldnames in SDT lines, |
662 | | // so let's just issue a warning and accept it. |
663 | 23.6k | BOOST_LOG(rdWarningLog) |
664 | 0 | << "Found a SCD/SED line with missing/empty SDT specification at line " |
665 | 0 | << line << std::endl; |
666 | 23.6k | } |
667 | | |
668 | 42.9k | if (strictParsing) { |
669 | 422 | if (type == "SCD" && counter > 2) { |
670 | 0 | std::ostringstream errout; |
671 | 0 | errout << "Found too many consecutive SCD lines, (#" << (counter + 1) |
672 | 0 | << " at line " << line << ") for SGroup " << sgIdx; |
673 | 0 | throw FileParseException(errout.str()); |
674 | 0 | } |
675 | 422 | } |
676 | | |
677 | 42.9k | if (pos + 1 < text.length()) { |
678 | 40.8k | currentDataField << text.substr(++pos, 69); |
679 | | |
680 | 40.8k | if (type == "SED") { |
681 | 31.9k | std::string trimmedData = boost::trim_right_copy(currentDataField.str()); |
682 | 31.9k | dataFieldsMap[sgIdx].push_back(trimmedData.substr(0, 200)); |
683 | 31.9k | currentDataField.str(""); |
684 | 31.9k | counter = 0; |
685 | 31.9k | } else { |
686 | 8.89k | ++counter; |
687 | 8.89k | } |
688 | 40.8k | } |
689 | 42.9k | } |
690 | | |
691 | | void ParseSGroupV2000SPLLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol, |
692 | | const std::string &text, unsigned int line, |
693 | 5.20k | bool strictParsing) { |
694 | 5.20k | PRECONDITION(mol, "bad mol"); |
695 | 5.20k | PRECONDITION(text.substr(0, 6) == "M SPL", "bad SPL line"); |
696 | | |
697 | 5.20k | unsigned int pos = 6; |
698 | 5.20k | bool ok; |
699 | 5.20k | unsigned int nent = |
700 | 5.20k | ParseSGroupIntField(ok, strictParsing, text, line, pos, true); |
701 | 5.20k | if (!ok) { |
702 | 1.05k | return; |
703 | 1.05k | } |
704 | | |
705 | 10.7k | for (unsigned int ie = 0; ie < nent; ++ie) { |
706 | 9.87k | if (text.size() < pos + 8) { |
707 | 2.14k | std::ostringstream errout; |
708 | 2.14k | errout << "SGroup SPL line too short: '" << text << "' on line " << line; |
709 | 2.14k | SGroupWarnOrThrow<>(strictParsing, errout.str()); |
710 | 2.14k | return; |
711 | 2.14k | } |
712 | | |
713 | 7.73k | unsigned int sgIdx = |
714 | 7.73k | ParseSGroupIntField(ok, strictParsing, text, line, pos); |
715 | 7.73k | if (!ok) { |
716 | 697 | return; |
717 | 697 | } |
718 | 7.03k | SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line); |
719 | 7.03k | if (!sgroup) { |
720 | 416 | return; |
721 | 416 | } |
722 | 6.61k | unsigned int parentIdx = ParseSGroupIntField(text, line, pos); |
723 | | |
724 | 6.61k | sgroup->setProp<unsigned int>("PARENT", parentIdx); |
725 | 6.61k | } |
726 | 4.14k | } |
727 | | |
728 | | void ParseSGroupV2000SNCLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol, |
729 | | const std::string &text, unsigned int line, |
730 | 3.80k | bool strictParsing) { |
731 | 3.80k | PRECONDITION(mol, "bad mol"); |
732 | 3.80k | PRECONDITION(text.substr(0, 6) == "M SNC", "bad SNC line"); |
733 | | |
734 | 3.80k | unsigned int pos = 6; |
735 | 3.80k | bool ok; |
736 | 3.80k | unsigned int nent = |
737 | 3.80k | ParseSGroupIntField(ok, strictParsing, text, line, pos, true); |
738 | 3.80k | if (!ok) { |
739 | 466 | return; |
740 | 466 | } |
741 | | |
742 | 4.57k | for (unsigned int ie = 0; ie < nent; ++ie) { |
743 | 3.68k | if (text.size() < pos + 8) { |
744 | 1.15k | std::ostringstream errout; |
745 | 1.15k | errout << "SGroup SNC line too short: '" << text << "' on line " << line; |
746 | 1.15k | SGroupWarnOrThrow<>(strictParsing, errout.str()); |
747 | 1.15k | return; |
748 | 1.15k | } |
749 | | |
750 | 2.52k | unsigned int sgIdx = |
751 | 2.52k | ParseSGroupIntField(ok, strictParsing, text, line, pos); |
752 | 2.52k | if (!ok) { |
753 | 279 | return; |
754 | 279 | } |
755 | 2.25k | SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line); |
756 | 2.25k | if (!sgroup) { |
757 | 277 | return; |
758 | 277 | } |
759 | | |
760 | 1.97k | unsigned int compno = |
761 | 1.97k | ParseSGroupIntField(ok, strictParsing, text, line, pos); |
762 | 1.97k | if (!ok) { |
763 | 412 | sgroup->setIsValid(false); |
764 | 412 | return; |
765 | 412 | } |
766 | 1.56k | if (compno > 256u) { |
767 | 322 | std::ostringstream errout; |
768 | 322 | errout << "SGroup SNC value over 256: '" << compno << "' on line " |
769 | 322 | << line; |
770 | 322 | SGroupWarnOrThrow<>(strictParsing, errout.str()); |
771 | 322 | sgroup->setIsValid(false); |
772 | 322 | return; |
773 | 322 | } |
774 | 1.23k | sgroup->setProp<unsigned int>("COMPNO", compno); |
775 | 1.23k | } |
776 | 3.33k | } |
777 | | |
778 | | void ParseSGroupV2000SAPLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol, |
779 | | const std::string &text, unsigned int line, |
780 | 10.5k | bool strictParsing) { |
781 | 10.5k | PRECONDITION(mol, "bad mol"); |
782 | 10.5k | PRECONDITION(text.substr(0, 6) == "M SAP", "bad SAP line"); |
783 | | |
784 | 10.5k | unsigned int pos = 6; |
785 | 10.5k | bool ok; |
786 | 10.5k | unsigned int sgIdx = ParseSGroupIntField(ok, strictParsing, text, line, pos); |
787 | 10.5k | if (!ok) { |
788 | 1.44k | return; |
789 | 1.44k | } |
790 | 9.08k | SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line); |
791 | 9.08k | if (!sgroup) { |
792 | 1.14k | return; |
793 | 1.14k | } |
794 | | |
795 | 7.93k | unsigned int nent = |
796 | 7.93k | ParseSGroupIntField(ok, strictParsing, text, line, pos, true); |
797 | 7.93k | if (!ok) { |
798 | 345 | sgroup->setIsValid(false); |
799 | 345 | return; |
800 | 345 | } |
801 | | |
802 | 14.3k | for (unsigned int ie = 0; ie < nent; ++ie) { |
803 | 8.58k | int lvIdx = -1; |
804 | 8.58k | if (text.size() < pos + 11) { |
805 | 3.56k | std::ostringstream errout; |
806 | 3.56k | errout << "SGroup SAP line too short: '" << text << "' on line " << line; |
807 | 3.56k | if (strictParsing) { |
808 | 1 | throw FileParseException(errout.str()); |
809 | 3.56k | } else { |
810 | 3.56k | BOOST_LOG(rdWarningLog) << errout.str() << std::endl; |
811 | 3.56k | if (text.size() < pos + 4) { |
812 | 440 | sgroup->setIsValid(false); |
813 | 440 | return; |
814 | 440 | } |
815 | 3.12k | lvIdx = mol->getNumAtoms(); |
816 | 3.12k | } |
817 | 3.56k | } |
818 | | |
819 | 8.14k | std::string id = " "; |
820 | 8.14k | unsigned int aIdxMark = |
821 | 8.14k | ParseSGroupIntField(ok, strictParsing, text, line, pos); |
822 | 8.14k | if (!ok) { |
823 | 1.04k | sgroup->setIsValid(false); |
824 | 1.04k | return; |
825 | 1.04k | } |
826 | 7.09k | unsigned int aIdx = mol->getAtomWithBookmark(aIdxMark)->getIdx(); |
827 | | |
828 | 7.09k | if (lvIdx == -1) { |
829 | 4.53k | unsigned int lvIdxMark = |
830 | 4.53k | ParseSGroupIntField(ok, strictParsing, text, line, pos); |
831 | 4.53k | if (!ok) { |
832 | 275 | sgroup->setIsValid(false); |
833 | 275 | return; |
834 | 275 | } |
835 | 4.25k | if (lvIdxMark != 0) { |
836 | 2.01k | lvIdx = mol->getAtomWithBookmark(lvIdxMark)->getIdx(); |
837 | 2.01k | } |
838 | 4.25k | if (text.size() >= pos + 3) { |
839 | 4.25k | id = text.substr(pos + 1, 2); |
840 | 4.25k | pos += 3; |
841 | 4.25k | } |
842 | 4.25k | } |
843 | | |
844 | 6.82k | try { |
845 | 6.82k | sgroup->addAttachPoint(aIdx, lvIdx, id); |
846 | 6.82k | } catch (const std::exception &e) { |
847 | 0 | SGroupWarnOrThrow<>(strictParsing, e.what()); |
848 | 0 | sgroup->setIsValid(false); |
849 | 0 | return; |
850 | 0 | } |
851 | 6.82k | } |
852 | 7.58k | } |
853 | | |
854 | | void ParseSGroupV2000SCLLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol, |
855 | | const std::string &text, unsigned int line, |
856 | 2.34k | bool strictParsing) { |
857 | 2.34k | PRECONDITION(mol, "bad mol"); |
858 | 2.34k | PRECONDITION(text.substr(0, 6) == "M SCL", "bad SCL line"); |
859 | | |
860 | 2.34k | unsigned int pos = 6; |
861 | 2.34k | bool ok; |
862 | 2.34k | unsigned int sgIdx = ParseSGroupIntField(ok, strictParsing, text, line, pos); |
863 | 2.34k | if (!ok) { |
864 | 420 | return; |
865 | 420 | } |
866 | 1.92k | SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line); |
867 | 1.92k | if (!sgroup) { |
868 | 262 | return; |
869 | 262 | } |
870 | 1.66k | if (pos + 1 >= text.length()) { |
871 | 457 | std::ostringstream errout; |
872 | 457 | errout << "SGroup SCL line too short: '" << text << "' on line " << line; |
873 | 457 | SGroupWarnOrThrow<>(strictParsing, errout.str()); |
874 | 457 | sgroup->setIsValid(false); |
875 | 457 | return; |
876 | 457 | } |
877 | | |
878 | 1.20k | ++pos; |
879 | 1.20k | sgroup->setProp("CLASS", text.substr(pos, text.length() - pos)); |
880 | 1.20k | } |
881 | | |
882 | | void ParseSGroupV2000SBTLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol, |
883 | | const std::string &text, unsigned int line, |
884 | 4.08k | bool strictParsing) { |
885 | 4.08k | PRECONDITION(mol, "bad mol"); |
886 | 4.08k | PRECONDITION(text.substr(0, 6) == "M SBT", "bad SBT line"); |
887 | | |
888 | 4.08k | unsigned int pos = 6; |
889 | 4.08k | bool ok; |
890 | 4.08k | unsigned int nent = |
891 | 4.08k | ParseSGroupIntField(ok, strictParsing, text, line, pos, true); |
892 | 4.08k | if (!ok) { |
893 | 695 | return; |
894 | 695 | } |
895 | | |
896 | 5.21k | for (unsigned int ie = 0; ie < nent; ++ie) { |
897 | 3.96k | if (text.size() < pos + 8) { |
898 | 1.08k | std::ostringstream errout; |
899 | 1.08k | errout << "SGroup SBT line too short: '" << text << "' on line " << line; |
900 | 1.08k | SGroupWarnOrThrow<>(strictParsing, errout.str()); |
901 | 1.08k | return; |
902 | 1.08k | } |
903 | | |
904 | 2.87k | unsigned int sgIdx = |
905 | 2.87k | ParseSGroupIntField(ok, strictParsing, text, line, pos); |
906 | 2.87k | if (!ok) { |
907 | 183 | return; |
908 | 183 | } |
909 | 2.69k | SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line); |
910 | 2.69k | if (!sgroup) { |
911 | 284 | return; |
912 | 284 | } |
913 | 2.41k | unsigned int bracketType = |
914 | 2.41k | ParseSGroupIntField(ok, strictParsing, text, line, pos); |
915 | 2.41k | if (!ok) { |
916 | 349 | sgroup->setIsValid(false); |
917 | 349 | return; |
918 | 349 | } |
919 | | |
920 | 2.06k | if (bracketType == 0) { |
921 | 1.02k | sgroup->setProp("BRKTYP", "BRACKET"); |
922 | 1.03k | } else if (bracketType == 1) { |
923 | 803 | sgroup->setProp("BRKTYP", "PAREN"); |
924 | 803 | } else { |
925 | 235 | std::ostringstream errout; |
926 | 235 | errout << "Invalid SBT value '" << bracketType << "' on line " << line; |
927 | 235 | SGroupWarnOrThrow<>(strictParsing, errout.str()); |
928 | 235 | sgroup->setIsValid(false); |
929 | 235 | return; |
930 | 235 | } |
931 | 2.06k | } |
932 | 3.38k | } |
933 | | |
934 | | /* ------------------ V3000 Utils ------------------ */ |
935 | | |
936 | | template <class T> |
937 | | std::vector<T> ParseV3000Array(std::stringstream &stream, int maxV, |
938 | 9.47k | bool strictParsing) { |
939 | 9.47k | auto paren = stream.get(); // discard parentheses |
940 | 9.47k | if (paren != '(') { |
941 | 8.95k | BOOST_LOG(rdWarningLog) |
942 | 0 | << "WARNING: first character of V3000 array is not '('" << std::endl; |
943 | 8.95k | } |
944 | | |
945 | 9.47k | unsigned int count = 0; |
946 | 9.47k | stream >> count; |
947 | 9.47k | std::vector<T> values; |
948 | 9.47k | if (maxV >= 0 && count > static_cast<unsigned int>(maxV)) { |
949 | 3.06k | SGroupWarnOrThrow(strictParsing, "invalid count value"); |
950 | 3.06k | return values; |
951 | 3.06k | } |
952 | | |
953 | 6.41k | values.reserve(count); |
954 | 6.41k | T value; |
955 | 13.8k | for (unsigned i = 0; i < count; ++i) { |
956 | 7.43k | stream >> value; |
957 | 7.43k | values.push_back(value); |
958 | 7.43k | } |
959 | 6.41k | paren = stream.get(); // discard parentheses |
960 | 6.41k | if (paren != ')') { |
961 | 6.26k | BOOST_LOG(rdWarningLog) |
962 | 0 | << "WARNING: final character of V3000 array is not ')'" << std::endl; |
963 | 6.26k | } |
964 | 6.41k | return values; |
965 | 9.47k | } std::__1::vector<unsigned int, std::__1::allocator<unsigned int> > RDKit::SGroupParsing::ParseV3000Array<unsigned int>(std::__1::basic_stringstream<char, std::__1::char_traits<char>, std::__1::allocator<char> >&, int, bool) Line | Count | Source | 938 | 8.55k | bool strictParsing) { | 939 | 8.55k | auto paren = stream.get(); // discard parentheses | 940 | 8.55k | if (paren != '(') { | 941 | 8.44k | BOOST_LOG(rdWarningLog) | 942 | 0 | << "WARNING: first character of V3000 array is not '('" << std::endl; | 943 | 8.44k | } | 944 | | | 945 | 8.55k | unsigned int count = 0; | 946 | 8.55k | stream >> count; | 947 | 8.55k | std::vector<T> values; | 948 | 8.55k | if (maxV >= 0 && count > static_cast<unsigned int>(maxV)) { | 949 | 2.73k | SGroupWarnOrThrow(strictParsing, "invalid count value"); | 950 | 2.73k | return values; | 951 | 2.73k | } | 952 | | | 953 | 5.81k | values.reserve(count); | 954 | 5.81k | T value; | 955 | 10.7k | for (unsigned i = 0; i < count; ++i) { | 956 | 4.89k | stream >> value; | 957 | 4.89k | values.push_back(value); | 958 | 4.89k | } | 959 | 5.81k | paren = stream.get(); // discard parentheses | 960 | 5.81k | if (paren != ')') { | 961 | 5.72k | BOOST_LOG(rdWarningLog) | 962 | 0 | << "WARNING: final character of V3000 array is not ')'" << std::endl; | 963 | 5.72k | } | 964 | 5.81k | return values; | 965 | 8.55k | } |
Unexecuted instantiation: std::__1::vector<int, std::__1::allocator<int> > RDKit::SGroupParsing::ParseV3000Array<int>(std::__1::basic_stringstream<char, std::__1::char_traits<char>, std::__1::allocator<char> >&, int, bool) std::__1::vector<double, std::__1::allocator<double> > RDKit::SGroupParsing::ParseV3000Array<double>(std::__1::basic_stringstream<char, std::__1::char_traits<char>, std::__1::allocator<char> >&, int, bool) Line | Count | Source | 938 | 923 | bool strictParsing) { | 939 | 923 | auto paren = stream.get(); // discard parentheses | 940 | 923 | if (paren != '(') { | 941 | 510 | BOOST_LOG(rdWarningLog) | 942 | 0 | << "WARNING: first character of V3000 array is not '('" << std::endl; | 943 | 510 | } | 944 | | | 945 | 923 | unsigned int count = 0; | 946 | 923 | stream >> count; | 947 | 923 | std::vector<T> values; | 948 | 923 | if (maxV >= 0 && count > static_cast<unsigned int>(maxV)) { | 949 | 327 | SGroupWarnOrThrow(strictParsing, "invalid count value"); | 950 | 327 | return values; | 951 | 327 | } | 952 | | | 953 | 596 | values.reserve(count); | 954 | 596 | T value; | 955 | 3.13k | for (unsigned i = 0; i < count; ++i) { | 956 | 2.53k | stream >> value; | 957 | 2.53k | values.push_back(value); | 958 | 2.53k | } | 959 | 596 | paren = stream.get(); // discard parentheses | 960 | 596 | if (paren != ')') { | 961 | 539 | BOOST_LOG(rdWarningLog) | 962 | 0 | << "WARNING: final character of V3000 array is not ')'" << std::endl; | 963 | 539 | } | 964 | 596 | return values; | 965 | 923 | } |
|
966 | | |
967 | | // force instantiation of the versions of this that we use |
968 | | template std::vector<unsigned int> ParseV3000Array(std::stringstream &stream, |
969 | | int, bool); |
970 | | template std::vector<int> ParseV3000Array(std::stringstream &stream, int, bool); |
971 | | |
972 | | void ParseV3000CStateLabel(RWMol *mol, SubstanceGroup &sgroup, |
973 | | std::stringstream &stream, unsigned int line, |
974 | 1.79k | bool strictParsing) { |
975 | 1.79k | stream.get(); // discard parentheses |
976 | | |
977 | 1.79k | unsigned int count; |
978 | 1.79k | unsigned int bondMark; |
979 | 1.79k | stream >> count >> bondMark; |
980 | | |
981 | 1.79k | std::string type = sgroup.getProp<std::string>("TYPE"); |
982 | | |
983 | 1.79k | if ((type != "SUP" && count != 1) || (type == "SUP" && count != 4)) { |
984 | 1.16k | std::ostringstream errout; |
985 | 1.16k | errout << "Unexpected number of fields for CSTATE field on line " << line; |
986 | 1.16k | SGroupWarnOrThrow<>(strictParsing, errout.str()); |
987 | 1.16k | sgroup.setIsValid(false); |
988 | 1.16k | return; |
989 | 1.16k | } |
990 | | |
991 | 628 | Bond *bond = mol->getUniqueBondWithBookmark(bondMark); |
992 | | |
993 | 628 | RDGeom::Point3D vector; |
994 | 628 | if (type == "SUP") { |
995 | 0 | stream >> vector.x >> vector.y >> vector.z; |
996 | 0 | } |
997 | 628 | try { |
998 | 628 | sgroup.addCState(bond->getIdx(), vector); |
999 | 628 | } catch (const std::exception &e) { |
1000 | 0 | SGroupWarnOrThrow<>(strictParsing, e.what()); |
1001 | 0 | sgroup.setIsValid(false); |
1002 | 0 | return; |
1003 | 0 | } |
1004 | | |
1005 | 0 | stream.get(); // discard final parentheses |
1006 | 0 | } |
1007 | | |
1008 | | void ParseV3000SAPLabel(RWMol *mol, SubstanceGroup &sgroup, |
1009 | 3.91k | std::stringstream &stream, bool strictParsing) { |
1010 | 3.91k | stream.get(); // discard parentheses |
1011 | | |
1012 | 3.91k | unsigned int count = 0; |
1013 | 3.91k | unsigned int aIdxMark = 0; |
1014 | 3.91k | std::string lvIdxStr; // In V3000 this may be a string |
1015 | 3.91k | std::string sapIdStr; |
1016 | 3.91k | stream >> count >> aIdxMark >> lvIdxStr >> sapIdStr; |
1017 | | |
1018 | | // remove final parentheses that gets parsed into sapIdStr |
1019 | 3.91k | sapIdStr.pop_back(); |
1020 | | |
1021 | 3.91k | unsigned int aIdx = mol->getAtomWithBookmark(aIdxMark)->getIdx(); |
1022 | 3.91k | int lvIdx = -1; |
1023 | | |
1024 | 3.91k | boost::to_upper(lvIdxStr); |
1025 | 3.91k | if (lvIdxStr == "AIDX") { |
1026 | 254 | lvIdx = aIdx; |
1027 | 3.65k | } else { |
1028 | 3.65k | unsigned int lvIdxTmp = FileParserUtils::toInt(lvIdxStr); |
1029 | 3.65k | if (lvIdxTmp > 0) { |
1030 | 177 | lvIdx = mol->getAtomWithBookmark(lvIdxTmp)->getIdx(); |
1031 | 177 | } |
1032 | 3.65k | } |
1033 | | |
1034 | 3.91k | try { |
1035 | 3.91k | sgroup.addAttachPoint(aIdx, lvIdx, sapIdStr); |
1036 | 3.91k | } catch (const std::exception &e) { |
1037 | 0 | SGroupWarnOrThrow<>(strictParsing, e.what()); |
1038 | 0 | sgroup.setIsValid(false); |
1039 | 0 | return; |
1040 | 0 | } |
1041 | 3.91k | } |
1042 | | |
1043 | 130k | std::string ParseV3000StringPropLabel(std::stringstream &stream) { |
1044 | 130k | std::string strValue; |
1045 | | |
1046 | 130k | auto nextChar = stream.peek(); |
1047 | 130k | if (nextChar == ' ') { |
1048 | | // empty value, we peeked at the next field's separator |
1049 | 26.5k | return strValue; |
1050 | 103k | } else if (nextChar == '"') { |
1051 | | // skip the opening quote: |
1052 | 1.67k | stream.get(); |
1053 | | |
1054 | | // this is a bit gross because it's legal to include a \" in a value, |
1055 | | // but the way that's done is by doubling it. So |
1056 | | // FIELDINFO="""" |
1057 | | // should assign the value \" to FIELDINFO |
1058 | 1.67k | char chr; |
1059 | 232k | while (stream.get(chr)) { |
1060 | 231k | if (chr == '"') { |
1061 | 2.04k | nextChar = stream.peek(); |
1062 | | |
1063 | | // if the next element in the stream is a \" then we have a quoted \". |
1064 | | // Otherwise we're done |
1065 | 2.04k | if (nextChar != '"') { |
1066 | 900 | break; |
1067 | 1.14k | } else { |
1068 | | // skip the second \" |
1069 | 1.14k | stream.get(); |
1070 | 1.14k | } |
1071 | 2.04k | } |
1072 | 230k | strValue += chr; |
1073 | 230k | } |
1074 | 101k | } else if (nextChar == '\'') { |
1075 | 2.65k | std::getline(stream, strValue, '\''); |
1076 | 99.1k | } else { |
1077 | 99.1k | stream >> strValue; |
1078 | 99.1k | } |
1079 | | |
1080 | 103k | boost::trim_right(strValue); |
1081 | 103k | return strValue; |
1082 | 130k | } |
1083 | | |
1084 | | void ParseV3000ParseLabel(const std::string &label, |
1085 | | std::stringstream &lineStream, STR_VECT &dataFields, |
1086 | | unsigned int line, SubstanceGroup &sgroup, size_t, |
1087 | 147k | RWMol *mol, bool strictParsing) { |
1088 | 147k | PRECONDITION(mol, "bad mol"); |
1089 | | // TODO: we could handle these in a more structured way |
1090 | 147k | try { |
1091 | 147k | if (label == "XBHEAD" || label == "XBCORR") { |
1092 | 2.17k | std::vector<unsigned int> bvect = ParseV3000Array<unsigned int>( |
1093 | 2.17k | lineStream, mol->getNumBonds(), strictParsing); |
1094 | 2.17k | std::transform(bvect.begin(), bvect.end(), bvect.begin(), |
1095 | 2.17k | [](unsigned int v) -> unsigned int { return v - 1; }); |
1096 | 2.17k | sgroup.setProp(label, bvect); |
1097 | 145k | } else if (label == "ATOMS") { |
1098 | 4.36k | for (auto atomIdx : ParseV3000Array<unsigned int>( |
1099 | 4.36k | lineStream, mol->getNumAtoms(), strictParsing)) { |
1100 | 3.98k | sgroup.addAtomWithBookmark(atomIdx); |
1101 | 3.98k | } |
1102 | 141k | } else if (label == "PATOMS") { |
1103 | 1.36k | for (auto patomIdx : ParseV3000Array<unsigned int>( |
1104 | 1.36k | lineStream, mol->getNumAtoms(), strictParsing)) { |
1105 | 914 | sgroup.addParentAtomWithBookmark(patomIdx); |
1106 | 914 | } |
1107 | 139k | } else if (label == "CBONDS" || label == "XBONDS") { |
1108 | 646 | for (auto bondIdx : ParseV3000Array<unsigned int>( |
1109 | 646 | lineStream, mol->getNumBonds(), strictParsing)) { |
1110 | 0 | sgroup.addBondWithBookmark(bondIdx); |
1111 | 0 | } |
1112 | 139k | } else if (label == "BRKXYZ") { |
1113 | 923 | auto coords = ParseV3000Array<double>(lineStream, 9, strictParsing); |
1114 | 923 | if (coords.size() != 9) { |
1115 | 764 | std::ostringstream errout; |
1116 | 764 | errout << "Unexpected number of coordinates for BRKXYZ on line " |
1117 | 764 | << line; |
1118 | 764 | throw FileParseException(errout.str()); |
1119 | 764 | } |
1120 | | |
1121 | 159 | SubstanceGroup::Bracket bracket; |
1122 | 636 | for (unsigned int i = 0; i < 3; ++i) { |
1123 | 477 | bracket[i] = RDGeom::Point3D(*(coords.begin() + (3 * i)), |
1124 | 477 | *(coords.begin() + (3 * i) + 1), |
1125 | 477 | *(coords.begin() + (3 * i) + 2)); |
1126 | 477 | } |
1127 | 159 | sgroup.addBracket(bracket); |
1128 | 138k | } else if (label == "CSTATE") { |
1129 | 1.79k | ParseV3000CStateLabel(mol, sgroup, lineStream, line, strictParsing); |
1130 | 136k | } else if (label == "SAP") { |
1131 | 3.91k | ParseV3000SAPLabel(mol, sgroup, lineStream, strictParsing); |
1132 | 132k | } else if (label == "PARENT") { |
1133 | | // Store relationship until all SGroups have been read |
1134 | 1.80k | unsigned int parentIdx; |
1135 | 1.80k | if (lineStream.eof()) { |
1136 | 209 | std::ostringstream errout; |
1137 | 209 | errout << "PARENT label not found on line " << line; |
1138 | 209 | throw FileParseException(errout.str()); |
1139 | 209 | } |
1140 | 1.59k | lineStream >> parentIdx; |
1141 | 1.59k | if (lineStream.fail()) { |
1142 | 244 | std::ostringstream errout; |
1143 | 244 | errout << "Invalid PARENT label found on line " << line; |
1144 | 244 | throw FileParseException(errout.str()); |
1145 | 244 | } |
1146 | 1.35k | sgroup.setProp<unsigned int>("PARENT", parentIdx); |
1147 | 130k | } else if (label == "COMPNO") { |
1148 | 524 | unsigned int compno; |
1149 | 524 | lineStream >> compno; |
1150 | 524 | if (compno > 256u) { |
1151 | 278 | std::ostringstream errout; |
1152 | 278 | errout << "SGroup SNC value over 256: '" << compno << "' on line " |
1153 | 278 | << line; |
1154 | 278 | throw FileParseException(errout.str()); |
1155 | 278 | } |
1156 | 246 | sgroup.setProp<unsigned int>("COMPNO", compno); |
1157 | 130k | } else if (label == "FIELDDATA") { |
1158 | 9.86k | auto strValue = ParseV3000StringPropLabel(lineStream); |
1159 | 9.86k | if (strictParsing) { |
1160 | 2.26k | strValue = strValue.substr(0, 200); |
1161 | 2.26k | } |
1162 | 9.86k | dataFields.push_back(strValue); |
1163 | | |
1164 | 120k | } else { |
1165 | | // Parse string props |
1166 | 120k | auto strValue = ParseV3000StringPropLabel(lineStream); |
1167 | | |
1168 | 120k | if (label == "SUBTYPE" && |
1169 | 1.43k | !SubstanceGroupChecks::isValidSubType(strValue)) { |
1170 | 1.37k | std::ostringstream errout; |
1171 | 1.37k | errout << "Unsupported SGroup subtype '" << strValue << "' on line " |
1172 | 1.37k | << line; |
1173 | 1.37k | throw FileParseException(errout.str()); |
1174 | 118k | } else if (label == "CONNECT" && |
1175 | 513 | !SubstanceGroupChecks::isValidConnectType(strValue)) { |
1176 | 449 | std::ostringstream errout; |
1177 | 449 | errout << "Unsupported SGroup connection type '" << strValue |
1178 | 449 | << "' on line " << line; |
1179 | 449 | throw FileParseException(errout.str()); |
1180 | 118k | } else if (label == "CLASS" && |
1181 | 1.33k | !SubstanceGroupChecks::isValidClass(strValue)) { |
1182 | 925 | std::ostringstream errout; |
1183 | 925 | errout << "Unsupported SGroup template class '" << strValue |
1184 | 925 | << "' on line " << line; |
1185 | 925 | throw FileParseException(errout.str()); |
1186 | 925 | } |
1187 | | // NATREPLACE is not validated nor used |
1188 | | |
1189 | 117k | sgroup.setProp(label, strValue); |
1190 | 117k | } |
1191 | 147k | } catch (const std::exception &e) { |
1192 | 6.80k | SGroupWarnOrThrow<>(strictParsing, e.what()); |
1193 | 6.80k | sgroup.setIsValid(false); |
1194 | 6.80k | return; |
1195 | 6.80k | } |
1196 | 147k | } |
1197 | | |
1198 | | std::string ParseV3000SGroupsBlock(std::istream *inStream, unsigned int &line, |
1199 | | unsigned int nSgroups, RWMol *mol, |
1200 | 1.17k | bool strictParsing) { |
1201 | 1.17k | PRECONDITION(inStream, "no stream"); |
1202 | 1.17k | PRECONDITION(mol, "no molecule"); |
1203 | 1.17k | unsigned int defaultLineNum = 0; |
1204 | 1.17k | std::string defaultString; |
1205 | | |
1206 | | // SGroups may be written in unsorted ID order, according to spec, so we will |
1207 | | // temporarily store them in a map before adding them to the mol |
1208 | 1.17k | IDX_TO_SGROUP_MAP sGroupMap; |
1209 | | |
1210 | 1.17k | std::unordered_map<std::string, std::stringstream> defaultLabels; |
1211 | | |
1212 | 1.17k | auto tempStr = FileParserUtils::getV3000Line(inStream, line); |
1213 | | |
1214 | | // Store defaults |
1215 | 1.17k | if (tempStr.substr(0, 7) == "DEFAULT" && tempStr.length() > 8) { |
1216 | 838 | defaultString = tempStr.substr(7); |
1217 | 838 | defaultLineNum = line; |
1218 | 838 | boost::trim_right(defaultString); |
1219 | 838 | tempStr = FileParserUtils::getV3000Line(inStream, line); |
1220 | 838 | boost::trim_right(tempStr); |
1221 | 838 | } |
1222 | | |
1223 | 22.7k | for (unsigned int si = 0; si < nSgroups; ++si) { |
1224 | 21.6k | unsigned int sequenceId; |
1225 | 21.6k | unsigned int externalId; |
1226 | 21.6k | std::string type; |
1227 | | |
1228 | 21.6k | std::stringstream lineStream(tempStr); |
1229 | 21.6k | lineStream >> sequenceId; |
1230 | 21.6k | lineStream >> type; |
1231 | 21.6k | lineStream >> externalId; |
1232 | | |
1233 | 21.6k | std::set<std::string> parsedLabels; |
1234 | 21.6k | if (strictParsing && !SubstanceGroupChecks::isValidType(type)) { |
1235 | 7 | std::ostringstream errout; |
1236 | 7 | errout << "Unsupported SGroup type '" << type << "' on line " << line; |
1237 | 7 | throw MolFileUnhandledFeatureException(errout.str()); |
1238 | 21.6k | } else if (!strictParsing && |
1239 | 21.4k | nSgroups == std::numeric_limits<unsigned int>::max() && |
1240 | 371 | lineStream.fail()) { |
1241 | | // something went wrong and we didn't know how many SGroups to expect, and |
1242 | | // now we have seen something that doesn't look like an SGroup start. |
1243 | | // So we assume we're done. |
1244 | 44 | nSgroups = 0; |
1245 | 44 | break; |
1246 | 44 | } |
1247 | | |
1248 | 21.6k | SubstanceGroup sgroup(mol, type); |
1249 | 21.6k | STR_VECT dataFields; |
1250 | | |
1251 | 21.6k | sgroup.setProp<unsigned int>("index", sequenceId); |
1252 | 21.6k | if (externalId > 0) { |
1253 | 5.94k | if (!SubstanceGroupChecks::isSubstanceGroupIdFree(*mol, externalId)) { |
1254 | 0 | std::ostringstream errout; |
1255 | 0 | errout << "Existing SGroup ID '" << externalId |
1256 | 0 | << "' assigned to a second SGroup on line " << line; |
1257 | 0 | if (strictParsing) { |
1258 | 0 | throw FileParseException(errout.str()); |
1259 | 0 | } else { |
1260 | 0 | BOOST_LOG(rdWarningLog) << errout.str() << std::endl; |
1261 | 0 | sgroup.setIsValid(false); |
1262 | 0 | } |
1263 | 0 | } |
1264 | | |
1265 | 5.94k | sgroup.setProp<unsigned int>("ID", externalId); |
1266 | 5.94k | } |
1267 | | |
1268 | 56.1k | while (sgroup.getIsValid() && !lineStream.eof() && !lineStream.fail()) { |
1269 | 34.5k | char spacer; |
1270 | 34.5k | std::string label; |
1271 | | |
1272 | 34.5k | lineStream.get(spacer); |
1273 | 34.5k | if (lineStream.gcount() == 0) { |
1274 | 153 | continue; |
1275 | 34.4k | } else if (spacer != ' ') { |
1276 | 1.60k | std::ostringstream errout; |
1277 | 1.60k | errout << "Found character '" << spacer |
1278 | 1.60k | << "' when expecting a separator (space) on line " << line; |
1279 | 1.60k | if (strictParsing) { |
1280 | 2 | throw FileParseException(errout.str()); |
1281 | 1.59k | } else { |
1282 | 1.59k | BOOST_LOG(rdWarningLog) << errout.str() << std::endl; |
1283 | 1.59k | sgroup.setIsValid(false); |
1284 | 1.59k | continue; |
1285 | 1.59k | } |
1286 | 1.60k | } |
1287 | | |
1288 | 32.8k | std::getline(lineStream, label, '='); |
1289 | 32.8k | if (label.empty()) { |
1290 | 710 | continue; |
1291 | 710 | } |
1292 | 32.1k | ParseV3000ParseLabel(label, lineStream, dataFields, line, sgroup, |
1293 | 32.1k | nSgroups, mol, strictParsing); |
1294 | 32.1k | parsedLabels.insert(label); |
1295 | 32.1k | } |
1296 | | |
1297 | | // Process defaults |
1298 | 21.6k | lineStream.clear(); |
1299 | 21.6k | lineStream.str(defaultString); |
1300 | 150k | while (sgroup.getIsValid() && !lineStream.eof() && !lineStream.fail()) { |
1301 | 128k | char spacer; |
1302 | 128k | std::string label; |
1303 | | |
1304 | 128k | lineStream.get(spacer); |
1305 | 128k | if (lineStream.gcount() == 0) { |
1306 | 1.77k | continue; |
1307 | 127k | } else if (spacer != ' ') { |
1308 | 1.32k | std::ostringstream errout; |
1309 | 1.32k | errout << "Found character '" << spacer |
1310 | 1.32k | << "' when expecting a separator (space) in DEFAULTS on line " |
1311 | 1.32k | << defaultLineNum; |
1312 | 1.32k | if (strictParsing) { |
1313 | 1 | throw FileParseException(errout.str()); |
1314 | 1.32k | } else { |
1315 | 1.32k | BOOST_LOG(rdWarningLog) << errout.str() << std::endl; |
1316 | 1.32k | sgroup.setIsValid(false); |
1317 | 1.32k | continue; |
1318 | 1.32k | } |
1319 | 1.32k | } |
1320 | | |
1321 | 125k | std::getline(lineStream, label, '='); |
1322 | 125k | if (label.empty()) { |
1323 | 4.37k | continue; |
1324 | 4.37k | } |
1325 | 121k | if (std::find(parsedLabels.begin(), parsedLabels.end(), label) == |
1326 | 121k | parsedLabels.end()) { |
1327 | 115k | ParseV3000ParseLabel(label, lineStream, dataFields, defaultLineNum, |
1328 | 115k | sgroup, nSgroups, mol, strictParsing); |
1329 | 115k | } else { |
1330 | 5.92k | spacer = lineStream.peek(); |
1331 | 5.92k | if (spacer == ' ') { |
1332 | 1.91k | std::ostringstream errout; |
1333 | 1.91k | errout << "Found unexpected whitespace at DEFAULT label " << label; |
1334 | 1.91k | if (strictParsing) { |
1335 | 2 | throw FileParseException(errout.str()); |
1336 | 1.91k | } else { |
1337 | 1.91k | BOOST_LOG(rdWarningLog) << errout.str() << std::endl; |
1338 | 1.91k | sgroup.setIsValid(false); |
1339 | 1.91k | continue; |
1340 | 1.91k | } |
1341 | 4.00k | } else if (spacer == '(') { |
1342 | 128 | std::getline(lineStream, label, ')'); |
1343 | 128 | lineStream.get(spacer); |
1344 | 3.88k | } else if (spacer == '"') { |
1345 | 109 | lineStream.get(spacer); |
1346 | 109 | std::getline(lineStream, label, '"'); |
1347 | 3.77k | } else { |
1348 | 3.77k | std::getline(lineStream, label, ' '); |
1349 | 3.77k | lineStream.putback(' '); |
1350 | 3.77k | } |
1351 | 5.92k | } |
1352 | 121k | } |
1353 | | |
1354 | 21.6k | sgroup.setProp("DATAFIELDS", dataFields); |
1355 | 21.6k | sGroupMap.emplace(sequenceId, sgroup); |
1356 | | |
1357 | 21.6k | tempStr = FileParserUtils::getV3000Line(inStream, line); |
1358 | 21.6k | boost::trim_right(tempStr); |
1359 | 21.6k | } |
1360 | | |
1361 | 1.15k | if (sGroupMap.size() != nSgroups) { |
1362 | 37 | std::ostringstream errout; |
1363 | 37 | errout << "Found " << sGroupMap.size() << " SGroups when " << nSgroups |
1364 | 37 | << " were expected." << std::endl; |
1365 | 37 | if (strictParsing) { |
1366 | 0 | throw FileParseException(errout.str()); |
1367 | 37 | } else { |
1368 | 37 | BOOST_LOG(rdWarningLog) << errout.str() << std::endl; |
1369 | 37 | } |
1370 | 37 | } |
1371 | | // SGroups successfully parsed, now add them to the molecule |
1372 | 1.15k | for (const auto &sg : sGroupMap) { |
1373 | 201 | if (sg.second.getIsValid()) { |
1374 | 164 | addSubstanceGroup(*mol, sg.second); |
1375 | 164 | } else { |
1376 | 37 | BOOST_LOG(rdWarningLog) << "SGroup " << sg.first |
1377 | 0 | << " is invalid and will be ignored" << std::endl; |
1378 | 37 | } |
1379 | 201 | } |
1380 | 1.15k | return tempStr; |
1381 | 1.15k | } |
1382 | | |
1383 | | } // namespace SGroupParsing |
1384 | | } // namespace RDKit |