Coverage Report

Created: 2026-06-23 06:55

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/rdkit/Code/GraphMol/FileParsers/MolSGroupParsing.cpp
Line
Count
Source
1
//
2
//  Copyright (C) 2002-2018 Greg Landrum and T5 Informatics GmbH
3
//
4
//   @@ All Rights Reserved @@
5
//  This file is part of the RDKit.
6
//  The contents are covered by the terms of the BSD license
7
//  which is included in the file license.txt, found at the root
8
//  of the RDKit source tree.
9
//
10
11
#include "FileParsers.h"
12
#include "FileParserUtils.h"
13
#include "MolSGroupParsing.h"
14
15
namespace RDKit {
16
namespace SGroupParsing {
17
18
/* ------------------ V2000 Utils  ------------------ */
19
20
unsigned int ParseSGroupIntField(const std::string &text, unsigned int line,
21
385k
                                 unsigned int &pos, bool isFieldCounter) {
22
385k
  ++pos;  // Account for separation space
23
385k
  unsigned int fieldValue;
24
385k
  size_t len = 3 - isFieldCounter;  // field counters are smaller
25
385k
  try {
26
385k
    fieldValue = FileParserUtils::toInt(text.substr(pos, len));
27
385k
  } catch (boost::bad_lexical_cast &) {
28
28.0k
    std::ostringstream errout;
29
28.0k
    errout << "Cannot convert '" << text.substr(pos, len) << "' to int on line "
30
28.0k
           << line;
31
28.0k
    throw FileParseException(errout.str());
32
28.0k
  } catch (const std::out_of_range &) {
33
8.26k
    std::ostringstream errout;
34
8.26k
    errout << "SGroup line too short: '" << text << "' on line " << line;
35
8.26k
    throw FileParseException(errout.str());
36
8.26k
  }
37
348k
  pos += len;
38
348k
  return fieldValue;
39
385k
}
40
41
unsigned int ParseSGroupIntField(bool &ok, bool strictParsing,
42
                                 const std::string &text, unsigned int line,
43
378k
                                 unsigned int &pos, bool isFieldCounter) {
44
378k
  ok = true;
45
378k
  unsigned int res = 0;
46
378k
  try {
47
378k
    res = ParseSGroupIntField(text, line, pos, isFieldCounter);
48
378k
  } catch (const std::exception &e) {
49
36.2k
    if (strictParsing) {
50
22
      throw;
51
36.2k
    } else {
52
36.2k
      ok = false;
53
36.2k
      BOOST_LOG(rdWarningLog) << e.what() << std::endl;
54
36.2k
    }
55
36.2k
  }
56
378k
  return res;
57
378k
}
58
59
double ParseSGroupDoubleField(const std::string &text, unsigned int line,
60
18.2k
                              unsigned int &pos) {
61
18.2k
  size_t len = 10;
62
18.2k
  double fieldValue;
63
18.2k
  try {
64
18.2k
    fieldValue = FileParserUtils::toDouble(text.substr(pos, len));
65
18.2k
  } catch (boost::bad_lexical_cast &) {
66
2.78k
    std::ostringstream errout;
67
2.78k
    errout << "Cannot convert '" << text.substr(pos, len)
68
2.78k
           << "' to double on line " << line;
69
2.78k
    throw FileParseException(errout.str());
70
2.78k
  } catch (const std::out_of_range &) {
71
1.76k
    std::ostringstream errout;
72
1.76k
    errout << "SGroup line too short: '" << text << "' on line " << line;
73
1.76k
    throw FileParseException(errout.str());
74
1.76k
  }
75
13.7k
  pos += len;
76
13.7k
  return fieldValue;
77
18.2k
}
78
79
double ParseSGroupDoubleField(bool &ok, bool strictParsing,
80
                              const std::string &text, unsigned int line,
81
18.2k
                              unsigned int &pos) {
82
18.2k
  ok = true;
83
18.2k
  double res = 0.;
84
18.2k
  try {
85
18.2k
    res = ParseSGroupDoubleField(text, line, pos);
86
18.2k
  } catch (const std::exception &e) {
87
4.55k
    if (strictParsing) {
88
2
      throw;
89
4.54k
    } else {
90
4.54k
      ok = false;
91
4.54k
      BOOST_LOG(rdWarningLog) << e.what() << std::endl;
92
4.54k
    }
93
4.55k
  }
94
18.2k
  return res;
95
18.2k
}
96
97
SubstanceGroup *FindSgIdx(IDX_TO_SGROUP_MAP &sGroupMap, int sgIdx,
98
154k
                          unsigned int line) {
99
154k
  auto sgIt = sGroupMap.find(sgIdx);
100
154k
  if (sgIt == sGroupMap.end()) {
101
12.4k
    BOOST_LOG(rdWarningLog) << "SGroup " << sgIdx << " referenced on line "
102
0
                            << line << " not found." << std::endl;
103
12.4k
    return nullptr;
104
12.4k
  }
105
142k
  return &sgIt->second;
106
154k
}
107
108
void ParseSGroupV2000STYLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
109
                             const std::string &text, unsigned int line,
110
28.6k
                             bool strictParsing) {
111
28.6k
  PRECONDITION(mol, "bad mol");
112
28.6k
  PRECONDITION(text.substr(0, 6) == "M  STY", "bad STY line");
113
114
28.6k
  unsigned int pos = 6;
115
28.6k
  bool ok;
116
28.6k
  unsigned int nent =
117
28.6k
      ParseSGroupIntField(ok, strictParsing, text, line, pos, true);
118
28.6k
  if (!ok) {
119
2.12k
    return;
120
2.12k
  }
121
122
62.5k
  for (unsigned int ie = 0; ie < nent; ++ie) {
123
53.7k
    if (text.size() < pos + 8) {
124
11.5k
      std::ostringstream errout;
125
11.5k
      errout << "SGroup STY line too short: '" << text << "' on line " << line;
126
11.5k
      SGroupWarnOrThrow<>(strictParsing, errout.str());
127
11.5k
      return;
128
11.5k
    }
129
130
42.2k
    unsigned int sequenceId =
131
42.2k
        ParseSGroupIntField(ok, strictParsing, text, line, pos);
132
42.2k
    if (!ok) {
133
6.13k
      return;
134
6.13k
    }
135
136
36.0k
    std::string typ = text.substr(pos + 1, 3);
137
36.0k
    if (SubstanceGroupChecks::isValidType(typ)) {
138
20.1k
      auto sgroup = SubstanceGroup(mol, typ);
139
20.1k
      sgroup.setProp<unsigned int>("index", sequenceId);
140
20.1k
      sGroupMap.emplace(sequenceId, sgroup);
141
20.1k
    } else {
142
15.9k
      std::ostringstream errout;
143
15.9k
      errout << "S group " << typ << " on line " << line;
144
15.9k
      SGroupWarnOrThrow<MolFileUnhandledFeatureException>(strictParsing,
145
15.9k
                                                          errout.str());
146
15.9k
    }
147
36.0k
    pos += 4;
148
36.0k
  }
149
26.4k
}
150
151
void ParseSGroupV2000VectorDataLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
152
                                    const std::string &text, unsigned int line,
153
22.3k
                                    bool strictParsing) {
154
22.3k
  PRECONDITION(mol, "bad mol");
155
156
22.3k
  std::string typ = text.substr(3, 3);
157
158
22.3k
  void (SubstanceGroup::*sGroupAddIndexedElement)(const int) = nullptr;
159
160
22.3k
  if (typ == "SAL") {
161
12.3k
    sGroupAddIndexedElement = &SubstanceGroup::addAtomWithBookmark;
162
12.3k
  } else if (typ == "SBL") {
163
5.43k
    sGroupAddIndexedElement = &SubstanceGroup::addBondWithBookmark;
164
5.43k
  } else if (typ == "SPA") {
165
4.55k
    sGroupAddIndexedElement = &SubstanceGroup::addParentAtomWithBookmark;
166
4.55k
  } else {
167
0
    std::ostringstream errout;
168
0
    errout << "Unsupported SGroup line '" << typ
169
0
           << "' passed to Vector Data parser ";
170
0
    throw FileParseException(errout.str());
171
0
  }
172
173
22.3k
  unsigned int pos = 6;
174
22.3k
  bool ok;
175
22.3k
  unsigned int sgIdx = ParseSGroupIntField(ok, strictParsing, text, line, pos);
176
22.3k
  if (!ok) {
177
1.76k
    return;
178
1.76k
  }
179
20.5k
  SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line);
180
20.5k
  if (!sgroup) {
181
1.97k
    return;
182
1.97k
  }
183
18.6k
  unsigned int nent =
184
18.6k
      ParseSGroupIntField(ok, strictParsing, text, line, pos, true);
185
18.6k
  if (!ok) {
186
1.16k
    sgroup->setIsValid(false);
187
1.16k
    return;
188
1.16k
  }
189
190
39.1k
  for (unsigned int i = 0; i < nent; ++i) {
191
29.3k
    if (text.size() < pos + 4) {
192
1.33k
      std::ostringstream errout;
193
1.33k
      errout << "SGroup line too short: '" << text << "' on line " << line;
194
1.33k
      SGroupWarnOrThrow<>(strictParsing, errout.str());
195
1.33k
      sgroup->setIsValid(false);
196
1.33k
      return;
197
1.33k
    }
198
27.9k
    unsigned int nbr = ParseSGroupIntField(ok, strictParsing, text, line, pos);
199
27.9k
    if (!ok) {
200
1.79k
      sgroup->setIsValid(false);
201
1.79k
      return;
202
1.79k
    }
203
26.1k
    try {
204
26.1k
      (sgroup->*sGroupAddIndexedElement)(nbr);
205
26.1k
    } catch (const std::exception &e) {
206
4.44k
      SGroupWarnOrThrow<>(strictParsing, e.what());
207
4.44k
      sgroup->setIsValid(false);
208
4.44k
      return;
209
4.44k
    }
210
26.1k
  }
211
17.4k
}
212
213
void ParseSGroupV2000SDILine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
214
                             const std::string &text, unsigned int line,
215
12.5k
                             bool strictParsing) {
216
12.5k
  PRECONDITION(mol, "bad mol");
217
12.5k
  PRECONDITION(text.substr(0, 6) == "M  SDI", "bad SDI line");
218
219
12.5k
  unsigned int pos = 6;
220
12.5k
  bool ok;
221
12.5k
  unsigned int sgIdx = ParseSGroupIntField(ok, strictParsing, text, line, pos);
222
12.5k
  if (!ok) {
223
5.21k
    return;
224
5.21k
  }
225
7.35k
  SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line);
226
7.35k
  if (!sgroup) {
227
386
    return;
228
386
  }
229
230
6.96k
  unsigned int nCoords =
231
6.96k
      ParseSGroupIntField(ok, strictParsing, text, line, pos, true);
232
6.96k
  if (!ok) {
233
634
    sgroup->setIsValid(false);
234
634
    return;
235
634
  }
236
6.33k
  if (nCoords != 4) {
237
454
    std::ostringstream errout;
238
454
    errout << "Unexpected number of coordinates for SDI on line " << line;
239
454
    SGroupWarnOrThrow<>(strictParsing, errout.str());
240
454
    sgroup->setIsValid(false);
241
454
    return;
242
454
  }
243
244
5.88k
  SubstanceGroup::Bracket bracket;
245
11.6k
  for (unsigned int i = 0; i < 2; ++i) {
246
9.31k
    double x = ParseSGroupDoubleField(ok, strictParsing, text, line, pos);
247
9.31k
    if (!ok) {
248
2.60k
      sgroup->setIsValid(false);
249
2.60k
      return;
250
2.60k
    }
251
6.71k
    double y = ParseSGroupDoubleField(ok, strictParsing, text, line, pos);
252
6.71k
    if (!ok) {
253
922
      sgroup->setIsValid(false);
254
922
      return;
255
922
    }
256
5.79k
    double z = 0.;
257
5.79k
    bracket[i] = RDGeom::Point3D(x, y, z);
258
5.79k
  }
259
2.35k
  bracket[2] = RDGeom::Point3D(0., 0., 0.);
260
2.35k
  try {
261
2.35k
    sgroup->addBracket(bracket);
262
2.35k
  } catch (const std::exception &e) {
263
0
    SGroupWarnOrThrow<>(strictParsing, e.what());
264
0
    sgroup->setIsValid(false);
265
0
    return;
266
0
  }
267
2.35k
}
268
269
void ParseSGroupV2000SSTLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
270
                             const std::string &text, unsigned int &line,
271
3.68k
                             bool strictParsing) {
272
3.68k
  PRECONDITION(mol, "bad mol");
273
3.68k
  PRECONDITION(text.substr(0, 6) == "M  SST", "bad SST line");
274
275
3.68k
  unsigned int pos = 6;
276
3.68k
  bool ok;
277
3.68k
  unsigned int nent =
278
3.68k
      ParseSGroupIntField(ok, strictParsing, text, line, pos, true);
279
3.68k
  if (!ok) {
280
513
    return;
281
513
  }
282
283
5.04k
  for (unsigned int ie = 0; ie < nent; ++ie) {
284
4.67k
    if (text.size() < pos + 8) {
285
1.40k
      std::ostringstream errout;
286
1.40k
      errout << "SGroup SST line too short: '" << text << "' on line " << line;
287
1.40k
      SGroupWarnOrThrow<>(strictParsing, errout.str());
288
1.40k
      return;
289
1.40k
    }
290
291
3.27k
    unsigned int sgIdx =
292
3.27k
        ParseSGroupIntField(ok, strictParsing, text, line, pos);
293
3.27k
    if (!ok) {
294
398
      return;
295
398
    }
296
2.87k
    SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line);
297
2.87k
    if (!sgroup) {
298
360
      return;
299
2.51k
    };
300
301
2.51k
    std::string subType = text.substr(++pos, 3);
302
303
2.51k
    if (!SubstanceGroupChecks::isValidSubType(subType)) {
304
638
      std::ostringstream errout;
305
638
      errout << "Unsupported SGroup subtype '" << subType << "' on line "
306
638
             << line;
307
638
      SGroupWarnOrThrow<>(strictParsing, errout.str());
308
638
      sgroup->setIsValid(false);
309
638
      return;
310
638
    }
311
312
1.87k
    sgroup->setProp("SUBTYPE", subType);
313
1.87k
    pos += 3;
314
1.87k
  }
315
3.17k
}
316
317
void ParseSGroupV2000SMTLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
318
                             const std::string &text, unsigned int &line,
319
3.69k
                             bool strictParsing) {
320
3.69k
  PRECONDITION(mol, "bad mol");
321
3.69k
  PRECONDITION(text.substr(0, 6) == "M  SMT", "bad SMT line");
322
323
3.69k
  unsigned int pos = 6;
324
3.69k
  bool ok;
325
3.69k
  unsigned int sgIdx = ParseSGroupIntField(ok, strictParsing, text, line, pos);
326
3.69k
  if (!ok) {
327
718
    return;
328
718
  }
329
2.98k
  SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line);
330
2.98k
  if (!sgroup) {
331
464
    return;
332
464
  }
333
2.51k
  ++pos;
334
335
2.51k
  if (pos >= text.length()) {
336
927
    std::ostringstream errout;
337
927
    errout << "SGroup line too short: '" << text << "' on line " << line;
338
927
    SGroupWarnOrThrow<>(strictParsing, errout.str());
339
927
    sgroup->setIsValid(false);
340
927
    return;
341
927
  }
342
1.59k
  std::string label = text.substr(pos, text.length() - pos);
343
344
1.59k
  if (sgroup->getProp<std::string>("TYPE") ==
345
1.59k
      "MUL") {  // Case of multiple groups
346
416
    sgroup->setProp("MULT", label);
347
348
1.17k
  } else {  // Case of abbreviation groups, but we might not have seen a SCL
349
            // line yet
350
1.17k
    sgroup->setProp("LABEL", label);
351
1.17k
  }
352
1.59k
}
353
354
void ParseSGroupV2000SLBLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
355
                             const std::string &text, unsigned int line,
356
11.0k
                             bool strictParsing) {
357
11.0k
  PRECONDITION(mol, "bad mol");
358
11.0k
  PRECONDITION(text.substr(0, 6) == "M  SLB", "bad SLB line");
359
360
11.0k
  unsigned int pos = 6;
361
11.0k
  bool ok;
362
11.0k
  unsigned int nent =
363
11.0k
      ParseSGroupIntField(ok, strictParsing, text, line, pos, true);
364
11.0k
  if (!ok) {
365
508
    return;
366
508
  }
367
368
26.2k
  for (unsigned int ie = 0; ie < nent; ++ie) {
369
25.2k
    if (text.size() < pos + 8) {
370
8.45k
      std::ostringstream errout;
371
8.45k
      errout << "SGroup SLB line too short: '" << text << "' on line " << line;
372
8.45k
      SGroupWarnOrThrow<>(strictParsing, errout.str());
373
8.45k
      return;
374
8.45k
    }
375
376
16.8k
    unsigned int sgIdx =
377
16.8k
        ParseSGroupIntField(ok, strictParsing, text, line, pos);
378
16.8k
    if (!ok) {
379
555
      return;
380
555
    }
381
16.2k
    SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line);
382
16.2k
    if (!sgroup) {
383
348
      return;
384
348
    }
385
15.9k
    unsigned int id = ParseSGroupIntField(ok, strictParsing, text, line, pos);
386
15.9k
    if (!ok) {
387
248
      sgroup->setIsValid(false);
388
248
      return;
389
248
    }
390
15.6k
    if (id != 0 && !SubstanceGroupChecks::isSubstanceGroupIdFree(*mol, id)) {
391
0
      std::ostringstream errout;
392
0
      errout << "SGroup ID '" << id
393
0
             << "' is assigned to more than one SGroup, on line " << line;
394
0
      SGroupWarnOrThrow<>(strictParsing, errout.str());
395
0
      sgroup->setIsValid(false);
396
0
      return;
397
0
    }
398
399
15.6k
    sgroup->setProp<unsigned int>("ID", id);
400
15.6k
  }
401
10.5k
}
402
403
void ParseSGroupV2000SCNLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
404
                             const std::string &text, unsigned int line,
405
3.45k
                             bool strictParsing) {
406
3.45k
  PRECONDITION(mol, "bad mol");
407
3.45k
  PRECONDITION(text.substr(0, 6) == "M  SCN", "bad SCN line");
408
409
3.45k
  unsigned int pos = 6;
410
3.45k
  bool ok;
411
3.45k
  unsigned int nent =
412
3.45k
      ParseSGroupIntField(ok, strictParsing, text, line, pos, true);
413
3.45k
  if (!ok) {
414
147
    return;
415
147
  }
416
417
5.80k
  for (unsigned int ie = 0; ie < nent; ++ie) {
418
5.41k
    if (text.size() < pos + 7) {
419
1.31k
      std::ostringstream errout;
420
1.31k
      errout << "SGroup SCN line too short: '" << text << "' on line " << line;
421
1.31k
      errout << "\n needed: " << pos + 7 << " found: " << text.size();
422
1.31k
      SGroupWarnOrThrow<>(strictParsing, errout.str());
423
1.31k
      return;
424
1.31k
    }
425
426
4.10k
    unsigned int sgIdx =
427
4.10k
        ParseSGroupIntField(ok, strictParsing, text, line, pos);
428
4.10k
    if (!ok) {
429
541
      return;
430
541
    }
431
3.56k
    SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line);
432
3.56k
    if (!sgroup) {
433
355
      return;
434
355
    }
435
436
3.21k
    std::string connect = text.substr(++pos, 2);
437
438
3.21k
    if (!SubstanceGroupChecks::isValidConnectType(connect)) {
439
710
      std::ostringstream errout;
440
710
      errout << "Unsupported SGroup connection type '" << connect
441
710
             << "' on line " << line;
442
710
      SGroupWarnOrThrow<>(strictParsing, errout.str());
443
710
      sgroup->setIsValid(false);
444
710
      return;
445
710
    }
446
447
2.50k
    sgroup->setProp("CONNECT", connect);
448
2.50k
    pos += 3;
449
2.50k
  }
450
3.30k
}
451
452
void ParseSGroupV2000SDSLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
453
                             const std::string &text, unsigned int line,
454
3.56k
                             bool strictParsing) {
455
3.56k
  PRECONDITION(mol, "bad mol");
456
3.56k
  PRECONDITION(text.substr(0, 10) == "M  SDS EXP", "bad SDS line");
457
458
3.53k
  unsigned int pos = 10;
459
3.53k
  bool ok;
460
3.53k
  unsigned int nent =
461
3.53k
      ParseSGroupIntField(ok, strictParsing, text, line, pos, true);
462
3.53k
  if (!ok) {
463
96
    return;
464
96
  }
465
466
6.65k
  for (unsigned int ie = 0; ie < nent; ++ie) {
467
6.14k
    if (text.size() < pos + 4) {
468
1.72k
      std::ostringstream errout;
469
1.72k
      errout << "SGroup SDS line too short: '" << text << "' on line " << line;
470
1.72k
      SGroupWarnOrThrow<>(strictParsing, errout.str());
471
1.72k
      return;
472
1.72k
    }
473
4.42k
    unsigned int sgIdx =
474
4.42k
        ParseSGroupIntField(ok, strictParsing, text, line, pos);
475
4.42k
    if (!ok) {
476
675
      return;
477
675
    }
478
3.75k
    SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line);
479
3.75k
    if (!sgroup) {
480
533
      return;
481
533
    }
482
483
3.21k
    sgroup->setProp("ESTATE", "E");
484
3.21k
  }
485
3.43k
}
486
487
void ParseSGroupV2000SBVLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
488
                             const std::string &text, unsigned int line,
489
7.64k
                             bool strictParsing) {
490
7.64k
  PRECONDITION(mol, "bad mol");
491
7.64k
  PRECONDITION(text.substr(0, 6) == "M  SBV", "bad SBV line");
492
493
7.64k
  unsigned int pos = 6;
494
7.64k
  bool ok;
495
7.64k
  unsigned int sgIdx = ParseSGroupIntField(ok, strictParsing, text, line, pos);
496
7.64k
  if (!ok) {
497
657
    return;
498
657
  }
499
6.98k
  SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line);
500
6.98k
  if (!sgroup) {
501
748
    return;
502
748
  }
503
504
6.23k
  unsigned int bondMark =
505
6.23k
      ParseSGroupIntField(ok, strictParsing, text, line, pos);
506
6.23k
  if (!ok) {
507
936
    sgroup->setIsValid(false);
508
936
    return;
509
936
  }
510
5.29k
  Bond *bond = mol->getUniqueBondWithBookmark(bondMark);
511
512
5.29k
  RDGeom::Point3D vector;
513
5.29k
  if (sgroup->getProp<std::string>("TYPE") == "SUP") {
514
1.39k
    vector.x = ParseSGroupDoubleField(ok, strictParsing, text, line, pos);
515
1.39k
    if (!ok) {
516
523
      sgroup->setIsValid(false);
517
523
      return;
518
523
    }
519
872
    vector.y = ParseSGroupDoubleField(ok, strictParsing, text, line, pos);
520
872
    if (!ok) {
521
499
      sgroup->setIsValid(false);
522
499
      return;
523
499
    }
524
373
    vector.z = 0.;
525
373
  }
526
527
4.27k
  try {
528
4.27k
    sgroup->addCState(bond->getIdx(), vector);
529
4.27k
  } catch (const std::exception &e) {
530
2.44k
    SGroupWarnOrThrow<>(strictParsing, e.what());
531
2.44k
    sgroup->setIsValid(false);
532
2.44k
    return;
533
2.44k
  }
534
4.27k
}
535
536
void ParseSGroupV2000SDTLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
537
                             const std::string &text, unsigned int line,
538
18.5k
                             bool strictParsing) {
539
18.5k
  PRECONDITION(mol, "bad mol");
540
18.5k
  PRECONDITION(text.substr(0, 6) == "M  SDT", "bad SDT line");
541
542
18.5k
  unsigned int pos = 6;
543
18.5k
  bool ok;
544
18.5k
  unsigned int sgIdx = ParseSGroupIntField(ok, strictParsing, text, line, pos);
545
18.5k
  if (!ok) {
546
1.64k
    return;
547
1.64k
  }
548
16.9k
  SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line);
549
16.9k
  if (!sgroup) {
550
1.99k
    return;
551
1.99k
  }
552
553
14.9k
  std::string fieldName;
554
14.9k
  std::string fieldType;
555
14.9k
  std::string fieldInfo;
556
14.9k
  std::string queryType;
557
14.9k
  std::string queryOp;
558
559
14.9k
  try {
560
14.9k
    fieldName = text.substr(++pos, 30);
561
14.9k
    boost::trim_right(fieldName);
562
14.9k
    pos += 30;
563
14.9k
    fieldType = text.substr(pos, 2);
564
14.9k
    boost::trim_right(fieldType);
565
14.9k
    pos += 2;
566
14.9k
    fieldInfo = text.substr(pos, 20);
567
14.9k
    boost::trim_right(fieldInfo);
568
14.9k
    pos += 20;
569
14.9k
    queryType = text.substr(pos, 2);
570
14.9k
    boost::trim_right(queryType);
571
14.9k
    pos += 2;
572
14.9k
    queryOp = text.substr(pos, text.length() - pos);
573
14.9k
    boost::trim_right(queryOp);
574
14.9k
  } catch (const std::out_of_range &) {
575
    // all kinds of wild things out there... this insulates us from them without
576
    // making the code super complicated
577
11.1k
  }
578
579
  // only add entries for the remaining properties if they aren't blank
580
14.9k
  if (!fieldName.empty()) {
581
13.3k
    sgroup->setProp("FIELDNAME", fieldName);
582
13.3k
  }
583
14.9k
  if (!fieldType.empty()) {
584
6.45k
    sgroup->setProp("FIELDTYPE", fieldType);
585
6.45k
  }
586
14.9k
  if (!fieldInfo.empty()) {
587
6.11k
    sgroup->setProp("FIELDINFO", fieldInfo);
588
6.11k
  }
589
14.9k
  if (!queryType.empty()) {
590
3.71k
    sgroup->setProp("QUERYTYPE", queryType);
591
3.71k
  }
592
14.9k
  if (!queryOp.empty()) {
593
2.94k
    sgroup->setProp("QUERYOP", queryOp);
594
2.94k
  }
595
14.9k
}
596
597
void ParseSGroupV2000SDDLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
598
                             const std::string &text, unsigned int line,
599
1.82k
                             bool strictParsing) {
600
1.82k
  PRECONDITION(mol, "bad mol");
601
1.82k
  PRECONDITION(text.substr(0, 6) == "M  SDD", "bad SDD line");
602
603
1.82k
  unsigned int pos = 6;
604
1.82k
  bool ok;
605
1.82k
  unsigned int sgIdx = ParseSGroupIntField(ok, strictParsing, text, line, pos);
606
1.82k
  if (!ok) {
607
344
    return;
608
344
  }
609
1.48k
  SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line);
610
1.48k
  if (!sgroup) {
611
300
    return;
612
300
  }
613
614
  // Store the rest of the line as is.
615
1.18k
  ++pos;
616
1.18k
  if (pos < text.length()) {
617
1.00k
    sgroup->setProp("FIELDDISP", text.substr(pos, text.length() - pos));
618
1.00k
  }
619
1.18k
}
620
621
void ParseSGroupV2000SCDSEDLine(IDX_TO_SGROUP_MAP &sGroupMap,
622
                                IDX_TO_STR_VECT_MAP &dataFieldsMap, RWMol *mol,
623
                                const std::string &text, unsigned int line,
624
                                bool strictParsing, unsigned int &counter,
625
                                unsigned int &lastDataSGroup,
626
51.0k
                                std::ostringstream &currentDataField) {
627
51.0k
  PRECONDITION(mol, "bad mol");
628
629
51.0k
  unsigned int pos = 3;
630
51.0k
  std::string type = text.substr(pos, 3);
631
51.0k
  pos += 3;
632
633
51.0k
  bool ok;
634
51.0k
  unsigned int sgIdx = ParseSGroupIntField(ok, strictParsing, text, line, pos);
635
51.0k
  if (!ok) {
636
1.77k
    return;
637
1.77k
  }
638
49.2k
  SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line);
639
49.2k
  if (!sgroup) {
640
2.58k
    return;
641
2.58k
  }
642
643
46.7k
  if (lastDataSGroup != 0 && lastDataSGroup != sgIdx) {
644
3.79k
    std::ostringstream errout;
645
3.79k
    errout << "Found a Data Field not matching the SGroup of the last Data "
646
3.79k
              "Field at line "
647
3.79k
           << line;
648
3.79k
    SGroupWarnOrThrow<>(strictParsing, errout.str());
649
3.79k
    sgroup->setIsValid(false);
650
3.79k
    return;
651
42.9k
  } else if (lastDataSGroup == 0 && type == "SCD") {
652
8.77k
    lastDataSGroup = sgIdx;
653
34.1k
  } else if (type == "SED") {
654
33.5k
    lastDataSGroup = 0;
655
33.5k
  }
656
657
  // have we already seen an SDT line?
658
42.9k
  if (!sgroup->hasProp("FIELDNAME")) {
659
    // one can read the docs and draw the conclusion that this is mandatory,
660
    // but it's also possible to interpret them the other way, and we know
661
    // that there are CTABs out there with empty fieldnames in SDT lines,
662
    // so let's just issue a warning and accept it.
663
23.6k
    BOOST_LOG(rdWarningLog)
664
0
        << "Found a SCD/SED line with missing/empty SDT specification at line "
665
0
        << line << std::endl;
666
23.6k
  }
667
668
42.9k
  if (strictParsing) {
669
422
    if (type == "SCD" && counter > 2) {
670
0
      std::ostringstream errout;
671
0
      errout << "Found too many consecutive SCD lines, (#" << (counter + 1)
672
0
             << " at line " << line << ") for SGroup " << sgIdx;
673
0
      throw FileParseException(errout.str());
674
0
    }
675
422
  }
676
677
42.9k
  if (pos + 1 < text.length()) {
678
40.8k
    currentDataField << text.substr(++pos, 69);
679
680
40.8k
    if (type == "SED") {
681
31.9k
      std::string trimmedData = boost::trim_right_copy(currentDataField.str());
682
31.9k
      dataFieldsMap[sgIdx].push_back(trimmedData.substr(0, 200));
683
31.9k
      currentDataField.str("");
684
31.9k
      counter = 0;
685
31.9k
    } else {
686
8.89k
      ++counter;
687
8.89k
    }
688
40.8k
  }
689
42.9k
}
690
691
void ParseSGroupV2000SPLLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
692
                             const std::string &text, unsigned int line,
693
5.20k
                             bool strictParsing) {
694
5.20k
  PRECONDITION(mol, "bad mol");
695
5.20k
  PRECONDITION(text.substr(0, 6) == "M  SPL", "bad SPL line");
696
697
5.20k
  unsigned int pos = 6;
698
5.20k
  bool ok;
699
5.20k
  unsigned int nent =
700
5.20k
      ParseSGroupIntField(ok, strictParsing, text, line, pos, true);
701
5.20k
  if (!ok) {
702
1.05k
    return;
703
1.05k
  }
704
705
10.7k
  for (unsigned int ie = 0; ie < nent; ++ie) {
706
9.87k
    if (text.size() < pos + 8) {
707
2.14k
      std::ostringstream errout;
708
2.14k
      errout << "SGroup SPL line too short: '" << text << "' on line " << line;
709
2.14k
      SGroupWarnOrThrow<>(strictParsing, errout.str());
710
2.14k
      return;
711
2.14k
    }
712
713
7.73k
    unsigned int sgIdx =
714
7.73k
        ParseSGroupIntField(ok, strictParsing, text, line, pos);
715
7.73k
    if (!ok) {
716
697
      return;
717
697
    }
718
7.03k
    SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line);
719
7.03k
    if (!sgroup) {
720
416
      return;
721
416
    }
722
6.61k
    unsigned int parentIdx = ParseSGroupIntField(text, line, pos);
723
724
6.61k
    sgroup->setProp<unsigned int>("PARENT", parentIdx);
725
6.61k
  }
726
4.14k
}
727
728
void ParseSGroupV2000SNCLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
729
                             const std::string &text, unsigned int line,
730
3.80k
                             bool strictParsing) {
731
3.80k
  PRECONDITION(mol, "bad mol");
732
3.80k
  PRECONDITION(text.substr(0, 6) == "M  SNC", "bad SNC line");
733
734
3.80k
  unsigned int pos = 6;
735
3.80k
  bool ok;
736
3.80k
  unsigned int nent =
737
3.80k
      ParseSGroupIntField(ok, strictParsing, text, line, pos, true);
738
3.80k
  if (!ok) {
739
466
    return;
740
466
  }
741
742
4.57k
  for (unsigned int ie = 0; ie < nent; ++ie) {
743
3.68k
    if (text.size() < pos + 8) {
744
1.15k
      std::ostringstream errout;
745
1.15k
      errout << "SGroup SNC line too short: '" << text << "' on line " << line;
746
1.15k
      SGroupWarnOrThrow<>(strictParsing, errout.str());
747
1.15k
      return;
748
1.15k
    }
749
750
2.52k
    unsigned int sgIdx =
751
2.52k
        ParseSGroupIntField(ok, strictParsing, text, line, pos);
752
2.52k
    if (!ok) {
753
279
      return;
754
279
    }
755
2.25k
    SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line);
756
2.25k
    if (!sgroup) {
757
277
      return;
758
277
    }
759
760
1.97k
    unsigned int compno =
761
1.97k
        ParseSGroupIntField(ok, strictParsing, text, line, pos);
762
1.97k
    if (!ok) {
763
412
      sgroup->setIsValid(false);
764
412
      return;
765
412
    }
766
1.56k
    if (compno > 256u) {
767
322
      std::ostringstream errout;
768
322
      errout << "SGroup SNC value over 256: '" << compno << "' on line "
769
322
             << line;
770
322
      SGroupWarnOrThrow<>(strictParsing, errout.str());
771
322
      sgroup->setIsValid(false);
772
322
      return;
773
322
    }
774
1.23k
    sgroup->setProp<unsigned int>("COMPNO", compno);
775
1.23k
  }
776
3.33k
}
777
778
void ParseSGroupV2000SAPLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
779
                             const std::string &text, unsigned int line,
780
10.5k
                             bool strictParsing) {
781
10.5k
  PRECONDITION(mol, "bad mol");
782
10.5k
  PRECONDITION(text.substr(0, 6) == "M  SAP", "bad SAP line");
783
784
10.5k
  unsigned int pos = 6;
785
10.5k
  bool ok;
786
10.5k
  unsigned int sgIdx = ParseSGroupIntField(ok, strictParsing, text, line, pos);
787
10.5k
  if (!ok) {
788
1.44k
    return;
789
1.44k
  }
790
9.08k
  SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line);
791
9.08k
  if (!sgroup) {
792
1.14k
    return;
793
1.14k
  }
794
795
7.93k
  unsigned int nent =
796
7.93k
      ParseSGroupIntField(ok, strictParsing, text, line, pos, true);
797
7.93k
  if (!ok) {
798
345
    sgroup->setIsValid(false);
799
345
    return;
800
345
  }
801
802
14.3k
  for (unsigned int ie = 0; ie < nent; ++ie) {
803
8.58k
    int lvIdx = -1;
804
8.58k
    if (text.size() < pos + 11) {
805
3.56k
      std::ostringstream errout;
806
3.56k
      errout << "SGroup SAP line too short: '" << text << "' on line " << line;
807
3.56k
      if (strictParsing) {
808
1
        throw FileParseException(errout.str());
809
3.56k
      } else {
810
3.56k
        BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
811
3.56k
        if (text.size() < pos + 4) {
812
440
          sgroup->setIsValid(false);
813
440
          return;
814
440
        }
815
3.12k
        lvIdx = mol->getNumAtoms();
816
3.12k
      }
817
3.56k
    }
818
819
8.14k
    std::string id = "  ";
820
8.14k
    unsigned int aIdxMark =
821
8.14k
        ParseSGroupIntField(ok, strictParsing, text, line, pos);
822
8.14k
    if (!ok) {
823
1.04k
      sgroup->setIsValid(false);
824
1.04k
      return;
825
1.04k
    }
826
7.09k
    unsigned int aIdx = mol->getAtomWithBookmark(aIdxMark)->getIdx();
827
828
7.09k
    if (lvIdx == -1) {
829
4.53k
      unsigned int lvIdxMark =
830
4.53k
          ParseSGroupIntField(ok, strictParsing, text, line, pos);
831
4.53k
      if (!ok) {
832
275
        sgroup->setIsValid(false);
833
275
        return;
834
275
      }
835
4.25k
      if (lvIdxMark != 0) {
836
2.01k
        lvIdx = mol->getAtomWithBookmark(lvIdxMark)->getIdx();
837
2.01k
      }
838
4.25k
      if (text.size() >= pos + 3) {
839
4.25k
        id = text.substr(pos + 1, 2);
840
4.25k
        pos += 3;
841
4.25k
      }
842
4.25k
    }
843
844
6.82k
    try {
845
6.82k
      sgroup->addAttachPoint(aIdx, lvIdx, id);
846
6.82k
    } catch (const std::exception &e) {
847
0
      SGroupWarnOrThrow<>(strictParsing, e.what());
848
0
      sgroup->setIsValid(false);
849
0
      return;
850
0
    }
851
6.82k
  }
852
7.58k
}
853
854
void ParseSGroupV2000SCLLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
855
                             const std::string &text, unsigned int line,
856
2.34k
                             bool strictParsing) {
857
2.34k
  PRECONDITION(mol, "bad mol");
858
2.34k
  PRECONDITION(text.substr(0, 6) == "M  SCL", "bad SCL line");
859
860
2.34k
  unsigned int pos = 6;
861
2.34k
  bool ok;
862
2.34k
  unsigned int sgIdx = ParseSGroupIntField(ok, strictParsing, text, line, pos);
863
2.34k
  if (!ok) {
864
420
    return;
865
420
  }
866
1.92k
  SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line);
867
1.92k
  if (!sgroup) {
868
262
    return;
869
262
  }
870
1.66k
  if (pos + 1 >= text.length()) {
871
457
    std::ostringstream errout;
872
457
    errout << "SGroup SCL line too short: '" << text << "' on line " << line;
873
457
    SGroupWarnOrThrow<>(strictParsing, errout.str());
874
457
    sgroup->setIsValid(false);
875
457
    return;
876
457
  }
877
878
1.20k
  ++pos;
879
1.20k
  sgroup->setProp("CLASS", text.substr(pos, text.length() - pos));
880
1.20k
}
881
882
void ParseSGroupV2000SBTLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
883
                             const std::string &text, unsigned int line,
884
4.08k
                             bool strictParsing) {
885
4.08k
  PRECONDITION(mol, "bad mol");
886
4.08k
  PRECONDITION(text.substr(0, 6) == "M  SBT", "bad SBT line");
887
888
4.08k
  unsigned int pos = 6;
889
4.08k
  bool ok;
890
4.08k
  unsigned int nent =
891
4.08k
      ParseSGroupIntField(ok, strictParsing, text, line, pos, true);
892
4.08k
  if (!ok) {
893
695
    return;
894
695
  }
895
896
5.21k
  for (unsigned int ie = 0; ie < nent; ++ie) {
897
3.96k
    if (text.size() < pos + 8) {
898
1.08k
      std::ostringstream errout;
899
1.08k
      errout << "SGroup SBT line too short: '" << text << "' on line " << line;
900
1.08k
      SGroupWarnOrThrow<>(strictParsing, errout.str());
901
1.08k
      return;
902
1.08k
    }
903
904
2.87k
    unsigned int sgIdx =
905
2.87k
        ParseSGroupIntField(ok, strictParsing, text, line, pos);
906
2.87k
    if (!ok) {
907
183
      return;
908
183
    }
909
2.69k
    SubstanceGroup *sgroup = FindSgIdx(sGroupMap, sgIdx, line);
910
2.69k
    if (!sgroup) {
911
284
      return;
912
284
    }
913
2.41k
    unsigned int bracketType =
914
2.41k
        ParseSGroupIntField(ok, strictParsing, text, line, pos);
915
2.41k
    if (!ok) {
916
349
      sgroup->setIsValid(false);
917
349
      return;
918
349
    }
919
920
2.06k
    if (bracketType == 0) {
921
1.02k
      sgroup->setProp("BRKTYP", "BRACKET");
922
1.03k
    } else if (bracketType == 1) {
923
803
      sgroup->setProp("BRKTYP", "PAREN");
924
803
    } else {
925
235
      std::ostringstream errout;
926
235
      errout << "Invalid SBT value '" << bracketType << "' on line " << line;
927
235
      SGroupWarnOrThrow<>(strictParsing, errout.str());
928
235
      sgroup->setIsValid(false);
929
235
      return;
930
235
    }
931
2.06k
  }
932
3.38k
}
933
934
/* ------------------ V3000 Utils  ------------------ */
935
936
template <class T>
937
std::vector<T> ParseV3000Array(std::stringstream &stream, int maxV,
938
9.47k
                               bool strictParsing) {
939
9.47k
  auto paren = stream.get();  // discard parentheses
940
9.47k
  if (paren != '(') {
941
8.95k
    BOOST_LOG(rdWarningLog)
942
0
        << "WARNING: first character of V3000 array is not '('" << std::endl;
943
8.95k
  }
944
945
9.47k
  unsigned int count = 0;
946
9.47k
  stream >> count;
947
9.47k
  std::vector<T> values;
948
9.47k
  if (maxV >= 0 && count > static_cast<unsigned int>(maxV)) {
949
3.06k
    SGroupWarnOrThrow(strictParsing, "invalid count value");
950
3.06k
    return values;
951
3.06k
  }
952
953
6.41k
  values.reserve(count);
954
6.41k
  T value;
955
13.8k
  for (unsigned i = 0; i < count; ++i) {
956
7.43k
    stream >> value;
957
7.43k
    values.push_back(value);
958
7.43k
  }
959
6.41k
  paren = stream.get();  // discard parentheses
960
6.41k
  if (paren != ')') {
961
6.26k
    BOOST_LOG(rdWarningLog)
962
0
        << "WARNING: final character of V3000 array is not ')'" << std::endl;
963
6.26k
  }
964
6.41k
  return values;
965
9.47k
}
std::__1::vector<unsigned int, std::__1::allocator<unsigned int> > RDKit::SGroupParsing::ParseV3000Array<unsigned int>(std::__1::basic_stringstream<char, std::__1::char_traits<char>, std::__1::allocator<char> >&, int, bool)
Line
Count
Source
938
8.55k
                               bool strictParsing) {
939
8.55k
  auto paren = stream.get();  // discard parentheses
940
8.55k
  if (paren != '(') {
941
8.44k
    BOOST_LOG(rdWarningLog)
942
0
        << "WARNING: first character of V3000 array is not '('" << std::endl;
943
8.44k
  }
944
945
8.55k
  unsigned int count = 0;
946
8.55k
  stream >> count;
947
8.55k
  std::vector<T> values;
948
8.55k
  if (maxV >= 0 && count > static_cast<unsigned int>(maxV)) {
949
2.73k
    SGroupWarnOrThrow(strictParsing, "invalid count value");
950
2.73k
    return values;
951
2.73k
  }
952
953
5.81k
  values.reserve(count);
954
5.81k
  T value;
955
10.7k
  for (unsigned i = 0; i < count; ++i) {
956
4.89k
    stream >> value;
957
4.89k
    values.push_back(value);
958
4.89k
  }
959
5.81k
  paren = stream.get();  // discard parentheses
960
5.81k
  if (paren != ')') {
961
5.72k
    BOOST_LOG(rdWarningLog)
962
0
        << "WARNING: final character of V3000 array is not ')'" << std::endl;
963
5.72k
  }
964
5.81k
  return values;
965
8.55k
}
Unexecuted instantiation: std::__1::vector<int, std::__1::allocator<int> > RDKit::SGroupParsing::ParseV3000Array<int>(std::__1::basic_stringstream<char, std::__1::char_traits<char>, std::__1::allocator<char> >&, int, bool)
std::__1::vector<double, std::__1::allocator<double> > RDKit::SGroupParsing::ParseV3000Array<double>(std::__1::basic_stringstream<char, std::__1::char_traits<char>, std::__1::allocator<char> >&, int, bool)
Line
Count
Source
938
923
                               bool strictParsing) {
939
923
  auto paren = stream.get();  // discard parentheses
940
923
  if (paren != '(') {
941
510
    BOOST_LOG(rdWarningLog)
942
0
        << "WARNING: first character of V3000 array is not '('" << std::endl;
943
510
  }
944
945
923
  unsigned int count = 0;
946
923
  stream >> count;
947
923
  std::vector<T> values;
948
923
  if (maxV >= 0 && count > static_cast<unsigned int>(maxV)) {
949
327
    SGroupWarnOrThrow(strictParsing, "invalid count value");
950
327
    return values;
951
327
  }
952
953
596
  values.reserve(count);
954
596
  T value;
955
3.13k
  for (unsigned i = 0; i < count; ++i) {
956
2.53k
    stream >> value;
957
2.53k
    values.push_back(value);
958
2.53k
  }
959
596
  paren = stream.get();  // discard parentheses
960
596
  if (paren != ')') {
961
539
    BOOST_LOG(rdWarningLog)
962
0
        << "WARNING: final character of V3000 array is not ')'" << std::endl;
963
539
  }
964
596
  return values;
965
923
}
966
967
// force instantiation of the versions of this that we use
968
template std::vector<unsigned int> ParseV3000Array(std::stringstream &stream,
969
                                                   int, bool);
970
template std::vector<int> ParseV3000Array(std::stringstream &stream, int, bool);
971
972
void ParseV3000CStateLabel(RWMol *mol, SubstanceGroup &sgroup,
973
                           std::stringstream &stream, unsigned int line,
974
1.79k
                           bool strictParsing) {
975
1.79k
  stream.get();  // discard parentheses
976
977
1.79k
  unsigned int count;
978
1.79k
  unsigned int bondMark;
979
1.79k
  stream >> count >> bondMark;
980
981
1.79k
  std::string type = sgroup.getProp<std::string>("TYPE");
982
983
1.79k
  if ((type != "SUP" && count != 1) || (type == "SUP" && count != 4)) {
984
1.16k
    std::ostringstream errout;
985
1.16k
    errout << "Unexpected number of fields for CSTATE field on line " << line;
986
1.16k
    SGroupWarnOrThrow<>(strictParsing, errout.str());
987
1.16k
    sgroup.setIsValid(false);
988
1.16k
    return;
989
1.16k
  }
990
991
628
  Bond *bond = mol->getUniqueBondWithBookmark(bondMark);
992
993
628
  RDGeom::Point3D vector;
994
628
  if (type == "SUP") {
995
0
    stream >> vector.x >> vector.y >> vector.z;
996
0
  }
997
628
  try {
998
628
    sgroup.addCState(bond->getIdx(), vector);
999
628
  } catch (const std::exception &e) {
1000
0
    SGroupWarnOrThrow<>(strictParsing, e.what());
1001
0
    sgroup.setIsValid(false);
1002
0
    return;
1003
0
  }
1004
1005
0
  stream.get();  // discard final parentheses
1006
0
}
1007
1008
void ParseV3000SAPLabel(RWMol *mol, SubstanceGroup &sgroup,
1009
3.91k
                        std::stringstream &stream, bool strictParsing) {
1010
3.91k
  stream.get();  // discard parentheses
1011
1012
3.91k
  unsigned int count = 0;
1013
3.91k
  unsigned int aIdxMark = 0;
1014
3.91k
  std::string lvIdxStr;  // In V3000 this may be a string
1015
3.91k
  std::string sapIdStr;
1016
3.91k
  stream >> count >> aIdxMark >> lvIdxStr >> sapIdStr;
1017
1018
  // remove final parentheses that gets parsed into sapIdStr
1019
3.91k
  sapIdStr.pop_back();
1020
1021
3.91k
  unsigned int aIdx = mol->getAtomWithBookmark(aIdxMark)->getIdx();
1022
3.91k
  int lvIdx = -1;
1023
1024
3.91k
  boost::to_upper(lvIdxStr);
1025
3.91k
  if (lvIdxStr == "AIDX") {
1026
254
    lvIdx = aIdx;
1027
3.65k
  } else {
1028
3.65k
    unsigned int lvIdxTmp = FileParserUtils::toInt(lvIdxStr);
1029
3.65k
    if (lvIdxTmp > 0) {
1030
177
      lvIdx = mol->getAtomWithBookmark(lvIdxTmp)->getIdx();
1031
177
    }
1032
3.65k
  }
1033
1034
3.91k
  try {
1035
3.91k
    sgroup.addAttachPoint(aIdx, lvIdx, sapIdStr);
1036
3.91k
  } catch (const std::exception &e) {
1037
0
    SGroupWarnOrThrow<>(strictParsing, e.what());
1038
0
    sgroup.setIsValid(false);
1039
0
    return;
1040
0
  }
1041
3.91k
}
1042
1043
130k
std::string ParseV3000StringPropLabel(std::stringstream &stream) {
1044
130k
  std::string strValue;
1045
1046
130k
  auto nextChar = stream.peek();
1047
130k
  if (nextChar == ' ') {
1048
    // empty value, we peeked at the next field's separator
1049
26.5k
    return strValue;
1050
103k
  } else if (nextChar == '"') {
1051
    // skip the opening quote:
1052
1.67k
    stream.get();
1053
1054
    // this is a bit gross because it's legal to include a \" in a value,
1055
    // but the way that's done is by doubling it. So
1056
    // FIELDINFO=""""
1057
    // should assign the value \" to FIELDINFO
1058
1.67k
    char chr;
1059
232k
    while (stream.get(chr)) {
1060
231k
      if (chr == '"') {
1061
2.04k
        nextChar = stream.peek();
1062
1063
        // if the next element in the stream is a \" then we have a quoted \".
1064
        // Otherwise we're done
1065
2.04k
        if (nextChar != '"') {
1066
900
          break;
1067
1.14k
        } else {
1068
          // skip the second \"
1069
1.14k
          stream.get();
1070
1.14k
        }
1071
2.04k
      }
1072
230k
      strValue += chr;
1073
230k
    }
1074
101k
  } else if (nextChar == '\'') {
1075
2.65k
    std::getline(stream, strValue, '\'');
1076
99.1k
  } else {
1077
99.1k
    stream >> strValue;
1078
99.1k
  }
1079
1080
103k
  boost::trim_right(strValue);
1081
103k
  return strValue;
1082
130k
}
1083
1084
void ParseV3000ParseLabel(const std::string &label,
1085
                          std::stringstream &lineStream, STR_VECT &dataFields,
1086
                          unsigned int line, SubstanceGroup &sgroup, size_t,
1087
147k
                          RWMol *mol, bool strictParsing) {
1088
147k
  PRECONDITION(mol, "bad mol");
1089
  // TODO: we could handle these in a more structured way
1090
147k
  try {
1091
147k
    if (label == "XBHEAD" || label == "XBCORR") {
1092
2.17k
      std::vector<unsigned int> bvect = ParseV3000Array<unsigned int>(
1093
2.17k
          lineStream, mol->getNumBonds(), strictParsing);
1094
2.17k
      std::transform(bvect.begin(), bvect.end(), bvect.begin(),
1095
2.17k
                     [](unsigned int v) -> unsigned int { return v - 1; });
1096
2.17k
      sgroup.setProp(label, bvect);
1097
145k
    } else if (label == "ATOMS") {
1098
4.36k
      for (auto atomIdx : ParseV3000Array<unsigned int>(
1099
4.36k
               lineStream, mol->getNumAtoms(), strictParsing)) {
1100
3.98k
        sgroup.addAtomWithBookmark(atomIdx);
1101
3.98k
      }
1102
141k
    } else if (label == "PATOMS") {
1103
1.36k
      for (auto patomIdx : ParseV3000Array<unsigned int>(
1104
1.36k
               lineStream, mol->getNumAtoms(), strictParsing)) {
1105
914
        sgroup.addParentAtomWithBookmark(patomIdx);
1106
914
      }
1107
139k
    } else if (label == "CBONDS" || label == "XBONDS") {
1108
646
      for (auto bondIdx : ParseV3000Array<unsigned int>(
1109
646
               lineStream, mol->getNumBonds(), strictParsing)) {
1110
0
        sgroup.addBondWithBookmark(bondIdx);
1111
0
      }
1112
139k
    } else if (label == "BRKXYZ") {
1113
923
      auto coords = ParseV3000Array<double>(lineStream, 9, strictParsing);
1114
923
      if (coords.size() != 9) {
1115
764
        std::ostringstream errout;
1116
764
        errout << "Unexpected number of coordinates for BRKXYZ on line "
1117
764
               << line;
1118
764
        throw FileParseException(errout.str());
1119
764
      }
1120
1121
159
      SubstanceGroup::Bracket bracket;
1122
636
      for (unsigned int i = 0; i < 3; ++i) {
1123
477
        bracket[i] = RDGeom::Point3D(*(coords.begin() + (3 * i)),
1124
477
                                     *(coords.begin() + (3 * i) + 1),
1125
477
                                     *(coords.begin() + (3 * i) + 2));
1126
477
      }
1127
159
      sgroup.addBracket(bracket);
1128
138k
    } else if (label == "CSTATE") {
1129
1.79k
      ParseV3000CStateLabel(mol, sgroup, lineStream, line, strictParsing);
1130
136k
    } else if (label == "SAP") {
1131
3.91k
      ParseV3000SAPLabel(mol, sgroup, lineStream, strictParsing);
1132
132k
    } else if (label == "PARENT") {
1133
      // Store relationship until all SGroups have been read
1134
1.80k
      unsigned int parentIdx;
1135
1.80k
      if (lineStream.eof()) {
1136
209
        std::ostringstream errout;
1137
209
        errout << "PARENT label not found on line " << line;
1138
209
        throw FileParseException(errout.str());
1139
209
      }
1140
1.59k
      lineStream >> parentIdx;
1141
1.59k
      if (lineStream.fail()) {
1142
244
        std::ostringstream errout;
1143
244
        errout << "Invalid PARENT label found on line " << line;
1144
244
        throw FileParseException(errout.str());
1145
244
      }
1146
1.35k
      sgroup.setProp<unsigned int>("PARENT", parentIdx);
1147
130k
    } else if (label == "COMPNO") {
1148
524
      unsigned int compno;
1149
524
      lineStream >> compno;
1150
524
      if (compno > 256u) {
1151
278
        std::ostringstream errout;
1152
278
        errout << "SGroup SNC value over 256: '" << compno << "' on line "
1153
278
               << line;
1154
278
        throw FileParseException(errout.str());
1155
278
      }
1156
246
      sgroup.setProp<unsigned int>("COMPNO", compno);
1157
130k
    } else if (label == "FIELDDATA") {
1158
9.86k
      auto strValue = ParseV3000StringPropLabel(lineStream);
1159
9.86k
      if (strictParsing) {
1160
2.26k
        strValue = strValue.substr(0, 200);
1161
2.26k
      }
1162
9.86k
      dataFields.push_back(strValue);
1163
1164
120k
    } else {
1165
      // Parse string props
1166
120k
      auto strValue = ParseV3000StringPropLabel(lineStream);
1167
1168
120k
      if (label == "SUBTYPE" &&
1169
1.43k
          !SubstanceGroupChecks::isValidSubType(strValue)) {
1170
1.37k
        std::ostringstream errout;
1171
1.37k
        errout << "Unsupported SGroup subtype '" << strValue << "' on line "
1172
1.37k
               << line;
1173
1.37k
        throw FileParseException(errout.str());
1174
118k
      } else if (label == "CONNECT" &&
1175
513
                 !SubstanceGroupChecks::isValidConnectType(strValue)) {
1176
449
        std::ostringstream errout;
1177
449
        errout << "Unsupported SGroup connection type '" << strValue
1178
449
               << "' on line " << line;
1179
449
        throw FileParseException(errout.str());
1180
118k
      } else if (label == "CLASS" &&
1181
1.33k
                 !SubstanceGroupChecks::isValidClass(strValue)) {
1182
925
        std::ostringstream errout;
1183
925
        errout << "Unsupported SGroup template class '" << strValue
1184
925
               << "' on line " << line;
1185
925
        throw FileParseException(errout.str());
1186
925
      }
1187
      // NATREPLACE is not validated nor used
1188
1189
117k
      sgroup.setProp(label, strValue);
1190
117k
    }
1191
147k
  } catch (const std::exception &e) {
1192
6.80k
    SGroupWarnOrThrow<>(strictParsing, e.what());
1193
6.80k
    sgroup.setIsValid(false);
1194
6.80k
    return;
1195
6.80k
  }
1196
147k
}
1197
1198
std::string ParseV3000SGroupsBlock(std::istream *inStream, unsigned int &line,
1199
                                   unsigned int nSgroups, RWMol *mol,
1200
1.17k
                                   bool strictParsing) {
1201
1.17k
  PRECONDITION(inStream, "no stream");
1202
1.17k
  PRECONDITION(mol, "no molecule");
1203
1.17k
  unsigned int defaultLineNum = 0;
1204
1.17k
  std::string defaultString;
1205
1206
  // SGroups may be written in unsorted ID order, according to spec, so we will
1207
  // temporarily store them in a map before adding them to the mol
1208
1.17k
  IDX_TO_SGROUP_MAP sGroupMap;
1209
1210
1.17k
  std::unordered_map<std::string, std::stringstream> defaultLabels;
1211
1212
1.17k
  auto tempStr = FileParserUtils::getV3000Line(inStream, line);
1213
1214
  // Store defaults
1215
1.17k
  if (tempStr.substr(0, 7) == "DEFAULT" && tempStr.length() > 8) {
1216
838
    defaultString = tempStr.substr(7);
1217
838
    defaultLineNum = line;
1218
838
    boost::trim_right(defaultString);
1219
838
    tempStr = FileParserUtils::getV3000Line(inStream, line);
1220
838
    boost::trim_right(tempStr);
1221
838
  }
1222
1223
22.7k
  for (unsigned int si = 0; si < nSgroups; ++si) {
1224
21.6k
    unsigned int sequenceId;
1225
21.6k
    unsigned int externalId;
1226
21.6k
    std::string type;
1227
1228
21.6k
    std::stringstream lineStream(tempStr);
1229
21.6k
    lineStream >> sequenceId;
1230
21.6k
    lineStream >> type;
1231
21.6k
    lineStream >> externalId;
1232
1233
21.6k
    std::set<std::string> parsedLabels;
1234
21.6k
    if (strictParsing && !SubstanceGroupChecks::isValidType(type)) {
1235
7
      std::ostringstream errout;
1236
7
      errout << "Unsupported SGroup type '" << type << "' on line " << line;
1237
7
      throw MolFileUnhandledFeatureException(errout.str());
1238
21.6k
    } else if (!strictParsing &&
1239
21.4k
               nSgroups == std::numeric_limits<unsigned int>::max() &&
1240
371
               lineStream.fail()) {
1241
      // something went wrong and we didn't know how many SGroups to expect, and
1242
      // now we have seen something that doesn't look like an SGroup start.
1243
      // So we assume we're done.
1244
44
      nSgroups = 0;
1245
44
      break;
1246
44
    }
1247
1248
21.6k
    SubstanceGroup sgroup(mol, type);
1249
21.6k
    STR_VECT dataFields;
1250
1251
21.6k
    sgroup.setProp<unsigned int>("index", sequenceId);
1252
21.6k
    if (externalId > 0) {
1253
5.94k
      if (!SubstanceGroupChecks::isSubstanceGroupIdFree(*mol, externalId)) {
1254
0
        std::ostringstream errout;
1255
0
        errout << "Existing SGroup ID '" << externalId
1256
0
               << "' assigned to a second SGroup on line " << line;
1257
0
        if (strictParsing) {
1258
0
          throw FileParseException(errout.str());
1259
0
        } else {
1260
0
          BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
1261
0
          sgroup.setIsValid(false);
1262
0
        }
1263
0
      }
1264
1265
5.94k
      sgroup.setProp<unsigned int>("ID", externalId);
1266
5.94k
    }
1267
1268
56.1k
    while (sgroup.getIsValid() && !lineStream.eof() && !lineStream.fail()) {
1269
34.5k
      char spacer;
1270
34.5k
      std::string label;
1271
1272
34.5k
      lineStream.get(spacer);
1273
34.5k
      if (lineStream.gcount() == 0) {
1274
153
        continue;
1275
34.4k
      } else if (spacer != ' ') {
1276
1.60k
        std::ostringstream errout;
1277
1.60k
        errout << "Found character '" << spacer
1278
1.60k
               << "' when expecting a separator (space) on line " << line;
1279
1.60k
        if (strictParsing) {
1280
2
          throw FileParseException(errout.str());
1281
1.59k
        } else {
1282
1.59k
          BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
1283
1.59k
          sgroup.setIsValid(false);
1284
1.59k
          continue;
1285
1.59k
        }
1286
1.60k
      }
1287
1288
32.8k
      std::getline(lineStream, label, '=');
1289
32.8k
      if (label.empty()) {
1290
710
        continue;
1291
710
      }
1292
32.1k
      ParseV3000ParseLabel(label, lineStream, dataFields, line, sgroup,
1293
32.1k
                           nSgroups, mol, strictParsing);
1294
32.1k
      parsedLabels.insert(label);
1295
32.1k
    }
1296
1297
    // Process defaults
1298
21.6k
    lineStream.clear();
1299
21.6k
    lineStream.str(defaultString);
1300
150k
    while (sgroup.getIsValid() && !lineStream.eof() && !lineStream.fail()) {
1301
128k
      char spacer;
1302
128k
      std::string label;
1303
1304
128k
      lineStream.get(spacer);
1305
128k
      if (lineStream.gcount() == 0) {
1306
1.77k
        continue;
1307
127k
      } else if (spacer != ' ') {
1308
1.32k
        std::ostringstream errout;
1309
1.32k
        errout << "Found character '" << spacer
1310
1.32k
               << "' when expecting a separator (space) in DEFAULTS on line "
1311
1.32k
               << defaultLineNum;
1312
1.32k
        if (strictParsing) {
1313
1
          throw FileParseException(errout.str());
1314
1.32k
        } else {
1315
1.32k
          BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
1316
1.32k
          sgroup.setIsValid(false);
1317
1.32k
          continue;
1318
1.32k
        }
1319
1.32k
      }
1320
1321
125k
      std::getline(lineStream, label, '=');
1322
125k
      if (label.empty()) {
1323
4.37k
        continue;
1324
4.37k
      }
1325
121k
      if (std::find(parsedLabels.begin(), parsedLabels.end(), label) ==
1326
121k
          parsedLabels.end()) {
1327
115k
        ParseV3000ParseLabel(label, lineStream, dataFields, defaultLineNum,
1328
115k
                             sgroup, nSgroups, mol, strictParsing);
1329
115k
      } else {
1330
5.92k
        spacer = lineStream.peek();
1331
5.92k
        if (spacer == ' ') {
1332
1.91k
          std::ostringstream errout;
1333
1.91k
          errout << "Found unexpected whitespace at DEFAULT label " << label;
1334
1.91k
          if (strictParsing) {
1335
2
            throw FileParseException(errout.str());
1336
1.91k
          } else {
1337
1.91k
            BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
1338
1.91k
            sgroup.setIsValid(false);
1339
1.91k
            continue;
1340
1.91k
          }
1341
4.00k
        } else if (spacer == '(') {
1342
128
          std::getline(lineStream, label, ')');
1343
128
          lineStream.get(spacer);
1344
3.88k
        } else if (spacer == '"') {
1345
109
          lineStream.get(spacer);
1346
109
          std::getline(lineStream, label, '"');
1347
3.77k
        } else {
1348
3.77k
          std::getline(lineStream, label, ' ');
1349
3.77k
          lineStream.putback(' ');
1350
3.77k
        }
1351
5.92k
      }
1352
121k
    }
1353
1354
21.6k
    sgroup.setProp("DATAFIELDS", dataFields);
1355
21.6k
    sGroupMap.emplace(sequenceId, sgroup);
1356
1357
21.6k
    tempStr = FileParserUtils::getV3000Line(inStream, line);
1358
21.6k
    boost::trim_right(tempStr);
1359
21.6k
  }
1360
1361
1.15k
  if (sGroupMap.size() != nSgroups) {
1362
37
    std::ostringstream errout;
1363
37
    errout << "Found " << sGroupMap.size() << " SGroups when " << nSgroups
1364
37
           << " were expected." << std::endl;
1365
37
    if (strictParsing) {
1366
0
      throw FileParseException(errout.str());
1367
37
    } else {
1368
37
      BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
1369
37
    }
1370
37
  }
1371
  // SGroups successfully parsed, now add them to the molecule
1372
1.15k
  for (const auto &sg : sGroupMap) {
1373
201
    if (sg.second.getIsValid()) {
1374
164
      addSubstanceGroup(*mol, sg.second);
1375
164
    } else {
1376
37
      BOOST_LOG(rdWarningLog) << "SGroup " << sg.first
1377
0
                              << " is invalid and will be ignored" << std::endl;
1378
37
    }
1379
201
  }
1380
1.15k
  return tempStr;
1381
1.15k
}
1382
1383
}  // namespace SGroupParsing
1384
}  // namespace RDKit