Coverage Report

Created: 2026-01-10 06:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openbabel/src/obmolecformat.cpp
Line
Count
Source
1
/**********************************************************************
2
obmolecformat.cpp - Implementation of subclass of OBFormat for conversion of OBMol.
3
4
Copyright (C) 2005 Chris Morley
5
6
This file is part of the Open Babel project.
7
For more information, see <http://openbabel.org/>
8
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation version 2 of the License.
12
13
This program is distributed in the hope that it will be useful,
14
but WITHOUT ANY WARRANTY; without even the implied warranty of
15
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
GNU General Public License for more details.
17
***********************************************************************/
18
#include <openbabel/babelconfig.h>
19
#include <openbabel/obmolecformat.h>
20
#include <openbabel/mol.h>
21
#include <openbabel/reaction.h>
22
23
#include <algorithm>
24
#include <iterator> // Required for MSVC2015 use of std::back_inserter
25
26
using namespace std;
27
namespace OpenBabel
28
{
29
  bool OBMoleculeFormat::OptionsRegistered=false;
30
  std::map<std::string, OBMol*> OBMoleculeFormat::IMols;
31
  OBMol* OBMoleculeFormat::_jmol;
32
  std::vector<OBMol> OBMoleculeFormat::MolArray;
33
  bool OBMoleculeFormat::StoredMolsReady=false;
34
35
  bool OBMoleculeFormat::ReadChemObjectImpl(OBConversion* pConv, OBFormat* pFormat)
36
18.5k
  {
37
18.5k
    std::istream *ifs = pConv->GetInStream();
38
18.5k
    if (!ifs || !ifs->good())
39
0
      return false;
40
41
18.5k
    OBMol* pmol = new OBMol;
42
43
18.5k
    if(pConv->IsOption("C",OBConversion::GENOPTIONS))
44
0
      return DeferMolOutput(pmol, pConv, pFormat);
45
46
18.5k
    bool ret=true;
47
18.5k
   if(pConv->IsOption("separate",OBConversion::GENOPTIONS))
48
0
   {
49
     //On first call, separate molecule and put fragments in MolArray.
50
     //On subsequent calls, remove a fragment from MolArray and send it for writing
51
     //Done this way so that each fragment can be written to its own file (with -m option)
52
0
     if(!StoredMolsReady)
53
0
     {
54
0
       while(ret) //do all the molecules in the file
55
0
       {
56
0
         ret = pFormat->ReadMolecule(pmol,pConv);
57
58
0
         if(ret && (pmol->NumAtoms() > 0 || (pFormat->Flags()&ZEROATOMSOK)))
59
0
         {
60
0
           vector<OBMol> SepArray = pmol->Separate(); //use un-transformed molecule
61
           //Add an appropriate title to each fragment
62
0
           if(SepArray.size()>1)
63
0
             for (unsigned int i=0; i<SepArray.size(); ++i)
64
0
             {
65
0
               stringstream ss;
66
0
               ss << pmol->GetTitle() << '#' << i+1;
67
0
               string title = ss.str();
68
0
               SepArray[i].SetTitle(title);
69
0
             }
70
0
           else
71
0
              SepArray[0].SetTitle(pmol->GetTitle());
72
73
0
           copy(SepArray.begin(),SepArray.end(),back_inserter(MolArray));
74
0
         }
75
0
       }
76
0
       reverse(MolArray.begin(),MolArray.end());
77
0
       StoredMolsReady = true;
78
       //Clear the flags of the input stream(which may have found eof) to ensure will
79
       //try to read anothe molecule and allow the stored ones to be sent for output.
80
0
       pConv->GetInStream()->clear();
81
0
     }
82
83
0
     if(MolArray.empty()) //normal end of fragments
84
0
       ret =false;
85
0
     else
86
0
     {
87
       // Copying is needed because the OBMol passed to AddChemObject will be deleted.
88
       // The OBMol in the vector is deleted here.
89
0
       OBMol* pMolCopy = new OBMol( MolArray.back());
90
0
       MolArray.pop_back();
91
0
       ret = pConv->AddChemObject(
92
0
           pMolCopy->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS), pConv))!=0;
93
0
     }
94
0
     if(!ret)
95
0
       StoredMolsReady = false;
96
97
0
     delete pmol;
98
0
     return ret;
99
0
   }
100
101
18.5k
    ret=pFormat->ReadMolecule(pmol,pConv);
102
103
18.5k
    OBMol* ptmol = nullptr;
104
    //Molecule is valid if it has some atoms
105
    //or it represents a reaction
106
    //or the format allows zero-atom molecules and it has a title or properties
107
18.5k
    if(ret && (pmol->NumAtoms() > 0
108
15.1k
      || pmol->IsReaction()
109
15.1k
      || (pFormat->Flags()&ZEROATOMSOK && (*pmol->GetTitle() || pmol->HasData(1)))))
110
10.5k
    {
111
10.5k
      ptmol = static_cast<OBMol*>(pmol->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS),pConv));
112
10.5k
      if(ptmol && (pConv->IsOption("j",OBConversion::GENOPTIONS)
113
10.5k
                || pConv->IsOption("join",OBConversion::GENOPTIONS)))
114
0
      {
115
        //With j option, accumulate all mols in one stored in this class
116
0
        if(pConv->IsFirstInput())
117
0
          _jmol = new OBMol;
118
0
        pConv->AddChemObject(_jmol);
119
        //will be discarded in WriteChemObjectImpl until the last input mol. This complication
120
        //is needed to allow joined molecules to be from different files. pOb1 in AddChem Object
121
        //is zeroed at the end of a file and _jmol is in danger of not being output.
122
0
        *_jmol += *ptmol;
123
0
        delete ptmol;
124
0
        return true;
125
0
      }
126
10.5k
    }
127
8.03k
    else
128
8.03k
      delete pmol;
129
130
    // Normal operation - send molecule to be written
131
18.5k
    ret = ret && (pConv->AddChemObject(ptmol)!=0); //success of both writing and reading
132
18.5k
    return ret;
133
18.5k
  }
134
135
  bool OBMoleculeFormat::WriteChemObjectImpl(OBConversion* pConv, OBFormat* pFormat)
136
10.5k
  {
137
10.5k
    if(pConv->IsOption("C",OBConversion::GENOPTIONS))
138
0
      return OutputDeferredMols(pConv);
139
10.5k
    if(pConv->IsOption("j",OBConversion::GENOPTIONS)
140
10.5k
        || pConv->IsOption("join",OBConversion::GENOPTIONS))
141
0
      {
142
        //arrives here at the end of a file
143
0
        if(!pConv->IsLast())
144
0
          return true;
145
0
        bool ret=pFormat->WriteMolecule(_jmol,pConv);
146
0
        pConv->SetOutputIndex(1);
147
0
        delete _jmol;
148
0
        return ret;
149
0
      }
150
151
152
    //Retrieve the target OBMol
153
10.5k
    OBBase* pOb = pConv->GetChemObject();
154
155
10.5k
    OBMol* pmol = dynamic_cast<OBMol*> (pOb);
156
10.5k
    bool ret=false;
157
10.5k
    if(pmol)
158
10.5k
      {
159
10.5k
        if(pmol->NumAtoms()==0)
160
7.32k
          {
161
7.32k
            std::string auditMsg = "OpenBabel::Molecule ";
162
7.32k
            auditMsg += pmol->GetTitle();
163
7.32k
            auditMsg += " has 0 atoms";
164
7.32k
            obErrorLog.ThrowError(__FUNCTION__,
165
7.32k
                                  auditMsg,
166
7.32k
                                  obInfo);
167
7.32k
          }
168
10.5k
        ret=true;
169
170
10.5k
        ret = DoOutputOptions(pOb, pConv);
171
172
10.5k
        if(ret)
173
10.5k
          ret = pFormat->WriteMolecule(pmol,pConv);
174
10.5k
    }
175
176
    //If sent a OBReaction* (rather than a OBMol*) output the consituent molecules
177
10.5k
    OBReaction* pReact = dynamic_cast<OBReaction*> (pOb);
178
10.5k
    if(pReact)
179
12
      ret = OutputMolsFromReaction(pReact, pConv, pFormat);
180
10.5k
    delete pOb;
181
10.5k
    return ret;
182
10.5k
  }
183
184
  bool OBMoleculeFormat::DoOutputOptions(OBBase* pOb, OBConversion* pConv)
185
10.5k
  {
186
10.5k
    if(pConv->IsOption("addoutindex", OBConversion::GENOPTIONS)) {
187
0
      stringstream ss;
188
0
      ss << pOb->GetTitle() << " " << pConv->GetOutputIndex();
189
0
      pOb->SetTitle(ss.str().c_str());
190
0
    }
191
192
10.5k
    OBMol* pmol = dynamic_cast<OBMol*> (pOb);
193
10.5k
    if(pmol) {
194
10.5k
      if(pConv->IsOption("writeconformers", OBConversion::GENOPTIONS)) {
195
        //The last conformer is written in the calling function
196
0
        int c = 0;
197
0
        for (; c < pmol->NumConformers()-1; ++c) {
198
0
          pmol->SetConformer(c);
199
0
          if(!pConv->GetOutFormat()->WriteMolecule(pmol, pConv))
200
0
            break;
201
0
        }
202
0
        pmol->SetConformer(c);
203
0
      }
204
10.5k
    }
205
10.5k
    return true;
206
10.5k
  }
207
208
  /*! Instead of sending molecules for output via AddChemObject(), they are
209
    saved in here in OBMoleculeFormat or discarded. By default they are
210
    saved only if they are in the first input file. Parts of subsequent
211
    molecules, such as chemical structure, coordinates and OBGenericData
212
    can replace the parts in molecules with the same title that have already
213
    been stored, subject to a set of rules. After all input files have been
214
    read, the stored molecules (possibly now having augmented properties) are
215
    sent to the output format.
216
217
    Is a static function with *this as parameter so that it can be called from other
218
    format classes like XMLMoleculeFormat which are not derived from OBMoleculeFormat.
219
  */
220
  bool OBMoleculeFormat::DeferMolOutput(OBMol* pmol, OBConversion* pConv, OBFormat* pF )
221
0
  {
222
0
    static bool IsFirstFile;
223
0
    bool OnlyMolsInFirstFile=true;
224
225
0
    if(pConv->IsFirstInput())
226
0
      {
227
0
        IsFirstFile=true;
228
0
        IMols.clear();
229
0
        pConv->AddOption("OutputAtEnd", OBConversion::GENOPTIONS);
230
0
      }
231
0
    else
232
0
      {
233
0
        if((std::streamoff)pConv->GetInStream()->tellg()<=0)
234
0
          IsFirstFile=false;//File has changed
235
0
      }
236
237
0
    if (!pF->ReadMolecule(pmol,pConv))
238
0
      {
239
0
        delete pmol;
240
0
        return false;
241
0
      }
242
0
    const char* ptitle = pmol->GetTitle();
243
0
    if(*ptitle==0)
244
0
      obErrorLog.ThrowError(__FUNCTION__, "Molecule with no title ignored", obWarning);
245
0
    else
246
0
      {
247
0
        string title(ptitle);
248
0
        string::size_type pos = title.find_first_of("\t\r\n"); //some title have other data appended
249
0
        if(pos!=string::npos)
250
0
          title.erase(pos);
251
252
0
        map<std::string, OBMol*>::iterator itr;
253
0
        itr = IMols.find(title);
254
0
        if(itr!=IMols.end())
255
0
          {
256
            //Molecule with the same title has been input previously: update it
257
0
            OBMol* pNewMol = MakeCombinedMolecule(itr->second, pmol);
258
0
            if(pNewMol)
259
0
              {
260
0
                delete itr->second;
261
0
                IMols[title] = pNewMol;
262
0
              }
263
0
            else
264
0
              {
265
                //error: cleanup and return false
266
0
                delete pmol;
267
0
                return DeleteDeferredMols();
268
0
              }
269
0
          }
270
0
        else
271
0
          {
272
            //Molecule not already saved in IMols: save it if in first file
273
0
            if(!OnlyMolsInFirstFile || IsFirstFile)
274
0
              {
275
0
                IMols[title] = pmol;
276
0
                return true; //don't delete pmol
277
0
              }
278
0
          }
279
0
      }
280
0
    delete pmol;
281
0
    return true;
282
0
  }
283
284
  /*! Makes a new OBMol on the heap by combining two molecules according to the rule below.
285
    If both have OBGenericData of the same type, or OBPairData with the
286
    same attribute,  the version from pFirst is used.
287
    Returns a pointer to a new OBMol which will need deleting by the calling program
288
    (probably by being sent to an output format).
289
    If the molecules cannot be regarded as being the same structure a NULL
290
    pointer is returned and an error message logged.
291
292
    pFirst and pSecond and the objects they point to are not changed. (const
293
    modifiers difficult because class OBMol not designed appropriately)
294
295
    Combining molecules: rules for each of the three parts
296
    Title:
297
    Use the title of pFirst unless it has none, when use that of pSecond.
298
    Warning if neither molecule has a title.
299
300
    Structure
301
    - a structure with atoms replaces one with no atoms
302
    - a structure with bonds replaces one with no bonds,
303
    provided the formula is the same, else an error.
304
    - structures with atoms and bonds are compared by InChI; error if not the same.
305
    - a structure with 3D coordinates replaces one with 2D coordinates
306
    - a structure with 2D coordinates replace one with 0D coordinates
307
308
    OBGenericData
309
    OBPairData
310
  */
311
  OBMol* OBMoleculeFormat::MakeCombinedMolecule(OBMol* pFirst, OBMol* pSecond)
312
0
  {
313
    //Decide on which OBMol provides the new title
314
0
    string title("No title");
315
0
    if(*pFirst->GetTitle()!=0)
316
0
      title = pFirst->GetTitle();
317
0
    else
318
0
      {
319
0
        if(*pSecond->GetTitle()!=0)
320
0
          title = pSecond->GetTitle();
321
0
        else
322
0
          obErrorLog.ThrowError(__FUNCTION__,"Combined molecule has no title", obWarning);
323
0
      }
324
325
    //Decide on which OBMol provides the new structure
326
0
    bool swap=false;
327
0
    if(pFirst->NumAtoms()==0 && pSecond->NumAtoms()!=0)
328
0
      swap=true;
329
0
    else if(pSecond->NumAtoms()!=0)
330
0
      {
331
0
        if(pFirst->GetSpacedFormula()!=pSecond->GetSpacedFormula())
332
0
          {
333
0
            obErrorLog.ThrowError(__FUNCTION__,
334
0
                                  "Molecules with name = " + title + " have different formula",obError);
335
0
            return nullptr;
336
0
          }
337
0
        else
338
0
          {
339
0
            if(pSecond->NumBonds()!=0 && pFirst->NumBonds()==0)
340
0
              swap=true;
341
0
            else
342
0
              {
343
                //Compare by inchi; error if different NOT YET IMPLEMENTED
344
                //Use the one with the higher dimension
345
0
                if(pSecond->GetDimension() > pFirst->GetDimension())
346
0
                  swap=true;
347
0
              }
348
0
          }
349
0
      }
350
351
0
    OBMol* pNewMol = new OBMol;
352
0
    pNewMol->SetTitle(title);
353
354
0
    OBMol* pMain = swap ? pSecond : pFirst;
355
0
    OBMol* pOther = swap ? pFirst : pSecond;
356
357
0
    *pNewMol = *pMain; //Now copies all data
358
359
    //Copy some OBGenericData from the OBMol which did not provide the structure
360
0
    vector<OBGenericData*>::iterator igd;
361
0
    for(igd=pOther->BeginData();igd!=pOther->EndData();++igd)
362
0
      {
363
        //copy only if not already data of the same type from molecule already copied to pNewMol
364
0
        unsigned datatype = (*igd)->GetDataType();
365
0
        OBGenericData* pData = pNewMol->GetData(datatype);
366
0
        if(datatype==OBGenericDataType::PairData)
367
0
          {
368
0
            if(pData->GetAttribute() == (*igd)->GetAttribute())
369
0
              continue;
370
0
          }
371
0
        else if (pNewMol->GetData(datatype) != nullptr)
372
0
          continue;
373
374
0
        OBGenericData* pCopiedData = (*igd)->Clone(pNewMol);
375
0
        pNewMol->SetData(pCopiedData);
376
0
      }
377
0
    return pNewMol;
378
0
  }
379
380
  bool OBMoleculeFormat::OutputDeferredMols(OBConversion* pConv)
381
0
  {
382
0
    std::map<std::string, OBMol*>::iterator itr, lastitr;
383
0
    bool ret=false;
384
0
    int i=1;
385
0
    lastitr = IMols.end();
386
0
    --lastitr;
387
0
    pConv->SetOneObjectOnly(false);
388
0
    for(itr=IMols.begin();itr!=IMols.end();++itr,++i)
389
0
      {
390
0
        if(!(itr->second)->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS), pConv))
391
0
          continue;
392
0
        pConv->SetOutputIndex(i);
393
0
        if(itr==lastitr)
394
0
          pConv->SetOneObjectOnly(); //to set IsLast
395
396
0
        ret = pConv->GetOutFormat()->WriteMolecule(itr->second, pConv);
397
398
0
        delete itr->second; //always delete OBMol object
399
0
        itr->second = nullptr; // so can be deleted in DeleteDeferredMols()
400
0
        if (!ret) break;
401
0
      }
402
0
    DeleteDeferredMols();//cleans up in case there have been errors
403
0
    return ret;
404
0
  }
405
406
  bool OBMoleculeFormat::DeleteDeferredMols()
407
0
  {
408
    //Empties IMols, deteting the OBMol objects whose pointers are stored there
409
0
    std::map<std::string, OBMol*>::iterator itr;
410
0
    for(itr=IMols.begin();itr!=IMols.end();++itr)
411
0
      {
412
0
        delete itr->second; //usually NULL
413
0
      }
414
0
    IMols.clear();
415
0
    return false;
416
0
  }
417
418
  ///////////////////////////////////////////////////////////////////
419
  bool OBMoleculeFormat::OutputMolsFromReaction
420
    (OBReaction* pReact, OBConversion* pConv, OBFormat* pFormat)
421
12
  {
422
    //Output all the constituent molecules of the reaction
423
424
    //Collect the molecules first, just for convenience
425
12
    vector<std::shared_ptr<OBMol> > mols;
426
21
    for(int i=0;i<pReact->NumReactants();i++)
427
9
      mols.push_back(pReact->GetReactant(i));
428
163
    for(int i=0;i<pReact->NumProducts();i++)
429
151
      mols.push_back(pReact->GetProduct(i));
430
12
    for (int i = 0; i<pReact->NumAgents(); i++)
431
0
      mols.push_back(pReact->GetAgent(i));
432
433
12
    if(pReact->GetTransitionState())
434
0
      mols.push_back(pReact->GetTransitionState());
435
436
12
    pConv->SetOutputIndex(pConv->GetOutputIndex() - 1); // The OBReaction object is not output
437
12
    if((pFormat->Flags() & WRITEONEONLY) && mols.size()>1)
438
4
    {
439
4
      stringstream ss;
440
4
      ss << "There are " << mols.size() << " molecules to be output,"
441
4
         << "but this format is for single molecules only";
442
4
      obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning);
443
4
      mols.resize(1);
444
4
    }
445
12
    bool ok = true;
446
148
    for(unsigned int i=0;i<mols.size() && ok;++i)
447
136
    {
448
136
      if(mols[i])
449
136
      {
450
        //Have to do set these manually because not using "Convert" interface
451
136
        pConv->SetLast(i==mols.size()-1);
452
136
        pConv->SetOutputIndex(pConv->GetOutputIndex()+1);
453
136
        ok = pFormat->WriteMolecule(
454
136
          mols[i]->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS), pConv),pConv);
455
136
      }
456
136
    }
457
12
    return ok;
458
12
  }
459
460
  //////////////////////////////////////////////////////////////////
461
  /** Attempts to read the index file datafilename.obindx successively
462
      from the following directories:
463
      - the current directory
464
      - that in the environment variable BABEL_DATADIR or in the macro BABEL_DATADIR
465
      if the environment variable is not set
466
      - in a subdirectory of the BABEL_DATADIR directory with the version of OpenBabel as its name
467
      An index of type NameIndexType is then constructed. NameIndexType is defined
468
      in obmolecformat.h as std::unordered_map. It is searched by
469
      @code
470
      NameIndexType::iterator itr = index.find(molecule_name);
471
      if(itr!=index.end())
472
      unsigned pos_in_datafile = itr->second;
473
      @endcode
474
      pos_in_datafile is used as a parameter in seekg() to read from the datafile
475
476
      If no index is found, it is constructed from the datafile by reading all of
477
      it using the format pInFormat, and written to the directory containing the datafile.
478
      This means that this function can be used without worrying whether there is an index.
479
      It will be slow to execute the first time, but subsequent uses get the speed benefit
480
      of indexed access to the datafile.
481
482
      The serialization and de-serialization of the NameIndexType is entirely in
483
      this routine and could possibly be improved. Currently re-hashing is done
484
      every time the index is read.
485
  **/
486
487
  bool OBMoleculeFormat::ReadNameIndex(NameIndexType& index,
488
                                       const string& datafilename, OBFormat* pInFormat)
489
0
  {
490
0
    struct headertype
491
0
    {
492
0
      char filename[256];
493
0
      size_t size;
494
0
    } header;
495
496
0
    NameIndexType::iterator itr;
497
498
0
    ifstream indexstream;
499
0
    OpenDatafile(indexstream, datafilename + ".obindx");
500
0
    if(!indexstream)
501
0
      {
502
        //Need to prepare the index
503
0
        ifstream datastream;
504
0
        string datafilepath = OpenDatafile(datastream, datafilename);
505
0
        if(!datastream)
506
0
          {
507
0
            obErrorLog.ThrowError(__FUNCTION__,
508
0
                                  datafilename + " was not found or could not be opened",  obError);
509
0
            return false;
510
0
          }
511
512
0
        OBConversion Conv(&datastream, nullptr);
513
0
        Conv.SetInFormat(pInFormat);
514
0
        OBMol mol;
515
0
        streampos pos;
516
0
        while(Conv.Read(&mol))
517
0
          {
518
0
            string name = mol.GetTitle();
519
0
            if(!name.empty())
520
0
              index.insert(make_pair(name, pos));
521
0
            mol.Clear();
522
0
            pos = datastream.tellg();
523
0
          }
524
0
        obErrorLog.ThrowError(__FUNCTION__,
525
0
                              "Prepared an index for " + datafilepath, obAuditMsg);
526
        //Save index to file
527
0
        ofstream dofs((datafilepath + ".obindx").c_str(), ios_base::out|ios_base::binary);
528
0
        if(!dofs) return false;
529
530
0
        strncpy(header.filename,datafilename.c_str(), sizeof(header.filename));
531
0
        header.filename[sizeof(header.filename) - 1] = '\0';
532
0
        header.size = index.size();
533
0
        dofs.write((const char*)&header, sizeof(headertype));
534
535
0
        for(itr=index.begin();itr!=index.end();++itr)
536
0
          {
537
            //#chars; chars;  ofset(4bytes).
538
0
            const char n = static_cast<char> (itr->first.size());
539
0
            dofs.put(n);
540
0
            dofs.write(itr->first.c_str(),n);
541
0
            dofs.write((const char*)&itr->second,sizeof(unsigned));
542
0
          }
543
0
      }
544
0
    else
545
0
      {
546
        //Read index data from file and put into hash_map
547
0
        indexstream.read((char*)&header,sizeof(headertype));
548
0
        itr=index.begin(); // for hint
549
0
        for(unsigned int i=0;i<header.size;++i)
550
0
          {
551
0
            char len;
552
0
            indexstream.get(len);
553
0
            string title(len, 0);
554
0
            unsigned pos;
555
0
            indexstream.read(&title[0],len);
556
0
            indexstream.read((char*)&pos,sizeof(unsigned));
557
0
            index.insert(itr, make_pair(title,pos));
558
0
          }
559
0
      }
560
0
    return true;
561
0
  }
562
563
} //namespace OpenBabel
564
565
//! \file obmolecformat.cpp
566
//! \brief Subclass of OBFormat for conversion of OBMol.