/src/openbabel/src/obmolecformat.cpp

Source
/**********************************************************************
obmolecformat.cpp - Implementation of subclass of OBFormat for conversion of OBMol.

Copyright (C) 2005 Chris Morley

This file is part of the Open Babel project.
For more information, see <http://openbabel.org/>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
***********************************************************************/
#include <openbabel/babelconfig.h>
#include <openbabel/obmolecformat.h>
#include <openbabel/mol.h>
#include <openbabel/reaction.h>

#include <algorithm>
#include <iterator> // Required for MSVC2015 use of std::back_inserter

using namespace std;
namespace OpenBabel
{
  bool OBMoleculeFormat::OptionsRegistered=false;
  std::map<std::string, OBMol*> OBMoleculeFormat::IMols;
  OBMol* OBMoleculeFormat::_jmol;
  std::vector<OBMol> OBMoleculeFormat::MolArray;
  bool OBMoleculeFormat::StoredMolsReady=false;

  bool OBMoleculeFormat::ReadChemObjectImpl(OBConversion* pConv, OBFormat* pFormat)
  {
    std::istream *ifs = pConv->GetInStream();
    if (!ifs || !ifs->good())
      return false;

    OBMol* pmol = new OBMol;

    if(pConv->IsOption("C",OBConversion::GENOPTIONS))
      return DeferMolOutput(pmol, pConv, pFormat);

    bool ret=true;
   if(pConv->IsOption("separate",OBConversion::GENOPTIONS))
   {
     //On first call, separate molecule and put fragments in MolArray.
     //On subsequent calls, remove a fragment from MolArray and send it for writing
     //Done this way so that each fragment can be written to its own file (with -m option)
     if(!StoredMolsReady)
     {
       while(ret) //do all the molecules in the file
       {
         ret = pFormat->ReadMolecule(pmol,pConv);

         if(ret && (pmol->NumAtoms() > 0 || (pFormat->Flags()&ZEROATOMSOK)))
         {
           vector<OBMol> SepArray = pmol->Separate(); //use un-transformed molecule
           //Add an appropriate title to each fragment
           if(SepArray.size()>1)
             for (unsigned int i=0; i<SepArray.size(); ++i)
             {
               stringstream ss;
               ss << pmol->GetTitle() << '#' << i+1;
               string title = ss.str();
               SepArray[i].SetTitle(title);
             }
           else
              SepArray[0].SetTitle(pmol->GetTitle());

           copy(SepArray.begin(),SepArray.end(),back_inserter(MolArray));
         }
       }
       reverse(MolArray.begin(),MolArray.end());
       StoredMolsReady = true;
       //Clear the flags of the input stream(which may have found eof) to ensure will
       //try to read anothe molecule and allow the stored ones to be sent for output.
       pConv->GetInStream()->clear();
     }

     if(MolArray.empty()) //normal end of fragments
       ret =false;
     else
     {
       // Copying is needed because the OBMol passed to AddChemObject will be deleted.
       // The OBMol in the vector is deleted here.
       OBMol* pMolCopy = new OBMol( MolArray.back());
       MolArray.pop_back();
       ret = pConv->AddChemObject(
           pMolCopy->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS), pConv))!=0;
     }
     if(!ret)
       StoredMolsReady = false;

     delete pmol;
     return ret;
   }

    ret=pFormat->ReadMolecule(pmol,pConv);

    OBMol* ptmol = nullptr;
    //Molecule is valid if it has some atoms
    //or it represents a reaction
    //or the format allows zero-atom molecules and it has a title or properties
    if(ret && (pmol->NumAtoms() > 0
      || pmol->IsReaction()
      || (pFormat->Flags()&ZEROATOMSOK && (*pmol->GetTitle() || pmol->HasData(1)))))
    {
      ptmol = static_cast<OBMol*>(pmol->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS),pConv));
      if(ptmol && (pConv->IsOption("j",OBConversion::GENOPTIONS)
                || pConv->IsOption("join",OBConversion::GENOPTIONS)))
      {
        //With j option, accumulate all mols in one stored in this class
        if(pConv->IsFirstInput())
          _jmol = new OBMol;
        pConv->AddChemObject(_jmol);
        //will be discarded in WriteChemObjectImpl until the last input mol. This complication
        //is needed to allow joined molecules to be from different files. pOb1 in AddChem Object
        //is zeroed at the end of a file and _jmol is in danger of not being output.
        *_jmol += *ptmol;
        delete ptmol;
        return true;
      }
    }
    else
      delete pmol;

    // Normal operation - send molecule to be written
    ret = ret && (pConv->AddChemObject(ptmol)!=0); //success of both writing and reading
    return ret;
  }

  bool OBMoleculeFormat::WriteChemObjectImpl(OBConversion* pConv, OBFormat* pFormat)
  {
    if(pConv->IsOption("C",OBConversion::GENOPTIONS))
      return OutputDeferredMols(pConv);
    if(pConv->IsOption("j",OBConversion::GENOPTIONS)
        || pConv->IsOption("join",OBConversion::GENOPTIONS))
      {
        //arrives here at the end of a file
        if(!pConv->IsLast())
          return true;
        bool ret=pFormat->WriteMolecule(_jmol,pConv);
        pConv->SetOutputIndex(1);
        delete _jmol;
        return ret;
      }


    //Retrieve the target OBMol
    OBBase* pOb = pConv->GetChemObject();

    OBMol* pmol = dynamic_cast<OBMol*> (pOb);
    bool ret=false;
    if(pmol)
      {
        if(pmol->NumAtoms()==0)
          {
            std::string auditMsg = "OpenBabel::Molecule ";
            auditMsg += pmol->GetTitle();
            auditMsg += " has 0 atoms";
            obErrorLog.ThrowError(__FUNCTION__,
                                  auditMsg,
                                  obInfo);
          }
        ret=true;

        ret = DoOutputOptions(pOb, pConv);

        if(ret)
          ret = pFormat->WriteMolecule(pmol,pConv);
    }

    //If sent a OBReaction* (rather than a OBMol*) output the consituent molecules
    OBReaction* pReact = dynamic_cast<OBReaction*> (pOb);
    if(pReact)
      ret = OutputMolsFromReaction(pReact, pConv, pFormat);
    delete pOb;
    return ret;
  }

  bool OBMoleculeFormat::DoOutputOptions(OBBase* pOb, OBConversion* pConv)
  {
    if(pConv->IsOption("addoutindex", OBConversion::GENOPTIONS)) {
      stringstream ss;
      ss << pOb->GetTitle() << " " << pConv->GetOutputIndex();
      pOb->SetTitle(ss.str().c_str());
    }

    OBMol* pmol = dynamic_cast<OBMol*> (pOb);
    if(pmol) {
      if(pConv->IsOption("writeconformers", OBConversion::GENOPTIONS)) {
        //The last conformer is written in the calling function
        int c = 0;
        for (; c < pmol->NumConformers()-1; ++c) {
          pmol->SetConformer(c);
          if(!pConv->GetOutFormat()->WriteMolecule(pmol, pConv))
            break;
        }
        pmol->SetConformer(c);
      }
    }
    return true;
  }

  /*! Instead of sending molecules for output via AddChemObject(), they are
    saved in here in OBMoleculeFormat or discarded. By default they are
    saved only if they are in the first input file. Parts of subsequent
    molecules, such as chemical structure, coordinates and OBGenericData
    can replace the parts in molecules with the same title that have already
    been stored, subject to a set of rules. After all input files have been
    read, the stored molecules (possibly now having augmented properties) are
    sent to the output format.

    Is a static function with *this as parameter so that it can be called from other
    format classes like XMLMoleculeFormat which are not derived from OBMoleculeFormat.
  */
  bool OBMoleculeFormat::DeferMolOutput(OBMol* pmol, OBConversion* pConv, OBFormat* pF )
  {
    static bool IsFirstFile;
    bool OnlyMolsInFirstFile=true;

    if(pConv->IsFirstInput())
      {
        IsFirstFile=true;
        IMols.clear();
        pConv->AddOption("OutputAtEnd", OBConversion::GENOPTIONS);
      }
    else
      {
        if((std::streamoff)pConv->GetInStream()->tellg()<=0)
          IsFirstFile=false;//File has changed
      }

    if (!pF->ReadMolecule(pmol,pConv))
      {
        delete pmol;
        return false;
      }
    const char* ptitle = pmol->GetTitle();
    if(*ptitle==0)
      obErrorLog.ThrowError(__FUNCTION__, "Molecule with no title ignored", obWarning);
    else
      {
        string title(ptitle);
        string::size_type pos = title.find_first_of("\t\r\n"); //some title have other data appended
        if(pos!=string::npos)
          title.erase(pos);

        map<std::string, OBMol*>::iterator itr;
        itr = IMols.find(title);
        if(itr!=IMols.end())
          {
            //Molecule with the same title has been input previously: update it
            OBMol* pNewMol = MakeCombinedMolecule(itr->second, pmol);
            if(pNewMol)
              {
                delete itr->second;
                IMols[title] = pNewMol;
              }
            else
              {
                //error: cleanup and return false
                delete pmol;
                return DeleteDeferredMols();
              }
          }
        else
          {
            //Molecule not already saved in IMols: save it if in first file
            if(!OnlyMolsInFirstFile || IsFirstFile)
              {
                IMols[title] = pmol;
                return true; //don't delete pmol
              }
          }
      }
    delete pmol;
    return true;
  }

  /*! Makes a new OBMol on the heap by combining two molecules according to the rule below.
    If both have OBGenericData of the same type, or OBPairData with the
    same attribute,  the version from pFirst is used.
    Returns a pointer to a new OBMol which will need deleting by the calling program
    (probably by being sent to an output format).
    If the molecules cannot be regarded as being the same structure a NULL
    pointer is returned and an error message logged.

    pFirst and pSecond and the objects they point to are not changed. (const
    modifiers difficult because class OBMol not designed appropriately)

    Combining molecules: rules for each of the three parts
    Title:
    Use the title of pFirst unless it has none, when use that of pSecond.
    Warning if neither molecule has a title.

    Structure
    - a structure with atoms replaces one with no atoms
    - a structure with bonds replaces one with no bonds,
    provided the formula is the same, else an error.
    - structures with atoms and bonds are compared by InChI; error if not the same.
    - a structure with 3D coordinates replaces one with 2D coordinates
    - a structure with 2D coordinates replace one with 0D coordinates

    OBGenericData
    OBPairData
  */
  OBMol* OBMoleculeFormat::MakeCombinedMolecule(OBMol* pFirst, OBMol* pSecond)
  {
    //Decide on which OBMol provides the new title
    string title("No title");
    if(*pFirst->GetTitle()!=0)
      title = pFirst->GetTitle();
    else
      {
        if(*pSecond->GetTitle()!=0)
          title = pSecond->GetTitle();
        else
          obErrorLog.ThrowError(__FUNCTION__,"Combined molecule has no title", obWarning);
      }

    //Decide on which OBMol provides the new structure
    bool swap=false;
    if(pFirst->NumAtoms()==0 && pSecond->NumAtoms()!=0)
      swap=true;
    else if(pSecond->NumAtoms()!=0)
      {
        if(pFirst->GetSpacedFormula()!=pSecond->GetSpacedFormula())
          {
            obErrorLog.ThrowError(__FUNCTION__,
                                  "Molecules with name = " + title + " have different formula",obError);
            return nullptr;
          }
        else
          {
            if(pSecond->NumBonds()!=0 && pFirst->NumBonds()==0)
              swap=true;
            else
              {
                //Compare by inchi; error if different NOT YET IMPLEMENTED
                //Use the one with the higher dimension
                if(pSecond->GetDimension() > pFirst->GetDimension())
                  swap=true;
              }
          }
      }

    OBMol* pNewMol = new OBMol;
    pNewMol->SetTitle(title);

    OBMol* pMain = swap ? pSecond : pFirst;
    OBMol* pOther = swap ? pFirst : pSecond;

    *pNewMol = *pMain; //Now copies all data

    //Copy some OBGenericData from the OBMol which did not provide the structure
    vector<OBGenericData*>::iterator igd;
    for(igd=pOther->BeginData();igd!=pOther->EndData();++igd)
      {
        //copy only if not already data of the same type from molecule already copied to pNewMol
        unsigned datatype = (*igd)->GetDataType();
        OBGenericData* pData = pNewMol->GetData(datatype);
        if(datatype==OBGenericDataType::PairData)
          {
            if(pData->GetAttribute() == (*igd)->GetAttribute())
              continue;
          }
        else if (pNewMol->GetData(datatype) != nullptr)
          continue;

        OBGenericData* pCopiedData = (*igd)->Clone(pNewMol);
        pNewMol->SetData(pCopiedData);
      }
    return pNewMol;
  }

  bool OBMoleculeFormat::OutputDeferredMols(OBConversion* pConv)
  {
    std::map<std::string, OBMol*>::iterator itr, lastitr;
    bool ret=false;
    int i=1;
    lastitr = IMols.end();
    --lastitr;
    pConv->SetOneObjectOnly(false);
    for(itr=IMols.begin();itr!=IMols.end();++itr,++i)
      {
        if(!(itr->second)->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS), pConv))
          continue;
        pConv->SetOutputIndex(i);
        if(itr==lastitr)
          pConv->SetOneObjectOnly(); //to set IsLast

        ret = pConv->GetOutFormat()->WriteMolecule(itr->second, pConv);

        delete itr->second; //always delete OBMol object
        itr->second = nullptr; // so can be deleted in DeleteDeferredMols()
        if (!ret) break;
      }
    DeleteDeferredMols();//cleans up in case there have been errors
    return ret;
  }

  bool OBMoleculeFormat::DeleteDeferredMols()
  {
    //Empties IMols, deteting the OBMol objects whose pointers are stored there
    std::map<std::string, OBMol*>::iterator itr;
    for(itr=IMols.begin();itr!=IMols.end();++itr)
      {
        delete itr->second; //usually NULL
      }
    IMols.clear();
    return false;
  }

  ///////////////////////////////////////////////////////////////////
  bool OBMoleculeFormat::OutputMolsFromReaction
    (OBReaction* pReact, OBConversion* pConv, OBFormat* pFormat)
  {
    //Output all the constituent molecules of the reaction

    //Collect the molecules first, just for convenience
    vector<std::shared_ptr<OBMol> > mols;
    for(int i=0;i<pReact->NumReactants();i++)
      mols.push_back(pReact->GetReactant(i));
    for(int i=0;i<pReact->NumProducts();i++)
      mols.push_back(pReact->GetProduct(i));
    for (int i = 0; i<pReact->NumAgents(); i++)
      mols.push_back(pReact->GetAgent(i));

    if(pReact->GetTransitionState())
      mols.push_back(pReact->GetTransitionState());

    pConv->SetOutputIndex(pConv->GetOutputIndex() - 1); // The OBReaction object is not output
    if((pFormat->Flags() & WRITEONEONLY) && mols.size()>1)
    {
      stringstream ss;
      ss << "There are " << mols.size() << " molecules to be output,"
         << "but this format is for single molecules only";
      obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning);
      mols.resize(1);
    }
    bool ok = true;
    for(unsigned int i=0;i<mols.size() && ok;++i)
    {
      if(mols[i])
      {
        //Have to do set these manually because not using "Convert" interface
        pConv->SetLast(i==mols.size()-1);
        pConv->SetOutputIndex(pConv->GetOutputIndex()+1);
        ok = pFormat->WriteMolecule(
          mols[i]->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS), pConv),pConv);
      }
    }
    return ok;
  }

  //////////////////////////////////////////////////////////////////
  /** Attempts to read the index file datafilename.obindx successively
      from the following directories:
      - the current directory
      - that in the environment variable BABEL_DATADIR or in the macro BABEL_DATADIR
      if the environment variable is not set
      - in a subdirectory of the BABEL_DATADIR directory with the version of OpenBabel as its name
      An index of type NameIndexType is then constructed. NameIndexType is defined
      in obmolecformat.h as std::unordered_map. It is searched by
      @code
      NameIndexType::iterator itr = index.find(molecule_name);
      if(itr!=index.end())
      unsigned pos_in_datafile = itr->second;
      @endcode
      pos_in_datafile is used as a parameter in seekg() to read from the datafile

      If no index is found, it is constructed from the datafile by reading all of
      it using the format pInFormat, and written to the directory containing the datafile.
      This means that this function can be used without worrying whether there is an index.
      It will be slow to execute the first time, but subsequent uses get the speed benefit
      of indexed access to the datafile.

      The serialization and de-serialization of the NameIndexType is entirely in
      this routine and could possibly be improved. Currently re-hashing is done
      every time the index is read.
  **/

  bool OBMoleculeFormat::ReadNameIndex(NameIndexType& index,
                                       const string& datafilename, OBFormat* pInFormat)
  {
    struct headertype
    {
      char filename[256];
      size_t size;
    } header;

    NameIndexType::iterator itr;

    ifstream indexstream;
    OpenDatafile(indexstream, datafilename + ".obindx");
    if(!indexstream)
      {
        //Need to prepare the index
        ifstream datastream;
        string datafilepath = OpenDatafile(datastream, datafilename);
        if(!datastream)
          {
            obErrorLog.ThrowError(__FUNCTION__,
                                  datafilename + " was not found or could not be opened",  obError);
            return false;
          }

        OBConversion Conv(&datastream, nullptr);
        Conv.SetInFormat(pInFormat);
        OBMol mol;
        streampos pos;
        while(Conv.Read(&mol))
          {
            string name = mol.GetTitle();
            if(!name.empty())
              index.insert(make_pair(name, pos));
            mol.Clear();
            pos = datastream.tellg();
          }
        obErrorLog.ThrowError(__FUNCTION__,
                              "Prepared an index for " + datafilepath, obAuditMsg);
        //Save index to file
        ofstream dofs((datafilepath + ".obindx").c_str(), ios_base::out|ios_base::binary);
        if(!dofs) return false;

        strncpy(header.filename,datafilename.c_str(), sizeof(header.filename));
        header.filename[sizeof(header.filename) - 1] = '\0';
        header.size = index.size();
        dofs.write((const char*)&header, sizeof(headertype));

        for(itr=index.begin();itr!=index.end();++itr)
          {
            //#chars; chars;  ofset(4bytes).
            const char n = static_cast<char> (itr->first.size());
            dofs.put(n);
            dofs.write(itr->first.c_str(),n);
            dofs.write((const char*)&itr->second,sizeof(unsigned));
          }
      }
    else
      {
        //Read index data from file and put into hash_map
        indexstream.read((char*)&header,sizeof(headertype));
        itr=index.begin(); // for hint
        for(unsigned int i=0;i<header.size;++i)
          {
            char len;
            indexstream.get(len);
            string title(len, 0);
            unsigned pos;
            indexstream.read(&title[0],len);
            indexstream.read((char*)&pos,sizeof(unsigned));
            index.insert(itr, make_pair(title,pos));
          }
      }
    return true;
  }

} //namespace OpenBabel

//! \file obmolecformat.cpp
//! \brief Subclass of OBFormat for conversion of OBMol.

Coverage Report

Created: 2026-01-10 06:48

Line	Count	Source
1		/**********************************************************************
2		obmolecformat.cpp - Implementation of subclass of OBFormat for conversion of OBMol.
3
4		Copyright (C) 2005 Chris Morley
5
6		This file is part of the Open Babel project.
7		For more information, see <http://openbabel.org/>
8
9		This program is free software; you can redistribute it and/or modify
10		it under the terms of the GNU General Public License as published by
11		the Free Software Foundation version 2 of the License.
12
13		This program is distributed in the hope that it will be useful,
14		but WITHOUT ANY WARRANTY; without even the implied warranty of
15		MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16		GNU General Public License for more details.
17		***********************************************************************/
18		#include <openbabel/babelconfig.h>
19		#include <openbabel/obmolecformat.h>
20		#include <openbabel/mol.h>
21		#include <openbabel/reaction.h>
22
23		#include <algorithm>
24		#include <iterator> // Required for MSVC2015 use of std::back_inserter
25
26		using namespace std;
27		namespace OpenBabel
28		{
29		bool OBMoleculeFormat::OptionsRegistered=false;
30		std::map<std::string, OBMol*> OBMoleculeFormat::IMols;
31		OBMol* OBMoleculeFormat::_jmol;
32		std::vector<OBMol> OBMoleculeFormat::MolArray;
33		bool OBMoleculeFormat::StoredMolsReady=false;
34
35		bool OBMoleculeFormat::ReadChemObjectImpl(OBConversion* pConv, OBFormat* pFormat)
36	18.5k	{
37	18.5k	std::istream *ifs = pConv->GetInStream();
38	18.5k	if (!ifs \|\| !ifs->good())
39	0	return false;
40
41	18.5k	OBMol* pmol = new OBMol;
42
43	18.5k	if(pConv->IsOption("C",OBConversion::GENOPTIONS))
44	0	return DeferMolOutput(pmol, pConv, pFormat);
45
46	18.5k	bool ret=true;
47	18.5k	if(pConv->IsOption("separate",OBConversion::GENOPTIONS))
48	0	{
49		//On first call, separate molecule and put fragments in MolArray.
50		//On subsequent calls, remove a fragment from MolArray and send it for writing
51		//Done this way so that each fragment can be written to its own file (with -m option)
52	0	if(!StoredMolsReady)
53	0	{
54	0	while(ret) //do all the molecules in the file
55	0	{
56	0	ret = pFormat->ReadMolecule(pmol,pConv);
57
58	0	if(ret && (pmol->NumAtoms() > 0 \|\| (pFormat->Flags()&ZEROATOMSOK)))
59	0	{
60	0	vector<OBMol> SepArray = pmol->Separate(); //use un-transformed molecule
61		//Add an appropriate title to each fragment
62	0	if(SepArray.size()>1)
63	0	for (unsigned int i=0; i<SepArray.size(); ++i)
64	0	{
65	0	stringstream ss;
66	0	ss << pmol->GetTitle() << '#' << i+1;
67	0	string title = ss.str();
68	0	SepArray[i].SetTitle(title);
69	0	}
70	0	else
71	0	SepArray[0].SetTitle(pmol->GetTitle());
72
73	0	copy(SepArray.begin(),SepArray.end(),back_inserter(MolArray));
74	0	}
75	0	}
76	0	reverse(MolArray.begin(),MolArray.end());
77	0	StoredMolsReady = true;
78		//Clear the flags of the input stream(which may have found eof) to ensure will
79		//try to read anothe molecule and allow the stored ones to be sent for output.
80	0	pConv->GetInStream()->clear();
81	0	}
82
83	0	if(MolArray.empty()) //normal end of fragments
84	0	ret =false;
85	0	else
86	0	{
87		// Copying is needed because the OBMol passed to AddChemObject will be deleted.
88		// The OBMol in the vector is deleted here.
89	0	OBMol* pMolCopy = new OBMol( MolArray.back());
90	0	MolArray.pop_back();
91	0	ret = pConv->AddChemObject(
92	0	pMolCopy->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS), pConv))!=0;
93	0	}
94	0	if(!ret)
95	0	StoredMolsReady = false;
96
97	0	delete pmol;
98	0	return ret;
99	0	}
100
101	18.5k	ret=pFormat->ReadMolecule(pmol,pConv);
102
103	18.5k	OBMol* ptmol = nullptr;
104		//Molecule is valid if it has some atoms
105		//or it represents a reaction
106		//or the format allows zero-atom molecules and it has a title or properties
107	18.5k	if(ret && (pmol->NumAtoms() > 0
108	15.1k	\|\| pmol->IsReaction()
109	15.1k	\|\| (pFormat->Flags()&ZEROATOMSOK && (*pmol->GetTitle() \|\| pmol->HasData(1)))))
110	10.5k	{
111	10.5k	ptmol = static_cast<OBMol*>(pmol->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS),pConv));
112	10.5k	if(ptmol && (pConv->IsOption("j",OBConversion::GENOPTIONS)
113	10.5k	\|\| pConv->IsOption("join",OBConversion::GENOPTIONS)))
114	0	{
115		//With j option, accumulate all mols in one stored in this class
116	0	if(pConv->IsFirstInput())
117	0	_jmol = new OBMol;
118	0	pConv->AddChemObject(_jmol);
119		//will be discarded in WriteChemObjectImpl until the last input mol. This complication
120		//is needed to allow joined molecules to be from different files. pOb1 in AddChem Object
121		//is zeroed at the end of a file and _jmol is in danger of not being output.
122	0	_jmol += ptmol;
123	0	delete ptmol;
124	0	return true;
125	0	}
126	10.5k	}
127	8.03k	else
128	8.03k	delete pmol;
129
130		// Normal operation - send molecule to be written
131	18.5k	ret = ret && (pConv->AddChemObject(ptmol)!=0); //success of both writing and reading
132	18.5k	return ret;
133	18.5k	}
134
135		bool OBMoleculeFormat::WriteChemObjectImpl(OBConversion* pConv, OBFormat* pFormat)
136	10.5k	{
137	10.5k	if(pConv->IsOption("C",OBConversion::GENOPTIONS))
138	0	return OutputDeferredMols(pConv);
139	10.5k	if(pConv->IsOption("j",OBConversion::GENOPTIONS)
140	10.5k	\|\| pConv->IsOption("join",OBConversion::GENOPTIONS))
141	0	{
142		//arrives here at the end of a file
143	0	if(!pConv->IsLast())
144	0	return true;
145	0	bool ret=pFormat->WriteMolecule(_jmol,pConv);
146	0	pConv->SetOutputIndex(1);
147	0	delete _jmol;
148	0	return ret;
149	0	}
150
151
152		//Retrieve the target OBMol
153	10.5k	OBBase* pOb = pConv->GetChemObject();
154
155	10.5k	OBMol* pmol = dynamic_cast<OBMol*> (pOb);
156	10.5k	bool ret=false;
157	10.5k	if(pmol)
158	10.5k	{
159	10.5k	if(pmol->NumAtoms()==0)
160	7.32k	{
161	7.32k	std::string auditMsg = "OpenBabel::Molecule ";
162	7.32k	auditMsg += pmol->GetTitle();
163	7.32k	auditMsg += " has 0 atoms";
164	7.32k	obErrorLog.ThrowError(__FUNCTION__,
165	7.32k	auditMsg,
166	7.32k	obInfo);
167	7.32k	}
168	10.5k	ret=true;
169
170	10.5k	ret = DoOutputOptions(pOb, pConv);
171
172	10.5k	if(ret)
173	10.5k	ret = pFormat->WriteMolecule(pmol,pConv);
174	10.5k	}
175
176		//If sent a OBReaction* (rather than a OBMol*) output the consituent molecules
177	10.5k	OBReaction* pReact = dynamic_cast<OBReaction*> (pOb);
178	10.5k	if(pReact)
179	12	ret = OutputMolsFromReaction(pReact, pConv, pFormat);
180	10.5k	delete pOb;
181	10.5k	return ret;
182	10.5k	}
183
184		bool OBMoleculeFormat::DoOutputOptions(OBBase* pOb, OBConversion* pConv)
185	10.5k	{
186	10.5k	if(pConv->IsOption("addoutindex", OBConversion::GENOPTIONS)) {
187	0	stringstream ss;
188	0	ss << pOb->GetTitle() << " " << pConv->GetOutputIndex();
189	0	pOb->SetTitle(ss.str().c_str());
190	0	}
191
192	10.5k	OBMol* pmol = dynamic_cast<OBMol*> (pOb);
193	10.5k	if(pmol) {
194	10.5k	if(pConv->IsOption("writeconformers", OBConversion::GENOPTIONS)) {
195		//The last conformer is written in the calling function
196	0	int c = 0;
197	0	for (; c < pmol->NumConformers()-1; ++c) {
198	0	pmol->SetConformer(c);
199	0	if(!pConv->GetOutFormat()->WriteMolecule(pmol, pConv))
200	0	break;
201	0	}
202	0	pmol->SetConformer(c);
203	0	}
204	10.5k	}
205	10.5k	return true;
206	10.5k	}
207
208		/*! Instead of sending molecules for output via AddChemObject(), they are
209		saved in here in OBMoleculeFormat or discarded. By default they are
210		saved only if they are in the first input file. Parts of subsequent
211		molecules, such as chemical structure, coordinates and OBGenericData
212		can replace the parts in molecules with the same title that have already
213		been stored, subject to a set of rules. After all input files have been
214		read, the stored molecules (possibly now having augmented properties) are
215		sent to the output format.
216
217		Is a static function with *this as parameter so that it can be called from other
218		format classes like XMLMoleculeFormat which are not derived from OBMoleculeFormat.
219		*/
220		bool OBMoleculeFormat::DeferMolOutput(OBMol* pmol, OBConversion* pConv, OBFormat* pF )
221	0	{
222	0	static bool IsFirstFile;
223	0	bool OnlyMolsInFirstFile=true;
224
225	0	if(pConv->IsFirstInput())
226	0	{
227	0	IsFirstFile=true;
228	0	IMols.clear();
229	0	pConv->AddOption("OutputAtEnd", OBConversion::GENOPTIONS);
230	0	}
231	0	else
232	0	{
233	0	if((std::streamoff)pConv->GetInStream()->tellg()<=0)
234	0	IsFirstFile=false;//File has changed
235	0	}
236
237	0	if (!pF->ReadMolecule(pmol,pConv))
238	0	{
239	0	delete pmol;
240	0	return false;
241	0	}
242	0	const char* ptitle = pmol->GetTitle();
243	0	if(*ptitle==0)
244	0	obErrorLog.ThrowError(__FUNCTION__, "Molecule with no title ignored", obWarning);
245	0	else
246	0	{
247	0	string title(ptitle);
248	0	string::size_type pos = title.find_first_of("\t\r\n"); //some title have other data appended
249	0	if(pos!=string::npos)
250	0	title.erase(pos);
251
252	0	map<std::string, OBMol*>::iterator itr;
253	0	itr = IMols.find(title);
254	0	if(itr!=IMols.end())
255	0	{
256		//Molecule with the same title has been input previously: update it
257	0	OBMol* pNewMol = MakeCombinedMolecule(itr->second, pmol);
258	0	if(pNewMol)
259	0	{
260	0	delete itr->second;
261	0	IMols[title] = pNewMol;
262	0	}
263	0	else
264	0	{
265		//error: cleanup and return false
266	0	delete pmol;
267	0	return DeleteDeferredMols();
268	0	}
269	0	}
270	0	else
271	0	{
272		//Molecule not already saved in IMols: save it if in first file
273	0	if(!OnlyMolsInFirstFile \|\| IsFirstFile)
274	0	{
275	0	IMols[title] = pmol;
276	0	return true; //don't delete pmol
277	0	}
278	0	}
279	0	}
280	0	delete pmol;
281	0	return true;
282	0	}
283
284		/*! Makes a new OBMol on the heap by combining two molecules according to the rule below.
285		If both have OBGenericData of the same type, or OBPairData with the
286		same attribute, the version from pFirst is used.
287		Returns a pointer to a new OBMol which will need deleting by the calling program
288		(probably by being sent to an output format).
289		If the molecules cannot be regarded as being the same structure a NULL
290		pointer is returned and an error message logged.
291
292		pFirst and pSecond and the objects they point to are not changed. (const
293		modifiers difficult because class OBMol not designed appropriately)
294
295		Combining molecules: rules for each of the three parts
296		Title:
297		Use the title of pFirst unless it has none, when use that of pSecond.
298		Warning if neither molecule has a title.
299
300		Structure
301		- a structure with atoms replaces one with no atoms
302		- a structure with bonds replaces one with no bonds,
303		provided the formula is the same, else an error.
304		- structures with atoms and bonds are compared by InChI; error if not the same.
305		- a structure with 3D coordinates replaces one with 2D coordinates
306		- a structure with 2D coordinates replace one with 0D coordinates
307
308		OBGenericData
309		OBPairData
310		*/
311		OBMol* OBMoleculeFormat::MakeCombinedMolecule(OBMol* pFirst, OBMol* pSecond)
312	0	{
313		//Decide on which OBMol provides the new title
314	0	string title("No title");
315	0	if(*pFirst->GetTitle()!=0)
316	0	title = pFirst->GetTitle();
317	0	else
318	0	{
319	0	if(*pSecond->GetTitle()!=0)
320	0	title = pSecond->GetTitle();
321	0	else
322	0	obErrorLog.ThrowError(__FUNCTION__,"Combined molecule has no title", obWarning);
323	0	}
324
325		//Decide on which OBMol provides the new structure
326	0	bool swap=false;
327	0	if(pFirst->NumAtoms()==0 && pSecond->NumAtoms()!=0)
328	0	swap=true;
329	0	else if(pSecond->NumAtoms()!=0)
330	0	{
331	0	if(pFirst->GetSpacedFormula()!=pSecond->GetSpacedFormula())
332	0	{
333	0	obErrorLog.ThrowError(__FUNCTION__,
334	0	"Molecules with name = " + title + " have different formula",obError);
335	0	return nullptr;
336	0	}
337	0	else
338	0	{
339	0	if(pSecond->NumBonds()!=0 && pFirst->NumBonds()==0)
340	0	swap=true;
341	0	else
342	0	{
343		//Compare by inchi; error if different NOT YET IMPLEMENTED
344		//Use the one with the higher dimension
345	0	if(pSecond->GetDimension() > pFirst->GetDimension())
346	0	swap=true;
347	0	}
348	0	}
349	0	}
350
351	0	OBMol* pNewMol = new OBMol;
352	0	pNewMol->SetTitle(title);
353
354	0	OBMol* pMain = swap ? pSecond : pFirst;
355	0	OBMol* pOther = swap ? pFirst : pSecond;
356
357	0	pNewMol = pMain; //Now copies all data
358
359		//Copy some OBGenericData from the OBMol which did not provide the structure
360	0	vector<OBGenericData*>::iterator igd;
361	0	for(igd=pOther->BeginData();igd!=pOther->EndData();++igd)
362	0	{
363		//copy only if not already data of the same type from molecule already copied to pNewMol
364	0	unsigned datatype = (*igd)->GetDataType();
365	0	OBGenericData* pData = pNewMol->GetData(datatype);
366	0	if(datatype==OBGenericDataType::PairData)
367	0	{
368	0	if(pData->GetAttribute() == (*igd)->GetAttribute())
369	0	continue;
370	0	}
371	0	else if (pNewMol->GetData(datatype) != nullptr)
372	0	continue;
373
374	0	OBGenericData* pCopiedData = (*igd)->Clone(pNewMol);
375	0	pNewMol->SetData(pCopiedData);
376	0	}
377	0	return pNewMol;
378	0	}
379
380		bool OBMoleculeFormat::OutputDeferredMols(OBConversion* pConv)
381	0	{
382	0	std::map<std::string, OBMol*>::iterator itr, lastitr;
383	0	bool ret=false;
384	0	int i=1;
385	0	lastitr = IMols.end();
386	0	--lastitr;
387	0	pConv->SetOneObjectOnly(false);
388	0	for(itr=IMols.begin();itr!=IMols.end();++itr,++i)
389	0	{
390	0	if(!(itr->second)->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS), pConv))
391	0	continue;
392	0	pConv->SetOutputIndex(i);
393	0	if(itr==lastitr)
394	0	pConv->SetOneObjectOnly(); //to set IsLast
395
396	0	ret = pConv->GetOutFormat()->WriteMolecule(itr->second, pConv);
397
398	0	delete itr->second; //always delete OBMol object
399	0	itr->second = nullptr; // so can be deleted in DeleteDeferredMols()
400	0	if (!ret) break;
401	0	}
402	0	DeleteDeferredMols();//cleans up in case there have been errors
403	0	return ret;
404	0	}
405
406		bool OBMoleculeFormat::DeleteDeferredMols()
407	0	{
408		//Empties IMols, deteting the OBMol objects whose pointers are stored there
409	0	std::map<std::string, OBMol*>::iterator itr;
410	0	for(itr=IMols.begin();itr!=IMols.end();++itr)
411	0	{
412	0	delete itr->second; //usually NULL
413	0	}
414	0	IMols.clear();
415	0	return false;
416	0	}
417
418		///////////////////////////////////////////////////////////////////
419		bool OBMoleculeFormat::OutputMolsFromReaction
420		(OBReaction* pReact, OBConversion* pConv, OBFormat* pFormat)
421	12	{
422		//Output all the constituent molecules of the reaction
423
424		//Collect the molecules first, just for convenience
425	12	vector<std::shared_ptr<OBMol> > mols;
426	21	for(int i=0;i<pReact->NumReactants();i++)
427	9	mols.push_back(pReact->GetReactant(i));
428	163	for(int i=0;i<pReact->NumProducts();i++)
429	151	mols.push_back(pReact->GetProduct(i));
430	12	for (int i = 0; i<pReact->NumAgents(); i++)
431	0	mols.push_back(pReact->GetAgent(i));
432
433	12	if(pReact->GetTransitionState())
434	0	mols.push_back(pReact->GetTransitionState());
435
436	12	pConv->SetOutputIndex(pConv->GetOutputIndex() - 1); // The OBReaction object is not output
437	12	if((pFormat->Flags() & WRITEONEONLY) && mols.size()>1)
438	4	{
439	4	stringstream ss;
440	4	ss << "There are " << mols.size() << " molecules to be output,"
441	4	<< "but this format is for single molecules only";
442	4	obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning);
443	4	mols.resize(1);
444	4	}
445	12	bool ok = true;
446	148	for(unsigned int i=0;i<mols.size() && ok;++i)
447	136	{
448	136	if(mols[i])
449	136	{
450		//Have to do set these manually because not using "Convert" interface
451	136	pConv->SetLast(i==mols.size()-1);
452	136	pConv->SetOutputIndex(pConv->GetOutputIndex()+1);
453	136	ok = pFormat->WriteMolecule(
454	136	mols[i]->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS), pConv),pConv);
455	136	}
456	136	}
457	12	return ok;
458	12	}
459
460		//////////////////////////////////////////////////////////////////
461		/** Attempts to read the index file datafilename.obindx successively
462		from the following directories:
463		- the current directory
464		- that in the environment variable BABEL_DATADIR or in the macro BABEL_DATADIR
465		if the environment variable is not set
466		- in a subdirectory of the BABEL_DATADIR directory with the version of OpenBabel as its name
467		An index of type NameIndexType is then constructed. NameIndexType is defined
468		in obmolecformat.h as std::unordered_map. It is searched by
469		@code
470		NameIndexType::iterator itr = index.find(molecule_name);
471		if(itr!=index.end())
472		unsigned pos_in_datafile = itr->second;
473		@endcode
474		pos_in_datafile is used as a parameter in seekg() to read from the datafile
475
476		If no index is found, it is constructed from the datafile by reading all of
477		it using the format pInFormat, and written to the directory containing the datafile.
478		This means that this function can be used without worrying whether there is an index.
479		It will be slow to execute the first time, but subsequent uses get the speed benefit
480		of indexed access to the datafile.
481
482		The serialization and de-serialization of the NameIndexType is entirely in
483		this routine and could possibly be improved. Currently re-hashing is done
484		every time the index is read.
485		**/
486
487		bool OBMoleculeFormat::ReadNameIndex(NameIndexType& index,
488		const string& datafilename, OBFormat* pInFormat)
489	0	{
490	0	struct headertype
491	0	{
492	0	char filename[256];
493	0	size_t size;
494	0	} header;
495
496	0	NameIndexType::iterator itr;
497
498	0	ifstream indexstream;
499	0	OpenDatafile(indexstream, datafilename + ".obindx");
500	0	if(!indexstream)
501	0	{
502		//Need to prepare the index
503	0	ifstream datastream;
504	0	string datafilepath = OpenDatafile(datastream, datafilename);
505	0	if(!datastream)
506	0	{
507	0	obErrorLog.ThrowError(__FUNCTION__,
508	0	datafilename + " was not found or could not be opened", obError);
509	0	return false;
510	0	}
511
512	0	OBConversion Conv(&datastream, nullptr);
513	0	Conv.SetInFormat(pInFormat);
514	0	OBMol mol;
515	0	streampos pos;
516	0	while(Conv.Read(&mol))
517	0	{
518	0	string name = mol.GetTitle();
519	0	if(!name.empty())
520	0	index.insert(make_pair(name, pos));
521	0	mol.Clear();
522	0	pos = datastream.tellg();
523	0	}
524	0	obErrorLog.ThrowError(__FUNCTION__,
525	0	"Prepared an index for " + datafilepath, obAuditMsg);
526		//Save index to file
527	0	ofstream dofs((datafilepath + ".obindx").c_str(), ios_base::out\|ios_base::binary);
528	0	if(!dofs) return false;
529
530	0	strncpy(header.filename,datafilename.c_str(), sizeof(header.filename));
531	0	header.filename[sizeof(header.filename) - 1] = '\0';
532	0	header.size = index.size();
533	0	dofs.write((const char*)&header, sizeof(headertype));
534
535	0	for(itr=index.begin();itr!=index.end();++itr)
536	0	{
537		//#chars; chars; ofset(4bytes).
538	0	const char n = static_cast<char> (itr->first.size());
539	0	dofs.put(n);
540	0	dofs.write(itr->first.c_str(),n);
541	0	dofs.write((const char*)&itr->second,sizeof(unsigned));
542	0	}
543	0	}
544	0	else
545	0	{
546		//Read index data from file and put into hash_map
547	0	indexstream.read((char*)&header,sizeof(headertype));
548	0	itr=index.begin(); // for hint
549	0	for(unsigned int i=0;i<header.size;++i)
550	0	{
551	0	char len;
552	0	indexstream.get(len);
553	0	string title(len, 0);
554	0	unsigned pos;
555	0	indexstream.read(&title[0],len);
556	0	indexstream.read((char*)&pos,sizeof(unsigned));
557	0	index.insert(itr, make_pair(title,pos));
558	0	}
559	0	}
560	0	return true;
561	0	}
562
563		} //namespace OpenBabel
564
565		//! \file obmolecformat.cpp
566		//! \brief Subclass of OBFormat for conversion of OBMol.