/src/openbabel/src/obmolecformat.cpp
Line | Count | Source |
1 | | /********************************************************************** |
2 | | obmolecformat.cpp - Implementation of subclass of OBFormat for conversion of OBMol. |
3 | | |
4 | | Copyright (C) 2005 Chris Morley |
5 | | |
6 | | This file is part of the Open Babel project. |
7 | | For more information, see <http://openbabel.org/> |
8 | | |
9 | | This program is free software; you can redistribute it and/or modify |
10 | | it under the terms of the GNU General Public License as published by |
11 | | the Free Software Foundation version 2 of the License. |
12 | | |
13 | | This program is distributed in the hope that it will be useful, |
14 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | | GNU General Public License for more details. |
17 | | ***********************************************************************/ |
18 | | #include <openbabel/babelconfig.h> |
19 | | #include <openbabel/obmolecformat.h> |
20 | | #include <openbabel/mol.h> |
21 | | #include <openbabel/reaction.h> |
22 | | |
23 | | #include <algorithm> |
24 | | #include <iterator> // Required for MSVC2015 use of std::back_inserter |
25 | | |
26 | | using namespace std; |
27 | | namespace OpenBabel |
28 | | { |
29 | | bool OBMoleculeFormat::OptionsRegistered=false; |
30 | | std::map<std::string, OBMol*> OBMoleculeFormat::IMols; |
31 | | OBMol* OBMoleculeFormat::_jmol; |
32 | | std::vector<OBMol> OBMoleculeFormat::MolArray; |
33 | | bool OBMoleculeFormat::StoredMolsReady=false; |
34 | | |
35 | | bool OBMoleculeFormat::ReadChemObjectImpl(OBConversion* pConv, OBFormat* pFormat) |
36 | 18.5k | { |
37 | 18.5k | std::istream *ifs = pConv->GetInStream(); |
38 | 18.5k | if (!ifs || !ifs->good()) |
39 | 0 | return false; |
40 | | |
41 | 18.5k | OBMol* pmol = new OBMol; |
42 | | |
43 | 18.5k | if(pConv->IsOption("C",OBConversion::GENOPTIONS)) |
44 | 0 | return DeferMolOutput(pmol, pConv, pFormat); |
45 | | |
46 | 18.5k | bool ret=true; |
47 | 18.5k | if(pConv->IsOption("separate",OBConversion::GENOPTIONS)) |
48 | 0 | { |
49 | | //On first call, separate molecule and put fragments in MolArray. |
50 | | //On subsequent calls, remove a fragment from MolArray and send it for writing |
51 | | //Done this way so that each fragment can be written to its own file (with -m option) |
52 | 0 | if(!StoredMolsReady) |
53 | 0 | { |
54 | 0 | while(ret) //do all the molecules in the file |
55 | 0 | { |
56 | 0 | ret = pFormat->ReadMolecule(pmol,pConv); |
57 | |
|
58 | 0 | if(ret && (pmol->NumAtoms() > 0 || (pFormat->Flags()&ZEROATOMSOK))) |
59 | 0 | { |
60 | 0 | vector<OBMol> SepArray = pmol->Separate(); //use un-transformed molecule |
61 | | //Add an appropriate title to each fragment |
62 | 0 | if(SepArray.size()>1) |
63 | 0 | for (unsigned int i=0; i<SepArray.size(); ++i) |
64 | 0 | { |
65 | 0 | stringstream ss; |
66 | 0 | ss << pmol->GetTitle() << '#' << i+1; |
67 | 0 | string title = ss.str(); |
68 | 0 | SepArray[i].SetTitle(title); |
69 | 0 | } |
70 | 0 | else |
71 | 0 | SepArray[0].SetTitle(pmol->GetTitle()); |
72 | |
|
73 | 0 | copy(SepArray.begin(),SepArray.end(),back_inserter(MolArray)); |
74 | 0 | } |
75 | 0 | } |
76 | 0 | reverse(MolArray.begin(),MolArray.end()); |
77 | 0 | StoredMolsReady = true; |
78 | | //Clear the flags of the input stream(which may have found eof) to ensure will |
79 | | //try to read anothe molecule and allow the stored ones to be sent for output. |
80 | 0 | pConv->GetInStream()->clear(); |
81 | 0 | } |
82 | |
|
83 | 0 | if(MolArray.empty()) //normal end of fragments |
84 | 0 | ret =false; |
85 | 0 | else |
86 | 0 | { |
87 | | // Copying is needed because the OBMol passed to AddChemObject will be deleted. |
88 | | // The OBMol in the vector is deleted here. |
89 | 0 | OBMol* pMolCopy = new OBMol( MolArray.back()); |
90 | 0 | MolArray.pop_back(); |
91 | 0 | ret = pConv->AddChemObject( |
92 | 0 | pMolCopy->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS), pConv))!=0; |
93 | 0 | } |
94 | 0 | if(!ret) |
95 | 0 | StoredMolsReady = false; |
96 | |
|
97 | 0 | delete pmol; |
98 | 0 | return ret; |
99 | 0 | } |
100 | | |
101 | 18.5k | ret=pFormat->ReadMolecule(pmol,pConv); |
102 | | |
103 | 18.5k | OBMol* ptmol = nullptr; |
104 | | //Molecule is valid if it has some atoms |
105 | | //or it represents a reaction |
106 | | //or the format allows zero-atom molecules and it has a title or properties |
107 | 18.5k | if(ret && (pmol->NumAtoms() > 0 |
108 | 15.1k | || pmol->IsReaction() |
109 | 15.1k | || (pFormat->Flags()&ZEROATOMSOK && (*pmol->GetTitle() || pmol->HasData(1))))) |
110 | 10.5k | { |
111 | 10.5k | ptmol = static_cast<OBMol*>(pmol->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS),pConv)); |
112 | 10.5k | if(ptmol && (pConv->IsOption("j",OBConversion::GENOPTIONS) |
113 | 10.5k | || pConv->IsOption("join",OBConversion::GENOPTIONS))) |
114 | 0 | { |
115 | | //With j option, accumulate all mols in one stored in this class |
116 | 0 | if(pConv->IsFirstInput()) |
117 | 0 | _jmol = new OBMol; |
118 | 0 | pConv->AddChemObject(_jmol); |
119 | | //will be discarded in WriteChemObjectImpl until the last input mol. This complication |
120 | | //is needed to allow joined molecules to be from different files. pOb1 in AddChem Object |
121 | | //is zeroed at the end of a file and _jmol is in danger of not being output. |
122 | 0 | *_jmol += *ptmol; |
123 | 0 | delete ptmol; |
124 | 0 | return true; |
125 | 0 | } |
126 | 10.5k | } |
127 | 8.03k | else |
128 | 8.03k | delete pmol; |
129 | | |
130 | | // Normal operation - send molecule to be written |
131 | 18.5k | ret = ret && (pConv->AddChemObject(ptmol)!=0); //success of both writing and reading |
132 | 18.5k | return ret; |
133 | 18.5k | } |
134 | | |
135 | | bool OBMoleculeFormat::WriteChemObjectImpl(OBConversion* pConv, OBFormat* pFormat) |
136 | 10.5k | { |
137 | 10.5k | if(pConv->IsOption("C",OBConversion::GENOPTIONS)) |
138 | 0 | return OutputDeferredMols(pConv); |
139 | 10.5k | if(pConv->IsOption("j",OBConversion::GENOPTIONS) |
140 | 10.5k | || pConv->IsOption("join",OBConversion::GENOPTIONS)) |
141 | 0 | { |
142 | | //arrives here at the end of a file |
143 | 0 | if(!pConv->IsLast()) |
144 | 0 | return true; |
145 | 0 | bool ret=pFormat->WriteMolecule(_jmol,pConv); |
146 | 0 | pConv->SetOutputIndex(1); |
147 | 0 | delete _jmol; |
148 | 0 | return ret; |
149 | 0 | } |
150 | | |
151 | | |
152 | | //Retrieve the target OBMol |
153 | 10.5k | OBBase* pOb = pConv->GetChemObject(); |
154 | | |
155 | 10.5k | OBMol* pmol = dynamic_cast<OBMol*> (pOb); |
156 | 10.5k | bool ret=false; |
157 | 10.5k | if(pmol) |
158 | 10.5k | { |
159 | 10.5k | if(pmol->NumAtoms()==0) |
160 | 7.32k | { |
161 | 7.32k | std::string auditMsg = "OpenBabel::Molecule "; |
162 | 7.32k | auditMsg += pmol->GetTitle(); |
163 | 7.32k | auditMsg += " has 0 atoms"; |
164 | 7.32k | obErrorLog.ThrowError(__FUNCTION__, |
165 | 7.32k | auditMsg, |
166 | 7.32k | obInfo); |
167 | 7.32k | } |
168 | 10.5k | ret=true; |
169 | | |
170 | 10.5k | ret = DoOutputOptions(pOb, pConv); |
171 | | |
172 | 10.5k | if(ret) |
173 | 10.5k | ret = pFormat->WriteMolecule(pmol,pConv); |
174 | 10.5k | } |
175 | | |
176 | | //If sent a OBReaction* (rather than a OBMol*) output the consituent molecules |
177 | 10.5k | OBReaction* pReact = dynamic_cast<OBReaction*> (pOb); |
178 | 10.5k | if(pReact) |
179 | 12 | ret = OutputMolsFromReaction(pReact, pConv, pFormat); |
180 | 10.5k | delete pOb; |
181 | 10.5k | return ret; |
182 | 10.5k | } |
183 | | |
184 | | bool OBMoleculeFormat::DoOutputOptions(OBBase* pOb, OBConversion* pConv) |
185 | 10.5k | { |
186 | 10.5k | if(pConv->IsOption("addoutindex", OBConversion::GENOPTIONS)) { |
187 | 0 | stringstream ss; |
188 | 0 | ss << pOb->GetTitle() << " " << pConv->GetOutputIndex(); |
189 | 0 | pOb->SetTitle(ss.str().c_str()); |
190 | 0 | } |
191 | | |
192 | 10.5k | OBMol* pmol = dynamic_cast<OBMol*> (pOb); |
193 | 10.5k | if(pmol) { |
194 | 10.5k | if(pConv->IsOption("writeconformers", OBConversion::GENOPTIONS)) { |
195 | | //The last conformer is written in the calling function |
196 | 0 | int c = 0; |
197 | 0 | for (; c < pmol->NumConformers()-1; ++c) { |
198 | 0 | pmol->SetConformer(c); |
199 | 0 | if(!pConv->GetOutFormat()->WriteMolecule(pmol, pConv)) |
200 | 0 | break; |
201 | 0 | } |
202 | 0 | pmol->SetConformer(c); |
203 | 0 | } |
204 | 10.5k | } |
205 | 10.5k | return true; |
206 | 10.5k | } |
207 | | |
208 | | /*! Instead of sending molecules for output via AddChemObject(), they are |
209 | | saved in here in OBMoleculeFormat or discarded. By default they are |
210 | | saved only if they are in the first input file. Parts of subsequent |
211 | | molecules, such as chemical structure, coordinates and OBGenericData |
212 | | can replace the parts in molecules with the same title that have already |
213 | | been stored, subject to a set of rules. After all input files have been |
214 | | read, the stored molecules (possibly now having augmented properties) are |
215 | | sent to the output format. |
216 | | |
217 | | Is a static function with *this as parameter so that it can be called from other |
218 | | format classes like XMLMoleculeFormat which are not derived from OBMoleculeFormat. |
219 | | */ |
220 | | bool OBMoleculeFormat::DeferMolOutput(OBMol* pmol, OBConversion* pConv, OBFormat* pF ) |
221 | 0 | { |
222 | 0 | static bool IsFirstFile; |
223 | 0 | bool OnlyMolsInFirstFile=true; |
224 | |
|
225 | 0 | if(pConv->IsFirstInput()) |
226 | 0 | { |
227 | 0 | IsFirstFile=true; |
228 | 0 | IMols.clear(); |
229 | 0 | pConv->AddOption("OutputAtEnd", OBConversion::GENOPTIONS); |
230 | 0 | } |
231 | 0 | else |
232 | 0 | { |
233 | 0 | if((std::streamoff)pConv->GetInStream()->tellg()<=0) |
234 | 0 | IsFirstFile=false;//File has changed |
235 | 0 | } |
236 | |
|
237 | 0 | if (!pF->ReadMolecule(pmol,pConv)) |
238 | 0 | { |
239 | 0 | delete pmol; |
240 | 0 | return false; |
241 | 0 | } |
242 | 0 | const char* ptitle = pmol->GetTitle(); |
243 | 0 | if(*ptitle==0) |
244 | 0 | obErrorLog.ThrowError(__FUNCTION__, "Molecule with no title ignored", obWarning); |
245 | 0 | else |
246 | 0 | { |
247 | 0 | string title(ptitle); |
248 | 0 | string::size_type pos = title.find_first_of("\t\r\n"); //some title have other data appended |
249 | 0 | if(pos!=string::npos) |
250 | 0 | title.erase(pos); |
251 | |
|
252 | 0 | map<std::string, OBMol*>::iterator itr; |
253 | 0 | itr = IMols.find(title); |
254 | 0 | if(itr!=IMols.end()) |
255 | 0 | { |
256 | | //Molecule with the same title has been input previously: update it |
257 | 0 | OBMol* pNewMol = MakeCombinedMolecule(itr->second, pmol); |
258 | 0 | if(pNewMol) |
259 | 0 | { |
260 | 0 | delete itr->second; |
261 | 0 | IMols[title] = pNewMol; |
262 | 0 | } |
263 | 0 | else |
264 | 0 | { |
265 | | //error: cleanup and return false |
266 | 0 | delete pmol; |
267 | 0 | return DeleteDeferredMols(); |
268 | 0 | } |
269 | 0 | } |
270 | 0 | else |
271 | 0 | { |
272 | | //Molecule not already saved in IMols: save it if in first file |
273 | 0 | if(!OnlyMolsInFirstFile || IsFirstFile) |
274 | 0 | { |
275 | 0 | IMols[title] = pmol; |
276 | 0 | return true; //don't delete pmol |
277 | 0 | } |
278 | 0 | } |
279 | 0 | } |
280 | 0 | delete pmol; |
281 | 0 | return true; |
282 | 0 | } |
283 | | |
284 | | /*! Makes a new OBMol on the heap by combining two molecules according to the rule below. |
285 | | If both have OBGenericData of the same type, or OBPairData with the |
286 | | same attribute, the version from pFirst is used. |
287 | | Returns a pointer to a new OBMol which will need deleting by the calling program |
288 | | (probably by being sent to an output format). |
289 | | If the molecules cannot be regarded as being the same structure a NULL |
290 | | pointer is returned and an error message logged. |
291 | | |
292 | | pFirst and pSecond and the objects they point to are not changed. (const |
293 | | modifiers difficult because class OBMol not designed appropriately) |
294 | | |
295 | | Combining molecules: rules for each of the three parts |
296 | | Title: |
297 | | Use the title of pFirst unless it has none, when use that of pSecond. |
298 | | Warning if neither molecule has a title. |
299 | | |
300 | | Structure |
301 | | - a structure with atoms replaces one with no atoms |
302 | | - a structure with bonds replaces one with no bonds, |
303 | | provided the formula is the same, else an error. |
304 | | - structures with atoms and bonds are compared by InChI; error if not the same. |
305 | | - a structure with 3D coordinates replaces one with 2D coordinates |
306 | | - a structure with 2D coordinates replace one with 0D coordinates |
307 | | |
308 | | OBGenericData |
309 | | OBPairData |
310 | | */ |
311 | | OBMol* OBMoleculeFormat::MakeCombinedMolecule(OBMol* pFirst, OBMol* pSecond) |
312 | 0 | { |
313 | | //Decide on which OBMol provides the new title |
314 | 0 | string title("No title"); |
315 | 0 | if(*pFirst->GetTitle()!=0) |
316 | 0 | title = pFirst->GetTitle(); |
317 | 0 | else |
318 | 0 | { |
319 | 0 | if(*pSecond->GetTitle()!=0) |
320 | 0 | title = pSecond->GetTitle(); |
321 | 0 | else |
322 | 0 | obErrorLog.ThrowError(__FUNCTION__,"Combined molecule has no title", obWarning); |
323 | 0 | } |
324 | | |
325 | | //Decide on which OBMol provides the new structure |
326 | 0 | bool swap=false; |
327 | 0 | if(pFirst->NumAtoms()==0 && pSecond->NumAtoms()!=0) |
328 | 0 | swap=true; |
329 | 0 | else if(pSecond->NumAtoms()!=0) |
330 | 0 | { |
331 | 0 | if(pFirst->GetSpacedFormula()!=pSecond->GetSpacedFormula()) |
332 | 0 | { |
333 | 0 | obErrorLog.ThrowError(__FUNCTION__, |
334 | 0 | "Molecules with name = " + title + " have different formula",obError); |
335 | 0 | return nullptr; |
336 | 0 | } |
337 | 0 | else |
338 | 0 | { |
339 | 0 | if(pSecond->NumBonds()!=0 && pFirst->NumBonds()==0) |
340 | 0 | swap=true; |
341 | 0 | else |
342 | 0 | { |
343 | | //Compare by inchi; error if different NOT YET IMPLEMENTED |
344 | | //Use the one with the higher dimension |
345 | 0 | if(pSecond->GetDimension() > pFirst->GetDimension()) |
346 | 0 | swap=true; |
347 | 0 | } |
348 | 0 | } |
349 | 0 | } |
350 | | |
351 | 0 | OBMol* pNewMol = new OBMol; |
352 | 0 | pNewMol->SetTitle(title); |
353 | |
|
354 | 0 | OBMol* pMain = swap ? pSecond : pFirst; |
355 | 0 | OBMol* pOther = swap ? pFirst : pSecond; |
356 | |
|
357 | 0 | *pNewMol = *pMain; //Now copies all data |
358 | | |
359 | | //Copy some OBGenericData from the OBMol which did not provide the structure |
360 | 0 | vector<OBGenericData*>::iterator igd; |
361 | 0 | for(igd=pOther->BeginData();igd!=pOther->EndData();++igd) |
362 | 0 | { |
363 | | //copy only if not already data of the same type from molecule already copied to pNewMol |
364 | 0 | unsigned datatype = (*igd)->GetDataType(); |
365 | 0 | OBGenericData* pData = pNewMol->GetData(datatype); |
366 | 0 | if(datatype==OBGenericDataType::PairData) |
367 | 0 | { |
368 | 0 | if(pData->GetAttribute() == (*igd)->GetAttribute()) |
369 | 0 | continue; |
370 | 0 | } |
371 | 0 | else if (pNewMol->GetData(datatype) != nullptr) |
372 | 0 | continue; |
373 | | |
374 | 0 | OBGenericData* pCopiedData = (*igd)->Clone(pNewMol); |
375 | 0 | pNewMol->SetData(pCopiedData); |
376 | 0 | } |
377 | 0 | return pNewMol; |
378 | 0 | } |
379 | | |
380 | | bool OBMoleculeFormat::OutputDeferredMols(OBConversion* pConv) |
381 | 0 | { |
382 | 0 | std::map<std::string, OBMol*>::iterator itr, lastitr; |
383 | 0 | bool ret=false; |
384 | 0 | int i=1; |
385 | 0 | lastitr = IMols.end(); |
386 | 0 | --lastitr; |
387 | 0 | pConv->SetOneObjectOnly(false); |
388 | 0 | for(itr=IMols.begin();itr!=IMols.end();++itr,++i) |
389 | 0 | { |
390 | 0 | if(!(itr->second)->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS), pConv)) |
391 | 0 | continue; |
392 | 0 | pConv->SetOutputIndex(i); |
393 | 0 | if(itr==lastitr) |
394 | 0 | pConv->SetOneObjectOnly(); //to set IsLast |
395 | |
|
396 | 0 | ret = pConv->GetOutFormat()->WriteMolecule(itr->second, pConv); |
397 | |
|
398 | 0 | delete itr->second; //always delete OBMol object |
399 | 0 | itr->second = nullptr; // so can be deleted in DeleteDeferredMols() |
400 | 0 | if (!ret) break; |
401 | 0 | } |
402 | 0 | DeleteDeferredMols();//cleans up in case there have been errors |
403 | 0 | return ret; |
404 | 0 | } |
405 | | |
406 | | bool OBMoleculeFormat::DeleteDeferredMols() |
407 | 0 | { |
408 | | //Empties IMols, deteting the OBMol objects whose pointers are stored there |
409 | 0 | std::map<std::string, OBMol*>::iterator itr; |
410 | 0 | for(itr=IMols.begin();itr!=IMols.end();++itr) |
411 | 0 | { |
412 | 0 | delete itr->second; //usually NULL |
413 | 0 | } |
414 | 0 | IMols.clear(); |
415 | 0 | return false; |
416 | 0 | } |
417 | | |
418 | | /////////////////////////////////////////////////////////////////// |
419 | | bool OBMoleculeFormat::OutputMolsFromReaction |
420 | | (OBReaction* pReact, OBConversion* pConv, OBFormat* pFormat) |
421 | 12 | { |
422 | | //Output all the constituent molecules of the reaction |
423 | | |
424 | | //Collect the molecules first, just for convenience |
425 | 12 | vector<std::shared_ptr<OBMol> > mols; |
426 | 21 | for(int i=0;i<pReact->NumReactants();i++) |
427 | 9 | mols.push_back(pReact->GetReactant(i)); |
428 | 163 | for(int i=0;i<pReact->NumProducts();i++) |
429 | 151 | mols.push_back(pReact->GetProduct(i)); |
430 | 12 | for (int i = 0; i<pReact->NumAgents(); i++) |
431 | 0 | mols.push_back(pReact->GetAgent(i)); |
432 | | |
433 | 12 | if(pReact->GetTransitionState()) |
434 | 0 | mols.push_back(pReact->GetTransitionState()); |
435 | | |
436 | 12 | pConv->SetOutputIndex(pConv->GetOutputIndex() - 1); // The OBReaction object is not output |
437 | 12 | if((pFormat->Flags() & WRITEONEONLY) && mols.size()>1) |
438 | 4 | { |
439 | 4 | stringstream ss; |
440 | 4 | ss << "There are " << mols.size() << " molecules to be output," |
441 | 4 | << "but this format is for single molecules only"; |
442 | 4 | obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning); |
443 | 4 | mols.resize(1); |
444 | 4 | } |
445 | 12 | bool ok = true; |
446 | 148 | for(unsigned int i=0;i<mols.size() && ok;++i) |
447 | 136 | { |
448 | 136 | if(mols[i]) |
449 | 136 | { |
450 | | //Have to do set these manually because not using "Convert" interface |
451 | 136 | pConv->SetLast(i==mols.size()-1); |
452 | 136 | pConv->SetOutputIndex(pConv->GetOutputIndex()+1); |
453 | 136 | ok = pFormat->WriteMolecule( |
454 | 136 | mols[i]->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS), pConv),pConv); |
455 | 136 | } |
456 | 136 | } |
457 | 12 | return ok; |
458 | 12 | } |
459 | | |
460 | | ////////////////////////////////////////////////////////////////// |
461 | | /** Attempts to read the index file datafilename.obindx successively |
462 | | from the following directories: |
463 | | - the current directory |
464 | | - that in the environment variable BABEL_DATADIR or in the macro BABEL_DATADIR |
465 | | if the environment variable is not set |
466 | | - in a subdirectory of the BABEL_DATADIR directory with the version of OpenBabel as its name |
467 | | An index of type NameIndexType is then constructed. NameIndexType is defined |
468 | | in obmolecformat.h as std::unordered_map. It is searched by |
469 | | @code |
470 | | NameIndexType::iterator itr = index.find(molecule_name); |
471 | | if(itr!=index.end()) |
472 | | unsigned pos_in_datafile = itr->second; |
473 | | @endcode |
474 | | pos_in_datafile is used as a parameter in seekg() to read from the datafile |
475 | | |
476 | | If no index is found, it is constructed from the datafile by reading all of |
477 | | it using the format pInFormat, and written to the directory containing the datafile. |
478 | | This means that this function can be used without worrying whether there is an index. |
479 | | It will be slow to execute the first time, but subsequent uses get the speed benefit |
480 | | of indexed access to the datafile. |
481 | | |
482 | | The serialization and de-serialization of the NameIndexType is entirely in |
483 | | this routine and could possibly be improved. Currently re-hashing is done |
484 | | every time the index is read. |
485 | | **/ |
486 | | |
487 | | bool OBMoleculeFormat::ReadNameIndex(NameIndexType& index, |
488 | | const string& datafilename, OBFormat* pInFormat) |
489 | 0 | { |
490 | 0 | struct headertype |
491 | 0 | { |
492 | 0 | char filename[256]; |
493 | 0 | size_t size; |
494 | 0 | } header; |
495 | |
|
496 | 0 | NameIndexType::iterator itr; |
497 | |
|
498 | 0 | ifstream indexstream; |
499 | 0 | OpenDatafile(indexstream, datafilename + ".obindx"); |
500 | 0 | if(!indexstream) |
501 | 0 | { |
502 | | //Need to prepare the index |
503 | 0 | ifstream datastream; |
504 | 0 | string datafilepath = OpenDatafile(datastream, datafilename); |
505 | 0 | if(!datastream) |
506 | 0 | { |
507 | 0 | obErrorLog.ThrowError(__FUNCTION__, |
508 | 0 | datafilename + " was not found or could not be opened", obError); |
509 | 0 | return false; |
510 | 0 | } |
511 | | |
512 | 0 | OBConversion Conv(&datastream, nullptr); |
513 | 0 | Conv.SetInFormat(pInFormat); |
514 | 0 | OBMol mol; |
515 | 0 | streampos pos; |
516 | 0 | while(Conv.Read(&mol)) |
517 | 0 | { |
518 | 0 | string name = mol.GetTitle(); |
519 | 0 | if(!name.empty()) |
520 | 0 | index.insert(make_pair(name, pos)); |
521 | 0 | mol.Clear(); |
522 | 0 | pos = datastream.tellg(); |
523 | 0 | } |
524 | 0 | obErrorLog.ThrowError(__FUNCTION__, |
525 | 0 | "Prepared an index for " + datafilepath, obAuditMsg); |
526 | | //Save index to file |
527 | 0 | ofstream dofs((datafilepath + ".obindx").c_str(), ios_base::out|ios_base::binary); |
528 | 0 | if(!dofs) return false; |
529 | | |
530 | 0 | strncpy(header.filename,datafilename.c_str(), sizeof(header.filename)); |
531 | 0 | header.filename[sizeof(header.filename) - 1] = '\0'; |
532 | 0 | header.size = index.size(); |
533 | 0 | dofs.write((const char*)&header, sizeof(headertype)); |
534 | |
|
535 | 0 | for(itr=index.begin();itr!=index.end();++itr) |
536 | 0 | { |
537 | | //#chars; chars; ofset(4bytes). |
538 | 0 | const char n = static_cast<char> (itr->first.size()); |
539 | 0 | dofs.put(n); |
540 | 0 | dofs.write(itr->first.c_str(),n); |
541 | 0 | dofs.write((const char*)&itr->second,sizeof(unsigned)); |
542 | 0 | } |
543 | 0 | } |
544 | 0 | else |
545 | 0 | { |
546 | | //Read index data from file and put into hash_map |
547 | 0 | indexstream.read((char*)&header,sizeof(headertype)); |
548 | 0 | itr=index.begin(); // for hint |
549 | 0 | for(unsigned int i=0;i<header.size;++i) |
550 | 0 | { |
551 | 0 | char len; |
552 | 0 | indexstream.get(len); |
553 | 0 | string title(len, 0); |
554 | 0 | unsigned pos; |
555 | 0 | indexstream.read(&title[0],len); |
556 | 0 | indexstream.read((char*)&pos,sizeof(unsigned)); |
557 | 0 | index.insert(itr, make_pair(title,pos)); |
558 | 0 | } |
559 | 0 | } |
560 | 0 | return true; |
561 | 0 | } |
562 | | |
563 | | } //namespace OpenBabel |
564 | | |
565 | | //! \file obmolecformat.cpp |
566 | | //! \brief Subclass of OBFormat for conversion of OBMol. |