/src/openbabel/src/formats/chemdrawcdx.cpp
Line | Count | Source |
1 | | /********************************************************************** |
2 | | Copyright (C) 2006 by Fredrik Wallner |
3 | | Some portions Copyright (C) 2006-2007 by Geoffrey Hutchsion |
4 | | Some portions Copyright (C) 2011 by Chris Morley |
5 | | |
6 | | This program is free software; you can redistribute it and/or modify |
7 | | it under the terms of the GNU General Public License as published by |
8 | | the Free Software Foundation version 2 of the License. |
9 | | |
10 | | This program is distributed in the hope that it will be useful, |
11 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | GNU General Public License for more details. |
14 | | ***********************************************************************/ |
15 | | |
16 | | #include <openbabel/babelconfig.h> |
17 | | #include <openbabel/obmolecformat.h> |
18 | | #include <openbabel/mol.h> |
19 | | #include <openbabel/atom.h> |
20 | | #include <openbabel/bond.h> |
21 | | #include <openbabel/elements.h> |
22 | | #include <openbabel/reactionfacade.h> |
23 | | #include <openbabel/stereo/stereo.h> |
24 | | #include <openbabel/obfunctions.h> |
25 | | #include <openbabel/reaction.h> |
26 | | #include <openbabel/tokenst.h> |
27 | | #include <openbabel/alias.h> |
28 | | #include <openbabel/text.h> |
29 | | #include "chemdrawcdx.h" |
30 | | |
31 | | #include <iostream> |
32 | | #include <fstream> |
33 | | #include <sstream> |
34 | | #include <map> |
35 | | #include <list> |
36 | | |
37 | | |
38 | | #if !defined(__CYGWIN__) |
39 | 0 | static inline unsigned short bswap_16(unsigned short x) { |
40 | 0 | return (x>>8) | (x<<8); |
41 | 0 | } |
42 | | |
43 | 0 | static inline unsigned int bswap_32(unsigned int x) { |
44 | 0 | return (bswap_16(x&0xffff)<<16) | (bswap_16(x>>16)); |
45 | 0 | } |
46 | | |
47 | 0 | static inline unsigned long long bswap_64(unsigned long long x) { |
48 | 0 | return (((unsigned long long)bswap_32(x&0xffffffffull))<<32) | (bswap_32(x>>32)); |
49 | 0 | } |
50 | | #endif |
51 | | |
52 | | // Macs -- need to use Apple macros to deal with Universal binaries correctly |
53 | | #ifdef __APPLE__ |
54 | | #include <machine/endian.h> |
55 | | #if BYTE_ORDER == BIG_ENDIAN |
56 | | # define READ_INT16(stream,data) \ |
57 | | (stream).read ((char*)&data, sizeof(data)); \ |
58 | | data = bswap_16 (data); |
59 | | # define READ_INT32(stream,data) \ |
60 | | (stream).read ((char*)&data, sizeof(data)); \ |
61 | | data = bswap_32 (data); |
62 | | #else BYTE_ORDER == LITTLE_ENDIAN |
63 | | # define READ_INT16(stream,data) \ |
64 | | (stream).read ((char*)&data, sizeof(data)); |
65 | | # define READ_INT32(stream,data) \ |
66 | | (stream).read ((char*)&data, sizeof(data)); |
67 | | #endif |
68 | | #else |
69 | | |
70 | | // Non-Apple systems |
71 | | // defined in babelconfig.h by autoconf (portable to Solaris, BSD, Linux) |
72 | | #ifdef WORDS_BIGENDIAN |
73 | | # define READ_INT16(stream,data) \ |
74 | | (stream).read ((char*)&data, sizeof(data)); \ |
75 | | data = bswap_16 (data); |
76 | | # define READ_INT32(stream,data) \ |
77 | | (stream).read ((char*)&data, sizeof(data)); \ |
78 | | data = bswap_32 (data); |
79 | | #else |
80 | 0 | # define READ_INT16(stream,data) \ |
81 | 0 | (stream).read ((char*)&data, sizeof(data)); |
82 | 0 | # define READ_INT32(stream,data) \ |
83 | 0 | (stream).read ((char*)&data, sizeof(data)); |
84 | | #endif |
85 | | // end endian / bigendian issues (on non-Mac systems) |
86 | | #endif |
87 | | // end Apple/non-Apple systems |
88 | | |
89 | | using namespace std; |
90 | | namespace OpenBabel |
91 | | { |
92 | | |
93 | | //Class which traverse the tree in CDX binary files |
94 | | class CDXReader |
95 | | { |
96 | | public: |
97 | | CDXReader(std::istream& is); |
98 | | CDXTag ReadNext(bool objectsOnly=false, int targetDepth=-2); |
99 | 0 | void IgnoreObject() { ReadNext(true, GetDepth()-1); } |
100 | 0 | operator bool ()const { return (bool)ifs; } |
101 | 0 | int GetDepth()const { return depth; } |
102 | 0 | int GetLen()const { return _len;} //length of current property data |
103 | 0 | CDXObjectID CurrentID()const { return ids.back(); } |
104 | | stringstream& data(); //call this only once for each set of property data |
105 | | |
106 | | //Routines to display the structure of a cdx binary file |
107 | | OBText* WriteTree(const std::string& filename, unsigned wtoptions); |
108 | | private: |
109 | | bool ParseEnums(std::map<CDXTag, std::string>& enummap, const std::string& filename); |
110 | | std::string TagName(std::map<CDXTag, std::string>& enummap, CDXTag tag); |
111 | | |
112 | | private: |
113 | | std::istream& ifs; |
114 | | int depth; |
115 | | std::vector<CDXObjectID> ids; |
116 | | CDXObjectID _tempback; |
117 | | std::string _buf; |
118 | | UINT16 _len; |
119 | | std::stringstream _ss; |
120 | | }; |
121 | | |
122 | | //************************************************************** |
123 | | class ChemDrawBinaryXFormat : OBMoleculeFormat |
124 | | { |
125 | | public: |
126 | | //Register this format type ID in the constructor |
127 | | ChemDrawBinaryXFormat() |
128 | 6 | { |
129 | 6 | OBConversion::RegisterFormat("cdx",this); |
130 | 6 | } |
131 | | |
132 | | const char* Description() override // required |
133 | 0 | { |
134 | 0 | return |
135 | 0 | "ChemDraw binary format\n" |
136 | 0 | "Read only\n" |
137 | 0 | "The whole file is read in one call.\n" |
138 | 0 | "Note that a file may contain a mixture of reactions and\n" |
139 | 0 | "molecules.\n" |
140 | |
|
141 | 0 | "With the -ad option, a human-readable representation of the CDX tree\n" |
142 | 0 | "structure is output as an OBText object. Use textformat to view it::\n\n" |
143 | |
|
144 | 0 | " obabel input.cdx -otext -ad\n\n" |
145 | |
|
146 | 0 | "Many reactions in CDX files are not fully specified with reaction data\n" |
147 | 0 | "structures, and may not be completely interpreted by this parser.\n\n" |
148 | |
|
149 | 0 | "Read Options, e.g. -am\n" |
150 | 0 | " m read molecules only; no reactions\n" |
151 | 0 | " d output CDX tree to OBText object\n" |
152 | 0 | " o display only objects in tree output\n"; |
153 | 0 | } |
154 | | |
155 | | const char* SpecificationURL() override |
156 | 0 | {return "http://www.cambridgesoft.com/services/documentation/sdk/chemdraw/cdx/IntroCDX.htm";} |
157 | | |
158 | | const char* GetMIMEType() override |
159 | 0 | { return "chemical/x-cdx"; } |
160 | | |
161 | | unsigned int Flags() override |
162 | 6 | { |
163 | 6 | return READBINARY|NOTWRITABLE; |
164 | 6 | } |
165 | | |
166 | | //////////////////////////////////////////////////// |
167 | | bool ReadMolecule(OBBase* pOb, OBConversion* pConv) override; |
168 | | |
169 | | private: |
170 | | enum graphicType {none, equilArrow}; |
171 | | bool TopLevelParse(CDXReader& cdxr, OBConversion* pConv,CDXObjectID ContainingGroup); |
172 | | bool DoFragment(CDXReader& cdxr, OBMol* pmol); |
173 | | bool DoFragmentImpl(CDXReader& cdxr, OBMol* pmol, |
174 | | map<CDXObjectID, unsigned>& atommap, map<OBBond*, OBStereo::BondDirection>& updown); |
175 | | bool DoReaction(CDXReader& cdxr, OBMol* pReact); |
176 | | std::string DoText(CDXReader& cdxr); |
177 | | |
178 | | std::vector<OBMol*> LookupMol(CDXObjectID id); |
179 | | graphicType LookupGraphic(CDXObjectID id); |
180 | | OBMol* LookupInMolMap(CDXObjectID id); |
181 | | |
182 | | private: |
183 | | bool readReactions; |
184 | | static const bool objectsOnly = true; |
185 | | std::map<CDXObjectID, graphicType> _graphicmap; |
186 | | std::map<CDXObjectID, OBMol*> _molmap; |
187 | | std::map<CDXObjectID, std::vector<CDXObjectID> > _groupmap; |
188 | | // In case of chain A -> B -> C, B is both reactant and product |
189 | | CDXObjectID _lastProdId; |
190 | | typedef std::map<CDXObjectID, std::vector<CDXObjectID> >::iterator GroupMapIterator; |
191 | | static const unsigned usedFlag = 1<<30; |
192 | | }; |
193 | | |
194 | | //****************************************************************** |
195 | | //Global instance of the format |
196 | | ChemDrawBinaryXFormat theChemDrawBinaryXFormat; |
197 | | //****************************************************************** |
198 | | |
199 | | /*New CDXformat |
200 | | Each fragment goes into a new OBMol on the heap. |
201 | | The CDX id and OBMol* are added to _molmap. |
202 | | When a reaction is found, the reactant/product/agent CDX ids are looked up in molmap, |
203 | | and added to an OBReaction (made by deleting pOb if it is a OBMol |
204 | | and assigning pOb to a new OBReaction. The OBMol is marked as Used. |
205 | | When the reaction is complete it is output via AddChemObject(). |
206 | | At the end, any OBMol in the map not marked as Used is output as an OBMol. |
207 | | */ |
208 | | |
209 | | |
210 | | bool ChemDrawBinaryXFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv) |
211 | 0 | { |
212 | 0 | _molmap.clear(); |
213 | 0 | _graphicmap.clear(); |
214 | 0 | _groupmap.clear(); |
215 | 0 | OBMol* pmol=nullptr; |
216 | 0 | bool ok = true; |
217 | |
|
218 | 0 | CDXReader cdxr(*pConv->GetInStream()); |
219 | 0 | readReactions = pConv->IsOption("m", OBConversion::INOPTIONS) == nullptr; |
220 | | |
221 | | // Write CDX tree only if requested |
222 | 0 | if(pConv->IsOption("d",OBConversion::INOPTIONS)) |
223 | 0 | { |
224 | 0 | unsigned wtoptions=0; |
225 | 0 | if(pConv->IsOption("o",OBConversion::INOPTIONS)) |
226 | 0 | wtoptions |= 1; //display objects only |
227 | 0 | OBText* pText = cdxr.WriteTree("chemdrawcdx.h", wtoptions); |
228 | 0 | if(pText) |
229 | 0 | { |
230 | 0 | pConv->AddChemObject(pText); |
231 | 0 | return true; |
232 | 0 | } |
233 | 0 | return false; |
234 | 0 | } |
235 | | |
236 | | // Normal reading of molecules and reactions |
237 | | //Top level parse |
238 | 0 | while(cdxr) |
239 | 0 | { |
240 | 0 | if(!TopLevelParse(cdxr, pConv, 0)) |
241 | 0 | return false; |
242 | 0 | } |
243 | | |
244 | | //At the end, output molecules that have not been used in a reaction |
245 | 0 | map<CDXObjectID, OBMol*>::iterator mapiter = _molmap.begin(); |
246 | 0 | for(; mapiter!=_molmap.end(); ++mapiter) |
247 | 0 | { |
248 | 0 | pmol = mapiter->second; |
249 | 0 | if(!(pmol->GetFlags() & usedFlag) && strcmp(pmol->GetTitle(),"justplus")) |
250 | 0 | { |
251 | 0 | OBMol* ptmol = static_cast<OBMol*>(pmol->DoTransformations( |
252 | 0 | pConv->GetOptions(OBConversion::GENOPTIONS),pConv)); |
253 | 0 | if(!ptmol) |
254 | 0 | delete pmol; |
255 | 0 | else |
256 | 0 | if(!pConv->AddChemObject(ptmol)) |
257 | 0 | return false; //error during writing |
258 | 0 | } |
259 | 0 | } |
260 | | |
261 | 0 | return ok; |
262 | 0 | } |
263 | | /////////////////////////////////////////////////////////////////////// |
264 | | bool ChemDrawBinaryXFormat::TopLevelParse |
265 | | (CDXReader& cdxr, OBConversion* pConv, CDXObjectID ContainingGroup) |
266 | 0 | { |
267 | 0 | bool ok = true; |
268 | 0 | CDXTag tag; |
269 | 0 | while((tag = cdxr.ReadNext(objectsOnly))) |
270 | 0 | { |
271 | 0 | if(tag == kCDXObj_Group) |
272 | 0 | { |
273 | 0 | CDXObjectID cid = cdxr.CurrentID(); |
274 | 0 | vector<CDXObjectID> v; |
275 | 0 | _groupmap.insert(make_pair(cid,v)); //empty vector as yet |
276 | 0 | TopLevelParse(cdxr, pConv, cid ); |
277 | 0 | } |
278 | | |
279 | 0 | else if(tag==kCDXObj_Fragment) |
280 | 0 | { |
281 | 0 | OBMol* pmol = new OBMol; |
282 | | //Save all molecules to the end |
283 | 0 | _molmap[cdxr.CurrentID()] = pmol; |
284 | |
|
285 | 0 | if(ContainingGroup) |
286 | 0 | { |
287 | | // Add the id of this mol to the group's entry in _groupmap |
288 | 0 | GroupMapIterator gmapiter = _groupmap.find(ContainingGroup); |
289 | 0 | if(gmapiter!=_groupmap.end()) |
290 | 0 | gmapiter->second.push_back(cdxr.CurrentID()); |
291 | 0 | } |
292 | 0 | ok = DoFragment(cdxr, pmol); |
293 | 0 | } |
294 | | |
295 | 0 | else if(tag == kCDXObj_ReactionStep && readReactions) |
296 | 0 | { |
297 | 0 | OBMol* pReact = new OBMol; |
298 | 0 | pReact->SetIsReaction(); |
299 | 0 | ok = DoReaction(cdxr, pReact); |
300 | | // Output OBReaction and continue |
301 | 0 | if(pReact) |
302 | 0 | if(!pConv->AddChemObject(pReact)) |
303 | 0 | return false; //error during writing |
304 | 0 | } |
305 | | |
306 | 0 | else if(ok && tag==kCDXObj_Graphic) |
307 | 0 | { |
308 | 0 | while( (tag = cdxr.ReadNext()) ) |
309 | 0 | { |
310 | 0 | stringstream& ss = cdxr.data(); |
311 | 0 | if(tag == kCDXProp_Arrow_Type) |
312 | 0 | { |
313 | 0 | char type1=0; |
314 | 0 | UINT16 type2=0; |
315 | 0 | if(cdxr.GetLen()==1) |
316 | 0 | ss.get(type1); |
317 | 0 | else |
318 | 0 | READ_INT16(ss,type2); |
319 | 0 | if(type1==kCDXArrowType_Equilibrium || type2==kCDXArrowType_Equilibrium) |
320 | 0 | _graphicmap[type1+type2] = equilArrow; //save in graphicmap |
321 | 0 | } |
322 | 0 | } |
323 | 0 | } |
324 | 0 | } |
325 | 0 | return true; |
326 | 0 | } |
327 | | /////////////////////////////////////////////////////////////////////// |
328 | | bool ChemDrawBinaryXFormat::DoReaction(CDXReader& cdxr, OBMol* pReact) |
329 | 0 | { |
330 | 0 | CDXTag tag; |
331 | 0 | CDXObjectID id; |
332 | 0 | OBReactionFacade facade(pReact); |
333 | 0 | while( (tag = cdxr.ReadNext()) ) |
334 | 0 | { |
335 | 0 | if(tag == kCDXProp_ReactionStep_Reactants) |
336 | 0 | { |
337 | 0 | stringstream& ss = cdxr.data(); |
338 | 0 | for(unsigned i=0;i<cdxr.GetLen()/4;++i)//for each reactant id |
339 | 0 | { |
340 | 0 | READ_INT32(ss,id); |
341 | 0 | vector<OBMol*> molvec = LookupMol(id); //id could be a group with several mols |
342 | 0 | for(unsigned i=0;i<molvec.size();++i) |
343 | 0 | if(strcmp(molvec[i]->GetTitle(),"justplus")) |
344 | 0 | { |
345 | 0 | facade.AddComponent(molvec[i], REACTANT); |
346 | 0 | } |
347 | 0 | } |
348 | 0 | } |
349 | 0 | else if(tag == kCDXProp_ReactionStep_Products) |
350 | 0 | { |
351 | 0 | stringstream& ss = cdxr.data(); |
352 | 0 | for(unsigned i=0;i<cdxr.GetLen()/4;++i)//for each product id |
353 | 0 | { |
354 | 0 | READ_INT32(ss,id); |
355 | 0 | vector<OBMol*> molvec = LookupMol(id); //id could be a group with several mols |
356 | 0 | for(unsigned i=0;i<molvec.size();++i) |
357 | 0 | if(strcmp(molvec[i]->GetTitle(),"justplus")) |
358 | 0 | { |
359 | 0 | facade.AddComponent(molvec[i], PRODUCT); |
360 | 0 | _lastProdId = id; |
361 | 0 | } |
362 | 0 | } |
363 | 0 | } |
364 | 0 | else if(tag==kCDXProp_ReactionStep_Arrows) |
365 | 0 | { |
366 | 0 | READ_INT32(cdxr.data(),id); |
367 | | //if(LookupGraphic(id)==equilArrow) // TODO? Store reversibility somehow? |
368 | | // pReact->SetReversible(); |
369 | 0 | } |
370 | 0 | } |
371 | 0 | return true; |
372 | 0 | } |
373 | | /////////////////////////////////////////////////////////////////////// |
374 | | vector<OBMol*> ChemDrawBinaryXFormat::LookupMol(CDXObjectID id) |
375 | 0 | { |
376 | 0 | vector<OBMol*> molvec; |
377 | | //Check whether the id is that of a kCDXObj_Group |
378 | 0 | GroupMapIterator gmapiter; |
379 | 0 | gmapiter = _groupmap.find(id); |
380 | 0 | if(gmapiter != _groupmap.end()) |
381 | 0 | { |
382 | 0 | for(unsigned i=0;i<gmapiter->second.size();++i) |
383 | 0 | { |
384 | 0 | OBMol* pmmol = LookupInMolMap(gmapiter->second[i]); |
385 | 0 | if(pmmol) |
386 | 0 | molvec.push_back(pmmol); |
387 | 0 | } |
388 | 0 | } |
389 | 0 | else |
390 | 0 | { |
391 | | //id is not a group; it must be a fragment |
392 | 0 | OBMol* pmmol = LookupInMolMap(id); |
393 | 0 | if(pmmol) |
394 | 0 | molvec.push_back(pmmol); |
395 | 0 | } |
396 | 0 | return molvec; |
397 | 0 | } |
398 | | |
399 | | OBMol* ChemDrawBinaryXFormat::LookupInMolMap(CDXObjectID id) |
400 | 0 | { |
401 | 0 | std::map<CDXObjectID, OBMol*>::iterator mapiter; |
402 | 0 | mapiter = _molmap.find(id); |
403 | 0 | if(mapiter!=_molmap.end()) |
404 | 0 | { |
405 | | //Mark mol as used in a reaction, so that it will not be output independently |
406 | 0 | mapiter->second->SetFlags(mapiter->second->GetFlags() | usedFlag); |
407 | 0 | return mapiter->second; |
408 | 0 | } |
409 | 0 | else |
410 | 0 | { |
411 | 0 | stringstream ss; |
412 | 0 | ss << "Reactant or product mol not found id = " << hex << showbase << id; |
413 | 0 | obErrorLog.ThrowError(__FUNCTION__, ss.str(), obError); |
414 | 0 | return nullptr; |
415 | 0 | } |
416 | 0 | } |
417 | | |
418 | | //////////////////////////////////////////////////////////////////////// |
419 | | ChemDrawBinaryXFormat::graphicType ChemDrawBinaryXFormat::LookupGraphic(CDXObjectID id) |
420 | 0 | { |
421 | 0 | std::map<CDXObjectID, graphicType>::iterator mapiter; |
422 | 0 | mapiter = _graphicmap.find(id); |
423 | 0 | if(mapiter != _graphicmap.end()) |
424 | 0 | return mapiter->second; |
425 | 0 | else |
426 | 0 | return none; |
427 | 0 | } |
428 | | |
429 | | //////////////////////////////////////////////////////////////////////// |
430 | | bool ChemDrawBinaryXFormat::DoFragment(CDXReader& cdxr, OBMol* pmol) |
431 | 0 | { |
432 | 0 | map<OBBond*, OBStereo::BondDirection> updown; |
433 | 0 | pmol->SetDimension(2); |
434 | 0 | pmol->BeginModify(); |
435 | |
|
436 | 0 | map<CDXObjectID, unsigned> atommap; //key = CDX id; value = OB atom idx |
437 | | |
438 | | //The inner workings of DoFragment,since Fragment elements can be nested |
439 | 0 | DoFragmentImpl(cdxr, pmol, atommap, updown); |
440 | | |
441 | | // use 2D coordinates + hash/wedge to determine stereochemistry |
442 | 0 | StereoFrom2D(pmol, &updown); |
443 | |
|
444 | 0 | pmol->EndModify(); |
445 | | |
446 | | //Expand any aliases after molecule constructed |
447 | | //Need to save aliases in list first and expand later |
448 | 0 | vector<OBAtom*> aliasatoms; |
449 | 0 | for(int idx=1; idx<=pmol->NumAtoms();++idx) |
450 | 0 | { |
451 | 0 | OBAtom* pAtom = pmol->GetAtom(idx); |
452 | 0 | AliasData* ad = dynamic_cast<AliasData*>(pAtom->GetData(AliasDataType)); |
453 | 0 | if(ad && !ad->IsExpanded()) |
454 | 0 | aliasatoms.push_back(pAtom); |
455 | 0 | } |
456 | 0 | for(vector<OBAtom*>::iterator vit=aliasatoms.begin(); |
457 | 0 | vit!=aliasatoms.end(); ++vit) |
458 | 0 | { |
459 | 0 | int idx = (*vit)->GetIdx(); |
460 | 0 | AliasData* ad = dynamic_cast<AliasData*>((*vit)->GetData(AliasDataType)); |
461 | 0 | if(ad && !ad->IsExpanded()) |
462 | 0 | ad->Expand(*pmol, idx); //Make chemically meaningful, if possible. |
463 | 0 | } |
464 | 0 | return true; |
465 | 0 | } |
466 | | |
467 | | bool ChemDrawBinaryXFormat::DoFragmentImpl(CDXReader& cdxr, OBMol* pmol, |
468 | | map<CDXObjectID, unsigned>& atommap, map<OBBond*, OBStereo::BondDirection>& updown) |
469 | 0 | { |
470 | 0 | CDXTag tag; |
471 | 0 | std::vector<OBAtom*> handleImplicitCarbons; |
472 | 0 | while((tag = cdxr.ReadNext(objectsOnly))) |
473 | 0 | { |
474 | 0 | if(tag==kCDXObj_Node) |
475 | 0 | { |
476 | 0 | unsigned nodeID = cdxr.CurrentID(); |
477 | 0 | bool isAlias=false, hasElement=false; |
478 | 0 | bool hasNumHs = false; |
479 | 0 | UINT16 atnum=-1, spin=0, numHs=0; |
480 | 0 | int x, y, charge=0, iso=0; |
481 | 0 | string aliastext; |
482 | | |
483 | | //Read all node properties |
484 | 0 | while( (tag = cdxr.ReadNext()) ) |
485 | 0 | { |
486 | 0 | switch(tag) |
487 | 0 | { |
488 | 0 | case kCDXProp_Node_Type: |
489 | 0 | UINT16 type; |
490 | 0 | READ_INT16(cdxr.data(), type); |
491 | 0 | if(type==4 || type==5) //Nickname or fragment |
492 | 0 | isAlias = true; |
493 | 0 | break; |
494 | 0 | case kCDXProp_Node_Element: |
495 | 0 | READ_INT16(cdxr.data(), atnum); |
496 | 0 | hasElement = true; |
497 | 0 | break; |
498 | 0 | case kCDXProp_2DPosition: |
499 | 0 | { |
500 | 0 | stringstream& ss = cdxr.data(); |
501 | 0 | READ_INT32(ss, y); //yes, this way round |
502 | 0 | READ_INT32(ss, x); |
503 | 0 | } |
504 | 0 | break; |
505 | 0 | case kCDXProp_Atom_Charge: |
506 | 0 | if(cdxr.GetLen()==1) |
507 | 0 | charge = (int8_t)cdxr.data().get(); |
508 | 0 | else |
509 | 0 | READ_INT32(cdxr.data(), charge); |
510 | 0 | break; |
511 | 0 | case kCDXProp_Atom_Radical: |
512 | 0 | READ_INT16(cdxr.data(),spin); |
513 | 0 | break; |
514 | 0 | case kCDXProp_Atom_Isotope: |
515 | 0 | READ_INT16(cdxr.data(),iso); |
516 | 0 | break; |
517 | 0 | case kCDXProp_Atom_NumHydrogens: |
518 | 0 | READ_INT16(cdxr.data(), numHs); |
519 | 0 | hasNumHs = true; |
520 | 0 | break; |
521 | 0 | case kCDXProp_Atom_CIPStereochemistry: |
522 | 0 | break; |
523 | 0 | case kCDXObj_Text: |
524 | 0 | aliastext = DoText(cdxr); |
525 | 0 | if(aliastext=="+") |
526 | 0 | { |
527 | | //This node is not an atom, but dangerous to delete |
528 | 0 | pmol->SetTitle("justplus"); |
529 | 0 | } |
530 | 0 | break; |
531 | 0 | case kCDXObj_Fragment: |
532 | | /* ignore fragment contained in node |
533 | | if(isAlias) |
534 | | { |
535 | | unsigned Idxbefore = pmol->NumAtoms(); |
536 | | if(DoFragmentImpl(cdxr, pmol, atommap, updown)) |
537 | | return false; |
538 | | } |
539 | | */ |
540 | | //ignore the contents of this node |
541 | 0 | cdxr.IgnoreObject(); |
542 | | //cdxr.ReadNext(objectsOnly, cdxr.GetDepth()-1); |
543 | 0 | break; |
544 | 0 | default: |
545 | 0 | if(tag & kCDXTag_Object) //unhandled object |
546 | 0 | while(cdxr.ReadNext()); |
547 | 0 | } |
548 | 0 | } |
549 | | //All properties of Node have now been read |
550 | 0 | OBAtom* pAtom = pmol->NewAtom(); |
551 | 0 | pAtom->SetVector(x*1.0e-6, -y*1.0e-6, 0); //inv y axis |
552 | 0 | atommap[nodeID] = pmol->NumAtoms(); |
553 | 0 | if(isAlias || (!aliastext.empty() && atnum==0xffff)) |
554 | 0 | { |
555 | | //Treat text as an alias |
556 | 0 | pAtom->SetAtomicNum(0); |
557 | 0 | AliasData* ad = new AliasData(); |
558 | 0 | ad->SetAlias(aliastext); |
559 | 0 | ad->SetOrigin(fileformatInput); |
560 | 0 | pAtom->SetData(ad); |
561 | 0 | } |
562 | 0 | else |
563 | 0 | { |
564 | 0 | if(atnum==0xffff) |
565 | 0 | atnum = 6; //atoms are C by default |
566 | 0 | pAtom->SetAtomicNum(atnum); |
567 | 0 | if (hasNumHs) |
568 | 0 | pAtom->SetImplicitHCount(numHs); |
569 | 0 | else if (atnum==6) |
570 | 0 | handleImplicitCarbons.push_back(pAtom); |
571 | 0 | pAtom->SetFormalCharge(charge); |
572 | 0 | pAtom->SetIsotope(iso); |
573 | 0 | pAtom->SetSpinMultiplicity(spin); |
574 | 0 | } |
575 | 0 | } |
576 | | |
577 | 0 | else if(tag==kCDXObj_Bond) |
578 | 0 | { |
579 | 0 | CDXObjectID bgnID, endID; |
580 | 0 | int order=1, bgnIdx, endIdx ; |
581 | 0 | UINT16 stereo=0; |
582 | |
|
583 | 0 | while( (tag = cdxr.ReadNext()) ) |
584 | 0 | { |
585 | 0 | switch(tag) |
586 | 0 | { |
587 | 0 | case kCDXProp_Bond_Begin: |
588 | 0 | READ_INT32(cdxr.data(), bgnID); |
589 | 0 | bgnIdx = atommap[bgnID]; |
590 | 0 | break; |
591 | 0 | case kCDXProp_Bond_End: |
592 | 0 | READ_INT32(cdxr.data(), endID); |
593 | 0 | endIdx = atommap[endID]; |
594 | 0 | break; |
595 | 0 | case kCDXProp_Bond_Order: |
596 | 0 | READ_INT16(cdxr.data(), order); |
597 | 0 | switch (order) |
598 | 0 | { |
599 | 0 | case 0xFFFF: // undefined, keep 1 for now |
600 | 0 | order = 1; |
601 | 0 | case 0x0001: |
602 | 0 | case 0x0002: |
603 | 0 | break; |
604 | 0 | case 0x0004: |
605 | 0 | order = 3; |
606 | 0 | break; |
607 | 0 | case 0x0080: // aromatic bond |
608 | 0 | order = 5; |
609 | 0 | break; |
610 | 0 | default: // other cases are just not supported, keep 1 |
611 | 0 | order = 1; |
612 | 0 | break; |
613 | 0 | } |
614 | 0 | break; |
615 | 0 | case kCDXProp_Bond_Display: |
616 | 0 | READ_INT16(cdxr.data(), stereo); |
617 | 0 | break; |
618 | 0 | } |
619 | 0 | } |
620 | | |
621 | 0 | if(!order || !bgnIdx || !endIdx) |
622 | 0 | { |
623 | 0 | obErrorLog.ThrowError(__FUNCTION__,"Incorrect bond", obError); |
624 | 0 | return false; |
625 | 0 | } |
626 | 0 | if(stereo==4 || stereo==7 || stereo==10 || stereo==12) |
627 | 0 | swap(bgnIdx, endIdx); |
628 | 0 | pmol->AddBond(bgnIdx, endIdx, order); |
629 | 0 | if(stereo) |
630 | 0 | { |
631 | 0 | OBBond* pBond = pmol->GetBond(pmol->NumBonds()-1); |
632 | 0 | if(stereo==3 || stereo==4) |
633 | 0 | pBond->SetHash(); |
634 | 0 | else if(stereo==6 || stereo==7) |
635 | 0 | pBond->SetWedge(); |
636 | 0 | } |
637 | 0 | } |
638 | 0 | } |
639 | | // Handle 'implicit carbons' by adjusting their valence with |
640 | | // implicit hydrognes |
641 | 0 | for(vector<OBAtom*>::iterator vit=handleImplicitCarbons.begin(); |
642 | 0 | vit!=handleImplicitCarbons.end(); ++vit) |
643 | 0 | OBAtomAssignTypicalImplicitHydrogens(*vit); |
644 | |
|
645 | 0 | return true; |
646 | 0 | } |
647 | | |
648 | | string ChemDrawBinaryXFormat::DoText(CDXReader& cdxr) |
649 | 0 | { |
650 | 0 | CDXTag tag; |
651 | 0 | string text; |
652 | 0 | while( (tag=cdxr.ReadNext()) ) |
653 | 0 | { |
654 | 0 | stringstream& ss = cdxr.data(); |
655 | 0 | switch(tag) |
656 | 0 | { |
657 | 0 | case kCDXProp_Text: |
658 | 0 | UINT16 nStyleRuns; |
659 | 0 | READ_INT16(ss,nStyleRuns); |
660 | 0 | ss.ignore(nStyleRuns*10); |
661 | 0 | ss >> text; |
662 | 0 | default: |
663 | 0 | if(tag & kCDXTag_Object) //unhandled object |
664 | 0 | while(cdxr.ReadNext()); |
665 | 0 | } |
666 | 0 | } |
667 | 0 | return text; |
668 | 0 | } |
669 | | |
670 | | //**************************************************************** |
671 | | CDXTag CDXReader::ReadNext(bool objectsOnly, int targetDepth) |
672 | 0 | { |
673 | | //ostringstream treestream; |
674 | 0 | CDXTag tag; |
675 | 0 | CDXObjectID id; |
676 | |
|
677 | 0 | while(ifs) |
678 | 0 | { |
679 | 0 | READ_INT16(ifs, tag); |
680 | 0 | if(tag==0) |
681 | 0 | { |
682 | 0 | if(depth==0) |
683 | 0 | { |
684 | 0 | ifs.setstate(ios::eofbit); //ignore everything after end of document |
685 | 0 | return 0; //end of document |
686 | 0 | } |
687 | 0 | --depth; |
688 | 0 | _tempback = ids.back(); //needed for WriteTree |
689 | 0 | ids.pop_back(); |
690 | 0 | if(targetDepth<0 || depth == targetDepth) |
691 | 0 | return 0; //end of object |
692 | 0 | } |
693 | 0 | else if(tag & kCDXTag_Object) |
694 | 0 | { |
695 | 0 | READ_INT32(ifs, id); |
696 | 0 | ids.push_back(id); |
697 | 0 | ++depth; |
698 | 0 | if(targetDepth<0 || depth-1 == targetDepth) |
699 | 0 | return tag; //object |
700 | 0 | } |
701 | 0 | else |
702 | 0 | { |
703 | | //property |
704 | 0 | READ_INT16(ifs, _len); |
705 | |
|
706 | 0 | if(objectsOnly) |
707 | 0 | ifs.ignore(_len); |
708 | 0 | else |
709 | 0 | { |
710 | | //copy property data to buffer |
711 | 0 | char* p = new char[_len+1]; |
712 | 0 | ifs.read(p, _len); |
713 | 0 | _buf.assign(p, _len); |
714 | 0 | delete[] p; |
715 | 0 | return tag; //property |
716 | 0 | } |
717 | 0 | } |
718 | 0 | } |
719 | 0 | return 0; |
720 | 0 | } |
721 | | ///////////////////////////////////////////////////////////////////// |
722 | | |
723 | | stringstream& CDXReader::data() |
724 | 0 | { |
725 | 0 | _ss.clear(); |
726 | 0 | _ss.str(_buf); |
727 | 0 | return _ss; |
728 | 0 | } |
729 | | ///////////////////////////////////////////////////////////////////// |
730 | | |
731 | 0 | CDXReader::CDXReader(std::istream& is) : ifs(is), depth(0) |
732 | 0 | { |
733 | | //ReadHeader |
734 | 0 | char buffer[kCDX_HeaderStringLen+1]; |
735 | 0 | ifs.read(buffer,kCDX_HeaderStringLen); |
736 | 0 | buffer[kCDX_HeaderStringLen] = '\0'; |
737 | 0 | if(strncmp(buffer, kCDX_HeaderString, kCDX_HeaderStringLen) == 0) |
738 | 0 | ifs.ignore(kCDX_HeaderLength - kCDX_HeaderStringLen); // Discard rest of header. |
739 | 0 | else |
740 | 0 | { |
741 | 0 | obErrorLog.ThrowError(__FUNCTION__,"Invalid file, no ChemDraw Header",obError); |
742 | 0 | ifs.setstate(ios::eofbit); |
743 | 0 | throw; |
744 | 0 | } |
745 | 0 | } |
746 | | //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
747 | | //Routines to display the structure of a cdx binary file |
748 | | |
749 | | OBText* CDXReader::WriteTree(const string& filename, unsigned wtoptions) |
750 | 0 | { |
751 | 0 | const char indentchar = '\t'; |
752 | 0 | std::map<CDXTag, std::string> enummap; |
753 | 0 | ParseEnums(enummap, filename); |
754 | |
|
755 | 0 | stringstream tss; |
756 | 0 | tss << hex << showbase; |
757 | |
|
758 | 0 | while(*this) |
759 | 0 | { |
760 | 0 | CDXTag tag = ReadNext(); |
761 | 0 | if(ifs.eof()) |
762 | 0 | return new OBText(tss.str()); //normal exit |
763 | 0 | if(tag==0 && !(wtoptions &1)) |
764 | 0 | { |
765 | | //Object end |
766 | 0 | tss << string(depth,indentchar) << "ObjectEnd " << _tempback << endl; |
767 | 0 | } |
768 | 0 | else if(tag & kCDXTag_Object) |
769 | 0 | { |
770 | | //Object |
771 | 0 | tss<<string(depth-1,indentchar) << "Object " << tag |
772 | 0 | << TagName(enummap,tag) << " id=" << ids.back() << endl; |
773 | 0 | } |
774 | 0 | else |
775 | 0 | { |
776 | | //Property |
777 | 0 | if(!(wtoptions &1)) |
778 | 0 | { |
779 | 0 | stringstream ss; |
780 | 0 | ss << _len; |
781 | 0 | tss<<string(depth,indentchar) << "Property "<< tag << TagName(enummap,tag) |
782 | 0 | << " [" << ss.str() << " bytes] "; |
783 | 0 | for(unsigned i=0;i<_len;++i) |
784 | 0 | { |
785 | 0 | ss.str(""); |
786 | 0 | ss.fill('0'); |
787 | 0 | ss.width(8); |
788 | 0 | ss << hex << static_cast<unsigned>(_buf[i]) << dec; |
789 | 0 | tss << ss.str()[6] << ss.str()[7] << ' '; |
790 | 0 | } |
791 | |
|
792 | 0 | if(tag==0x700 || tag==kCDXProp_CreationProgram || tag==kCDXProp_CreationDate |
793 | 0 | || tag==kCDXProp_Name) |
794 | 0 | { |
795 | 0 | stringstream ss(_buf); |
796 | 0 | UINT16 nStyleRuns; |
797 | 0 | READ_INT16(ss, nStyleRuns); |
798 | 0 | tss << '\"'; |
799 | 0 | for(unsigned i=2+nStyleRuns*10; i<_len; ++i) |
800 | 0 | tss << _buf[i]; |
801 | 0 | tss << '\"'; |
802 | 0 | } |
803 | 0 | tss << endl; |
804 | 0 | } |
805 | 0 | } |
806 | 0 | } |
807 | 0 | return nullptr; //error exit |
808 | 0 | } |
809 | | |
810 | | /////////////////////////////////////////////////////////////////////// |
811 | | bool CDXReader::ParseEnums(map<CDXTag, string>& enummap, const string& filename) |
812 | 0 | { |
813 | 0 | ifstream ihs; |
814 | 0 | if(OpenDatafile(ihs, filename).empty()) |
815 | 0 | { |
816 | 0 | obErrorLog.ThrowError(__FUNCTION__, |
817 | 0 | filename + " needs to be in the *data* directory when displaying the tree.\n" , obError); |
818 | 0 | return false; |
819 | 0 | } |
820 | 0 | ignore(ihs, "enum CDXDatumID"); |
821 | 0 | string ln; |
822 | 0 | vector<string> vec; |
823 | 0 | stringstream ss; |
824 | 0 | CDXTag tag; |
825 | 0 | while(ihs) |
826 | 0 | { |
827 | 0 | getline(ihs, ln); |
828 | 0 | tokenize(vec, ln, " \t,{}"); |
829 | 0 | if(vec.size()==0 || vec[0]=="//") |
830 | 0 | continue; //blank and comment lines |
831 | 0 | if(vec[0]==";") //line is }; end of enum |
832 | 0 | return true; |
833 | 0 | if(vec[0][0]!='k') //only collect enums starting with kCDX |
834 | 0 | continue; |
835 | 0 | int tagpos = (vec[1]=="=" && vec.size()>4) ? 4 : 2; |
836 | 0 | ss.str(vec[tagpos]); |
837 | 0 | ss.clear(); |
838 | 0 | ss >> hex >> tag; |
839 | 0 | if(ss) |
840 | 0 | { |
841 | 0 | if(tag==0x0400 && vec[0]=="kCDXUser_TemporaryEnd")//special case |
842 | 0 | continue; |
843 | 0 | enummap[tag] = vec[0]; |
844 | 0 | } |
845 | 0 | } |
846 | 0 | return false; |
847 | 0 | } |
848 | | ///////////////////////////////////////////////////////////////////////// |
849 | | |
850 | | string CDXReader::TagName(map<CDXTag, string>& enummap, CDXTag tag) |
851 | 0 | { |
852 | 0 | string tagname; |
853 | 0 | if(!enummap.empty()) |
854 | 0 | { |
855 | 0 | map<CDXTag, std::string>::iterator iter = enummap.find(tag); |
856 | 0 | if(iter!=enummap.end()) |
857 | 0 | { |
858 | 0 | tagname=iter->second; |
859 | | //Remove prefix, e.g. kCDXProp_ |
860 | 0 | string::size_type pos = tagname.find('_'); |
861 | 0 | if(pos!=string::npos) |
862 | 0 | { |
863 | 0 | tagname.erase(0,pos); |
864 | 0 | tagname[0] = ' '; |
865 | 0 | } |
866 | 0 | } |
867 | 0 | } |
868 | 0 | return tagname; |
869 | 0 | } |
870 | | |
871 | | } //namespace |