Coverage Report

Created: 2026-01-17 06:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openbabel/src/formats/chemdrawcdx.cpp
Line
Count
Source
1
/**********************************************************************
2
Copyright (C) 2006 by Fredrik Wallner
3
Some portions Copyright (C) 2006-2007 by Geoffrey Hutchsion
4
Some portions Copyright (C) 2011 by Chris Morley
5
6
This program is free software; you can redistribute it and/or modify
7
it under the terms of the GNU General Public License as published by
8
the Free Software Foundation version 2 of the License.
9
10
This program is distributed in the hope that it will be useful,
11
but WITHOUT ANY WARRANTY; without even the implied warranty of
12
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
GNU General Public License for more details.
14
***********************************************************************/
15
16
#include <openbabel/babelconfig.h>
17
#include <openbabel/obmolecformat.h>
18
#include <openbabel/mol.h>
19
#include <openbabel/atom.h>
20
#include <openbabel/bond.h>
21
#include <openbabel/elements.h>
22
#include <openbabel/reactionfacade.h>
23
#include <openbabel/stereo/stereo.h>
24
#include <openbabel/obfunctions.h>
25
#include <openbabel/reaction.h>
26
#include <openbabel/tokenst.h>
27
#include <openbabel/alias.h>
28
#include <openbabel/text.h>
29
#include "chemdrawcdx.h"
30
31
#include <iostream>
32
#include <fstream>
33
#include <sstream>
34
#include <map>
35
#include <list>
36
37
38
#if !defined(__CYGWIN__)
39
0
static inline unsigned short bswap_16(unsigned short x) {
40
0
  return (x>>8) | (x<<8);
41
0
}
42
43
0
static inline unsigned int bswap_32(unsigned int x) {
44
0
  return (bswap_16(x&0xffff)<<16) | (bswap_16(x>>16));
45
0
}
46
47
0
static inline unsigned long long bswap_64(unsigned long long x) {
48
0
  return (((unsigned long long)bswap_32(x&0xffffffffull))<<32) | (bswap_32(x>>32));
49
0
}
50
#endif
51
52
// Macs -- need to use Apple macros to deal with Universal binaries correctly
53
#ifdef __APPLE__
54
#include <machine/endian.h>
55
#if BYTE_ORDER == BIG_ENDIAN
56
#    define READ_INT16(stream,data) \
57
(stream).read ((char*)&data, sizeof(data)); \
58
data = bswap_16 (data);
59
#    define READ_INT32(stream,data) \
60
(stream).read ((char*)&data, sizeof(data)); \
61
data = bswap_32 (data);
62
#else BYTE_ORDER == LITTLE_ENDIAN
63
#    define READ_INT16(stream,data) \
64
(stream).read ((char*)&data, sizeof(data));
65
#    define READ_INT32(stream,data) \
66
(stream).read ((char*)&data, sizeof(data));
67
#endif
68
#else
69
70
// Non-Apple systems
71
// defined in babelconfig.h by autoconf (portable to Solaris, BSD, Linux)
72
#ifdef WORDS_BIGENDIAN
73
#    define READ_INT16(stream,data) \
74
(stream).read ((char*)&data, sizeof(data)); \
75
data = bswap_16 (data);
76
#    define READ_INT32(stream,data) \
77
(stream).read ((char*)&data, sizeof(data)); \
78
data = bswap_32 (data);
79
#else
80
0
#    define READ_INT16(stream,data) \
81
0
(stream).read ((char*)&data, sizeof(data));
82
0
#    define READ_INT32(stream,data) \
83
0
(stream).read ((char*)&data, sizeof(data));
84
#endif
85
// end endian / bigendian issues (on non-Mac systems)
86
#endif
87
// end Apple/non-Apple systems
88
89
using namespace std;
90
namespace OpenBabel
91
{
92
93
//Class which traverse the tree in CDX binary files 
94
class CDXReader
95
{
96
public:
97
  CDXReader(std::istream& is);
98
  CDXTag ReadNext(bool objectsOnly=false, int targetDepth=-2);
99
0
  void IgnoreObject()          { ReadNext(true, GetDepth()-1); }
100
0
  operator bool ()const        { return (bool)ifs; }
101
0
  int GetDepth()const          { return depth; }
102
0
  int GetLen()const            { return _len;} //length of current property data
103
0
  CDXObjectID CurrentID()const { return ids.back(); }
104
  stringstream& data(); //call this only once for each set of property data
105
106
  //Routines to display the structure of a cdx binary file
107
  OBText* WriteTree(const std::string& filename, unsigned wtoptions);
108
private:
109
  bool ParseEnums(std::map<CDXTag, std::string>& enummap, const std::string& filename);
110
  std::string TagName(std::map<CDXTag, std::string>& enummap, CDXTag tag);
111
112
private:
113
  std::istream& ifs;
114
  int depth;
115
  std::vector<CDXObjectID> ids;
116
  CDXObjectID _tempback;
117
  std::string _buf;
118
  UINT16 _len;
119
  std::stringstream _ss;
120
};
121
122
//**************************************************************
123
class ChemDrawBinaryXFormat : OBMoleculeFormat
124
{
125
public:
126
  //Register this format type ID in the constructor
127
  ChemDrawBinaryXFormat()
128
6
  {
129
6
    OBConversion::RegisterFormat("cdx",this);
130
6
  }
131
132
  const char* Description() override  // required
133
0
  {
134
0
    return
135
0
      "ChemDraw binary format\n"
136
0
      "Read only\n"
137
0
      "The whole file is read in one call.\n"
138
0
      "Note that a file may contain a mixture of reactions and\n"
139
0
      "molecules.\n"
140
141
0
      "With the -ad option, a human-readable representation of the CDX tree\n"
142
0
      "structure is output as an OBText object. Use textformat to view it::\n\n"
143
144
0
      "    obabel input.cdx -otext -ad\n\n"
145
146
0
      "Many reactions in CDX files are not fully specified with reaction data\n"
147
0
      "structures, and may not be completely interpreted by this parser.\n\n"
148
149
0
      "Read Options, e.g. -am\n"
150
0
      " m read molecules only; no reactions\n"
151
0
      " d output CDX tree to OBText object\n"
152
0
      " o display only objects in tree output\n";
153
0
  }
154
155
  const char* SpecificationURL() override
156
0
  {return "http://www.cambridgesoft.com/services/documentation/sdk/chemdraw/cdx/IntroCDX.htm";}
157
158
  const char* GetMIMEType() override
159
0
  { return "chemical/x-cdx"; }
160
161
  unsigned int Flags() override
162
6
  {
163
6
    return READBINARY|NOTWRITABLE;
164
6
  }
165
166
  ////////////////////////////////////////////////////
167
  bool ReadMolecule(OBBase* pOb, OBConversion* pConv) override;
168
169
private:
170
  enum graphicType {none, equilArrow};
171
  bool        TopLevelParse(CDXReader& cdxr, OBConversion* pConv,CDXObjectID ContainingGroup);
172
  bool        DoFragment(CDXReader& cdxr, OBMol* pmol);
173
  bool        DoFragmentImpl(CDXReader& cdxr, OBMol* pmol,
174
         map<CDXObjectID, unsigned>& atommap, map<OBBond*, OBStereo::BondDirection>& updown);
175
  bool        DoReaction(CDXReader& cdxr, OBMol* pReact);
176
  std::string DoText(CDXReader& cdxr);
177
178
  std::vector<OBMol*> LookupMol(CDXObjectID id);
179
  graphicType         LookupGraphic(CDXObjectID id);
180
  OBMol*              LookupInMolMap(CDXObjectID id);
181
182
private:
183
  bool readReactions;
184
  static const bool objectsOnly = true;
185
  std::map<CDXObjectID, graphicType> _graphicmap;
186
  std::map<CDXObjectID, OBMol*> _molmap;
187
  std::map<CDXObjectID, std::vector<CDXObjectID> > _groupmap;
188
  // In case of chain A -> B -> C, B is both reactant and product
189
  CDXObjectID _lastProdId;
190
  typedef std::map<CDXObjectID, std::vector<CDXObjectID> >::iterator GroupMapIterator;
191
  static const unsigned usedFlag = 1<<30;
192
};
193
194
//******************************************************************
195
  //Global instance of the format
196
 ChemDrawBinaryXFormat theChemDrawBinaryXFormat;
197
//******************************************************************
198
199
 /*New CDXformat
200
Each fragment goes into a new OBMol on the heap.
201
The CDX id and OBMol* are added to _molmap.
202
When a reaction is found, the reactant/product/agent CDX ids are looked up in molmap,
203
and added to an OBReaction (made by deleting pOb if it is a OBMol
204
and assigning pOb to a new OBReaction. The OBMol is marked as Used.
205
When the reaction is complete it is output via AddChemObject().
206
At the end, any OBMol in the map not marked as Used is output as an OBMol.
207
*/
208
209
210
bool ChemDrawBinaryXFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv)
211
0
{
212
0
  _molmap.clear();
213
0
  _graphicmap.clear();
214
0
  _groupmap.clear();
215
0
  OBMol* pmol=nullptr;
216
0
  bool ok = true;
217
218
0
  CDXReader cdxr(*pConv->GetInStream());
219
0
  readReactions = pConv->IsOption("m", OBConversion::INOPTIONS) == nullptr;
220
221
  // Write CDX tree only if requested
222
0
  if(pConv->IsOption("d",OBConversion::INOPTIONS))
223
0
  {
224
0
    unsigned wtoptions=0;
225
0
    if(pConv->IsOption("o",OBConversion::INOPTIONS))
226
0
      wtoptions |= 1; //display objects only
227
0
    OBText* pText  = cdxr.WriteTree("chemdrawcdx.h", wtoptions);
228
0
    if(pText)
229
0
    {
230
0
      pConv->AddChemObject(pText);
231
0
      return true;
232
0
    }
233
0
    return false;
234
0
  }
235
236
  // Normal reading of molecules and reactions
237
  //Top level parse 
238
0
  while(cdxr)
239
0
  {
240
0
    if(!TopLevelParse(cdxr, pConv, 0))
241
0
      return false;
242
0
  }
243
244
  //At the end, output molecules that have not been used in a reaction
245
0
  map<CDXObjectID, OBMol*>::iterator mapiter = _molmap.begin();
246
0
  for(; mapiter!=_molmap.end(); ++mapiter)
247
0
  {
248
0
    pmol = mapiter->second;
249
0
    if(!(pmol->GetFlags() & usedFlag) && strcmp(pmol->GetTitle(),"justplus"))
250
0
    {
251
0
      OBMol* ptmol = static_cast<OBMol*>(pmol->DoTransformations(
252
0
                    pConv->GetOptions(OBConversion::GENOPTIONS),pConv));
253
0
      if(!ptmol)
254
0
        delete pmol;
255
0
      else
256
0
        if(!pConv->AddChemObject(ptmol))
257
0
          return false; //error during writing
258
0
    }
259
0
  }
260
261
0
  return ok;
262
0
}
263
///////////////////////////////////////////////////////////////////////
264
bool ChemDrawBinaryXFormat::TopLevelParse
265
        (CDXReader& cdxr, OBConversion* pConv, CDXObjectID ContainingGroup)
266
0
{
267
0
  bool ok = true;
268
0
  CDXTag tag;
269
0
  while((tag = cdxr.ReadNext(objectsOnly)))
270
0
  {
271
0
    if(tag == kCDXObj_Group)
272
0
    {
273
0
      CDXObjectID cid = cdxr.CurrentID();
274
0
      vector<CDXObjectID> v;
275
0
      _groupmap.insert(make_pair(cid,v)); //empty vector as yet
276
0
      TopLevelParse(cdxr, pConv, cid );
277
0
    }
278
279
0
    else if(tag==kCDXObj_Fragment)
280
0
    {
281
0
      OBMol* pmol = new OBMol;
282
      //Save all molecules to the end
283
0
      _molmap[cdxr.CurrentID()] = pmol;
284
285
0
      if(ContainingGroup)
286
0
      {
287
        // Add the id of this mol to the group's entry in _groupmap 
288
0
        GroupMapIterator gmapiter = _groupmap.find(ContainingGroup);
289
0
        if(gmapiter!=_groupmap.end())
290
0
          gmapiter->second.push_back(cdxr.CurrentID());
291
0
      }
292
0
      ok = DoFragment(cdxr, pmol);
293
0
    }
294
295
0
    else if(tag == kCDXObj_ReactionStep && readReactions)
296
0
    {
297
0
      OBMol* pReact = new OBMol;
298
0
      pReact->SetIsReaction();
299
0
      ok = DoReaction(cdxr, pReact);
300
      // Output OBReaction and continue 
301
0
      if(pReact)
302
0
        if(!pConv->AddChemObject(pReact))
303
0
          return false; //error during writing
304
0
    }
305
306
0
    else if(ok && tag==kCDXObj_Graphic)
307
0
    {
308
0
      while( (tag = cdxr.ReadNext()) )
309
0
      {
310
0
        stringstream& ss = cdxr.data();
311
0
        if(tag == kCDXProp_Arrow_Type)
312
0
        {
313
0
          char type1=0;
314
0
          UINT16 type2=0;
315
0
          if(cdxr.GetLen()==1)
316
0
            ss.get(type1);
317
0
          else
318
0
            READ_INT16(ss,type2);
319
0
          if(type1==kCDXArrowType_Equilibrium || type2==kCDXArrowType_Equilibrium)
320
0
            _graphicmap[type1+type2] = equilArrow; //save in graphicmap
321
0
        }
322
0
      }
323
0
    }
324
0
  }
325
0
  return true;
326
0
}
327
///////////////////////////////////////////////////////////////////////
328
bool ChemDrawBinaryXFormat::DoReaction(CDXReader& cdxr, OBMol* pReact)
329
0
{
330
0
  CDXTag tag;
331
0
  CDXObjectID id;
332
0
  OBReactionFacade facade(pReact);
333
0
  while( (tag = cdxr.ReadNext()) )
334
0
  {
335
0
    if(tag ==  kCDXProp_ReactionStep_Reactants)
336
0
    {
337
0
      stringstream& ss = cdxr.data();
338
0
      for(unsigned i=0;i<cdxr.GetLen()/4;++i)//for each reactant id
339
0
      {
340
0
        READ_INT32(ss,id);
341
0
        vector<OBMol*> molvec = LookupMol(id); //id could be a group with several mols
342
0
        for(unsigned i=0;i<molvec.size();++i)
343
0
          if(strcmp(molvec[i]->GetTitle(),"justplus"))
344
0
          {
345
0
            facade.AddComponent(molvec[i], REACTANT);
346
0
          }
347
0
      }
348
0
    }
349
0
    else if(tag == kCDXProp_ReactionStep_Products)
350
0
    {
351
0
      stringstream& ss = cdxr.data();
352
0
      for(unsigned i=0;i<cdxr.GetLen()/4;++i)//for each product id
353
0
      {
354
0
        READ_INT32(ss,id);
355
0
        vector<OBMol*> molvec = LookupMol(id); //id could be a group with several mols
356
0
        for(unsigned i=0;i<molvec.size();++i)
357
0
          if(strcmp(molvec[i]->GetTitle(),"justplus"))
358
0
          {
359
0
            facade.AddComponent(molvec[i], PRODUCT);
360
0
            _lastProdId = id;
361
0
          }
362
0
      }
363
0
    }
364
0
    else if(tag==kCDXProp_ReactionStep_Arrows)
365
0
    {
366
0
      READ_INT32(cdxr.data(),id);
367
      //if(LookupGraphic(id)==equilArrow) // TODO? Store reversibility somehow?
368
      //  pReact->SetReversible();
369
0
    }
370
0
  }
371
0
  return true;
372
0
}
373
///////////////////////////////////////////////////////////////////////
374
vector<OBMol*> ChemDrawBinaryXFormat::LookupMol(CDXObjectID id)
375
0
{
376
0
  vector<OBMol*> molvec;
377
  //Check whether the id is that of a kCDXObj_Group
378
0
  GroupMapIterator gmapiter;
379
0
  gmapiter = _groupmap.find(id);
380
0
  if(gmapiter != _groupmap.end())
381
0
  {
382
0
    for(unsigned i=0;i<gmapiter->second.size();++i)
383
0
    {
384
0
      OBMol* pmmol = LookupInMolMap(gmapiter->second[i]);
385
0
      if(pmmol)
386
0
        molvec.push_back(pmmol);
387
0
    }
388
0
  }
389
0
  else
390
0
  {
391
    //id is not a group; it must be a fragment
392
0
    OBMol* pmmol = LookupInMolMap(id);
393
0
    if(pmmol)
394
0
      molvec.push_back(pmmol);
395
0
  }
396
0
  return molvec; 
397
0
}
398
399
OBMol* ChemDrawBinaryXFormat::LookupInMolMap(CDXObjectID id)
400
0
{
401
0
  std::map<CDXObjectID, OBMol*>::iterator mapiter;
402
0
  mapiter = _molmap.find(id);
403
0
  if(mapiter!=_molmap.end())
404
0
  {
405
    //Mark mol as used in a reaction, so that it will not be output independently
406
0
    mapiter->second->SetFlags(mapiter->second->GetFlags() | usedFlag);
407
0
    return mapiter->second;
408
0
  }
409
0
  else
410
0
  {
411
0
    stringstream ss;
412
0
    ss << "Reactant or product mol not found id = " << hex << showbase << id; 
413
0
    obErrorLog.ThrowError(__FUNCTION__, ss.str(), obError);
414
0
    return nullptr;
415
0
  }
416
0
}
417
418
////////////////////////////////////////////////////////////////////////
419
ChemDrawBinaryXFormat::graphicType ChemDrawBinaryXFormat::LookupGraphic(CDXObjectID id)
420
0
{
421
0
  std::map<CDXObjectID, graphicType>::iterator mapiter;
422
0
  mapiter = _graphicmap.find(id);
423
0
  if(mapiter != _graphicmap.end())
424
0
    return mapiter->second;
425
0
  else
426
0
    return none;
427
0
}
428
429
////////////////////////////////////////////////////////////////////////
430
bool ChemDrawBinaryXFormat::DoFragment(CDXReader& cdxr, OBMol* pmol)
431
0
{
432
0
  map<OBBond*, OBStereo::BondDirection> updown;
433
0
  pmol->SetDimension(2);
434
0
  pmol->BeginModify();
435
436
0
  map<CDXObjectID, unsigned> atommap; //key = CDX id; value = OB atom idx
437
438
  //The inner workings of DoFragment,since Fragment elements can be nested
439
0
  DoFragmentImpl(cdxr, pmol, atommap, updown);
440
441
  // use 2D coordinates + hash/wedge to determine stereochemistry
442
0
  StereoFrom2D(pmol, &updown);
443
444
0
  pmol->EndModify();
445
446
  //Expand any aliases after molecule constructed
447
  //Need to save aliases in list first and expand later
448
0
  vector<OBAtom*> aliasatoms;
449
0
  for(int idx=1; idx<=pmol->NumAtoms();++idx)
450
0
  {
451
0
    OBAtom* pAtom = pmol->GetAtom(idx);
452
0
    AliasData* ad = dynamic_cast<AliasData*>(pAtom->GetData(AliasDataType));
453
0
    if(ad && !ad->IsExpanded())
454
0
      aliasatoms.push_back(pAtom);
455
0
  }
456
0
  for(vector<OBAtom*>::iterator vit=aliasatoms.begin();
457
0
      vit!=aliasatoms.end(); ++vit)
458
0
  {
459
0
    int idx = (*vit)->GetIdx();
460
0
    AliasData* ad = dynamic_cast<AliasData*>((*vit)->GetData(AliasDataType));
461
0
    if(ad && !ad->IsExpanded())
462
0
      ad->Expand(*pmol, idx); //Make chemically meaningful, if possible.
463
0
  }
464
0
  return true;
465
0
}
466
467
bool ChemDrawBinaryXFormat::DoFragmentImpl(CDXReader& cdxr, OBMol* pmol, 
468
       map<CDXObjectID, unsigned>& atommap, map<OBBond*, OBStereo::BondDirection>& updown)
469
0
{
470
0
  CDXTag tag;
471
0
  std::vector<OBAtom*> handleImplicitCarbons;
472
0
  while((tag = cdxr.ReadNext(objectsOnly)))
473
0
  {
474
0
    if(tag==kCDXObj_Node)
475
0
    {
476
0
      unsigned nodeID = cdxr.CurrentID();
477
0
      bool isAlias=false, hasElement=false;
478
0
      bool hasNumHs = false;
479
0
      UINT16 atnum=-1, spin=0, numHs=0;
480
0
      int x, y, charge=0, iso=0;
481
0
      string aliastext;
482
483
      //Read all node properties
484
0
      while( (tag = cdxr.ReadNext()) )
485
0
      {
486
0
        switch(tag)
487
0
        {
488
0
        case kCDXProp_Node_Type:
489
0
          UINT16 type;
490
0
          READ_INT16(cdxr.data(), type);
491
0
          if(type==4 || type==5) //Nickname or fragment
492
0
            isAlias = true;
493
0
          break;
494
0
        case kCDXProp_Node_Element:
495
0
          READ_INT16(cdxr.data(), atnum);
496
0
          hasElement = true;
497
0
          break;
498
0
        case kCDXProp_2DPosition:
499
0
          {
500
0
            stringstream& ss = cdxr.data();
501
0
            READ_INT32(ss, y); //yes, this way round
502
0
            READ_INT32(ss, x);
503
0
          }
504
0
            break;
505
0
        case kCDXProp_Atom_Charge:
506
0
          if(cdxr.GetLen()==1)
507
0
            charge = (int8_t)cdxr.data().get();
508
0
          else
509
0
            READ_INT32(cdxr.data(), charge);
510
0
          break;
511
0
        case kCDXProp_Atom_Radical:
512
0
          READ_INT16(cdxr.data(),spin);
513
0
          break;
514
0
        case kCDXProp_Atom_Isotope:
515
0
          READ_INT16(cdxr.data(),iso);
516
0
          break;
517
0
        case kCDXProp_Atom_NumHydrogens:
518
0
          READ_INT16(cdxr.data(), numHs);
519
0
          hasNumHs = true;
520
0
          break;
521
0
        case kCDXProp_Atom_CIPStereochemistry:
522
0
          break;
523
0
        case kCDXObj_Text:
524
0
          aliastext = DoText(cdxr);
525
0
          if(aliastext=="+")
526
0
          {
527
            //This node is not an atom, but dangerous to delete
528
0
            pmol->SetTitle("justplus");
529
0
          }
530
0
          break;
531
0
        case kCDXObj_Fragment:
532
        /* ignore fragment contained in node
533
        if(isAlias)
534
          {
535
            unsigned Idxbefore = pmol->NumAtoms();
536
            if(DoFragmentImpl(cdxr, pmol, atommap, updown))
537
              return false;
538
          }
539
         */
540
          //ignore the contents of this node
541
0
          cdxr.IgnoreObject();
542
          //cdxr.ReadNext(objectsOnly, cdxr.GetDepth()-1);
543
0
          break;
544
0
        default:
545
0
          if(tag & kCDXTag_Object) //unhandled object
546
0
            while(cdxr.ReadNext());
547
0
        }
548
0
      }
549
      //All properties of Node have now been read
550
0
      OBAtom* pAtom = pmol->NewAtom();
551
0
      pAtom->SetVector(x*1.0e-6, -y*1.0e-6, 0); //inv y axis
552
0
      atommap[nodeID] = pmol->NumAtoms();
553
0
      if(isAlias || (!aliastext.empty() && atnum==0xffff))
554
0
      {
555
        //Treat text as an alias 
556
0
        pAtom->SetAtomicNum(0);
557
0
        AliasData* ad = new AliasData();
558
0
        ad->SetAlias(aliastext);
559
0
        ad->SetOrigin(fileformatInput);
560
0
        pAtom->SetData(ad);
561
0
      } 
562
0
      else
563
0
      {
564
0
        if(atnum==0xffff)
565
0
          atnum = 6; //atoms are C by default
566
0
        pAtom->SetAtomicNum(atnum);
567
0
        if (hasNumHs)
568
0
          pAtom->SetImplicitHCount(numHs);
569
0
        else if (atnum==6)
570
0
          handleImplicitCarbons.push_back(pAtom);
571
0
        pAtom->SetFormalCharge(charge);
572
0
        pAtom->SetIsotope(iso);
573
0
        pAtom->SetSpinMultiplicity(spin);
574
0
      }
575
0
    }
576
577
0
    else if(tag==kCDXObj_Bond)
578
0
    {
579
0
      CDXObjectID bgnID, endID;
580
0
      int order=1, bgnIdx, endIdx ;
581
0
      UINT16 stereo=0;
582
583
0
      while( (tag = cdxr.ReadNext()) )
584
0
      {
585
0
        switch(tag)
586
0
        {
587
0
        case kCDXProp_Bond_Begin:
588
0
          READ_INT32(cdxr.data(), bgnID);
589
0
          bgnIdx = atommap[bgnID];
590
0
          break;
591
0
        case kCDXProp_Bond_End:
592
0
          READ_INT32(cdxr.data(), endID);
593
0
          endIdx = atommap[endID];
594
0
          break;
595
0
        case kCDXProp_Bond_Order:
596
0
          READ_INT16(cdxr.data(), order);
597
0
          switch (order)
598
0
          {
599
0
          case 0xFFFF: // undefined, keep 1 for now
600
0
            order = 1;
601
0
          case 0x0001:
602
0
          case 0x0002:
603
0
            break;
604
0
          case 0x0004:
605
0
            order = 3;
606
0
            break;
607
0
          case 0x0080: // aromatic bond
608
0
            order = 5;
609
0
            break;
610
0
          default: // other cases are just not supported, keep 1
611
0
            order = 1;
612
0
            break;
613
0
          }
614
0
          break;
615
0
        case kCDXProp_Bond_Display:
616
0
          READ_INT16(cdxr.data(), stereo);
617
0
        break;
618
0
        }
619
0
      }
620
621
0
      if(!order || !bgnIdx || !endIdx)
622
0
      {
623
0
        obErrorLog.ThrowError(__FUNCTION__,"Incorrect bond", obError);
624
0
        return false;
625
0
      }
626
0
      if(stereo==4 || stereo==7 || stereo==10 || stereo==12)
627
0
        swap(bgnIdx, endIdx);
628
0
      pmol->AddBond(bgnIdx, endIdx, order);
629
0
      if(stereo)
630
0
      {
631
0
        OBBond* pBond = pmol->GetBond(pmol->NumBonds()-1);
632
0
        if(stereo==3 || stereo==4)
633
0
          pBond->SetHash();
634
0
        else if(stereo==6 || stereo==7)
635
0
          pBond->SetWedge();
636
0
      }
637
0
    }
638
0
  }
639
  // Handle 'implicit carbons' by adjusting their valence with
640
  // implicit hydrognes
641
0
  for(vector<OBAtom*>::iterator vit=handleImplicitCarbons.begin();
642
0
      vit!=handleImplicitCarbons.end(); ++vit)
643
0
    OBAtomAssignTypicalImplicitHydrogens(*vit);
644
645
0
  return true;
646
0
}
647
648
string ChemDrawBinaryXFormat::DoText(CDXReader& cdxr)
649
0
{
650
0
  CDXTag tag;
651
0
  string text;
652
0
  while( (tag=cdxr.ReadNext()) )
653
0
  {
654
0
    stringstream& ss = cdxr.data();
655
0
    switch(tag)
656
0
    {
657
0
    case kCDXProp_Text:
658
0
      UINT16 nStyleRuns;
659
0
      READ_INT16(ss,nStyleRuns);
660
0
      ss.ignore(nStyleRuns*10);
661
0
      ss >> text;
662
0
    default:
663
0
      if(tag & kCDXTag_Object) //unhandled object
664
0
        while(cdxr.ReadNext());      
665
0
    }
666
0
  }
667
0
  return text;
668
0
}
669
670
//****************************************************************
671
CDXTag CDXReader::ReadNext(bool objectsOnly, int targetDepth)
672
0
{
673
  //ostringstream treestream;
674
0
  CDXTag tag;
675
0
  CDXObjectID id;
676
677
0
  while(ifs) 
678
0
  {
679
0
    READ_INT16(ifs, tag);
680
0
    if(tag==0)
681
0
    {
682
0
      if(depth==0)
683
0
      {
684
0
        ifs.setstate(ios::eofbit); //ignore everything after end of document
685
0
        return 0; //end of document
686
0
      }
687
0
      --depth;
688
0
      _tempback = ids.back(); //needed for WriteTree
689
0
      ids.pop_back();
690
0
      if(targetDepth<0 || depth == targetDepth)
691
0
        return 0; //end of object
692
0
    }
693
0
    else if(tag & kCDXTag_Object)
694
0
    {
695
0
      READ_INT32(ifs, id);
696
0
      ids.push_back(id);
697
0
      ++depth;
698
0
      if(targetDepth<0 || depth-1 == targetDepth)
699
0
        return tag; //object
700
0
    }
701
0
    else
702
0
    {
703
      //property
704
0
      READ_INT16(ifs, _len);
705
706
0
      if(objectsOnly)
707
0
        ifs.ignore(_len);
708
0
      else
709
0
      {
710
        //copy property data to buffer
711
0
        char* p = new char[_len+1];
712
0
        ifs.read(p, _len);
713
0
        _buf.assign(p, _len);
714
0
        delete[] p;
715
0
        return tag; //property
716
0
      }
717
0
    }
718
0
  }
719
0
  return 0;
720
0
}
721
/////////////////////////////////////////////////////////////////////
722
723
stringstream& CDXReader::data()
724
0
{
725
0
  _ss.clear();
726
0
  _ss.str(_buf);
727
0
  return _ss;
728
0
}
729
/////////////////////////////////////////////////////////////////////
730
731
0
CDXReader::CDXReader(std::istream& is) : ifs(is), depth(0)
732
0
{
733
  //ReadHeader
734
0
  char buffer[kCDX_HeaderStringLen+1];
735
0
  ifs.read(buffer,kCDX_HeaderStringLen);
736
0
  buffer[kCDX_HeaderStringLen] = '\0';
737
0
  if(strncmp(buffer, kCDX_HeaderString, kCDX_HeaderStringLen) == 0)
738
0
    ifs.ignore(kCDX_HeaderLength - kCDX_HeaderStringLen); // Discard rest of header.
739
0
  else
740
0
  {
741
0
    obErrorLog.ThrowError(__FUNCTION__,"Invalid file, no ChemDraw Header",obError);
742
0
    ifs.setstate(ios::eofbit);
743
0
    throw;
744
0
  }
745
0
}
746
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
747
//Routines to display the structure of a cdx binary file
748
749
OBText* CDXReader::WriteTree(const string& filename, unsigned wtoptions)
750
0
{  
751
0
  const char indentchar = '\t';
752
0
  std::map<CDXTag, std::string> enummap;
753
0
  ParseEnums(enummap, filename);
754
755
0
  stringstream tss;
756
0
  tss << hex << showbase;
757
758
0
  while(*this)
759
0
  {
760
0
    CDXTag tag = ReadNext();
761
0
    if(ifs.eof())
762
0
      return new OBText(tss.str()); //normal exit
763
0
    if(tag==0 && !(wtoptions &1))
764
0
    {
765
      //Object end
766
0
      tss << string(depth,indentchar) << "ObjectEnd " << _tempback << endl;
767
0
    } 
768
0
    else if(tag & kCDXTag_Object)
769
0
    {
770
      //Object
771
0
      tss<<string(depth-1,indentchar) << "Object " << tag
772
0
                   << TagName(enummap,tag) << " id=" << ids.back() << endl; 
773
0
    }
774
0
    else
775
0
    {
776
      //Property
777
0
      if(!(wtoptions &1))
778
0
      {
779
0
        stringstream ss;
780
0
        ss << _len;
781
0
        tss<<string(depth,indentchar) << "Property  "<< tag << TagName(enummap,tag)
782
0
                     << " [" << ss.str() << " bytes] ";
783
0
        for(unsigned i=0;i<_len;++i)
784
0
        {
785
0
          ss.str("");
786
0
          ss.fill('0');
787
0
          ss.width(8);
788
0
          ss << hex << static_cast<unsigned>(_buf[i]) << dec;
789
0
          tss << ss.str()[6] << ss.str()[7] << ' ';
790
0
        }
791
792
0
        if(tag==0x700 || tag==kCDXProp_CreationProgram || tag==kCDXProp_CreationDate
793
0
          || tag==kCDXProp_Name)
794
0
        {
795
0
          stringstream ss(_buf);
796
0
          UINT16 nStyleRuns;
797
0
          READ_INT16(ss, nStyleRuns);
798
0
          tss << '\"';
799
0
          for(unsigned i=2+nStyleRuns*10; i<_len; ++i)
800
0
            tss << _buf[i];
801
0
          tss << '\"';
802
0
        }
803
0
        tss << endl;
804
0
      }
805
0
    }
806
0
  }
807
0
  return nullptr; //error exit
808
0
}
809
810
///////////////////////////////////////////////////////////////////////
811
bool CDXReader::ParseEnums(map<CDXTag, string>& enummap, const string& filename)
812
0
{
813
0
  ifstream ihs;
814
0
  if(OpenDatafile(ihs, filename).empty())
815
0
  {
816
0
    obErrorLog.ThrowError(__FUNCTION__, 
817
0
      filename + " needs to be in the *data* directory when displaying the tree.\n" , obError);
818
0
    return false;
819
0
  }
820
0
  ignore(ihs, "enum CDXDatumID");
821
0
  string ln;
822
0
  vector<string> vec;
823
0
  stringstream ss;
824
0
  CDXTag tag;
825
0
  while(ihs)
826
0
  {
827
0
    getline(ihs, ln);
828
0
    tokenize(vec, ln, " \t,{}");
829
0
    if(vec.size()==0 || vec[0]=="//")
830
0
      continue; //blank and comment lines
831
0
    if(vec[0]==";") //line is }; end of enum
832
0
      return true;
833
0
    if(vec[0][0]!='k') //only collect enums starting with kCDX
834
0
      continue;
835
0
    int tagpos = (vec[1]=="=" && vec.size()>4) ? 4 : 2;
836
0
    ss.str(vec[tagpos]);
837
0
    ss.clear();
838
0
    ss >> hex >> tag;
839
0
    if(ss)
840
0
    {
841
0
      if(tag==0x0400 && vec[0]=="kCDXUser_TemporaryEnd")//special case
842
0
        continue;
843
0
      enummap[tag] = vec[0];
844
0
    }
845
0
  }
846
0
  return false;
847
0
}
848
/////////////////////////////////////////////////////////////////////////
849
850
string CDXReader::TagName(map<CDXTag, string>& enummap, CDXTag tag)
851
0
{
852
0
  string tagname;
853
0
  if(!enummap.empty())
854
0
  {
855
0
    map<CDXTag, std::string>::iterator iter = enummap.find(tag);
856
0
    if(iter!=enummap.end())
857
0
    {
858
0
      tagname=iter->second;
859
      //Remove prefix, e.g. kCDXProp_
860
0
      string::size_type pos = tagname.find('_');
861
0
      if(pos!=string::npos)
862
0
      {
863
0
        tagname.erase(0,pos);
864
0
        tagname[0] = ' ';
865
0
      }
866
0
    }
867
0
  }
868
0
  return tagname;
869
0
}
870
871
} //namespace