Coverage Report

Created: 2026-04-09 06:30

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openbabel/src/formats/cifformat.cpp
Line
Count
Source
1
/**********************************************************************
2
cifformat.cpp - Implementation of subclass of OBFormat for conversion of OBMol.
3
4
Copyright (C) 2006 Vincent Favre-Nicolin
5
6
This file is part of the Open Babel project.
7
For more information, see <http://openbabel.org/>
8
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation version 2 of the License.
12
13
This program is distributed in the hope that it will be useful,
14
but WITHOUT ANY WARRANTY; without even the implied warranty of
15
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
GNU General Public License for more details.
17
***********************************************************************/
18
19
#ifndef M_PI
20
#define M_PI 3.14159265358979323846
21
#endif
22
23
#include <openbabel/babelconfig.h>
24
#include <openbabel/obmolecformat.h>
25
#include <openbabel/mol.h>
26
#include <openbabel/atom.h>
27
#include <openbabel/elements.h>
28
#include <openbabel/bond.h>
29
#include <openbabel/obiter.h>
30
#include <openbabel/generic.h>
31
#include <cstdlib>
32
33
#include <openbabel/math/spacegroup.h>
34
35
#include <sstream>
36
#include <vector>
37
#include <list>
38
#include <map>
39
#include <set>
40
41
0
#define NOCHARGE FLT_MAX
42
43
#ifdef _MSC_VER
44
 #pragma warning( disable : 4503 )
45
 // The decorated name was longer than the compiler limit (4096), and was truncated.
46
 // This is due to the use of templates specialized on templates repeatedly.
47
 // The correctness of the program, however, is unaffected by the truncated name,
48
 // but if you get link time errors on a truncated symbol, it will be more difficult
49
 // to determine the type of the symbol in the error. Debugging will also be more difficult;
50
 // the debugger will also have difficultly mapping symbol name to type name.
51
#endif
52
53
using namespace std;
54
55
namespace OpenBabel
56
{
57
  class CIFFormat : public OBMoleculeFormat
58
  {
59
  public:
60
    //Register this format type ID
61
    CIFFormat()
62
6
    {
63
6
      RegisterFormat("cif", "chemical/x-cif");
64
6
    }
65
66
    const char* Description() override  // required
67
0
    {
68
0
      return
69
0
        "Crystallographic Information File\n"
70
0
        "The CIF file format is the standard interchange format for small-molecule crystal structures\n\n"
71
0
        "Fractional coordinates are converted to cartesian ones using the following convention:\n\n"
72
73
0
        "- The x axis is parallel to a\n"
74
0
        "- The y axis is in the (a,b) plane\n"
75
0
        "- The z axis is along c*\n\n"
76
77
0
        "Ref: Int. Tables for Crystallography (2006), vol. B, sec 3.3.1.1.1\n"
78
0
        "  (the matrix used is the 2nd form listed)\n\n"
79
80
0
        "Read Options e.g. -ab:\n"
81
0
        "  s  Output single bonds only\n"
82
0
        "  b  Disable bonding entirely\n"
83
0
        "  B  Use bonds listed in CIF file from _geom_bond_etc records (overrides option b)\n\n"
84
85
0
        "Write Options e.g. -xg:\n"
86
0
        "  w  Wrap atomic coordinates to unit cell (default = off)\n"
87
0
        "  g  Write bonds using _geom_bond_etc fields \n\n";
88
0
    }
89
90
    const char* SpecificationURL() override
91
0
    { return "http://www.iucr.org/iucr-top/cif/spec/"; }  // optional
92
93
    //*** This section identical for most OBMol conversions ***
94
    ////////////////////////////////////////////////////
95
    /// The "API" interface functions
96
    bool ReadMolecule(OBBase* pOb, OBConversion* pConv) override;
97
    bool WriteMolecule(OBBase* pOb, OBConversion* pConv) override;
98
  };
99
  //############################## Case-insensituve string####################################################
100
  // :@todo: This duplicates normal case-insensitive string comparison in OpenBabel
101
  /// Case-insensitive string class
102
  /// From: Guru of the Week #29
103
  /// e.g.: http://gcc.gnu.org/onlinedocs/libstdc++/21_strings/gotw29a.txt
104
  ///
105
  /// Public domain
106
  struct ci_char_traits : public std::char_traits<char>
107
  {
108
    static bool eq( char c1, char c2 );
109
110
    static bool ne( char c1, char c2 );
111
112
    static bool lt( char c1, char c2 );
113
114
    static int compare(const char* s1,const char* s2,size_t n );
115
116
    static const char* find( const char* s, int n, char a );
117
  };
118
119
  typedef std::basic_string<char, ci_char_traits> ci_string;
120
  int strnicmp(const char *s1, const char *s2, int len)
121
0
  {
122
0
    unsigned char c1, c2;
123
0
    while (len)
124
0
      {
125
0
        c1 = *s1; c2 = *s2;
126
0
        s1++; s2++;
127
0
        if (!c1) return c2 ? -1 : 0;
128
0
        if (!c2) return 1;
129
0
        if (c1 != c2)
130
0
          {
131
0
            c1 = tolower(c1);
132
0
            c2 = tolower(c2);
133
0
            if (c1 != c2) return c1 < c2 ? -1 : 1;
134
0
          }
135
0
        len--;
136
0
      }
137
0
    return 0;
138
0
  }
139
140
  bool ci_char_traits::eq( char c1, char c2 )
141
0
  {return tolower(c1) == tolower(c2);}
142
143
  bool ci_char_traits::ne( char c1, char c2 )
144
0
  {return tolower(c1) != tolower(c2);}
145
146
  bool ci_char_traits::lt( char c1, char c2 )
147
0
  {return tolower(c1) < tolower(c2);}
148
149
  int ci_char_traits::compare(const char* s1,const char* s2,size_t n )
150
0
  {return strnicmp( s1, s2, n );}
151
152
  const char* ci_char_traits::find( const char* s, int n, char a )
153
0
  {
154
0
    while( n-- > 0 && tolower(*s) != tolower(a) ) ++s;
155
0
    return s;
156
0
  }
157
  //############################## CIF CLASSES headers####################################################
158
  /** The CIFData class holds all the information from a \e single data_ block from a cif file.
159
   *
160
   * It is a placeholder for all comments, item and loop data, as raw strings copied from
161
   * a cif file.
162
   *
163
   * It is also used to interpret this data to extract parts of the cif data, i.e.
164
   * only part of the core cif dictionnary are recognized. CIF tags currently recognized
165
   * include ("tag1 > tag2" means tag1 is preferred to tag2 when extracting the info, only one is reported):
166
   *  - crystal name: _chemical_name_systematic > _chemical_name_mineral > _chemical_name_structure_type > _chemical_name_common
167
   *  - crystal formula: _chemical_formula_analytical > _chemical_formula_structural > _chemical_formula_iupac > _chemical_formula_moiety
168
   *  - unit cell:  _cell_length_{a,b,c} ; _cell_angle_{alpha,beta,gamma}
169
   *  - spacegroup number: _space_group_IT_number > _symmetry_Int_Tables_number
170
   *  - spacegroup Hall symbol: _space_group_name_Hall > _symmetry_space_group_name_Hall
171
   *  - spacegroup Hermann-Mauguin symbol:_space_group_name_H-M_alt > _symmetry_space_group_name_H-M
172
   *  - atom coordinates: _atom_site_fract_{x} ; _atom_site_Cartn_{x,y,z}
173
   *  - atom occupancy: _atom_site_occupancy
174
   *  - atom label & symbol: _atom_site_type_symbol ; _atom_site_label
175
   *
176
   * Cartesian coordinates are stored in Angstroems, angles in radians.
177
   *
178
   * If another data field is needed, it is possible to directly access the string data
179
   * (CIFData::mvComment , CIFData::mvItem and CIFData::mvLoop) to search for the correct tags.
180
   */
181
  class CIFData
182
  {
183
  public:
184
    CIFData();
185
186
    /// Extract lattice parameters, spacegroup (symbol or number), atomic positions,
187
    /// chemical name and formula if available.
188
    /// All other data is ignored
189
    void ExtractAll();
190
    /// Extract name & formula for the crystal
191
    void ExtractName();
192
    /// Extract unit cell
193
    void ExtractUnitCell();
194
    /// Extract spacegroup number or symbol
195
    void ExtractSpacegroup();
196
    /// Extract all atomic positions. Will generate cartesian from fractional
197
    /// coordinates or vice-versa if only cartesian coordinates are available.
198
    void ExtractAtomicPositions();
199
    /// Extract listed bond distances, from _geom_bond_* loops
200
    void ExtractBonds();
201
    //// Extract Charges information from cif file and assign it to atoms
202
    void ExtractCharges();
203
    /// Generate fractional coordinates from cartesian ones for all atoms
204
    /// CIFData::CalcMatrices() must be called first
205
    void Cartesian2FractionalCoord();
206
    /// Generate cartesian coordinates from fractional ones for all atoms
207
    /// CIFData::CalcMatrices() must be called first
208
    void Fractional2CartesianCoord();
209
    /// Convert from fractional to cartesian coordinates
210
    /// CIFData::CalcMatrices() must be called first
211
    void f2c(float &x,float &y, float &z);
212
    /// Convert from cartesia to fractional coordinates
213
    /// CIFData::CalcMatrices() must be called first
214
    void c2f(float &x,float &y, float &z);
215
    /// Calculate real space transformation matrices
216
    /// requires unit cell parameters
217
    void CalcMatrices();
218
    /// Comments from CIF file, in the order they were read
219
    std::list<std::string> mvComment;
220
    /// Individual CIF items
221
    std::map<ci_string,std::string> mvItem;
222
    /// CIF Loop data
223
    std::map<std::set<ci_string>,std::map<ci_string,std::vector<std::string> > > mvLoop;
224
    /// Lattice parameters, in ansgtroem and degrees - vector size is 0 if no
225
    /// parameters have been obtained yet.
226
    std::vector<float> mvLatticePar;
227
    /// Spacegroup number from International Tables (_space_group_IT_number), or -1.
228
    unsigned int mSpacegroupNumberIT;
229
    /// Spacegroup Hall symbol (or empty string) (_space_group_name_Hall)
230
    std::string mSpacegroupSymbolHall;
231
    /// Spacegroup Hermann-Mauguin symbol (or empty string) (_space_group_name_H-M_alt)
232
    std::string mSpacegroupHermannMauguin;
233
    /// Crystal name. Or empty string if none is available.
234
    std::string mName;
235
    /// Formula. Or empty string if none is available.
236
    std::string mFormula;
237
    /// Atom record
238
    struct CIFAtom
239
    {
240
      CIFAtom();
241
      /// Label of the atom, or empty string (_atom_site_label).
242
      std::string mLabel;
243
      /// Symbol of the atom, or empty string (_atom_type_symbol or _atom_site_type_symbol).
244
      std::string mSymbol;
245
      /// Fractionnal coordinates (_atom_site_fract_{x,y,z}) or empty vector.
246
      std::vector<float> mCoordFrac;
247
      /// Cartesian coordinates in Angstroem (_atom_site_Cartn_{x,y,z}) or empty vector.
248
      /// Transformation to fractionnal coordinates currently assumes
249
      /// "a parallel to x; b in the plane of y and z" (see _atom_sites_Cartn_transform_axes)
250
      std::vector<float> mCoordCart;
251
      /// Site occupancy, or -1
252
      float mOccupancy;
253
      //charge from oxydation
254
      float mCharge;
255
    };
256
    /// Atoms, if any are found
257
    std::vector<CIFAtom> mvAtom;
258
    /// Bond distance record
259
    struct CIFBond
260
    {
261
      /// Label of the two bonded atoms
262
      std::string mLabel1,mLabel2;
263
      /// distance
264
      float mDistance;
265
    };
266
    /// Atoms, if any are found
267
    std::vector<CIFBond> mvBond;
268
    /// Fractionnal2Cartesian matrix
269
    float mOrthMatrix[3][3];
270
    /// Cartesian2Fractionnal matrix
271
    float mOrthMatrixInvert[3][3];
272
    const SpaceGroup *mSpaceGroup;
273
    /// Name for the CIF data block
274
    std::string mDataBlockName;
275
  };
276
  /** Main CIF class - parses the stream and separates data blocks, comments, items, loops.
277
   * All values are stored as string, and Each CIF block is stored in a separate CIFData object.
278
   * No interpretaion is made here - this must be done from all CIFData objects.
279
   */
280
  class CIF
281
  {
282
  public:
283
    /// Creates the CIF object from a stream
284
    ///
285
    /// \param interpret: if true, interpret all data blocks. See CIFData::ExtractAll()
286
    CIF(std::istream &in, const bool interpret=true);
287
    //private:
288
    /// Separate the file in data blocks and parse them to sort tags, loops and comments.
289
    /// All is stored in the original strings.
290
    ///
291
    /// Returns the name of the next data block
292
    void Parse(std::istream &in);
293
    /// The data blocks, after parsing. The key is the name of the data block
294
    std::map<std::string,CIFData> mvData;
295
    /// Global comments, outside and data block
296
    std::list<std::string> mvComment;
297
  };
298
  /// Convert one CIF value to a floating-point value
299
  /// Return 0 if no value can be converted (e.g. if '.' or '?' is encountered)
300
  float CIFNumeric2Float(const std::string &s);
301
  /// Convert one CIF value to a floating-point value
302
  /// Return 0 if no value can be converted (e.g. if '.' or '?' is encountered)
303
  int CIFNumeric2Int(const std::string &s);
304
305
  template <typename T> string to_string(T pNumber)
306
  {
307
    ostringstream oOStrStream;
308
    oOStrStream << pNumber;
309
    return oOStrStream.str();
310
  }
311
312
  bool is_double(const std::string& s, double& r_double);
313
314
  //############################## CIF CLASSES CODE ####################################################
315
  CIFData::CIFAtom::CIFAtom():
316
0
    mLabel(""),mSymbol(""),mOccupancy(1.0f)
317
0
  {}
318
319
  CIFData::CIFData()
320
0
  {}
321
322
  void CIFData::ExtractAll()
323
0
  {
324
0
    {
325
0
      stringstream ss;
326
0
      ss<<"CIF: interpreting data block: "<<mDataBlockName;
327
0
      obErrorLog.ThrowError(__FUNCTION__, ss.str(), obInfo);
328
0
    }
329
0
    if(mDataBlockName=="data_global")
330
0
      { // :KLUDGE: this data block name is used for journal &author information
331
        // for IUCr journals, so do not generate an error if the block contains
332
        // no structural information
333
0
        bool empty_iucrjournal_block=true;
334
0
        if(mvItem.find("_cell_length_a"   )!=mvItem.end()) empty_iucrjournal_block=false;
335
0
        if(mvItem.find("_cell_length_b"   )!=mvItem.end()) empty_iucrjournal_block=false;
336
0
        if(mvItem.find("_cell_length_c"   )!=mvItem.end()) empty_iucrjournal_block=false;
337
0
        for(map<set<ci_string>,map<ci_string,vector<string> > >::const_iterator loop=mvLoop.begin();
338
0
            loop!=mvLoop.end();++loop)
339
0
          {
340
0
            if(loop->second.find("_atom_site_fract_x")!=loop->second.end()) empty_iucrjournal_block=false;
341
0
            if(loop->second.find("_atom_site_fract_y")!=loop->second.end()) empty_iucrjournal_block=false;
342
0
            if(loop->second.find("_atom_site_fract_z")!=loop->second.end()) empty_iucrjournal_block=false;
343
0
            if(loop->second.find("_atom_site_Cartn_x")!=loop->second.end()) empty_iucrjournal_block=false;
344
0
            if(loop->second.find("_atom_site_Cartn_y")!=loop->second.end()) empty_iucrjournal_block=false;
345
0
            if(loop->second.find("_atom_site_Cartn_z")!=loop->second.end()) empty_iucrjournal_block=false;
346
0
          }
347
0
        if(empty_iucrjournal_block)
348
0
          {
349
0
            stringstream ss;
350
0
            ss << "CIF WARNING: found en empty 'data_global' block - SKIPPING\n"
351
0
               << "  (you can safely ignore this if reading a CIF file from an IUCr journal)";
352
0
            obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning);
353
0
            return;
354
0
          }
355
0
    }
356
    // :@todo: Take care of values listed as "." and "?" instead of a real value.
357
0
    this->ExtractName();
358
    // Spacegroup must be extracted before unit cell
359
0
    this->ExtractSpacegroup();
360
0
    this->ExtractUnitCell();
361
0
    this->ExtractAtomicPositions();
362
0
    if(mvAtom.size()==0)
363
0
      {
364
0
        stringstream ss;
365
0
        ss << "CIF Error: no atom found ! (in data block:"<<mDataBlockName<<")";
366
0
        obErrorLog.ThrowError(__FUNCTION__, ss.str(), obError);
367
0
      }
368
0
    this->ExtractBonds();
369
0
    this->ExtractCharges();
370
0
  }
371
372
  void CIFData::ExtractUnitCell()
373
0
  {
374
    // Use spacegroup to determine missing angle or length
375
0
    const int spgid= mSpaceGroup->GetId();
376
0
    if(  (mvItem.find("_cell_length_a")!=mvItem.end())
377
0
       ||(mvItem.find("_cell_length_b")!=mvItem.end())
378
0
       ||(mvItem.find("_cell_length_c")!=mvItem.end()) )
379
0
      {
380
0
        mvLatticePar.resize(6);
381
0
        for(unsigned int i=0;i<6;i++) mvLatticePar[i]=float(0);
382
0
        map<ci_string,string>::const_iterator positem;
383
0
        positem=mvItem.find("_cell_length_a");
384
0
        if(positem!=mvItem.end())
385
0
          mvLatticePar[0]=CIFNumeric2Float(positem->second);
386
0
        positem=mvItem.find("_cell_length_b");
387
0
        if(positem!=mvItem.end())
388
0
          mvLatticePar[1]=CIFNumeric2Float(positem->second);
389
0
        positem=mvItem.find("_cell_length_c");
390
0
        if(positem!=mvItem.end())
391
0
          mvLatticePar[2]=CIFNumeric2Float(positem->second);
392
0
        positem=mvItem.find("_cell_angle_alpha");
393
0
        if(positem!=mvItem.end())
394
0
          mvLatticePar[3]=CIFNumeric2Float(positem->second);
395
0
        positem=mvItem.find("_cell_angle_beta");
396
0
        if(positem!=mvItem.end())
397
0
          mvLatticePar[4]=CIFNumeric2Float(positem->second);
398
0
        positem=mvItem.find("_cell_angle_gamma");
399
0
        if(positem!=mvItem.end())
400
0
          mvLatticePar[5]=CIFNumeric2Float(positem->second);
401
0
        stringstream ss;
402
0
        ss << "Found Lattice parameters:" << mvLatticePar[0] << " , " << mvLatticePar[1] << " , " << mvLatticePar[2]
403
0
           << " , " << mvLatticePar[3] << " , " << mvLatticePar[4] << " , " << mvLatticePar[5];
404
0
        obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
405
0
        mvLatticePar[3] = static_cast<float> (mvLatticePar[3] * DEG_TO_RAD);// pi/180
406
0
        mvLatticePar[4] = static_cast<float> (mvLatticePar[4] * DEG_TO_RAD);
407
0
        mvLatticePar[5] = static_cast<float> (mvLatticePar[5] * DEG_TO_RAD);
408
409
        // Fill values depending on spacegroup, *only* when missing
410
0
        if((spgid>2)&&(spgid<=15))
411
0
        {// :TODO: monoclinic spg, depending on unique axis....
412
0
        }
413
0
        if((spgid>15)&&(spgid<=142))
414
0
        {// orthorombic & tetragonal
415
0
          if(mvLatticePar[3]==0) mvLatticePar[3]=M_PI/2;
416
0
          if(mvLatticePar[4]==0) mvLatticePar[4]=M_PI/2;
417
0
          if(mvLatticePar[5]==0) mvLatticePar[5]=M_PI/2;
418
0
        }
419
0
        if((spgid>74)&&(spgid<=142))
420
0
        {// Tetragonal, make sure a=b if one is missing
421
0
          if(mvLatticePar[1]==0) mvLatticePar[1]=mvLatticePar[0];
422
0
          if(mvLatticePar[0]==0) mvLatticePar[0]=mvLatticePar[1];
423
0
        }
424
0
        if((spgid>142)&&(spgid<=194))
425
0
        {// trigonal/ rhomboedric...
426
0
          const string::size_type pos = mSpaceGroup->GetHallName().find('R');
427
0
          if(pos==std::string::npos)
428
0
          {//rhomboedric cell, a=b=c, alpha=beta=gamma
429
0
            float a=0;
430
0
            if(mvLatticePar[0]>a) a=mvLatticePar[0];
431
0
            if(mvLatticePar[1]>a) a=mvLatticePar[1];
432
0
            if(mvLatticePar[2]>a) a=mvLatticePar[2];
433
0
            if(mvLatticePar[0]==0) mvLatticePar[0]=a;
434
0
            if(mvLatticePar[1]==0) mvLatticePar[1]=a;
435
0
            if(mvLatticePar[2]==0) mvLatticePar[2]=a;
436
437
0
            float alpha=0;
438
0
            if(mvLatticePar[3]>alpha) alpha=mvLatticePar[3];
439
0
            if(mvLatticePar[4]>alpha) alpha=mvLatticePar[4];
440
0
            if(mvLatticePar[5]>alpha) alpha=mvLatticePar[5];
441
0
            if(mvLatticePar[3]==0) mvLatticePar[3]=alpha;
442
0
            if(mvLatticePar[4]==0) mvLatticePar[4]=alpha;
443
0
            if(mvLatticePar[5]==0) mvLatticePar[5]=alpha;
444
0
          }
445
0
          else
446
0
          {//hexagonal cell, a=b & alpha=beta=pi/2, gamma= 2*pi/3
447
0
            if(mvLatticePar[1]==0) mvLatticePar[1]=mvLatticePar[0];
448
0
            if(mvLatticePar[0]==0) mvLatticePar[0]=mvLatticePar[1];
449
0
            if(mvLatticePar[3]==0) mvLatticePar[3]=M_PI/2;
450
0
            if(mvLatticePar[4]==0) mvLatticePar[4]=M_PI/2;
451
0
            if(mvLatticePar[5]==0) mvLatticePar[5]=2*M_PI/3;
452
0
          }
453
0
        }
454
0
        if(spgid>194)
455
0
        {
456
0
          if(mvLatticePar[3]==0) mvLatticePar[3]=M_PI/2;
457
0
          if(mvLatticePar[4]==0) mvLatticePar[4]=M_PI/2;
458
0
          if(mvLatticePar[5]==0) mvLatticePar[5]=M_PI/2;
459
          // In case some idiot cif only supplies one value, make sure a=b=c
460
0
          float a=0;
461
0
          if(mvLatticePar[0]>a) a=mvLatticePar[0];
462
0
          if(mvLatticePar[1]>a) a=mvLatticePar[1];
463
0
          if(mvLatticePar[2]>a) a=mvLatticePar[2];
464
0
          if(mvLatticePar[0]==0) mvLatticePar[0]=a;
465
0
          if(mvLatticePar[1]==0) mvLatticePar[1]=a;
466
0
          if(mvLatticePar[2]==0) mvLatticePar[2]=a;
467
0
        }
468
        // Handle missing values
469
0
        if(mvLatticePar[3]<1e-6)
470
0
        {
471
0
            stringstream ss;
472
0
            ss << "CIF WARNING: missing alpha value, defaulting to 90 degrees (in data block:"<<mDataBlockName<<")";
473
0
            obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning);
474
0
            mvLatticePar[3]=90*DEG_TO_RAD;
475
0
        }
476
0
        if(mvLatticePar[4]<1e-6)
477
0
        {
478
0
            stringstream ss;
479
0
            ss << "CIF WARNING: missing beta value, defaulting to 90 degrees (in data block:"<<mDataBlockName<<")";
480
0
            obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning);
481
0
            mvLatticePar[4]=90*DEG_TO_RAD;
482
0
        }
483
0
        if(mvLatticePar[5]<1e-6)
484
0
        {
485
0
            stringstream ss;
486
0
            ss << "CIF WARNING: missing gamma value, defaulting to 90 degrees (in data block:"<<mDataBlockName<<")";
487
0
            obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning);
488
0
            mvLatticePar[5]=90*DEG_TO_RAD;
489
0
        }
490
0
        if(mvLatticePar[1]<1e-6)
491
0
        {
492
0
            stringstream ss;
493
0
            ss << "CIF Error: missing b lattice parameter - cannot interpret structure ! (in data block:"<<mDataBlockName<<")";
494
0
            obErrorLog.ThrowError(__FUNCTION__, ss.str(), obError);
495
0
        }
496
0
        if(mvLatticePar[2]<1e-6)
497
0
        {
498
0
            stringstream ss;
499
0
            ss << "CIF Error: missing c lattice parameter - cannot interpret structure ! (in data block:"<<mDataBlockName<<")";
500
0
            obErrorLog.ThrowError(__FUNCTION__, ss.str(), obError);
501
0
        }
502
503
0
        this->CalcMatrices();
504
0
      }
505
0
      else
506
0
      {
507
0
         stringstream ss;
508
0
         ss << "CIF Error: missing a,b and c value - cannot interpret structure ! (in data block:"<<mDataBlockName<<")";
509
0
         obErrorLog.ThrowError(__FUNCTION__, ss.str(), obError);
510
0
      }
511
0
  }
512
513
  void CIFData::ExtractSpacegroup()
514
0
  {
515
0
    map<ci_string,string>::const_iterator positem;
516
0
    bool found = false;
517
0
    positem=mvItem.find("_space_group_IT_number");
518
0
    if(positem!=mvItem.end())
519
0
      {
520
0
        mSpacegroupNumberIT=CIFNumeric2Int(positem->second);
521
0
        found = true;
522
0
        stringstream ss;
523
0
        ss << "Found spacegroup IT number:" << mSpacegroupNumberIT;
524
0
        obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
525
0
      }
526
0
    else
527
0
      {
528
0
        positem=mvItem.find("_symmetry_Int_Tables_number");
529
0
        if(positem!=mvItem.end())
530
0
          {
531
0
            mSpacegroupNumberIT=CIFNumeric2Int(positem->second);
532
0
            found = true;
533
0
            stringstream ss;
534
0
            ss << "Found spacegroup IT number (with OBSOLETE CIF #1.0 TAG):" << mSpacegroupNumberIT;
535
0
            obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
536
0
          }
537
0
        else {
538
0
          positem=mvItem.find("_symmetry_group_IT_number");
539
0
          if(positem!=mvItem.end())
540
0
          {
541
0
            mSpacegroupNumberIT=CIFNumeric2Int(positem->second);
542
0
            found = true;
543
0
            stringstream ss;
544
0
            ss << "Found spacegroup IT number (with NON-STANDARD CIF TAG):" << mSpacegroupNumberIT;
545
0
            obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
546
0
          }
547
0
          else
548
0
            mSpacegroupNumberIT=0;
549
0
        }
550
0
      }
551
552
0
    positem=mvItem.find("_space_group_name_Hall");
553
0
    if(positem!=mvItem.end())
554
0
      {
555
0
        mSpacegroupSymbolHall=positem->second;
556
0
        found = true;
557
0
        obErrorLog.ThrowError(__FUNCTION__, "Found spacegroup Hall symbol:"+mSpacegroupSymbolHall, obDebug);
558
0
      }
559
0
    else
560
0
      {
561
0
        positem=mvItem.find("_symmetry_space_group_name_Hall");
562
0
        if(positem!=mvItem.end())
563
0
          {
564
0
            mSpacegroupSymbolHall=positem->second;
565
0
            found = true;
566
0
            obErrorLog.ThrowError(__FUNCTION__, "Found spacegroup Hall symbol (with OBSOLETE CIF #1.0 TAG):"+mSpacegroupSymbolHall, obDebug);
567
0
          }
568
0
      }
569
570
0
    positem=mvItem.find("_space_group_name_H-M_alt");
571
0
    if(positem!=mvItem.end())
572
0
      {
573
0
        mSpacegroupHermannMauguin=positem->second;
574
0
        found = true;
575
0
        obErrorLog.ThrowError(__FUNCTION__, "Found spacegroup Hermann-Mauguin symbol:"+mSpacegroupHermannMauguin, obDebug);
576
0
      }
577
0
    else
578
0
      {
579
0
        positem=mvItem.find("_symmetry_space_group_name_H-M");
580
0
        if(positem!=mvItem.end())
581
0
          {
582
0
            mSpacegroupHermannMauguin=positem->second;
583
0
            found = true;
584
0
            obErrorLog.ThrowError(__FUNCTION__, "Found spacegroup Hermann-Mauguin symbol (with OBSOLETE CIF #1.0 TAG):"+mSpacegroupHermannMauguin, obDebug);
585
0
          }
586
0
      }
587
    // DDL2 tag is "_space_group.IT_coordinate_system_code", converted by the cif reader to "_space_group_IT_coordinate_system_code"
588
0
    positem=mvItem.find("_space_group_IT_coordinate_system_code");
589
0
    if(positem!=mvItem.end())
590
0
      {
591
0
        obErrorLog.ThrowError(__FUNCTION__, "Found spacegroup IT_coordinate_system_code:"+positem->second, obDebug);
592
0
        if((mSpacegroupHermannMauguin.length()>0) && (positem->second=="1" || positem->second=="2"))
593
0
        {
594
          // this is a HACK which will work as long as the HM symbols in spacegroups.txt have the ":1" or ":2" extension listed, when needed
595
0
          mSpacegroupHermannMauguin=mSpacegroupHermannMauguin+string(":")+positem->second;
596
0
        }
597
0
        else
598
0
        {
599
0
          stringstream ss;
600
0
          ss << "CIF Error: found DDL2 tag _space_group.IT_coordinate_system_code ("<<positem->second<<")"<<endl
601
0
             <<"            but could not interpret it ! Origin choice or axis may be incorrect.";
602
0
          obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning);
603
0
        }
604
0
      }
605
606
0
    mSpaceGroup = nullptr;
607
    // be forgiving - if spg not found, try again
608
    // Prefer Hall > HM == number, as Hall symbol is truly unique
609
0
    if (mSpacegroupSymbolHall.length() > 0) {
610
      //Make sure there are no leading spaces before Hall symbol (kludge)
611
0
      for(std::string::iterator pos=mSpacegroupSymbolHall.begin();pos!=mSpacegroupSymbolHall.end();)
612
0
      {
613
0
        if((char)(*pos)==' ')  pos=mSpacegroupSymbolHall.erase(pos);
614
0
        else ++pos;
615
0
      }
616
0
      mSpaceGroup = SpaceGroup::GetSpaceGroup(mSpacegroupSymbolHall);
617
0
    }
618
0
    if (mSpaceGroup == nullptr && mSpacegroupHermannMauguin.length() > 0) {
619
0
      mSpaceGroup = SpaceGroup::GetSpaceGroup(mSpacegroupHermannMauguin);
620
0
    }
621
0
    if (mSpaceGroup == nullptr && mSpacegroupNumberIT != 0) {
622
0
      mSpaceGroup = SpaceGroup::GetSpaceGroup(mSpacegroupNumberIT);
623
0
    }
624
0
    if (mSpaceGroup == nullptr) {
625
0
      SpaceGroup *sg = new SpaceGroup();
626
0
      positem=mvItem.find("_space_group_symop_operation_xyz");
627
0
      if(positem==mvItem.end())
628
0
        positem=mvItem.find("_symmetry_equiv_pos_as_xyz");
629
0
      if(positem!=mvItem.end())
630
0
        {
631
0
          sg->AddTransform (positem->second);
632
0
          found = true;
633
0
        }
634
0
      else {
635
0
        for(map<set<ci_string>,map<ci_string,vector<string> > >::const_iterator loop=mvLoop.begin();
636
0
            loop!=mvLoop.end();++loop)
637
0
          {
638
0
            map<ci_string,vector<string> >::const_iterator pos;
639
0
            unsigned i, nb;
640
0
            pos=loop->second.find("_space_group_symop_operation_xyz");
641
0
            if (pos==loop->second.end())
642
0
              pos=loop->second.find("_symmetry_equiv_pos_as_xyz");
643
0
            if (pos!=loop->second.end())
644
0
              {
645
0
                nb=pos->second.size();
646
0
                found = true;
647
0
                for (i = 0; i < nb; i++)
648
0
                  sg->AddTransform(pos->second[i]);
649
0
                break; // found the transforms, so we have done with them
650
0
              }
651
0
          }
652
0
        if (found)
653
0
          mSpaceGroup = SpaceGroup::Find(sg);
654
0
        if (mSpaceGroup == nullptr && sg->IsValid())
655
0
          mSpaceGroup = sg;
656
0
        else
657
0
          delete sg;
658
0
      }
659
0
    }
660
0
    if (mSpaceGroup == nullptr)
661
0
    {
662
0
        stringstream ss;
663
0
        ss << "CIF Error: missing spacegroup description: defaulting to P1... (in data block:"<<mDataBlockName<<")";
664
0
        obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning);
665
0
        mSpaceGroup = SpaceGroup::GetSpaceGroup(1);
666
0
    }
667
    // set the space group name to Hall symbol
668
0
    mSpacegroupSymbolHall = mSpaceGroup->GetHallName();
669
0
  }
670
671
  void CIFData::ExtractName()
672
0
  {
673
0
    map<ci_string,string>::const_iterator positem;
674
0
    positem=mvItem.find("_chemical_name_systematic");
675
0
    if(positem!=mvItem.end())
676
0
      {
677
0
        mName=positem->second;
678
0
        obErrorLog.ThrowError(__FUNCTION__, "Found chemical name:"+mName, obDebug);
679
0
      }
680
0
    else
681
0
      {
682
0
        positem=mvItem.find("_chemical_name_mineral");
683
0
        if(positem!=mvItem.end())
684
0
          {
685
0
            mName=positem->second;
686
0
            obErrorLog.ThrowError(__FUNCTION__, "Found chemical name:"+mName, obDebug);
687
0
          }
688
0
        else
689
0
          {
690
0
            positem=mvItem.find("_chemical_name_structure_type");
691
0
            if(positem!=mvItem.end())
692
0
              {
693
0
                mName=positem->second;
694
0
                obErrorLog.ThrowError(__FUNCTION__, "Found chemical name:"+mName, obDebug);
695
0
              }
696
0
            else
697
0
              {
698
0
                positem=mvItem.find("_chemical_name_common");
699
0
                if(positem!=mvItem.end())
700
0
                  {
701
0
                    mName=positem->second;
702
0
                    obErrorLog.ThrowError(__FUNCTION__, "Found chemical name:"+mName, obDebug);
703
0
                  }
704
0
              }
705
0
          }
706
0
      }
707
    /// Crystal formula
708
0
    positem=mvItem.find("_chemical_formula_analytical");
709
0
    if(positem!=mvItem.end())
710
0
      {
711
0
        mFormula=positem->second;
712
0
        obErrorLog.ThrowError(__FUNCTION__, "Found chemical formula:"+mFormula, obDebug);
713
0
      }
714
0
    else
715
0
      {
716
0
        positem=mvItem.find("_chemical_formula_structural");
717
0
        if(positem!=mvItem.end())
718
0
          {
719
0
            mFormula=positem->second;
720
0
            obErrorLog.ThrowError(__FUNCTION__, "Found chemical formula:"+mFormula, obDebug);
721
0
          }
722
0
        else
723
0
          {
724
0
            positem=mvItem.find("_chemical_formula_iupac");
725
0
            if(positem!=mvItem.end())
726
0
              {
727
0
                mFormula=positem->second;
728
0
                obErrorLog.ThrowError(__FUNCTION__, "Found chemical formula:"+mFormula, obDebug);
729
0
              }
730
0
            else
731
0
              {
732
0
                positem=mvItem.find("_chemical_formula_moiety");
733
0
                if(positem!=mvItem.end())
734
0
                  {
735
0
                    mFormula=positem->second;
736
0
                    obErrorLog.ThrowError(__FUNCTION__, "Found chemical formula:"+mFormula, obDebug);
737
0
                  }
738
0
              }
739
0
          }
740
0
      }
741
0
  }
742
743
  void CIFData::ExtractAtomicPositions()
744
0
  {
745
0
    map<ci_string,string>::const_iterator positem;
746
0
    for(map<set<ci_string>,map<ci_string,vector<string> > >::const_iterator loop=mvLoop.begin();
747
0
        loop!=mvLoop.end();++loop)
748
0
      {
749
0
        if(mvAtom.size()>0) break;// only extract ONE list of atoms, preferably fractional coordinates
750
0
        map<ci_string,vector<string> >::const_iterator posx,posy,posz,poslabel,possymbol,posoccup;
751
0
        posx=loop->second.find("_atom_site_fract_x");
752
0
        posy=loop->second.find("_atom_site_fract_y");
753
0
        posz=loop->second.find("_atom_site_fract_z");
754
0
        unsigned int nb = 0;
755
0
        if( (posx!=loop->second.end()) && (posy!=loop->second.end()) && (posz!=loop->second.end()))
756
0
          {
757
0
            nb=posx->second.size();
758
0
            mvAtom.resize(nb);
759
0
            for(unsigned int i=0;i<nb;++i)
760
0
              {
761
0
                mvAtom[i].mCoordFrac.resize(3);
762
0
                mvAtom[i].mCoordFrac[0]=CIFNumeric2Float(posx->second[i]);
763
0
                mvAtom[i].mCoordFrac[1]=CIFNumeric2Float(posy->second[i]);
764
0
                mvAtom[i].mCoordFrac[2]=CIFNumeric2Float(posz->second[i]);
765
0
              }
766
0
            this->Fractional2CartesianCoord();
767
0
          }
768
0
        else
769
0
          {
770
0
            posx=loop->second.find("_atom_site_Cartn_x");
771
0
            posy=loop->second.find("_atom_site_Cartn_y");
772
0
            posz=loop->second.find("_atom_site_Cartn_z");
773
0
            if( (posx!=loop->second.end()) && (posy!=loop->second.end()) && (posz!=loop->second.end()))
774
0
              {
775
0
                nb=posx->second.size();
776
0
                mvAtom.resize(nb);
777
0
                for(unsigned int i=0;i<nb;++i)
778
0
                  {
779
0
                    mvAtom[i].mCoordCart.resize(3);
780
0
                    mvAtom[i].mCoordCart[0]=CIFNumeric2Float(posx->second[i]);
781
0
                    mvAtom[i].mCoordCart[1]=CIFNumeric2Float(posy->second[i]);
782
0
                    mvAtom[i].mCoordCart[2]=CIFNumeric2Float(posz->second[i]);
783
0
                  }
784
0
                this->Cartesian2FractionalCoord();
785
0
              }
786
0
          }
787
0
        if(mvAtom.size()>0)
788
0
          {// Got the atoms, get names and symbols
789
0
            possymbol=loop->second.find("_atom_site_type_symbol");
790
0
            if(possymbol!=loop->second.end())
791
0
              for(unsigned int i=0;i<nb;++i)
792
0
                mvAtom[i].mSymbol=possymbol->second[i];
793
0
            poslabel=loop->second.find("_atom_site_label");
794
0
            if(poslabel!=loop->second.end())
795
0
              for(unsigned int i=0;i<nb;++i)
796
0
                {
797
0
                  mvAtom[i].mLabel=poslabel->second[i];
798
0
                  if(possymbol==loop->second.end())
799
0
                    {// There was no symbol, use the labels to guess it
800
0
                      int nbc=0;
801
0
                      if(mvAtom[i].mLabel.size()==1)
802
0
                        if(isalpha(mvAtom[i].mLabel[0])) nbc=1;
803
0
                      if(mvAtom[i].mLabel.size()>=2)
804
0
                        {
805
0
                          if(isalpha(mvAtom[i].mLabel[0]) && isalpha(mvAtom[i].mLabel[1])) nbc=2;
806
0
                          else if(isalpha(mvAtom[i].mLabel[0])) nbc=1;
807
0
                        }
808
0
                      if(nbc>0) mvAtom[i].mSymbol=mvAtom[i].mLabel.substr(0,nbc);
809
0
                      else mvAtom[i].mSymbol="H";//Something wen wrong, no symbol !
810
0
                    }
811
0
                }
812
            // Occupancy ?
813
0
            posoccup=loop->second.find("_atom_site_occupancy");
814
0
            if(posoccup!=loop->second.end())
815
0
              for(unsigned int i=0;i<nb;++i)
816
0
                {
817
0
                  mvAtom[i].mOccupancy=CIFNumeric2Float(posoccup->second[i]);
818
0
                  if( (mvAtom[i].mOccupancy <= 0.0) || (mvAtom[i].mOccupancy > 1.0) )
819
0
                    mvAtom[i].mOccupancy = 1.0;
820
0
                }
821
            // Now be somewhat verbose
822
0
            stringstream ss;
823
0
            ss << "Found "<<nb<<" atoms."<<endl;
824
0
            for(unsigned int i=0;i<nb;++i)
825
0
              {
826
0
                ss<<mvAtom[i].mLabel<<" "<<mvAtom[i].mSymbol;
827
0
                if(mvAtom[i].mCoordFrac.size()>0)
828
0
                  {
829
0
                    ss<<" , Fractional: ";
830
0
                    for(unsigned int j=0;j<mvAtom[i].mCoordFrac.size();++j)
831
0
                      ss<<mvAtom[i].mCoordFrac[j]<<" ";
832
0
                  }
833
0
                if(mvAtom[i].mCoordCart.size()>0)
834
0
                  {
835
0
                    ss<<" , Cartesian: ";
836
0
                    for(unsigned int j=0;j<mvAtom[i].mCoordCart.size();++j)
837
0
                      ss<<mvAtom[i].mCoordCart[j]<<" ";
838
0
                  }
839
0
                ss<<" , Occupancy= "<<mvAtom[i].mOccupancy<<endl;
840
0
              }
841
0
            obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
842
0
          }
843
0
      }
844
0
  }
845
846
  void CIFData::ExtractBonds()
847
0
  {
848
0
    map<ci_string,string>::const_iterator positem;
849
0
    for(map<set<ci_string>,map<ci_string,vector<string> > >::const_iterator loop=mvLoop.begin(); loop!=mvLoop.end();++loop)
850
0
      {
851
        //if(mvBond.size()>0) break;// Only allow one bond list
852
0
        map<ci_string,vector<string> >::const_iterator poslabel1,poslabel2,posdist;
853
0
        poslabel1=loop->second.find("_geom_bond_atom_site_label_1");
854
0
        poslabel2=loop->second.find("_geom_bond_atom_site_label_2");
855
0
        posdist=loop->second.find("_geom_bond_distance");
856
0
        if( (poslabel1!=loop->second.end()) && (poslabel2!=loop->second.end()) && (posdist!=loop->second.end()))
857
0
          {
858
0
            obErrorLog.ThrowError(__FUNCTION__, "Found _geom_bond* record...", obDebug);
859
0
            const unsigned long nb=poslabel1->second.size();
860
0
            mvBond.resize(nb);
861
0
            for(unsigned int i=0;i<nb;++i)
862
0
              {
863
0
                mvBond[i].mLabel1=poslabel1->second[i];
864
0
                mvBond[i].mLabel2=poslabel2->second[i];
865
0
                mvBond[i].mDistance=CIFNumeric2Float(posdist->second[i]);
866
0
                stringstream ss;
867
0
                ss << "  d(" << mvBond[i].mLabel1 << "-" << mvBond[i].mLabel2 << ")=" << mvBond[i].mDistance;
868
0
                obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
869
0
              }
870
0
          }
871
0
      }
872
0
  }
873
874
  void CIFData::ExtractCharges()
875
0
  {
876
0
    map<ci_string,string>::const_iterator positem;
877
878
0
    map<std::string, double> lbl2ox;
879
0
    for(map<set<ci_string>, map<ci_string, vector<string> > >::const_iterator loop=mvLoop.begin(); loop!=mvLoop.end(); ++loop)
880
0
    {
881
      //if(mvBond.size()>0) break;// Only allow one bond list
882
0
      map<ci_string,vector<string> >::const_iterator pos_symbol, pos_ox_number, posdist;
883
0
      pos_symbol    =loop->second.find("_atom_type_symbol");
884
0
      pos_ox_number =loop->second.find("_atom_type_oxidation_number");
885
0
      if( (pos_symbol != loop->second.end()) && (pos_ox_number != loop->second.end()) )
886
0
      {
887
0
        obErrorLog.ThrowError(__FUNCTION__, " Found _atom_type* record with oxydation number...", obDebug);
888
0
        const unsigned long nl = pos_symbol->second.size();
889
890
0
        for(unsigned int i = 0; i < nl; i++)
891
0
        {
892
0
          lbl2ox[pos_symbol->second[i]] = CIFNumeric2Float(pos_ox_number->second[i]);
893
0
          obErrorLog.ThrowError(__FUNCTION__, " has oxydation "+pos_ox_number->second[i], obDebug);
894
0
        }
895
0
      }
896
0
    }
897
898
0
    for (std::vector<CIFAtom>::iterator it = mvAtom.begin() ; it != mvAtom.end(); ++it)
899
0
    {
900
0
      string label = (*it).mLabel;
901
902
0
      if( lbl2ox.count(label) > 0 )
903
0
        (*it).mCharge = lbl2ox[label];
904
0
      else
905
0
      {
906
0
        (*it).mCharge = NOCHARGE;
907
0
        obErrorLog.ThrowError(__FUNCTION__, "Charge for label: "+label+" cannot be found.", obDebug);
908
0
      }
909
0
    }
910
0
  }
911
912
  void CIFData::CalcMatrices()
913
0
  {
914
0
    if(mvLatticePar.size()==0) return;//:@todo: throw error
915
0
    float a,b,c,alpha,beta,gamma;//direct space parameters
916
0
    float aa,bb,cc,alphaa,betaa,gammaa;//reciprocal space parameters
917
0
    float v;//volume of the unit cell
918
0
    a=mvLatticePar[0];
919
0
    b=mvLatticePar[1];
920
0
    c=mvLatticePar[2];
921
0
    alpha=mvLatticePar[3];
922
0
    beta=mvLatticePar[4];
923
0
    gamma=mvLatticePar[5];
924
925
0
    v=sqrt(1-cos(alpha)*cos(alpha)-cos(beta)*cos(beta)-cos(gamma)*cos(gamma)
926
0
           +2*cos(alpha)*cos(beta)*cos(gamma));
927
928
0
    aa=sin(alpha)/a/v;
929
0
    bb=sin(beta )/b/v;
930
0
    cc=sin(gamma)/c/v;
931
932
0
    alphaa=acos( (cos(beta )*cos(gamma)-cos(alpha))/sin(beta )/sin(gamma) );
933
0
    betaa =acos( (cos(alpha)*cos(gamma)-cos(beta ))/sin(alpha)/sin(gamma) );
934
0
    gammaa=acos( (cos(alpha)*cos(beta )-cos(gamma))/sin(alpha)/sin(beta ) );
935
936
0
    mOrthMatrix[0][0]=a;
937
0
    mOrthMatrix[0][1]=b*cos(gamma);
938
0
    mOrthMatrix[0][2]=c*cos(beta);
939
940
0
    mOrthMatrix[1][0]=0;
941
0
    mOrthMatrix[1][1]=b*sin(gamma);
942
0
    mOrthMatrix[1][2]=-c*sin(beta)*cos(alphaa);
943
944
0
    mOrthMatrix[2][0]=0;
945
0
    mOrthMatrix[2][1]=0;
946
0
    mOrthMatrix[2][2]=1/cc;
947
948
    // Invert upper triangular matrix
949
0
    float cm[3][3];
950
0
    cm[0][0]=mOrthMatrix[0][0];
951
0
    cm[0][1]=mOrthMatrix[0][1];
952
0
    cm[0][2]=mOrthMatrix[0][2];
953
954
0
    cm[1][0]=mOrthMatrix[1][0];
955
0
    cm[1][1]=mOrthMatrix[1][1];
956
0
    cm[1][2]=mOrthMatrix[1][2];
957
958
0
    cm[2][0]=mOrthMatrix[2][0];
959
0
    cm[2][1]=mOrthMatrix[2][1];
960
0
    cm[2][2]=mOrthMatrix[2][2];
961
0
    for(long i=0;i<3;i++)
962
0
      for(long j=0;j<3;j++)
963
0
        if(i==j) mOrthMatrixInvert[i][j]=1;
964
0
        else mOrthMatrixInvert[i][j]=0;
965
0
    for(long i=0;i<3;i++)
966
0
      {
967
0
        float a;
968
0
        for(long j=i-1;j>=0;j--)
969
0
          {
970
0
            a=cm[j][i]/cm[i][i];
971
0
            for(long k=0;k<3;k++) mOrthMatrixInvert[j][k] -= mOrthMatrixInvert[i][k]*a;
972
0
            for(long k=0;k<3;k++) cm[j][k] -= cm[i][k]*a;
973
0
          }
974
0
        a=cm[i][i];
975
0
        for(long k=0;k<3;k++) mOrthMatrixInvert[i][k] /= a;
976
0
        for(long k=0;k<3;k++) cm[i][k] /= a;
977
0
      }
978
0
      stringstream ss;
979
0
      ss <<"Fractional2Cartesian matrix:"<<endl
980
0
           <<mOrthMatrix[0][0]<<" "<<mOrthMatrix[0][1]<<" "<<mOrthMatrix[0][2]<<endl
981
0
           <<mOrthMatrix[1][0]<<" "<<mOrthMatrix[1][1]<<" "<<mOrthMatrix[1][2]<<endl
982
0
           <<mOrthMatrix[2][0]<<" "<<mOrthMatrix[2][1]<<" "<<mOrthMatrix[2][2]<<endl<<endl;
983
0
      ss <<"Cartesian2Fractional matrix:"<<endl
984
0
           <<mOrthMatrixInvert[0][0]<<" "<<mOrthMatrixInvert[0][1]<<" "<<mOrthMatrixInvert[0][2]<<endl
985
0
           <<mOrthMatrixInvert[1][0]<<" "<<mOrthMatrixInvert[1][1]<<" "<<mOrthMatrixInvert[1][2]<<endl
986
0
           <<mOrthMatrixInvert[2][0]<<" "<<mOrthMatrixInvert[2][1]<<" "<<mOrthMatrixInvert[2][2];
987
0
      obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
988
0
  }
989
990
  void CIFData::f2c(float &x,float &y, float &z)
991
0
  {
992
0
    const float x0=x,y0=y,z0=z;
993
0
    x=mOrthMatrix[0][0]*x0+mOrthMatrix[0][1]*y0+mOrthMatrix[0][2]*z0;
994
0
    y=mOrthMatrix[1][0]*x0+mOrthMatrix[1][1]*y0+mOrthMatrix[1][2]*z0;
995
0
    z=mOrthMatrix[2][0]*x0+mOrthMatrix[2][1]*y0+mOrthMatrix[2][2]*z0;
996
0
  }
997
998
  void CIFData::c2f(float &x,float &y, float &z)
999
0
  {
1000
0
    const float x0=x,y0=y,z0=z;
1001
0
    x=mOrthMatrixInvert[0][0]*x0+mOrthMatrixInvert[0][1]*y0+mOrthMatrixInvert[0][2]*z0;
1002
0
    y=mOrthMatrixInvert[1][0]*x0+mOrthMatrixInvert[1][1]*y0+mOrthMatrixInvert[1][2]*z0;
1003
0
    z=mOrthMatrixInvert[2][0]*x0+mOrthMatrixInvert[2][1]*y0+mOrthMatrixInvert[2][2]*z0;
1004
0
  }
1005
1006
  void CIFData::Cartesian2FractionalCoord()
1007
0
  {
1008
0
    if(mvLatticePar.size()==0) return;//:@todo: report error
1009
0
    for(vector<CIFAtom>::iterator pos=mvAtom.begin();pos!=mvAtom.end();++pos)
1010
0
      {
1011
0
        pos->mCoordFrac.resize(3);
1012
0
        pos->mCoordFrac[0]=pos->mCoordCart.at(0);
1013
0
        pos->mCoordFrac[1]=pos->mCoordCart.at(1);
1014
0
        pos->mCoordFrac[2]=pos->mCoordCart.at(2);
1015
0
        c2f(pos->mCoordFrac[0],pos->mCoordFrac[1],pos->mCoordFrac[2]);
1016
0
      }
1017
0
  }
1018
1019
  void CIFData::Fractional2CartesianCoord()
1020
0
  {
1021
0
    if(mvLatticePar.size()==0) return;//:@todo: report error
1022
0
    for(vector<CIFAtom>::iterator pos=mvAtom.begin();pos!=mvAtom.end();++pos)
1023
0
      {
1024
0
        pos->mCoordCart.resize(3);
1025
0
        pos->mCoordCart[0]=pos->mCoordFrac.at(0);
1026
0
        pos->mCoordCart[1]=pos->mCoordFrac.at(1);
1027
0
        pos->mCoordCart[2]=pos->mCoordFrac.at(2);
1028
0
        f2c(pos->mCoordCart[0],pos->mCoordCart[1],pos->mCoordCart[2]);
1029
0
      }
1030
0
  }
1031
1032
  /////
1033
1034
1035
  CIF::CIF(istream &is, const bool interpret)
1036
0
  {
1037
0
    bool found_atoms=false;
1038
0
    while(!found_atoms)
1039
0
    {
1040
      // :TODO: we don't need a vector of CIFData, since only one block is read at a time
1041
0
      mvData.clear();
1042
0
      this->Parse(is);
1043
      // Extract structure from 1 block
1044
0
      if(interpret)
1045
0
        for(map<string,CIFData>::iterator posd=mvData.begin();posd!=mvData.end();++posd)
1046
0
        {
1047
0
          posd->second.ExtractAll();
1048
0
          if(posd->second.mvAtom.size()>0) found_atoms=true;
1049
0
        }
1050
0
    }
1051
0
  }
1052
1053
0
  bool iseol(const char c) { return ((c=='\n')||(c=='\r'));}
1054
1055
  /// Read one value, whether it is numeric, string or text
1056
  string CIFReadValue(istream &in,char &lastc)
1057
0
  {
1058
0
    bool vv=false;//very verbose ?
1059
0
    string value("");
1060
0
    while(!isgraph(in.peek())) in.get(lastc);
1061
0
    while(in.peek()=='#')
1062
0
      {//discard these comments for now
1063
0
        string tmp;
1064
0
        getline(in,tmp);
1065
0
        lastc='\r';
1066
0
        while(!isgraph(in.peek())) in.get(lastc);
1067
0
      }
1068
0
    if(in.peek()=='_') {
1069
0
      stringstream errorMsg;
1070
0
      errorMsg << "Warning: Trying to read a value but found a new CIF tag !";
1071
0
      obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , obError);
1072
0
      return value;
1073
0
    }
1074
0
    if(in.peek()==';')
1075
0
      {//SemiColonTextField
1076
0
        bool warning=!iseol(lastc);
1077
0
        if(warning){
1078
0
          stringstream errorMsg;
1079
0
          errorMsg << "Warning: Trying to read a SemiColonTextField but last char is not an end-of-line char !";
1080
0
          obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , obError);
1081
0
        }
1082
0
        value="";
1083
0
        in.get(lastc);
1084
0
        while(in.peek()!=';')
1085
0
          {
1086
0
            if (in.peek() == '_') {
1087
0
              stringstream errorMsg;
1088
0
              errorMsg << "Warning: Trying to read a value but found a new CIF tag !";
1089
0
              obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , obError);
1090
0
              warning = true;
1091
0
              break;
1092
0
            }
1093
0
            string tmp;
1094
0
            getline(in,tmp);
1095
0
            value+=tmp+" ";
1096
0
          }
1097
0
        if (!warning)
1098
0
          in.get(lastc);
1099
0
        if(vv) obErrorLog.ThrowError(__FUNCTION__, "SemiColonTextField:"+value, obDebug);
1100
0
        if(warning && !vv) obErrorLog.ThrowError(__FUNCTION__, "SemiColonTextField:"+value, obDebug);
1101
0
        return value;
1102
0
      }
1103
0
    if((in.peek()=='\'') || (in.peek()=='\"'))
1104
0
      {//QuotedString
1105
0
        char delim;
1106
0
        in.get(delim);
1107
0
        value="";
1108
0
        while(!((lastc==delim)&&(!isgraph(in.peek()))) )
1109
0
          {
1110
0
            in.get(lastc);
1111
0
            value+=lastc;
1112
0
          }
1113
0
        if(vv) obErrorLog.ThrowError(__FUNCTION__, "QuotedString:"+value, obDebug);
1114
0
        return value.substr(0,value.size()-1);
1115
0
      }
1116
    // If we got here, we have an ordinary value, numeric or unquoted string
1117
0
    in>>value;
1118
0
    if(vv) obErrorLog.ThrowError(__FUNCTION__, "NormalValue:"+value, obDebug);
1119
0
    return value;
1120
0
  }
1121
1122
  void CIF::Parse(istream &in)
1123
0
  {
1124
0
    bool vv=false;//very verbose ?
1125
0
    char lastc=' ';
1126
0
    string block="";// Current block data
1127
0
    while(!in.eof())
1128
0
      {
1129
0
        while(!isgraph(in.peek()) && !in.eof()) in.get(lastc);
1130
0
        if(in.peek()=='#')
1131
0
          {//Comment
1132
0
            string tmp;
1133
0
            getline(in,tmp);
1134
0
            if(block=="") mvComment.push_back(tmp);
1135
0
            else mvData[block].mvComment.push_back(tmp);
1136
0
            lastc='\r';
1137
0
            continue;
1138
0
          }
1139
0
        if(in.peek()=='_')
1140
0
          {//Tag
1141
0
            string tag,value;
1142
0
            in>>tag;
1143
            // Convert all dots to underscores to cover much of DDL2 with this DDL1 parser.
1144
0
            for (string::size_type pos = tag.find('.'); pos != string::npos; pos = tag.find('.', ++ pos))
1145
0
              tag.replace(pos, 1, 1, '_');
1146
0
            value=CIFReadValue(in,lastc);
1147
0
            mvData[block].mvItem[ci_string(tag.c_str())]=value;
1148
0
            if(vv)
1149
0
            {
1150
0
              stringstream ss;
1151
0
              ss<<"New Tag:"<<tag<<" ("<<value.size()<<"):"<<value;
1152
0
              obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
1153
0
            }
1154
0
            continue;
1155
0
          }
1156
0
        if((in.peek()=='d') || (in.peek()=='D'))
1157
0
          {// Data
1158
0
            if(!mvData.empty()) return; // We want just a single data block
1159
1160
0
            string tmp;
1161
0
            in>>tmp;
1162
0
            block=tmp.substr(5);
1163
0
            if(vv)
1164
0
            {
1165
0
              stringstream ss;
1166
0
              ss<<endl<<endl<<"NEW BLOCK DATA: !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ->"<<block<<endl<<endl;
1167
0
              obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
1168
0
            }
1169
0
            mvData[block]=CIFData();
1170
0
            mvData[block].mDataBlockName=tmp;
1171
0
            continue;
1172
0
          }
1173
0
        if((in.peek()=='l') || (in.peek()=='L'))
1174
0
          {// loop_
1175
0
            vector<ci_string> tit;
1176
0
            string tmp;
1177
0
            in>>tmp; //should be loop_
1178
0
            if(vv) obErrorLog.ThrowError(__FUNCTION__, "LOOP : "+tmp, obDebug);
1179
0
            while(true)
1180
0
              {//read titles
1181
0
                while(!isgraph(in.peek()) && !in.eof()) in.get(lastc);
1182
0
                if(in.peek()=='#')
1183
0
                  {
1184
0
                    getline(in,tmp);
1185
0
                    if(block=="") mvComment.push_back(tmp);
1186
0
                    else mvData[block].mvComment.push_back(tmp);
1187
0
                    continue;
1188
0
                  }
1189
0
                if(in.peek()!='_')
1190
0
                  {
1191
0
                    stringstream ss;
1192
0
                    ss << "End of loop titles:"<<(char)in.peek();
1193
0
                    if(vv) obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
1194
0
                    break;
1195
0
                  }
1196
0
                in>>tmp;
1197
                // Convert all dots to underscores to cover much of DDL2 with this DDL1 parser.
1198
0
                for (string::size_type pos = tmp.find('.'); pos != string::npos; pos = tmp.find('.', ++ pos))
1199
0
                  tmp.replace(pos, 1, 1, '_');
1200
0
                tit.push_back(ci_string(tmp.c_str()));
1201
0
                if(vv) obErrorLog.ThrowError(__FUNCTION__, " , "+tmp, obDebug);
1202
0
              }
1203
0
            map<ci_string,vector<string> > lp;
1204
0
            while(true)
1205
0
              {
1206
0
                std::ios::pos_type pos0=in.tellg();
1207
0
                while(!isgraph(in.peek()) && !in.eof()) in.get(lastc);
1208
0
                if(in.eof()) break;
1209
0
                if(in.peek()=='_') break;
1210
0
                if(in.peek()=='#')
1211
0
                  {// Comment (in a loop ??)
1212
                    //const std::ios::pos_type pos=in.tellg();
1213
0
                    string tmp;
1214
0
                    getline(in,tmp);
1215
0
                    pos0=in.tellg();
1216
0
                    if(block=="") mvComment.push_back(tmp);
1217
0
                    else mvData[block].mvComment.push_back(tmp);
1218
0
                    lastc='\r';
1219
0
                    if(vv) obErrorLog.ThrowError(__FUNCTION__, "Comment in a loop (?):"+tmp, obDebug);
1220
                    //in.seekg(pos);
1221
0
                    break;
1222
0
                  }
1223
                //in>>tmp;
1224
0
                tmp=CIFReadValue(in,lastc);
1225
0
                if(ci_string(tmp.c_str())=="loop_")
1226
0
                  {//go back and continue
1227
0
                    in.clear();
1228
0
                    in.seekg(pos0,std::ios::beg);
1229
0
                    stringstream ss;
1230
0
                    ss <<"END OF LOOP :"<<tmp<<","<<(char)in.peek()<<","<<in.tellg();
1231
0
                    if(vv) obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
1232
0
                    break;
1233
0
                  }
1234
0
                if(tmp.size()>=5)
1235
0
                  if(ci_string(tmp.substr(0,5).c_str())=="data_")
1236
0
                    {//go back and continue
1237
0
                      in.clear();
1238
0
                      in.seekg(pos0,std::ios::beg);
1239
0
                      stringstream ss;
1240
0
                      ss <<"END OF LOOP :"<<tmp<<","<<(char)in.peek()<<","<<in.tellg();
1241
0
                      if(vv) obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
1242
0
                      break;
1243
0
                    }
1244
0
                for(unsigned int i=0;i<tit.size();++i)
1245
0
                  {//Read all values
1246
0
                    if(i>0) tmp=CIFReadValue(in,lastc);
1247
0
                    lp[tit[i]].push_back(tmp);
1248
0
                    stringstream ss;
1249
0
                    ss <<" LOOP VALUE    #"<<lp[tit[i]].size()<<","<<i<<" :  "<<tmp;
1250
0
                    if(vv) obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
1251
0
                  }
1252
0
              }
1253
            // The key to the mvLoop map is the set of column titles
1254
0
            set<ci_string> stit;
1255
0
            for(unsigned int i=0;i<tit.size();++i) stit.insert(tit[i]);
1256
0
            mvData[block].mvLoop[stit]=lp;
1257
0
            continue;
1258
0
          }
1259
        // If we get here, something went wrong ! Discard till end of line...
1260
        // It is OK if this is just a blank line though
1261
0
        string junk;
1262
0
        getline(in,junk);
1263
1264
0
        if(junk.size()>0)
1265
0
        {
1266
0
          stringstream errorMsg;
1267
0
          errorMsg << "Warning: one line could not be interpreted while reading a CIF file:"<<endl
1268
0
                   << " -> line contents:" << junk;
1269
0
          obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , obWarning);
1270
0
        }
1271
0
      }
1272
0
  }
1273
1274
  float CIFNumeric2Float(const string &s)
1275
0
  {
1276
0
    if((s==".") || (s=="?")) return 0.0;
1277
0
    float v;
1278
0
    const int n=sscanf(s.c_str(),"%f",&v);
1279
0
    if(n!=1) return 0.0;
1280
0
    return v;
1281
0
  }
1282
1283
  int CIFNumeric2Int(const string &s)
1284
0
  {
1285
0
    if((s==".") || (s=="?")) return 0;
1286
0
    int v;
1287
0
    const int n=sscanf(s.c_str(),"%d",&v);
1288
0
    if(n!=1) return 0;
1289
0
    return v;
1290
0
  }
1291
1292
  bool is_double(const std::string& s, double& r_double)
1293
0
  {
1294
0
    std::istringstream i(s);
1295
1296
0
    if (i >> r_double)
1297
0
      return true;
1298
1299
0
    r_double = 0.0;
1300
0
    return false;
1301
0
  }
1302
1303
1304
  //################ END CIF CLASSES######################################
1305
1306
  //Make an instance of the format class
1307
  CIFFormat theCIFFormat;
1308
1309
  // Helper function for CorrectFormatCharges
1310
  // Is this atom an oxygen in a water molecule
1311
  // We know the oxygen is connected to one ion, but check for non-hydrogens
1312
  // Returns: true if the atom is an oxygen and connected to two hydrogens and up to one other atom
1313
  bool CIFisWaterOxygen(OBAtom *atom)
1314
0
  {
1315
0
    if (atom->GetAtomicNum() != OBElements::Oxygen)
1316
0
      return false;
1317
1318
0
    int nonHydrogenCount = 0;
1319
0
    int hydrogenCount = 0;
1320
0
    FOR_NBORS_OF_ATOM(neighbor, *atom) {
1321
0
      if (neighbor->GetAtomicNum() != OBElements::Hydrogen)
1322
0
        nonHydrogenCount++;
1323
0
      else
1324
0
        hydrogenCount++;
1325
0
    }
1326
1327
0
    return (hydrogenCount == 2 && nonHydrogenCount <= 1);
1328
0
  }
1329
1330
  // Look for lone ions, and correct their formal charges
1331
  void CorrectFormalCharges(OBMol *mol)
1332
0
  {
1333
0
    if (!mol)
1334
0
      return;
1335
1336
    // First look for NR4, PR4 ions,
1337
    // or bare halides, alkali and alkaline earth metal ions
1338
0
    FOR_ATOMS_OF_MOL(atom, *mol) {
1339
1340
0
      if ((atom->GetAtomicNum() == 7 || atom->GetAtomicNum() == 15)
1341
0
          && atom->GetExplicitValence() == 4) {
1342
        // check if we should make a positive charge?
1343
        // i.e., 4 non-metal neighbors
1344
0
        bool nonMetalNeighbors = true;
1345
0
        FOR_NBORS_OF_ATOM(neighbor, &*atom) {
1346
0
          switch (neighbor->GetAtomicNum()) {
1347
0
          case 1:
1348
0
          case 5: case 6: case 7: case 8: case 9:
1349
0
          case 14: case 15: case 16: case 17:
1350
0
          case 33: case 34: case 35:
1351
0
          case 53:
1352
0
            continue; // good non-metals
1353
0
          default:
1354
0
            nonMetalNeighbors = false;
1355
0
            break; // stop looking
1356
0
          }
1357
0
        }
1358
0
        if (nonMetalNeighbors) // 4 non-metals, e.g. NH4+
1359
0
          atom->SetFormalCharge(+1);
1360
0
      }
1361
1362
      // Now look for simple atomic ions like Na, Li, F, Cl, Br...
1363
      // If we have an existing formal charge, keep going
1364
0
      if (atom->GetFormalCharge() != 0)
1365
0
        continue;
1366
1367
      // If we're connected to anything besides H2O, keep going
1368
0
      if (atom->GetExplicitDegree() != 0) {
1369
0
        int nonWaterBonds = 0;
1370
0
        FOR_NBORS_OF_ATOM(neighbor, &*atom) {
1371
0
          if (!CIFisWaterOxygen(&*neighbor)) {
1372
0
            nonWaterBonds = 1;
1373
0
            break;
1374
0
          }
1375
0
        }
1376
0
        if (nonWaterBonds)
1377
0
          continue; // look at another atom
1378
0
      }
1379
1380
0
      switch(atom->GetAtomicNum()) {
1381
0
      case 3: case 11: case 19: case 37: case 55: case 87:
1382
        // Alkali ions
1383
0
        atom->SetFormalCharge(+1);
1384
0
        break;
1385
0
      case 4: case 12: case 20: case 38: case 56: case 88:
1386
        // Alkaline earth ions
1387
0
        atom->SetFormalCharge(+2);
1388
0
        break;
1389
0
      case 9: case 17: case 35: case 53: case 85:
1390
        // Halides
1391
0
        atom->SetFormalCharge(-1);
1392
0
        break;
1393
0
      }
1394
0
    }
1395
0
  }
1396
1397
  /////////////////////////////////////////////////////////////////
1398
  bool CIFFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv)
1399
0
  {
1400
    // If installed, use the mmCIF parser to read CIF
1401
0
    OBFormat *obformat = OBFormat::FindType("mmcif");
1402
0
    if (obformat) { return obformat->ReadMolecule(pOb, pConv); }
1403
0
    obErrorLog.ThrowError(__FUNCTION__, "mmCIF parser not found. Using CIF parser.", obDebug);
1404
1405
0
    OBMol* pmol = dynamic_cast<OBMol*>(pOb);
1406
0
    if (pmol == nullptr)
1407
0
      return false;
1408
1409
0
    CIF cif(*pConv->GetInStream(),true);
1410
    // Loop on all data blocks until we find one structure :@todo: handle multiple structures
1411
0
    for(map<string,CIFData>::iterator pos=cif.mvData.begin();pos!=cif.mvData.end();++pos)
1412
0
      if(pos->second.mvAtom.size()>0)
1413
0
        {
1414
0
          pmol->BeginModify();
1415
0
          if(pos->second.mvLatticePar.size()==6)
1416
0
            {// We have one unit cell
1417
0
              string spg=pos->second.mSpacegroupSymbolHall;
1418
0
              if(spg=="") spg=pos->second.mSpacegroupHermannMauguin;
1419
0
              if(spg=="") spg=pos->second.mSpacegroupNumberIT;
1420
0
              if(spg=="") spg="P1";
1421
0
              OBUnitCell *pCell=new OBUnitCell;
1422
0
              pCell->SetOrigin(fileformatInput);
1423
0
              pCell->SetData(pos->second.mvLatticePar[0],
1424
0
                             pos->second.mvLatticePar[1],
1425
0
                             pos->second.mvLatticePar[2],
1426
0
                             pos->second.mvLatticePar[3]/DEG_TO_RAD,
1427
0
                             pos->second.mvLatticePar[4]/DEG_TO_RAD,
1428
0
                             pos->second.mvLatticePar[5]/DEG_TO_RAD);
1429
0
              pCell->SetSpaceGroup(spg);
1430
0
              pCell->SetSpaceGroup(pos->second.mSpaceGroup);
1431
0
              pmol->SetData(pCell);
1432
0
            }
1433
0
          if(pos->second.mName!="") pmol->SetTitle(pos->second.mName);
1434
0
          else
1435
0
            if(pos->second.mFormula!="") pmol->SetTitle(pos->second.mFormula);
1436
0
            else pmol->SetTitle(pConv->GetTitle());
1437
1438
0
          if(pos->second.mFormula!="") pmol->SetFormula(pos->second.mFormula);
1439
1440
          // Keep a map linking the cif atom label to the obatom*, for bond interpretation later
1441
0
          std::map<std::string,OBAtom *> vLabelOBatom;
1442
1443
0
          const unsigned int nbatoms=pos->second.mvAtom.size();
1444
0
          pmol->ReserveAtoms(nbatoms);
1445
0
          for(vector<CIFData::CIFAtom>::const_iterator posat=pos->second.mvAtom.begin();posat!=pos->second.mvAtom.end();++posat)
1446
0
            {
1447
              // Problem: posat->mSymbol is not guaranteed to actually be a symbol
1448
              // see http://www.iucr.org/iucr-top/cif/cifdic_html/1/cif_core.dic/Iatom_type_symbol.html
1449
              // Try to strip the string to have a better chance to have a valid symbol
1450
              // This is not guaranteed to work still, as the CIF standard allows about any string...
1451
0
              string tmpSymbol=posat->mSymbol;
1452
0
              unsigned int nbc=0;
1453
0
              if((tmpSymbol.size()==1) && isalpha(tmpSymbol[0])) nbc=1;
1454
0
              else if(tmpSymbol.size()>=2)
1455
0
                {
1456
0
                  if(isalpha(tmpSymbol[0]) && isalpha(tmpSymbol[1])) nbc=2;
1457
0
                  else if(isalpha(tmpSymbol[0])) nbc=1;
1458
0
                }
1459
1460
0
              OBAtom *atom  = pmol->NewAtom();
1461
1462
0
              vLabelOBatom.insert(make_pair(posat->mLabel,atom));
1463
1464
0
              if(tmpSymbol.size()>nbc)
1465
0
                {// Try to find a formal charge in the symbol
1466
0
                  int charge=0;
1467
0
                  int sign=0;
1468
0
                  for(unsigned int i=nbc;i<tmpSymbol.size();++i)
1469
0
                    {// Use first number found as formal charge
1470
0
                      if(isdigit(tmpSymbol[i]) && (charge==0)) charge=atoi(tmpSymbol.substr(i,1).c_str());
1471
0
                      if('-'==tmpSymbol[i]) sign-=1;
1472
0
                      if('+'==tmpSymbol[i]) sign+=1;
1473
0
                    }
1474
0
                  if(0!=sign) // no sign, no charge
1475
0
                    {
1476
0
                      if(charge==0) charge=1;
1477
0
                      stringstream ss;
1478
0
                      ss << tmpSymbol<<" / symbol="<<tmpSymbol.substr(0,nbc)<<" charge= "<<sign*charge;
1479
0
                      obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
1480
0
                      atom->SetFormalCharge(sign*charge);
1481
0
                    }
1482
0
                }
1483
1484
0
              if(nbc>0) tmpSymbol=tmpSymbol.substr(0,nbc);
1485
0
              else tmpSymbol="C";//Something went wrong, no symbol ! Default to C ??
1486
1487
0
              int atomicNum = OBElements::GetAtomicNum(tmpSymbol.c_str());
1488
              // Test for some oxygens with subscripts
1489
0
              if (atomicNum == 0 && tmpSymbol[0] == 'O') {
1490
0
                atomicNum = 8; // e.g. Ob, OH, etc.
1491
0
              }
1492
1493
0
              atom->SetAtomicNum(atomicNum); //set atomic number, or '0' if the atom type is not recognized
1494
0
              atom->SetType(tmpSymbol); //set atomic number, or '0' if the atom type is not recognized
1495
0
              atom->SetVector(posat->mCoordCart[0],posat->mCoordCart[1],posat->mCoordCart[2]);
1496
0
              if(posat->mLabel.size()>0)
1497
0
              {
1498
0
                OBPairData *label = new OBPairData;
1499
0
                label->SetAttribute("_atom_site_label");
1500
0
                label->SetValue(posat->mLabel);
1501
0
                label->SetOrigin(fileformatInput);
1502
0
                atom->SetData(label);
1503
0
              }
1504
1505
0
              OBPairFloatingPoint *occup_data = new OBPairFloatingPoint;
1506
0
              occup_data->SetAttribute("_atom_site_occupancy");
1507
0
              occup_data->SetValue(posat->mOccupancy);
1508
0
              occup_data->SetOrigin(fileformatInput);
1509
0
              atom->SetData(occup_data);
1510
1511
0
              if( posat->mCharge != NOCHARGE )
1512
0
              {
1513
0
                OBPairFloatingPoint *charge_data = new OBPairFloatingPoint;
1514
0
                charge_data->SetAttribute("input_charge");
1515
0
                charge_data->SetValue(posat->mCharge);
1516
0
                charge_data->SetOrigin(fileformatInput);
1517
0
                atom->SetData(charge_data);
1518
0
              }
1519
0
            }
1520
0
          if (!pConv->IsOption("b",OBConversion::INOPTIONS))
1521
0
            pmol->ConnectTheDots();
1522
0
          if (pConv->IsOption("B",OBConversion::INOPTIONS))
1523
0
            {
1524
0
              for(vector<CIFData::CIFBond>::const_iterator posbond=pos->second.mvBond.begin();posbond!=pos->second.mvBond.end();++posbond)
1525
0
                {// Add bonds present in the cif and not detected by ConnectTheDots()
1526
0
                  std::map<std::string,OBAtom *>::iterator posat1,posat2;
1527
0
                  posat1=vLabelOBatom.find(posbond->mLabel1);
1528
0
                  posat2=vLabelOBatom.find(posbond->mLabel2);
1529
0
                  if(posat1!=vLabelOBatom.end() && posat2!=vLabelOBatom.end())
1530
0
                    {
1531
0
                      stringstream ss;
1532
0
                      ss << "  Adding cif bond ? "<<posat1->first<<"-"<<posat2->first;
1533
0
                      obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
1534
0
                      if (pmol->GetBond(posat1->second, posat2->second) == nullptr)
1535
0
                        {
1536
0
                           obErrorLog.ThrowError(__FUNCTION__, "  :Bond added !", obDebug);
1537
0
                           OBBond * bond=pmol->NewBond();
1538
0
                           bond->SetBegin(posat1->second);
1539
0
                           bond->SetEnd(posat2->second);
1540
0
                           bond->SetBondOrder(1);
1541
0
                           bond->SetLength(double(posbond->mDistance));
1542
0
                        }
1543
0
                       else obErrorLog.ThrowError(__FUNCTION__, "  :Bond already present.. ", obDebug);
1544
0
                    }
1545
0
                }
1546
0
            }
1547
0
          if (!pConv->IsOption("s",OBConversion::INOPTIONS) && !pConv->IsOption("b",OBConversion::INOPTIONS))
1548
0
            pmol->PerceiveBondOrders();
1549
0
          pmol->EndModify();
1550
0
          pmol->SetAutomaticFormalCharge(false); // we should have set formal charges
1551
0
          CorrectFormalCharges(pmol); // Look for lone Na -> Na+, etc.
1552
0
          return true;
1553
0
        }
1554
1555
    // If we got here, no structure was found
1556
0
    obErrorLog.ThrowError(__FUNCTION__, "Problems reading a CIF file: no structure found !" , obWarning);
1557
0
    return(false);
1558
0
  }
1559
1560
  ////////////////////////////////////////////////////////////////
1561
1562
  bool CIFFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv)
1563
0
  {
1564
0
    OBMol* pmol = dynamic_cast<OBMol*>(pOb);
1565
0
    if (pmol == nullptr)
1566
0
      return false;
1567
0
    ostream &ofs = *pConv->GetOutStream();
1568
1569
    // default is false - leave coordinates as they are
1570
0
    bool wrapFractional = pConv->IsOption("w", OBConversion::OUTOPTIONS);
1571
1572
0
    char buffer[BUFF_SIZE];
1573
1574
0
    ofs <<"# CIF file generated by openbabel "<<BABEL_VERSION<<", see https://openbabel.org"<<endl;
1575
1576
0
    ofs << "data_I"<<endl;
1577
    // Use pmol->GetTitle() as chemical name, though it will probably be the file name
1578
0
    ofs <<"_chemical_name_common '"<<pmol->GetTitle()<<"'"<<endl;
1579
    // Print Unit cell if we have it
1580
0
    OBUnitCell *pUC = nullptr;
1581
0
    if (pmol->HasData(OBGenericDataType::UnitCell))
1582
0
      {
1583
0
        pUC = (OBUnitCell*)pmol->GetData(OBGenericDataType::UnitCell);
1584
0
        ofs << "_cell_length_a " << pUC->GetA() << endl
1585
0
            << "_cell_length_b " << pUC->GetB() << endl
1586
0
            << "_cell_length_c " << pUC->GetC() << endl
1587
0
            << "_cell_angle_alpha " << pUC->GetAlpha() << endl
1588
0
            << "_cell_angle_beta "  << pUC->GetBeta() << endl
1589
0
            << "_cell_angle_gamma " << pUC->GetGamma() << endl;
1590
        // Save the space group if known
1591
0
        const SpaceGroup* pSG = pUC->GetSpaceGroup();
1592
0
        if (pSG != nullptr)
1593
0
          {
1594
            // Do we have an extended HM symbol, with origin choice as ":1" or ":2" ? If so, remove it.
1595
0
            size_t n=pSG->GetHMName().find(":");
1596
0
            if(n==string::npos)
1597
0
              ofs << "_space_group_name_H-M_alt '" << pSG->GetHMName() << "'" << endl;
1598
0
            else
1599
0
              ofs << "_space_group_name_H-M_alt '" << pSG->GetHMName().substr(0,n) << "'" << endl;
1600
0
            ofs << "_space_group_name_Hall '" << pSG->GetHallName() << "'" << endl;
1601
0
            ofs << "loop_" <<endl
1602
0
                << "    _symmetry_equiv_pos_as_xyz" << endl;
1603
0
            transform3dIterator ti;
1604
0
            const transform3d *t = pSG->BeginTransform(ti);
1605
0
            while(t)
1606
0
              {
1607
0
                ofs << "    " << t->DescribeAsString() << endl;
1608
0
                t = pSG->NextTransform(ti);
1609
0
              }
1610
0
          }
1611
0
      }
1612
1613
0
    ofs << "loop_"                      << endl
1614
0
        << "    _atom_site_label"       << endl
1615
0
        << "    _atom_site_type_symbol" << endl
1616
0
        << "    _atom_site_fract_x"     << endl
1617
0
        << "    _atom_site_fract_y"     << endl
1618
0
        << "    _atom_site_fract_z"     << endl
1619
0
        << "    _atom_site_occupancy"   << endl;
1620
0
    std::map<OBAtom*,std::string> label_table;
1621
0
    unsigned int i = 0;
1622
0
    FOR_ATOMS_OF_MOL(atom, *pmol)
1623
0
      {
1624
0
         double X, Y, Z; //atom coordinates
1625
0
         vector3 v = atom->GetVector();
1626
0
         if (pUC != nullptr) {
1627
0
           v = pUC->CartesianToFractional(v);
1628
0
           if (wrapFractional)
1629
0
            v = pUC->WrapFractionalCoordinate(v);
1630
0
         }
1631
0
         X = v.x();
1632
0
         Y = v.y();
1633
0
         Z = v.z();
1634
0
         string label_str;
1635
0
         double occup;
1636
1637
0
         if (atom->HasData("_atom_site_occupancy"))
1638
0
           {
1639
0
             occup = (dynamic_cast<OBPairFloatingPoint *> (atom->GetData("_atom_site_occupancy")))->GetGenericValue();
1640
0
           }
1641
0
         else occup = 1.0;
1642
1643
0
         if (atom->HasData("_atom_site_label"))
1644
0
           {
1645
0
             OBPairData *label = dynamic_cast<OBPairData *> (atom->GetData("_atom_site_label"));
1646
0
             label_str = label->GetValue().c_str();
1647
0
           }
1648
0
         else
1649
0
           {
1650
0
             label_str = OBElements::GetSymbol(atom->GetAtomicNum()) + to_string(i);
1651
0
             i++;
1652
0
           }
1653
         // Save the existing or generated label for optional bonding
1654
0
         label_table[&*atom] = label_str;
1655
1656
0
         snprintf(buffer, BUFF_SIZE, "    %-8s %-5s %10.5f %10.5f %10.5f %8.3f\n",
1657
0
                  label_str.c_str(), OBElements::GetSymbol(atom->GetAtomicNum()),
1658
0
                  X, Y, Z, occup);
1659
1660
0
         ofs << buffer;
1661
0
      }
1662
1663
0
    if (pConv->IsOption("g", OBConversion::OUTOPTIONS))
1664
0
      {
1665
0
        if (pmol->NumBonds() > 0) {
1666
0
            obErrorLog.ThrowError(__FUNCTION__, "Writing bonds to output CIF", obDebug);
1667
0
            ofs << "loop_"                            << endl
1668
0
                << "    _geom_bond_atom_site_label_1" << endl
1669
0
                << "    _geom_bond_atom_site_label_2" << endl
1670
0
                << "    _geom_bond_distance"          << endl
1671
0
                << "    _geom_bond_site_symmetry_2"   << endl
1672
0
                << "    _ccdc_geom_bond_type"         << endl;
1673
0
        } else {
1674
0
            obErrorLog.ThrowError(__FUNCTION__, "No bonds defined in molecule for CIF export", obDebug);
1675
0
        }
1676
1677
0
        FOR_BONDS_OF_MOL(bond, *pmol)
1678
0
        {
1679
0
          std::string label_1 = label_table[bond->GetBeginAtom()];
1680
0
          std::string label_2 = label_table[bond->GetEndAtom()];
1681
1682
0
          std::string sym_key;
1683
0
          int symmetry_num = 555;
1684
0
          if (bond->IsPeriodic()) {
1685
0
              OBUnitCell *box = (OBUnitCell*)pmol->GetData(OBGenericDataType::UnitCell);
1686
0
              vector3 begin, end_orig, end_expected, uc_direction;
1687
              // Use consistent coordinates with the X Y Z written in the _atom_site_* loop earlier
1688
0
              begin = box->CartesianToFractional(bond->GetBeginAtom()->GetVector());
1689
0
              begin = box->WrapFractionalCoordinate(begin);
1690
0
              end_orig = box->CartesianToFractional(bond->GetEndAtom()->GetVector());
1691
0
              end_orig = box->WrapFractionalCoordinate(end_orig);
1692
0
              end_expected = box->UnwrapFractionalNear(end_orig, begin);
1693
1694
              // To get the signs right, consider the example {0, 0.7}.  We want -1 as the periodic direction.
1695
              // TODO: Think about edge cases, particularly atoms on the border of the unit cell.
1696
0
              uc_direction = end_expected - end_orig;
1697
1698
0
              std::vector<int> uc;
1699
0
              for (int i = 0; i < 3; ++i) {
1700
0
                  double raw_cell = uc_direction[i];
1701
0
                  uc.push_back(static_cast<int>(lrint(raw_cell)));
1702
0
              }
1703
0
              symmetry_num += 100*uc[0] + 10*uc[1] + 1*uc[2];  // Unit cell directionality vs. 555, per CIF spec
1704
0
          }
1705
0
          if (symmetry_num == 555)
1706
0
            {
1707
0
              sym_key = ".";
1708
0
            }
1709
0
          else
1710
0
            {
1711
0
              stringstream ss;
1712
0
              ss << "1_" << symmetry_num;
1713
0
              sym_key = ss.str();
1714
0
            }
1715
1716
0
          std::string bond_type;
1717
0
          int bond_order = bond->GetBondOrder();
1718
0
          switch (bond_order)
1719
0
          {
1720
0
            case 1:
1721
0
              bond_type = "S";
1722
0
              break;
1723
0
            case 2:
1724
0
              bond_type = "D";
1725
0
              break;
1726
0
            case 3:
1727
0
              bond_type = "T";
1728
0
              break;
1729
0
            case 5:  // FIXME: this will be different in upstream code
1730
0
              bond_type = "A";  // aromatic, per OBBond::_order
1731
0
              break;
1732
0
            default:
1733
0
              stringstream ss;
1734
0
              ss << "Unexpected bond order " << bond_order
1735
0
                 << " for bond" << label_1 << "-" << label_2 << std::endl
1736
0
                 << "Defaulting to single bond.";
1737
0
              obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning);
1738
0
              bond_type = "S";
1739
0
          }
1740
1741
1742
          //printf("%p: %s\n", &*atom, label_table[&*atom].c_str());
1743
0
          snprintf(buffer, BUFF_SIZE, "    %-7s%-7s%10.5f%7s%4s\n",
1744
0
                   label_1.c_str(), label_2.c_str(),
1745
0
                   bond->GetLength(), sym_key.c_str(),
1746
0
                   bond_type.c_str());
1747
1748
0
          ofs << buffer;
1749
0
        }
1750
0
      }
1751
0
    return true;
1752
0
  }//WriteMolecule
1753
} //namespace OpenBabel