/src/openbabel/src/formats/cifformat.cpp
Line | Count | Source |
1 | | /********************************************************************** |
2 | | cifformat.cpp - Implementation of subclass of OBFormat for conversion of OBMol. |
3 | | |
4 | | Copyright (C) 2006 Vincent Favre-Nicolin |
5 | | |
6 | | This file is part of the Open Babel project. |
7 | | For more information, see <http://openbabel.org/> |
8 | | |
9 | | This program is free software; you can redistribute it and/or modify |
10 | | it under the terms of the GNU General Public License as published by |
11 | | the Free Software Foundation version 2 of the License. |
12 | | |
13 | | This program is distributed in the hope that it will be useful, |
14 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | | GNU General Public License for more details. |
17 | | ***********************************************************************/ |
18 | | |
19 | | #ifndef M_PI |
20 | | #define M_PI 3.14159265358979323846 |
21 | | #endif |
22 | | |
23 | | #include <openbabel/babelconfig.h> |
24 | | #include <openbabel/obmolecformat.h> |
25 | | #include <openbabel/mol.h> |
26 | | #include <openbabel/atom.h> |
27 | | #include <openbabel/elements.h> |
28 | | #include <openbabel/bond.h> |
29 | | #include <openbabel/obiter.h> |
30 | | #include <openbabel/generic.h> |
31 | | #include <cstdlib> |
32 | | |
33 | | #include <openbabel/math/spacegroup.h> |
34 | | |
35 | | #include <sstream> |
36 | | #include <vector> |
37 | | #include <list> |
38 | | #include <map> |
39 | | #include <set> |
40 | | |
41 | 0 | #define NOCHARGE FLT_MAX |
42 | | |
43 | | #ifdef _MSC_VER |
44 | | #pragma warning( disable : 4503 ) |
45 | | // The decorated name was longer than the compiler limit (4096), and was truncated. |
46 | | // This is due to the use of templates specialized on templates repeatedly. |
47 | | // The correctness of the program, however, is unaffected by the truncated name, |
48 | | // but if you get link time errors on a truncated symbol, it will be more difficult |
49 | | // to determine the type of the symbol in the error. Debugging will also be more difficult; |
50 | | // the debugger will also have difficultly mapping symbol name to type name. |
51 | | #endif |
52 | | |
53 | | using namespace std; |
54 | | |
55 | | namespace OpenBabel |
56 | | { |
57 | | class CIFFormat : public OBMoleculeFormat |
58 | | { |
59 | | public: |
60 | | //Register this format type ID |
61 | | CIFFormat() |
62 | 6 | { |
63 | 6 | RegisterFormat("cif", "chemical/x-cif"); |
64 | 6 | } |
65 | | |
66 | | const char* Description() override // required |
67 | 0 | { |
68 | 0 | return |
69 | 0 | "Crystallographic Information File\n" |
70 | 0 | "The CIF file format is the standard interchange format for small-molecule crystal structures\n\n" |
71 | 0 | "Fractional coordinates are converted to cartesian ones using the following convention:\n\n" |
72 | |
|
73 | 0 | "- The x axis is parallel to a\n" |
74 | 0 | "- The y axis is in the (a,b) plane\n" |
75 | 0 | "- The z axis is along c*\n\n" |
76 | |
|
77 | 0 | "Ref: Int. Tables for Crystallography (2006), vol. B, sec 3.3.1.1.1\n" |
78 | 0 | " (the matrix used is the 2nd form listed)\n\n" |
79 | |
|
80 | 0 | "Read Options e.g. -ab:\n" |
81 | 0 | " s Output single bonds only\n" |
82 | 0 | " b Disable bonding entirely\n" |
83 | 0 | " B Use bonds listed in CIF file from _geom_bond_etc records (overrides option b)\n\n" |
84 | |
|
85 | 0 | "Write Options e.g. -xg:\n" |
86 | 0 | " w Wrap atomic coordinates to unit cell (default = off)\n" |
87 | 0 | " g Write bonds using _geom_bond_etc fields \n\n"; |
88 | 0 | } |
89 | | |
90 | | const char* SpecificationURL() override |
91 | 0 | { return "http://www.iucr.org/iucr-top/cif/spec/"; } // optional |
92 | | |
93 | | //*** This section identical for most OBMol conversions *** |
94 | | //////////////////////////////////////////////////// |
95 | | /// The "API" interface functions |
96 | | bool ReadMolecule(OBBase* pOb, OBConversion* pConv) override; |
97 | | bool WriteMolecule(OBBase* pOb, OBConversion* pConv) override; |
98 | | }; |
99 | | //############################## Case-insensituve string#################################################### |
100 | | // :@todo: This duplicates normal case-insensitive string comparison in OpenBabel |
101 | | /// Case-insensitive string class |
102 | | /// From: Guru of the Week #29 |
103 | | /// e.g.: http://gcc.gnu.org/onlinedocs/libstdc++/21_strings/gotw29a.txt |
104 | | /// |
105 | | /// Public domain |
106 | | struct ci_char_traits : public std::char_traits<char> |
107 | | { |
108 | | static bool eq( char c1, char c2 ); |
109 | | |
110 | | static bool ne( char c1, char c2 ); |
111 | | |
112 | | static bool lt( char c1, char c2 ); |
113 | | |
114 | | static int compare(const char* s1,const char* s2,size_t n ); |
115 | | |
116 | | static const char* find( const char* s, int n, char a ); |
117 | | }; |
118 | | |
119 | | typedef std::basic_string<char, ci_char_traits> ci_string; |
120 | | int strnicmp(const char *s1, const char *s2, int len) |
121 | 0 | { |
122 | 0 | unsigned char c1, c2; |
123 | 0 | while (len) |
124 | 0 | { |
125 | 0 | c1 = *s1; c2 = *s2; |
126 | 0 | s1++; s2++; |
127 | 0 | if (!c1) return c2 ? -1 : 0; |
128 | 0 | if (!c2) return 1; |
129 | 0 | if (c1 != c2) |
130 | 0 | { |
131 | 0 | c1 = tolower(c1); |
132 | 0 | c2 = tolower(c2); |
133 | 0 | if (c1 != c2) return c1 < c2 ? -1 : 1; |
134 | 0 | } |
135 | 0 | len--; |
136 | 0 | } |
137 | 0 | return 0; |
138 | 0 | } |
139 | | |
140 | | bool ci_char_traits::eq( char c1, char c2 ) |
141 | 0 | {return tolower(c1) == tolower(c2);} |
142 | | |
143 | | bool ci_char_traits::ne( char c1, char c2 ) |
144 | 0 | {return tolower(c1) != tolower(c2);} |
145 | | |
146 | | bool ci_char_traits::lt( char c1, char c2 ) |
147 | 0 | {return tolower(c1) < tolower(c2);} |
148 | | |
149 | | int ci_char_traits::compare(const char* s1,const char* s2,size_t n ) |
150 | 0 | {return strnicmp( s1, s2, n );} |
151 | | |
152 | | const char* ci_char_traits::find( const char* s, int n, char a ) |
153 | 0 | { |
154 | 0 | while( n-- > 0 && tolower(*s) != tolower(a) ) ++s; |
155 | 0 | return s; |
156 | 0 | } |
157 | | //############################## CIF CLASSES headers#################################################### |
158 | | /** The CIFData class holds all the information from a \e single data_ block from a cif file. |
159 | | * |
160 | | * It is a placeholder for all comments, item and loop data, as raw strings copied from |
161 | | * a cif file. |
162 | | * |
163 | | * It is also used to interpret this data to extract parts of the cif data, i.e. |
164 | | * only part of the core cif dictionnary are recognized. CIF tags currently recognized |
165 | | * include ("tag1 > tag2" means tag1 is preferred to tag2 when extracting the info, only one is reported): |
166 | | * - crystal name: _chemical_name_systematic > _chemical_name_mineral > _chemical_name_structure_type > _chemical_name_common |
167 | | * - crystal formula: _chemical_formula_analytical > _chemical_formula_structural > _chemical_formula_iupac > _chemical_formula_moiety |
168 | | * - unit cell: _cell_length_{a,b,c} ; _cell_angle_{alpha,beta,gamma} |
169 | | * - spacegroup number: _space_group_IT_number > _symmetry_Int_Tables_number |
170 | | * - spacegroup Hall symbol: _space_group_name_Hall > _symmetry_space_group_name_Hall |
171 | | * - spacegroup Hermann-Mauguin symbol:_space_group_name_H-M_alt > _symmetry_space_group_name_H-M |
172 | | * - atom coordinates: _atom_site_fract_{x} ; _atom_site_Cartn_{x,y,z} |
173 | | * - atom occupancy: _atom_site_occupancy |
174 | | * - atom label & symbol: _atom_site_type_symbol ; _atom_site_label |
175 | | * |
176 | | * Cartesian coordinates are stored in Angstroems, angles in radians. |
177 | | * |
178 | | * If another data field is needed, it is possible to directly access the string data |
179 | | * (CIFData::mvComment , CIFData::mvItem and CIFData::mvLoop) to search for the correct tags. |
180 | | */ |
181 | | class CIFData |
182 | | { |
183 | | public: |
184 | | CIFData(); |
185 | | |
186 | | /// Extract lattice parameters, spacegroup (symbol or number), atomic positions, |
187 | | /// chemical name and formula if available. |
188 | | /// All other data is ignored |
189 | | void ExtractAll(); |
190 | | /// Extract name & formula for the crystal |
191 | | void ExtractName(); |
192 | | /// Extract unit cell |
193 | | void ExtractUnitCell(); |
194 | | /// Extract spacegroup number or symbol |
195 | | void ExtractSpacegroup(); |
196 | | /// Extract all atomic positions. Will generate cartesian from fractional |
197 | | /// coordinates or vice-versa if only cartesian coordinates are available. |
198 | | void ExtractAtomicPositions(); |
199 | | /// Extract listed bond distances, from _geom_bond_* loops |
200 | | void ExtractBonds(); |
201 | | //// Extract Charges information from cif file and assign it to atoms |
202 | | void ExtractCharges(); |
203 | | /// Generate fractional coordinates from cartesian ones for all atoms |
204 | | /// CIFData::CalcMatrices() must be called first |
205 | | void Cartesian2FractionalCoord(); |
206 | | /// Generate cartesian coordinates from fractional ones for all atoms |
207 | | /// CIFData::CalcMatrices() must be called first |
208 | | void Fractional2CartesianCoord(); |
209 | | /// Convert from fractional to cartesian coordinates |
210 | | /// CIFData::CalcMatrices() must be called first |
211 | | void f2c(float &x,float &y, float &z); |
212 | | /// Convert from cartesia to fractional coordinates |
213 | | /// CIFData::CalcMatrices() must be called first |
214 | | void c2f(float &x,float &y, float &z); |
215 | | /// Calculate real space transformation matrices |
216 | | /// requires unit cell parameters |
217 | | void CalcMatrices(); |
218 | | /// Comments from CIF file, in the order they were read |
219 | | std::list<std::string> mvComment; |
220 | | /// Individual CIF items |
221 | | std::map<ci_string,std::string> mvItem; |
222 | | /// CIF Loop data |
223 | | std::map<std::set<ci_string>,std::map<ci_string,std::vector<std::string> > > mvLoop; |
224 | | /// Lattice parameters, in ansgtroem and degrees - vector size is 0 if no |
225 | | /// parameters have been obtained yet. |
226 | | std::vector<float> mvLatticePar; |
227 | | /// Spacegroup number from International Tables (_space_group_IT_number), or -1. |
228 | | unsigned int mSpacegroupNumberIT; |
229 | | /// Spacegroup Hall symbol (or empty string) (_space_group_name_Hall) |
230 | | std::string mSpacegroupSymbolHall; |
231 | | /// Spacegroup Hermann-Mauguin symbol (or empty string) (_space_group_name_H-M_alt) |
232 | | std::string mSpacegroupHermannMauguin; |
233 | | /// Crystal name. Or empty string if none is available. |
234 | | std::string mName; |
235 | | /// Formula. Or empty string if none is available. |
236 | | std::string mFormula; |
237 | | /// Atom record |
238 | | struct CIFAtom |
239 | | { |
240 | | CIFAtom(); |
241 | | /// Label of the atom, or empty string (_atom_site_label). |
242 | | std::string mLabel; |
243 | | /// Symbol of the atom, or empty string (_atom_type_symbol or _atom_site_type_symbol). |
244 | | std::string mSymbol; |
245 | | /// Fractionnal coordinates (_atom_site_fract_{x,y,z}) or empty vector. |
246 | | std::vector<float> mCoordFrac; |
247 | | /// Cartesian coordinates in Angstroem (_atom_site_Cartn_{x,y,z}) or empty vector. |
248 | | /// Transformation to fractionnal coordinates currently assumes |
249 | | /// "a parallel to x; b in the plane of y and z" (see _atom_sites_Cartn_transform_axes) |
250 | | std::vector<float> mCoordCart; |
251 | | /// Site occupancy, or -1 |
252 | | float mOccupancy; |
253 | | //charge from oxydation |
254 | | float mCharge; |
255 | | }; |
256 | | /// Atoms, if any are found |
257 | | std::vector<CIFAtom> mvAtom; |
258 | | /// Bond distance record |
259 | | struct CIFBond |
260 | | { |
261 | | /// Label of the two bonded atoms |
262 | | std::string mLabel1,mLabel2; |
263 | | /// distance |
264 | | float mDistance; |
265 | | }; |
266 | | /// Atoms, if any are found |
267 | | std::vector<CIFBond> mvBond; |
268 | | /// Fractionnal2Cartesian matrix |
269 | | float mOrthMatrix[3][3]; |
270 | | /// Cartesian2Fractionnal matrix |
271 | | float mOrthMatrixInvert[3][3]; |
272 | | const SpaceGroup *mSpaceGroup; |
273 | | /// Name for the CIF data block |
274 | | std::string mDataBlockName; |
275 | | }; |
276 | | /** Main CIF class - parses the stream and separates data blocks, comments, items, loops. |
277 | | * All values are stored as string, and Each CIF block is stored in a separate CIFData object. |
278 | | * No interpretaion is made here - this must be done from all CIFData objects. |
279 | | */ |
280 | | class CIF |
281 | | { |
282 | | public: |
283 | | /// Creates the CIF object from a stream |
284 | | /// |
285 | | /// \param interpret: if true, interpret all data blocks. See CIFData::ExtractAll() |
286 | | CIF(std::istream &in, const bool interpret=true); |
287 | | //private: |
288 | | /// Separate the file in data blocks and parse them to sort tags, loops and comments. |
289 | | /// All is stored in the original strings. |
290 | | /// |
291 | | /// Returns the name of the next data block |
292 | | void Parse(std::istream &in); |
293 | | /// The data blocks, after parsing. The key is the name of the data block |
294 | | std::map<std::string,CIFData> mvData; |
295 | | /// Global comments, outside and data block |
296 | | std::list<std::string> mvComment; |
297 | | }; |
298 | | /// Convert one CIF value to a floating-point value |
299 | | /// Return 0 if no value can be converted (e.g. if '.' or '?' is encountered) |
300 | | float CIFNumeric2Float(const std::string &s); |
301 | | /// Convert one CIF value to a floating-point value |
302 | | /// Return 0 if no value can be converted (e.g. if '.' or '?' is encountered) |
303 | | int CIFNumeric2Int(const std::string &s); |
304 | | |
305 | | template <typename T> string to_string(T pNumber) |
306 | | { |
307 | | ostringstream oOStrStream; |
308 | | oOStrStream << pNumber; |
309 | | return oOStrStream.str(); |
310 | | } |
311 | | |
312 | | bool is_double(const std::string& s, double& r_double); |
313 | | |
314 | | //############################## CIF CLASSES CODE #################################################### |
315 | | CIFData::CIFAtom::CIFAtom(): |
316 | 0 | mLabel(""),mSymbol(""),mOccupancy(1.0f) |
317 | 0 | {} |
318 | | |
319 | | CIFData::CIFData() |
320 | 0 | {} |
321 | | |
322 | | void CIFData::ExtractAll() |
323 | 0 | { |
324 | 0 | { |
325 | 0 | stringstream ss; |
326 | 0 | ss<<"CIF: interpreting data block: "<<mDataBlockName; |
327 | 0 | obErrorLog.ThrowError(__FUNCTION__, ss.str(), obInfo); |
328 | 0 | } |
329 | 0 | if(mDataBlockName=="data_global") |
330 | 0 | { // :KLUDGE: this data block name is used for journal &author information |
331 | | // for IUCr journals, so do not generate an error if the block contains |
332 | | // no structural information |
333 | 0 | bool empty_iucrjournal_block=true; |
334 | 0 | if(mvItem.find("_cell_length_a" )!=mvItem.end()) empty_iucrjournal_block=false; |
335 | 0 | if(mvItem.find("_cell_length_b" )!=mvItem.end()) empty_iucrjournal_block=false; |
336 | 0 | if(mvItem.find("_cell_length_c" )!=mvItem.end()) empty_iucrjournal_block=false; |
337 | 0 | for(map<set<ci_string>,map<ci_string,vector<string> > >::const_iterator loop=mvLoop.begin(); |
338 | 0 | loop!=mvLoop.end();++loop) |
339 | 0 | { |
340 | 0 | if(loop->second.find("_atom_site_fract_x")!=loop->second.end()) empty_iucrjournal_block=false; |
341 | 0 | if(loop->second.find("_atom_site_fract_y")!=loop->second.end()) empty_iucrjournal_block=false; |
342 | 0 | if(loop->second.find("_atom_site_fract_z")!=loop->second.end()) empty_iucrjournal_block=false; |
343 | 0 | if(loop->second.find("_atom_site_Cartn_x")!=loop->second.end()) empty_iucrjournal_block=false; |
344 | 0 | if(loop->second.find("_atom_site_Cartn_y")!=loop->second.end()) empty_iucrjournal_block=false; |
345 | 0 | if(loop->second.find("_atom_site_Cartn_z")!=loop->second.end()) empty_iucrjournal_block=false; |
346 | 0 | } |
347 | 0 | if(empty_iucrjournal_block) |
348 | 0 | { |
349 | 0 | stringstream ss; |
350 | 0 | ss << "CIF WARNING: found en empty 'data_global' block - SKIPPING\n" |
351 | 0 | << " (you can safely ignore this if reading a CIF file from an IUCr journal)"; |
352 | 0 | obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning); |
353 | 0 | return; |
354 | 0 | } |
355 | 0 | } |
356 | | // :@todo: Take care of values listed as "." and "?" instead of a real value. |
357 | 0 | this->ExtractName(); |
358 | | // Spacegroup must be extracted before unit cell |
359 | 0 | this->ExtractSpacegroup(); |
360 | 0 | this->ExtractUnitCell(); |
361 | 0 | this->ExtractAtomicPositions(); |
362 | 0 | if(mvAtom.size()==0) |
363 | 0 | { |
364 | 0 | stringstream ss; |
365 | 0 | ss << "CIF Error: no atom found ! (in data block:"<<mDataBlockName<<")"; |
366 | 0 | obErrorLog.ThrowError(__FUNCTION__, ss.str(), obError); |
367 | 0 | } |
368 | 0 | this->ExtractBonds(); |
369 | 0 | this->ExtractCharges(); |
370 | 0 | } |
371 | | |
372 | | void CIFData::ExtractUnitCell() |
373 | 0 | { |
374 | | // Use spacegroup to determine missing angle or length |
375 | 0 | const int spgid= mSpaceGroup->GetId(); |
376 | 0 | if( (mvItem.find("_cell_length_a")!=mvItem.end()) |
377 | 0 | ||(mvItem.find("_cell_length_b")!=mvItem.end()) |
378 | 0 | ||(mvItem.find("_cell_length_c")!=mvItem.end()) ) |
379 | 0 | { |
380 | 0 | mvLatticePar.resize(6); |
381 | 0 | for(unsigned int i=0;i<6;i++) mvLatticePar[i]=float(0); |
382 | 0 | map<ci_string,string>::const_iterator positem; |
383 | 0 | positem=mvItem.find("_cell_length_a"); |
384 | 0 | if(positem!=mvItem.end()) |
385 | 0 | mvLatticePar[0]=CIFNumeric2Float(positem->second); |
386 | 0 | positem=mvItem.find("_cell_length_b"); |
387 | 0 | if(positem!=mvItem.end()) |
388 | 0 | mvLatticePar[1]=CIFNumeric2Float(positem->second); |
389 | 0 | positem=mvItem.find("_cell_length_c"); |
390 | 0 | if(positem!=mvItem.end()) |
391 | 0 | mvLatticePar[2]=CIFNumeric2Float(positem->second); |
392 | 0 | positem=mvItem.find("_cell_angle_alpha"); |
393 | 0 | if(positem!=mvItem.end()) |
394 | 0 | mvLatticePar[3]=CIFNumeric2Float(positem->second); |
395 | 0 | positem=mvItem.find("_cell_angle_beta"); |
396 | 0 | if(positem!=mvItem.end()) |
397 | 0 | mvLatticePar[4]=CIFNumeric2Float(positem->second); |
398 | 0 | positem=mvItem.find("_cell_angle_gamma"); |
399 | 0 | if(positem!=mvItem.end()) |
400 | 0 | mvLatticePar[5]=CIFNumeric2Float(positem->second); |
401 | 0 | stringstream ss; |
402 | 0 | ss << "Found Lattice parameters:" << mvLatticePar[0] << " , " << mvLatticePar[1] << " , " << mvLatticePar[2] |
403 | 0 | << " , " << mvLatticePar[3] << " , " << mvLatticePar[4] << " , " << mvLatticePar[5]; |
404 | 0 | obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug); |
405 | 0 | mvLatticePar[3] = static_cast<float> (mvLatticePar[3] * DEG_TO_RAD);// pi/180 |
406 | 0 | mvLatticePar[4] = static_cast<float> (mvLatticePar[4] * DEG_TO_RAD); |
407 | 0 | mvLatticePar[5] = static_cast<float> (mvLatticePar[5] * DEG_TO_RAD); |
408 | | |
409 | | // Fill values depending on spacegroup, *only* when missing |
410 | 0 | if((spgid>2)&&(spgid<=15)) |
411 | 0 | {// :TODO: monoclinic spg, depending on unique axis.... |
412 | 0 | } |
413 | 0 | if((spgid>15)&&(spgid<=142)) |
414 | 0 | {// orthorombic & tetragonal |
415 | 0 | if(mvLatticePar[3]==0) mvLatticePar[3]=M_PI/2; |
416 | 0 | if(mvLatticePar[4]==0) mvLatticePar[4]=M_PI/2; |
417 | 0 | if(mvLatticePar[5]==0) mvLatticePar[5]=M_PI/2; |
418 | 0 | } |
419 | 0 | if((spgid>74)&&(spgid<=142)) |
420 | 0 | {// Tetragonal, make sure a=b if one is missing |
421 | 0 | if(mvLatticePar[1]==0) mvLatticePar[1]=mvLatticePar[0]; |
422 | 0 | if(mvLatticePar[0]==0) mvLatticePar[0]=mvLatticePar[1]; |
423 | 0 | } |
424 | 0 | if((spgid>142)&&(spgid<=194)) |
425 | 0 | {// trigonal/ rhomboedric... |
426 | 0 | const string::size_type pos = mSpaceGroup->GetHallName().find('R'); |
427 | 0 | if(pos==std::string::npos) |
428 | 0 | {//rhomboedric cell, a=b=c, alpha=beta=gamma |
429 | 0 | float a=0; |
430 | 0 | if(mvLatticePar[0]>a) a=mvLatticePar[0]; |
431 | 0 | if(mvLatticePar[1]>a) a=mvLatticePar[1]; |
432 | 0 | if(mvLatticePar[2]>a) a=mvLatticePar[2]; |
433 | 0 | if(mvLatticePar[0]==0) mvLatticePar[0]=a; |
434 | 0 | if(mvLatticePar[1]==0) mvLatticePar[1]=a; |
435 | 0 | if(mvLatticePar[2]==0) mvLatticePar[2]=a; |
436 | |
|
437 | 0 | float alpha=0; |
438 | 0 | if(mvLatticePar[3]>alpha) alpha=mvLatticePar[3]; |
439 | 0 | if(mvLatticePar[4]>alpha) alpha=mvLatticePar[4]; |
440 | 0 | if(mvLatticePar[5]>alpha) alpha=mvLatticePar[5]; |
441 | 0 | if(mvLatticePar[3]==0) mvLatticePar[3]=alpha; |
442 | 0 | if(mvLatticePar[4]==0) mvLatticePar[4]=alpha; |
443 | 0 | if(mvLatticePar[5]==0) mvLatticePar[5]=alpha; |
444 | 0 | } |
445 | 0 | else |
446 | 0 | {//hexagonal cell, a=b & alpha=beta=pi/2, gamma= 2*pi/3 |
447 | 0 | if(mvLatticePar[1]==0) mvLatticePar[1]=mvLatticePar[0]; |
448 | 0 | if(mvLatticePar[0]==0) mvLatticePar[0]=mvLatticePar[1]; |
449 | 0 | if(mvLatticePar[3]==0) mvLatticePar[3]=M_PI/2; |
450 | 0 | if(mvLatticePar[4]==0) mvLatticePar[4]=M_PI/2; |
451 | 0 | if(mvLatticePar[5]==0) mvLatticePar[5]=2*M_PI/3; |
452 | 0 | } |
453 | 0 | } |
454 | 0 | if(spgid>194) |
455 | 0 | { |
456 | 0 | if(mvLatticePar[3]==0) mvLatticePar[3]=M_PI/2; |
457 | 0 | if(mvLatticePar[4]==0) mvLatticePar[4]=M_PI/2; |
458 | 0 | if(mvLatticePar[5]==0) mvLatticePar[5]=M_PI/2; |
459 | | // In case some idiot cif only supplies one value, make sure a=b=c |
460 | 0 | float a=0; |
461 | 0 | if(mvLatticePar[0]>a) a=mvLatticePar[0]; |
462 | 0 | if(mvLatticePar[1]>a) a=mvLatticePar[1]; |
463 | 0 | if(mvLatticePar[2]>a) a=mvLatticePar[2]; |
464 | 0 | if(mvLatticePar[0]==0) mvLatticePar[0]=a; |
465 | 0 | if(mvLatticePar[1]==0) mvLatticePar[1]=a; |
466 | 0 | if(mvLatticePar[2]==0) mvLatticePar[2]=a; |
467 | 0 | } |
468 | | // Handle missing values |
469 | 0 | if(mvLatticePar[3]<1e-6) |
470 | 0 | { |
471 | 0 | stringstream ss; |
472 | 0 | ss << "CIF WARNING: missing alpha value, defaulting to 90 degrees (in data block:"<<mDataBlockName<<")"; |
473 | 0 | obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning); |
474 | 0 | mvLatticePar[3]=90*DEG_TO_RAD; |
475 | 0 | } |
476 | 0 | if(mvLatticePar[4]<1e-6) |
477 | 0 | { |
478 | 0 | stringstream ss; |
479 | 0 | ss << "CIF WARNING: missing beta value, defaulting to 90 degrees (in data block:"<<mDataBlockName<<")"; |
480 | 0 | obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning); |
481 | 0 | mvLatticePar[4]=90*DEG_TO_RAD; |
482 | 0 | } |
483 | 0 | if(mvLatticePar[5]<1e-6) |
484 | 0 | { |
485 | 0 | stringstream ss; |
486 | 0 | ss << "CIF WARNING: missing gamma value, defaulting to 90 degrees (in data block:"<<mDataBlockName<<")"; |
487 | 0 | obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning); |
488 | 0 | mvLatticePar[5]=90*DEG_TO_RAD; |
489 | 0 | } |
490 | 0 | if(mvLatticePar[1]<1e-6) |
491 | 0 | { |
492 | 0 | stringstream ss; |
493 | 0 | ss << "CIF Error: missing b lattice parameter - cannot interpret structure ! (in data block:"<<mDataBlockName<<")"; |
494 | 0 | obErrorLog.ThrowError(__FUNCTION__, ss.str(), obError); |
495 | 0 | } |
496 | 0 | if(mvLatticePar[2]<1e-6) |
497 | 0 | { |
498 | 0 | stringstream ss; |
499 | 0 | ss << "CIF Error: missing c lattice parameter - cannot interpret structure ! (in data block:"<<mDataBlockName<<")"; |
500 | 0 | obErrorLog.ThrowError(__FUNCTION__, ss.str(), obError); |
501 | 0 | } |
502 | |
|
503 | 0 | this->CalcMatrices(); |
504 | 0 | } |
505 | 0 | else |
506 | 0 | { |
507 | 0 | stringstream ss; |
508 | 0 | ss << "CIF Error: missing a,b and c value - cannot interpret structure ! (in data block:"<<mDataBlockName<<")"; |
509 | 0 | obErrorLog.ThrowError(__FUNCTION__, ss.str(), obError); |
510 | 0 | } |
511 | 0 | } |
512 | | |
513 | | void CIFData::ExtractSpacegroup() |
514 | 0 | { |
515 | 0 | map<ci_string,string>::const_iterator positem; |
516 | 0 | bool found = false; |
517 | 0 | positem=mvItem.find("_space_group_IT_number"); |
518 | 0 | if(positem!=mvItem.end()) |
519 | 0 | { |
520 | 0 | mSpacegroupNumberIT=CIFNumeric2Int(positem->second); |
521 | 0 | found = true; |
522 | 0 | stringstream ss; |
523 | 0 | ss << "Found spacegroup IT number:" << mSpacegroupNumberIT; |
524 | 0 | obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug); |
525 | 0 | } |
526 | 0 | else |
527 | 0 | { |
528 | 0 | positem=mvItem.find("_symmetry_Int_Tables_number"); |
529 | 0 | if(positem!=mvItem.end()) |
530 | 0 | { |
531 | 0 | mSpacegroupNumberIT=CIFNumeric2Int(positem->second); |
532 | 0 | found = true; |
533 | 0 | stringstream ss; |
534 | 0 | ss << "Found spacegroup IT number (with OBSOLETE CIF #1.0 TAG):" << mSpacegroupNumberIT; |
535 | 0 | obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug); |
536 | 0 | } |
537 | 0 | else { |
538 | 0 | positem=mvItem.find("_symmetry_group_IT_number"); |
539 | 0 | if(positem!=mvItem.end()) |
540 | 0 | { |
541 | 0 | mSpacegroupNumberIT=CIFNumeric2Int(positem->second); |
542 | 0 | found = true; |
543 | 0 | stringstream ss; |
544 | 0 | ss << "Found spacegroup IT number (with NON-STANDARD CIF TAG):" << mSpacegroupNumberIT; |
545 | 0 | obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug); |
546 | 0 | } |
547 | 0 | else |
548 | 0 | mSpacegroupNumberIT=0; |
549 | 0 | } |
550 | 0 | } |
551 | |
|
552 | 0 | positem=mvItem.find("_space_group_name_Hall"); |
553 | 0 | if(positem!=mvItem.end()) |
554 | 0 | { |
555 | 0 | mSpacegroupSymbolHall=positem->second; |
556 | 0 | found = true; |
557 | 0 | obErrorLog.ThrowError(__FUNCTION__, "Found spacegroup Hall symbol:"+mSpacegroupSymbolHall, obDebug); |
558 | 0 | } |
559 | 0 | else |
560 | 0 | { |
561 | 0 | positem=mvItem.find("_symmetry_space_group_name_Hall"); |
562 | 0 | if(positem!=mvItem.end()) |
563 | 0 | { |
564 | 0 | mSpacegroupSymbolHall=positem->second; |
565 | 0 | found = true; |
566 | 0 | obErrorLog.ThrowError(__FUNCTION__, "Found spacegroup Hall symbol (with OBSOLETE CIF #1.0 TAG):"+mSpacegroupSymbolHall, obDebug); |
567 | 0 | } |
568 | 0 | } |
569 | |
|
570 | 0 | positem=mvItem.find("_space_group_name_H-M_alt"); |
571 | 0 | if(positem!=mvItem.end()) |
572 | 0 | { |
573 | 0 | mSpacegroupHermannMauguin=positem->second; |
574 | 0 | found = true; |
575 | 0 | obErrorLog.ThrowError(__FUNCTION__, "Found spacegroup Hermann-Mauguin symbol:"+mSpacegroupHermannMauguin, obDebug); |
576 | 0 | } |
577 | 0 | else |
578 | 0 | { |
579 | 0 | positem=mvItem.find("_symmetry_space_group_name_H-M"); |
580 | 0 | if(positem!=mvItem.end()) |
581 | 0 | { |
582 | 0 | mSpacegroupHermannMauguin=positem->second; |
583 | 0 | found = true; |
584 | 0 | obErrorLog.ThrowError(__FUNCTION__, "Found spacegroup Hermann-Mauguin symbol (with OBSOLETE CIF #1.0 TAG):"+mSpacegroupHermannMauguin, obDebug); |
585 | 0 | } |
586 | 0 | } |
587 | | // DDL2 tag is "_space_group.IT_coordinate_system_code", converted by the cif reader to "_space_group_IT_coordinate_system_code" |
588 | 0 | positem=mvItem.find("_space_group_IT_coordinate_system_code"); |
589 | 0 | if(positem!=mvItem.end()) |
590 | 0 | { |
591 | 0 | obErrorLog.ThrowError(__FUNCTION__, "Found spacegroup IT_coordinate_system_code:"+positem->second, obDebug); |
592 | 0 | if((mSpacegroupHermannMauguin.length()>0) && (positem->second=="1" || positem->second=="2")) |
593 | 0 | { |
594 | | // this is a HACK which will work as long as the HM symbols in spacegroups.txt have the ":1" or ":2" extension listed, when needed |
595 | 0 | mSpacegroupHermannMauguin=mSpacegroupHermannMauguin+string(":")+positem->second; |
596 | 0 | } |
597 | 0 | else |
598 | 0 | { |
599 | 0 | stringstream ss; |
600 | 0 | ss << "CIF Error: found DDL2 tag _space_group.IT_coordinate_system_code ("<<positem->second<<")"<<endl |
601 | 0 | <<" but could not interpret it ! Origin choice or axis may be incorrect."; |
602 | 0 | obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning); |
603 | 0 | } |
604 | 0 | } |
605 | |
|
606 | 0 | mSpaceGroup = nullptr; |
607 | | // be forgiving - if spg not found, try again |
608 | | // Prefer Hall > HM == number, as Hall symbol is truly unique |
609 | 0 | if (mSpacegroupSymbolHall.length() > 0) { |
610 | | //Make sure there are no leading spaces before Hall symbol (kludge) |
611 | 0 | for(std::string::iterator pos=mSpacegroupSymbolHall.begin();pos!=mSpacegroupSymbolHall.end();) |
612 | 0 | { |
613 | 0 | if((char)(*pos)==' ') pos=mSpacegroupSymbolHall.erase(pos); |
614 | 0 | else ++pos; |
615 | 0 | } |
616 | 0 | mSpaceGroup = SpaceGroup::GetSpaceGroup(mSpacegroupSymbolHall); |
617 | 0 | } |
618 | 0 | if (mSpaceGroup == nullptr && mSpacegroupHermannMauguin.length() > 0) { |
619 | 0 | mSpaceGroup = SpaceGroup::GetSpaceGroup(mSpacegroupHermannMauguin); |
620 | 0 | } |
621 | 0 | if (mSpaceGroup == nullptr && mSpacegroupNumberIT != 0) { |
622 | 0 | mSpaceGroup = SpaceGroup::GetSpaceGroup(mSpacegroupNumberIT); |
623 | 0 | } |
624 | 0 | if (mSpaceGroup == nullptr) { |
625 | 0 | SpaceGroup *sg = new SpaceGroup(); |
626 | 0 | positem=mvItem.find("_space_group_symop_operation_xyz"); |
627 | 0 | if(positem==mvItem.end()) |
628 | 0 | positem=mvItem.find("_symmetry_equiv_pos_as_xyz"); |
629 | 0 | if(positem!=mvItem.end()) |
630 | 0 | { |
631 | 0 | sg->AddTransform (positem->second); |
632 | 0 | found = true; |
633 | 0 | } |
634 | 0 | else { |
635 | 0 | for(map<set<ci_string>,map<ci_string,vector<string> > >::const_iterator loop=mvLoop.begin(); |
636 | 0 | loop!=mvLoop.end();++loop) |
637 | 0 | { |
638 | 0 | map<ci_string,vector<string> >::const_iterator pos; |
639 | 0 | unsigned i, nb; |
640 | 0 | pos=loop->second.find("_space_group_symop_operation_xyz"); |
641 | 0 | if (pos==loop->second.end()) |
642 | 0 | pos=loop->second.find("_symmetry_equiv_pos_as_xyz"); |
643 | 0 | if (pos!=loop->second.end()) |
644 | 0 | { |
645 | 0 | nb=pos->second.size(); |
646 | 0 | found = true; |
647 | 0 | for (i = 0; i < nb; i++) |
648 | 0 | sg->AddTransform(pos->second[i]); |
649 | 0 | break; // found the transforms, so we have done with them |
650 | 0 | } |
651 | 0 | } |
652 | 0 | if (found) |
653 | 0 | mSpaceGroup = SpaceGroup::Find(sg); |
654 | 0 | if (mSpaceGroup == nullptr && sg->IsValid()) |
655 | 0 | mSpaceGroup = sg; |
656 | 0 | else |
657 | 0 | delete sg; |
658 | 0 | } |
659 | 0 | } |
660 | 0 | if (mSpaceGroup == nullptr) |
661 | 0 | { |
662 | 0 | stringstream ss; |
663 | 0 | ss << "CIF Error: missing spacegroup description: defaulting to P1... (in data block:"<<mDataBlockName<<")"; |
664 | 0 | obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning); |
665 | 0 | mSpaceGroup = SpaceGroup::GetSpaceGroup(1); |
666 | 0 | } |
667 | | // set the space group name to Hall symbol |
668 | 0 | mSpacegroupSymbolHall = mSpaceGroup->GetHallName(); |
669 | 0 | } |
670 | | |
671 | | void CIFData::ExtractName() |
672 | 0 | { |
673 | 0 | map<ci_string,string>::const_iterator positem; |
674 | 0 | positem=mvItem.find("_chemical_name_systematic"); |
675 | 0 | if(positem!=mvItem.end()) |
676 | 0 | { |
677 | 0 | mName=positem->second; |
678 | 0 | obErrorLog.ThrowError(__FUNCTION__, "Found chemical name:"+mName, obDebug); |
679 | 0 | } |
680 | 0 | else |
681 | 0 | { |
682 | 0 | positem=mvItem.find("_chemical_name_mineral"); |
683 | 0 | if(positem!=mvItem.end()) |
684 | 0 | { |
685 | 0 | mName=positem->second; |
686 | 0 | obErrorLog.ThrowError(__FUNCTION__, "Found chemical name:"+mName, obDebug); |
687 | 0 | } |
688 | 0 | else |
689 | 0 | { |
690 | 0 | positem=mvItem.find("_chemical_name_structure_type"); |
691 | 0 | if(positem!=mvItem.end()) |
692 | 0 | { |
693 | 0 | mName=positem->second; |
694 | 0 | obErrorLog.ThrowError(__FUNCTION__, "Found chemical name:"+mName, obDebug); |
695 | 0 | } |
696 | 0 | else |
697 | 0 | { |
698 | 0 | positem=mvItem.find("_chemical_name_common"); |
699 | 0 | if(positem!=mvItem.end()) |
700 | 0 | { |
701 | 0 | mName=positem->second; |
702 | 0 | obErrorLog.ThrowError(__FUNCTION__, "Found chemical name:"+mName, obDebug); |
703 | 0 | } |
704 | 0 | } |
705 | 0 | } |
706 | 0 | } |
707 | | /// Crystal formula |
708 | 0 | positem=mvItem.find("_chemical_formula_analytical"); |
709 | 0 | if(positem!=mvItem.end()) |
710 | 0 | { |
711 | 0 | mFormula=positem->second; |
712 | 0 | obErrorLog.ThrowError(__FUNCTION__, "Found chemical formula:"+mFormula, obDebug); |
713 | 0 | } |
714 | 0 | else |
715 | 0 | { |
716 | 0 | positem=mvItem.find("_chemical_formula_structural"); |
717 | 0 | if(positem!=mvItem.end()) |
718 | 0 | { |
719 | 0 | mFormula=positem->second; |
720 | 0 | obErrorLog.ThrowError(__FUNCTION__, "Found chemical formula:"+mFormula, obDebug); |
721 | 0 | } |
722 | 0 | else |
723 | 0 | { |
724 | 0 | positem=mvItem.find("_chemical_formula_iupac"); |
725 | 0 | if(positem!=mvItem.end()) |
726 | 0 | { |
727 | 0 | mFormula=positem->second; |
728 | 0 | obErrorLog.ThrowError(__FUNCTION__, "Found chemical formula:"+mFormula, obDebug); |
729 | 0 | } |
730 | 0 | else |
731 | 0 | { |
732 | 0 | positem=mvItem.find("_chemical_formula_moiety"); |
733 | 0 | if(positem!=mvItem.end()) |
734 | 0 | { |
735 | 0 | mFormula=positem->second; |
736 | 0 | obErrorLog.ThrowError(__FUNCTION__, "Found chemical formula:"+mFormula, obDebug); |
737 | 0 | } |
738 | 0 | } |
739 | 0 | } |
740 | 0 | } |
741 | 0 | } |
742 | | |
743 | | void CIFData::ExtractAtomicPositions() |
744 | 0 | { |
745 | 0 | map<ci_string,string>::const_iterator positem; |
746 | 0 | for(map<set<ci_string>,map<ci_string,vector<string> > >::const_iterator loop=mvLoop.begin(); |
747 | 0 | loop!=mvLoop.end();++loop) |
748 | 0 | { |
749 | 0 | if(mvAtom.size()>0) break;// only extract ONE list of atoms, preferably fractional coordinates |
750 | 0 | map<ci_string,vector<string> >::const_iterator posx,posy,posz,poslabel,possymbol,posoccup; |
751 | 0 | posx=loop->second.find("_atom_site_fract_x"); |
752 | 0 | posy=loop->second.find("_atom_site_fract_y"); |
753 | 0 | posz=loop->second.find("_atom_site_fract_z"); |
754 | 0 | unsigned int nb = 0; |
755 | 0 | if( (posx!=loop->second.end()) && (posy!=loop->second.end()) && (posz!=loop->second.end())) |
756 | 0 | { |
757 | 0 | nb=posx->second.size(); |
758 | 0 | mvAtom.resize(nb); |
759 | 0 | for(unsigned int i=0;i<nb;++i) |
760 | 0 | { |
761 | 0 | mvAtom[i].mCoordFrac.resize(3); |
762 | 0 | mvAtom[i].mCoordFrac[0]=CIFNumeric2Float(posx->second[i]); |
763 | 0 | mvAtom[i].mCoordFrac[1]=CIFNumeric2Float(posy->second[i]); |
764 | 0 | mvAtom[i].mCoordFrac[2]=CIFNumeric2Float(posz->second[i]); |
765 | 0 | } |
766 | 0 | this->Fractional2CartesianCoord(); |
767 | 0 | } |
768 | 0 | else |
769 | 0 | { |
770 | 0 | posx=loop->second.find("_atom_site_Cartn_x"); |
771 | 0 | posy=loop->second.find("_atom_site_Cartn_y"); |
772 | 0 | posz=loop->second.find("_atom_site_Cartn_z"); |
773 | 0 | if( (posx!=loop->second.end()) && (posy!=loop->second.end()) && (posz!=loop->second.end())) |
774 | 0 | { |
775 | 0 | nb=posx->second.size(); |
776 | 0 | mvAtom.resize(nb); |
777 | 0 | for(unsigned int i=0;i<nb;++i) |
778 | 0 | { |
779 | 0 | mvAtom[i].mCoordCart.resize(3); |
780 | 0 | mvAtom[i].mCoordCart[0]=CIFNumeric2Float(posx->second[i]); |
781 | 0 | mvAtom[i].mCoordCart[1]=CIFNumeric2Float(posy->second[i]); |
782 | 0 | mvAtom[i].mCoordCart[2]=CIFNumeric2Float(posz->second[i]); |
783 | 0 | } |
784 | 0 | this->Cartesian2FractionalCoord(); |
785 | 0 | } |
786 | 0 | } |
787 | 0 | if(mvAtom.size()>0) |
788 | 0 | {// Got the atoms, get names and symbols |
789 | 0 | possymbol=loop->second.find("_atom_site_type_symbol"); |
790 | 0 | if(possymbol!=loop->second.end()) |
791 | 0 | for(unsigned int i=0;i<nb;++i) |
792 | 0 | mvAtom[i].mSymbol=possymbol->second[i]; |
793 | 0 | poslabel=loop->second.find("_atom_site_label"); |
794 | 0 | if(poslabel!=loop->second.end()) |
795 | 0 | for(unsigned int i=0;i<nb;++i) |
796 | 0 | { |
797 | 0 | mvAtom[i].mLabel=poslabel->second[i]; |
798 | 0 | if(possymbol==loop->second.end()) |
799 | 0 | {// There was no symbol, use the labels to guess it |
800 | 0 | int nbc=0; |
801 | 0 | if(mvAtom[i].mLabel.size()==1) |
802 | 0 | if(isalpha(mvAtom[i].mLabel[0])) nbc=1; |
803 | 0 | if(mvAtom[i].mLabel.size()>=2) |
804 | 0 | { |
805 | 0 | if(isalpha(mvAtom[i].mLabel[0]) && isalpha(mvAtom[i].mLabel[1])) nbc=2; |
806 | 0 | else if(isalpha(mvAtom[i].mLabel[0])) nbc=1; |
807 | 0 | } |
808 | 0 | if(nbc>0) mvAtom[i].mSymbol=mvAtom[i].mLabel.substr(0,nbc); |
809 | 0 | else mvAtom[i].mSymbol="H";//Something wen wrong, no symbol ! |
810 | 0 | } |
811 | 0 | } |
812 | | // Occupancy ? |
813 | 0 | posoccup=loop->second.find("_atom_site_occupancy"); |
814 | 0 | if(posoccup!=loop->second.end()) |
815 | 0 | for(unsigned int i=0;i<nb;++i) |
816 | 0 | { |
817 | 0 | mvAtom[i].mOccupancy=CIFNumeric2Float(posoccup->second[i]); |
818 | 0 | if( (mvAtom[i].mOccupancy <= 0.0) || (mvAtom[i].mOccupancy > 1.0) ) |
819 | 0 | mvAtom[i].mOccupancy = 1.0; |
820 | 0 | } |
821 | | // Now be somewhat verbose |
822 | 0 | stringstream ss; |
823 | 0 | ss << "Found "<<nb<<" atoms."<<endl; |
824 | 0 | for(unsigned int i=0;i<nb;++i) |
825 | 0 | { |
826 | 0 | ss<<mvAtom[i].mLabel<<" "<<mvAtom[i].mSymbol; |
827 | 0 | if(mvAtom[i].mCoordFrac.size()>0) |
828 | 0 | { |
829 | 0 | ss<<" , Fractional: "; |
830 | 0 | for(unsigned int j=0;j<mvAtom[i].mCoordFrac.size();++j) |
831 | 0 | ss<<mvAtom[i].mCoordFrac[j]<<" "; |
832 | 0 | } |
833 | 0 | if(mvAtom[i].mCoordCart.size()>0) |
834 | 0 | { |
835 | 0 | ss<<" , Cartesian: "; |
836 | 0 | for(unsigned int j=0;j<mvAtom[i].mCoordCart.size();++j) |
837 | 0 | ss<<mvAtom[i].mCoordCart[j]<<" "; |
838 | 0 | } |
839 | 0 | ss<<" , Occupancy= "<<mvAtom[i].mOccupancy<<endl; |
840 | 0 | } |
841 | 0 | obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug); |
842 | 0 | } |
843 | 0 | } |
844 | 0 | } |
845 | | |
846 | | void CIFData::ExtractBonds() |
847 | 0 | { |
848 | 0 | map<ci_string,string>::const_iterator positem; |
849 | 0 | for(map<set<ci_string>,map<ci_string,vector<string> > >::const_iterator loop=mvLoop.begin(); loop!=mvLoop.end();++loop) |
850 | 0 | { |
851 | | //if(mvBond.size()>0) break;// Only allow one bond list |
852 | 0 | map<ci_string,vector<string> >::const_iterator poslabel1,poslabel2,posdist; |
853 | 0 | poslabel1=loop->second.find("_geom_bond_atom_site_label_1"); |
854 | 0 | poslabel2=loop->second.find("_geom_bond_atom_site_label_2"); |
855 | 0 | posdist=loop->second.find("_geom_bond_distance"); |
856 | 0 | if( (poslabel1!=loop->second.end()) && (poslabel2!=loop->second.end()) && (posdist!=loop->second.end())) |
857 | 0 | { |
858 | 0 | obErrorLog.ThrowError(__FUNCTION__, "Found _geom_bond* record...", obDebug); |
859 | 0 | const unsigned long nb=poslabel1->second.size(); |
860 | 0 | mvBond.resize(nb); |
861 | 0 | for(unsigned int i=0;i<nb;++i) |
862 | 0 | { |
863 | 0 | mvBond[i].mLabel1=poslabel1->second[i]; |
864 | 0 | mvBond[i].mLabel2=poslabel2->second[i]; |
865 | 0 | mvBond[i].mDistance=CIFNumeric2Float(posdist->second[i]); |
866 | 0 | stringstream ss; |
867 | 0 | ss << " d(" << mvBond[i].mLabel1 << "-" << mvBond[i].mLabel2 << ")=" << mvBond[i].mDistance; |
868 | 0 | obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug); |
869 | 0 | } |
870 | 0 | } |
871 | 0 | } |
872 | 0 | } |
873 | | |
874 | | void CIFData::ExtractCharges() |
875 | 0 | { |
876 | 0 | map<ci_string,string>::const_iterator positem; |
877 | |
|
878 | 0 | map<std::string, double> lbl2ox; |
879 | 0 | for(map<set<ci_string>, map<ci_string, vector<string> > >::const_iterator loop=mvLoop.begin(); loop!=mvLoop.end(); ++loop) |
880 | 0 | { |
881 | | //if(mvBond.size()>0) break;// Only allow one bond list |
882 | 0 | map<ci_string,vector<string> >::const_iterator pos_symbol, pos_ox_number, posdist; |
883 | 0 | pos_symbol =loop->second.find("_atom_type_symbol"); |
884 | 0 | pos_ox_number =loop->second.find("_atom_type_oxidation_number"); |
885 | 0 | if( (pos_symbol != loop->second.end()) && (pos_ox_number != loop->second.end()) ) |
886 | 0 | { |
887 | 0 | obErrorLog.ThrowError(__FUNCTION__, " Found _atom_type* record with oxydation number...", obDebug); |
888 | 0 | const unsigned long nl = pos_symbol->second.size(); |
889 | |
|
890 | 0 | for(unsigned int i = 0; i < nl; i++) |
891 | 0 | { |
892 | 0 | lbl2ox[pos_symbol->second[i]] = CIFNumeric2Float(pos_ox_number->second[i]); |
893 | 0 | obErrorLog.ThrowError(__FUNCTION__, " has oxydation "+pos_ox_number->second[i], obDebug); |
894 | 0 | } |
895 | 0 | } |
896 | 0 | } |
897 | |
|
898 | 0 | for (std::vector<CIFAtom>::iterator it = mvAtom.begin() ; it != mvAtom.end(); ++it) |
899 | 0 | { |
900 | 0 | string label = (*it).mLabel; |
901 | |
|
902 | 0 | if( lbl2ox.count(label) > 0 ) |
903 | 0 | (*it).mCharge = lbl2ox[label]; |
904 | 0 | else |
905 | 0 | { |
906 | 0 | (*it).mCharge = NOCHARGE; |
907 | 0 | obErrorLog.ThrowError(__FUNCTION__, "Charge for label: "+label+" cannot be found.", obDebug); |
908 | 0 | } |
909 | 0 | } |
910 | 0 | } |
911 | | |
912 | | void CIFData::CalcMatrices() |
913 | 0 | { |
914 | 0 | if(mvLatticePar.size()==0) return;//:@todo: throw error |
915 | 0 | float a,b,c,alpha,beta,gamma;//direct space parameters |
916 | 0 | float aa,bb,cc,alphaa,betaa,gammaa;//reciprocal space parameters |
917 | 0 | float v;//volume of the unit cell |
918 | 0 | a=mvLatticePar[0]; |
919 | 0 | b=mvLatticePar[1]; |
920 | 0 | c=mvLatticePar[2]; |
921 | 0 | alpha=mvLatticePar[3]; |
922 | 0 | beta=mvLatticePar[4]; |
923 | 0 | gamma=mvLatticePar[5]; |
924 | |
|
925 | 0 | v=sqrt(1-cos(alpha)*cos(alpha)-cos(beta)*cos(beta)-cos(gamma)*cos(gamma) |
926 | 0 | +2*cos(alpha)*cos(beta)*cos(gamma)); |
927 | |
|
928 | 0 | aa=sin(alpha)/a/v; |
929 | 0 | bb=sin(beta )/b/v; |
930 | 0 | cc=sin(gamma)/c/v; |
931 | |
|
932 | 0 | alphaa=acos( (cos(beta )*cos(gamma)-cos(alpha))/sin(beta )/sin(gamma) ); |
933 | 0 | betaa =acos( (cos(alpha)*cos(gamma)-cos(beta ))/sin(alpha)/sin(gamma) ); |
934 | 0 | gammaa=acos( (cos(alpha)*cos(beta )-cos(gamma))/sin(alpha)/sin(beta ) ); |
935 | |
|
936 | 0 | mOrthMatrix[0][0]=a; |
937 | 0 | mOrthMatrix[0][1]=b*cos(gamma); |
938 | 0 | mOrthMatrix[0][2]=c*cos(beta); |
939 | |
|
940 | 0 | mOrthMatrix[1][0]=0; |
941 | 0 | mOrthMatrix[1][1]=b*sin(gamma); |
942 | 0 | mOrthMatrix[1][2]=-c*sin(beta)*cos(alphaa); |
943 | |
|
944 | 0 | mOrthMatrix[2][0]=0; |
945 | 0 | mOrthMatrix[2][1]=0; |
946 | 0 | mOrthMatrix[2][2]=1/cc; |
947 | | |
948 | | // Invert upper triangular matrix |
949 | 0 | float cm[3][3]; |
950 | 0 | cm[0][0]=mOrthMatrix[0][0]; |
951 | 0 | cm[0][1]=mOrthMatrix[0][1]; |
952 | 0 | cm[0][2]=mOrthMatrix[0][2]; |
953 | |
|
954 | 0 | cm[1][0]=mOrthMatrix[1][0]; |
955 | 0 | cm[1][1]=mOrthMatrix[1][1]; |
956 | 0 | cm[1][2]=mOrthMatrix[1][2]; |
957 | |
|
958 | 0 | cm[2][0]=mOrthMatrix[2][0]; |
959 | 0 | cm[2][1]=mOrthMatrix[2][1]; |
960 | 0 | cm[2][2]=mOrthMatrix[2][2]; |
961 | 0 | for(long i=0;i<3;i++) |
962 | 0 | for(long j=0;j<3;j++) |
963 | 0 | if(i==j) mOrthMatrixInvert[i][j]=1; |
964 | 0 | else mOrthMatrixInvert[i][j]=0; |
965 | 0 | for(long i=0;i<3;i++) |
966 | 0 | { |
967 | 0 | float a; |
968 | 0 | for(long j=i-1;j>=0;j--) |
969 | 0 | { |
970 | 0 | a=cm[j][i]/cm[i][i]; |
971 | 0 | for(long k=0;k<3;k++) mOrthMatrixInvert[j][k] -= mOrthMatrixInvert[i][k]*a; |
972 | 0 | for(long k=0;k<3;k++) cm[j][k] -= cm[i][k]*a; |
973 | 0 | } |
974 | 0 | a=cm[i][i]; |
975 | 0 | for(long k=0;k<3;k++) mOrthMatrixInvert[i][k] /= a; |
976 | 0 | for(long k=0;k<3;k++) cm[i][k] /= a; |
977 | 0 | } |
978 | 0 | stringstream ss; |
979 | 0 | ss <<"Fractional2Cartesian matrix:"<<endl |
980 | 0 | <<mOrthMatrix[0][0]<<" "<<mOrthMatrix[0][1]<<" "<<mOrthMatrix[0][2]<<endl |
981 | 0 | <<mOrthMatrix[1][0]<<" "<<mOrthMatrix[1][1]<<" "<<mOrthMatrix[1][2]<<endl |
982 | 0 | <<mOrthMatrix[2][0]<<" "<<mOrthMatrix[2][1]<<" "<<mOrthMatrix[2][2]<<endl<<endl; |
983 | 0 | ss <<"Cartesian2Fractional matrix:"<<endl |
984 | 0 | <<mOrthMatrixInvert[0][0]<<" "<<mOrthMatrixInvert[0][1]<<" "<<mOrthMatrixInvert[0][2]<<endl |
985 | 0 | <<mOrthMatrixInvert[1][0]<<" "<<mOrthMatrixInvert[1][1]<<" "<<mOrthMatrixInvert[1][2]<<endl |
986 | 0 | <<mOrthMatrixInvert[2][0]<<" "<<mOrthMatrixInvert[2][1]<<" "<<mOrthMatrixInvert[2][2]; |
987 | 0 | obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug); |
988 | 0 | } |
989 | | |
990 | | void CIFData::f2c(float &x,float &y, float &z) |
991 | 0 | { |
992 | 0 | const float x0=x,y0=y,z0=z; |
993 | 0 | x=mOrthMatrix[0][0]*x0+mOrthMatrix[0][1]*y0+mOrthMatrix[0][2]*z0; |
994 | 0 | y=mOrthMatrix[1][0]*x0+mOrthMatrix[1][1]*y0+mOrthMatrix[1][2]*z0; |
995 | 0 | z=mOrthMatrix[2][0]*x0+mOrthMatrix[2][1]*y0+mOrthMatrix[2][2]*z0; |
996 | 0 | } |
997 | | |
998 | | void CIFData::c2f(float &x,float &y, float &z) |
999 | 0 | { |
1000 | 0 | const float x0=x,y0=y,z0=z; |
1001 | 0 | x=mOrthMatrixInvert[0][0]*x0+mOrthMatrixInvert[0][1]*y0+mOrthMatrixInvert[0][2]*z0; |
1002 | 0 | y=mOrthMatrixInvert[1][0]*x0+mOrthMatrixInvert[1][1]*y0+mOrthMatrixInvert[1][2]*z0; |
1003 | 0 | z=mOrthMatrixInvert[2][0]*x0+mOrthMatrixInvert[2][1]*y0+mOrthMatrixInvert[2][2]*z0; |
1004 | 0 | } |
1005 | | |
1006 | | void CIFData::Cartesian2FractionalCoord() |
1007 | 0 | { |
1008 | 0 | if(mvLatticePar.size()==0) return;//:@todo: report error |
1009 | 0 | for(vector<CIFAtom>::iterator pos=mvAtom.begin();pos!=mvAtom.end();++pos) |
1010 | 0 | { |
1011 | 0 | pos->mCoordFrac.resize(3); |
1012 | 0 | pos->mCoordFrac[0]=pos->mCoordCart.at(0); |
1013 | 0 | pos->mCoordFrac[1]=pos->mCoordCart.at(1); |
1014 | 0 | pos->mCoordFrac[2]=pos->mCoordCart.at(2); |
1015 | 0 | c2f(pos->mCoordFrac[0],pos->mCoordFrac[1],pos->mCoordFrac[2]); |
1016 | 0 | } |
1017 | 0 | } |
1018 | | |
1019 | | void CIFData::Fractional2CartesianCoord() |
1020 | 0 | { |
1021 | 0 | if(mvLatticePar.size()==0) return;//:@todo: report error |
1022 | 0 | for(vector<CIFAtom>::iterator pos=mvAtom.begin();pos!=mvAtom.end();++pos) |
1023 | 0 | { |
1024 | 0 | pos->mCoordCart.resize(3); |
1025 | 0 | pos->mCoordCart[0]=pos->mCoordFrac.at(0); |
1026 | 0 | pos->mCoordCart[1]=pos->mCoordFrac.at(1); |
1027 | 0 | pos->mCoordCart[2]=pos->mCoordFrac.at(2); |
1028 | 0 | f2c(pos->mCoordCart[0],pos->mCoordCart[1],pos->mCoordCart[2]); |
1029 | 0 | } |
1030 | 0 | } |
1031 | | |
1032 | | ///// |
1033 | | |
1034 | | |
1035 | | CIF::CIF(istream &is, const bool interpret) |
1036 | 0 | { |
1037 | 0 | bool found_atoms=false; |
1038 | 0 | while(!found_atoms) |
1039 | 0 | { |
1040 | | // :TODO: we don't need a vector of CIFData, since only one block is read at a time |
1041 | 0 | mvData.clear(); |
1042 | 0 | this->Parse(is); |
1043 | | // Extract structure from 1 block |
1044 | 0 | if(interpret) |
1045 | 0 | for(map<string,CIFData>::iterator posd=mvData.begin();posd!=mvData.end();++posd) |
1046 | 0 | { |
1047 | 0 | posd->second.ExtractAll(); |
1048 | 0 | if(posd->second.mvAtom.size()>0) found_atoms=true; |
1049 | 0 | } |
1050 | 0 | } |
1051 | 0 | } |
1052 | | |
1053 | 0 | bool iseol(const char c) { return ((c=='\n')||(c=='\r'));} |
1054 | | |
1055 | | /// Read one value, whether it is numeric, string or text |
1056 | | string CIFReadValue(istream &in,char &lastc) |
1057 | 0 | { |
1058 | 0 | bool vv=false;//very verbose ? |
1059 | 0 | string value(""); |
1060 | 0 | while(!isgraph(in.peek())) in.get(lastc); |
1061 | 0 | while(in.peek()=='#') |
1062 | 0 | {//discard these comments for now |
1063 | 0 | string tmp; |
1064 | 0 | getline(in,tmp); |
1065 | 0 | lastc='\r'; |
1066 | 0 | while(!isgraph(in.peek())) in.get(lastc); |
1067 | 0 | } |
1068 | 0 | if(in.peek()=='_') { |
1069 | 0 | stringstream errorMsg; |
1070 | 0 | errorMsg << "Warning: Trying to read a value but found a new CIF tag !"; |
1071 | 0 | obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , obError); |
1072 | 0 | return value; |
1073 | 0 | } |
1074 | 0 | if(in.peek()==';') |
1075 | 0 | {//SemiColonTextField |
1076 | 0 | bool warning=!iseol(lastc); |
1077 | 0 | if(warning){ |
1078 | 0 | stringstream errorMsg; |
1079 | 0 | errorMsg << "Warning: Trying to read a SemiColonTextField but last char is not an end-of-line char !"; |
1080 | 0 | obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , obError); |
1081 | 0 | } |
1082 | 0 | value=""; |
1083 | 0 | in.get(lastc); |
1084 | 0 | while(in.peek()!=';') |
1085 | 0 | { |
1086 | 0 | if (in.peek() == '_') { |
1087 | 0 | stringstream errorMsg; |
1088 | 0 | errorMsg << "Warning: Trying to read a value but found a new CIF tag !"; |
1089 | 0 | obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , obError); |
1090 | 0 | warning = true; |
1091 | 0 | break; |
1092 | 0 | } |
1093 | 0 | string tmp; |
1094 | 0 | getline(in,tmp); |
1095 | 0 | value+=tmp+" "; |
1096 | 0 | } |
1097 | 0 | if (!warning) |
1098 | 0 | in.get(lastc); |
1099 | 0 | if(vv) obErrorLog.ThrowError(__FUNCTION__, "SemiColonTextField:"+value, obDebug); |
1100 | 0 | if(warning && !vv) obErrorLog.ThrowError(__FUNCTION__, "SemiColonTextField:"+value, obDebug); |
1101 | 0 | return value; |
1102 | 0 | } |
1103 | 0 | if((in.peek()=='\'') || (in.peek()=='\"')) |
1104 | 0 | {//QuotedString |
1105 | 0 | char delim; |
1106 | 0 | in.get(delim); |
1107 | 0 | value=""; |
1108 | 0 | while(!((lastc==delim)&&(!isgraph(in.peek()))) ) |
1109 | 0 | { |
1110 | 0 | in.get(lastc); |
1111 | 0 | value+=lastc; |
1112 | 0 | } |
1113 | 0 | if(vv) obErrorLog.ThrowError(__FUNCTION__, "QuotedString:"+value, obDebug); |
1114 | 0 | return value.substr(0,value.size()-1); |
1115 | 0 | } |
1116 | | // If we got here, we have an ordinary value, numeric or unquoted string |
1117 | 0 | in>>value; |
1118 | 0 | if(vv) obErrorLog.ThrowError(__FUNCTION__, "NormalValue:"+value, obDebug); |
1119 | 0 | return value; |
1120 | 0 | } |
1121 | | |
1122 | | void CIF::Parse(istream &in) |
1123 | 0 | { |
1124 | 0 | bool vv=false;//very verbose ? |
1125 | 0 | char lastc=' '; |
1126 | 0 | string block="";// Current block data |
1127 | 0 | while(!in.eof()) |
1128 | 0 | { |
1129 | 0 | while(!isgraph(in.peek()) && !in.eof()) in.get(lastc); |
1130 | 0 | if(in.peek()=='#') |
1131 | 0 | {//Comment |
1132 | 0 | string tmp; |
1133 | 0 | getline(in,tmp); |
1134 | 0 | if(block=="") mvComment.push_back(tmp); |
1135 | 0 | else mvData[block].mvComment.push_back(tmp); |
1136 | 0 | lastc='\r'; |
1137 | 0 | continue; |
1138 | 0 | } |
1139 | 0 | if(in.peek()=='_') |
1140 | 0 | {//Tag |
1141 | 0 | string tag,value; |
1142 | 0 | in>>tag; |
1143 | | // Convert all dots to underscores to cover much of DDL2 with this DDL1 parser. |
1144 | 0 | for (string::size_type pos = tag.find('.'); pos != string::npos; pos = tag.find('.', ++ pos)) |
1145 | 0 | tag.replace(pos, 1, 1, '_'); |
1146 | 0 | value=CIFReadValue(in,lastc); |
1147 | 0 | mvData[block].mvItem[ci_string(tag.c_str())]=value; |
1148 | 0 | if(vv) |
1149 | 0 | { |
1150 | 0 | stringstream ss; |
1151 | 0 | ss<<"New Tag:"<<tag<<" ("<<value.size()<<"):"<<value; |
1152 | 0 | obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug); |
1153 | 0 | } |
1154 | 0 | continue; |
1155 | 0 | } |
1156 | 0 | if((in.peek()=='d') || (in.peek()=='D')) |
1157 | 0 | {// Data |
1158 | 0 | if(!mvData.empty()) return; // We want just a single data block |
1159 | | |
1160 | 0 | string tmp; |
1161 | 0 | in>>tmp; |
1162 | 0 | block=tmp.substr(5); |
1163 | 0 | if(vv) |
1164 | 0 | { |
1165 | 0 | stringstream ss; |
1166 | 0 | ss<<endl<<endl<<"NEW BLOCK DATA: !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ->"<<block<<endl<<endl; |
1167 | 0 | obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug); |
1168 | 0 | } |
1169 | 0 | mvData[block]=CIFData(); |
1170 | 0 | mvData[block].mDataBlockName=tmp; |
1171 | 0 | continue; |
1172 | 0 | } |
1173 | 0 | if((in.peek()=='l') || (in.peek()=='L')) |
1174 | 0 | {// loop_ |
1175 | 0 | vector<ci_string> tit; |
1176 | 0 | string tmp; |
1177 | 0 | in>>tmp; //should be loop_ |
1178 | 0 | if(vv) obErrorLog.ThrowError(__FUNCTION__, "LOOP : "+tmp, obDebug); |
1179 | 0 | while(true) |
1180 | 0 | {//read titles |
1181 | 0 | while(!isgraph(in.peek()) && !in.eof()) in.get(lastc); |
1182 | 0 | if(in.peek()=='#') |
1183 | 0 | { |
1184 | 0 | getline(in,tmp); |
1185 | 0 | if(block=="") mvComment.push_back(tmp); |
1186 | 0 | else mvData[block].mvComment.push_back(tmp); |
1187 | 0 | continue; |
1188 | 0 | } |
1189 | 0 | if(in.peek()!='_') |
1190 | 0 | { |
1191 | 0 | stringstream ss; |
1192 | 0 | ss << "End of loop titles:"<<(char)in.peek(); |
1193 | 0 | if(vv) obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug); |
1194 | 0 | break; |
1195 | 0 | } |
1196 | 0 | in>>tmp; |
1197 | | // Convert all dots to underscores to cover much of DDL2 with this DDL1 parser. |
1198 | 0 | for (string::size_type pos = tmp.find('.'); pos != string::npos; pos = tmp.find('.', ++ pos)) |
1199 | 0 | tmp.replace(pos, 1, 1, '_'); |
1200 | 0 | tit.push_back(ci_string(tmp.c_str())); |
1201 | 0 | if(vv) obErrorLog.ThrowError(__FUNCTION__, " , "+tmp, obDebug); |
1202 | 0 | } |
1203 | 0 | map<ci_string,vector<string> > lp; |
1204 | 0 | while(true) |
1205 | 0 | { |
1206 | 0 | std::ios::pos_type pos0=in.tellg(); |
1207 | 0 | while(!isgraph(in.peek()) && !in.eof()) in.get(lastc); |
1208 | 0 | if(in.eof()) break; |
1209 | 0 | if(in.peek()=='_') break; |
1210 | 0 | if(in.peek()=='#') |
1211 | 0 | {// Comment (in a loop ??) |
1212 | | //const std::ios::pos_type pos=in.tellg(); |
1213 | 0 | string tmp; |
1214 | 0 | getline(in,tmp); |
1215 | 0 | pos0=in.tellg(); |
1216 | 0 | if(block=="") mvComment.push_back(tmp); |
1217 | 0 | else mvData[block].mvComment.push_back(tmp); |
1218 | 0 | lastc='\r'; |
1219 | 0 | if(vv) obErrorLog.ThrowError(__FUNCTION__, "Comment in a loop (?):"+tmp, obDebug); |
1220 | | //in.seekg(pos); |
1221 | 0 | break; |
1222 | 0 | } |
1223 | | //in>>tmp; |
1224 | 0 | tmp=CIFReadValue(in,lastc); |
1225 | 0 | if(ci_string(tmp.c_str())=="loop_") |
1226 | 0 | {//go back and continue |
1227 | 0 | in.clear(); |
1228 | 0 | in.seekg(pos0,std::ios::beg); |
1229 | 0 | stringstream ss; |
1230 | 0 | ss <<"END OF LOOP :"<<tmp<<","<<(char)in.peek()<<","<<in.tellg(); |
1231 | 0 | if(vv) obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug); |
1232 | 0 | break; |
1233 | 0 | } |
1234 | 0 | if(tmp.size()>=5) |
1235 | 0 | if(ci_string(tmp.substr(0,5).c_str())=="data_") |
1236 | 0 | {//go back and continue |
1237 | 0 | in.clear(); |
1238 | 0 | in.seekg(pos0,std::ios::beg); |
1239 | 0 | stringstream ss; |
1240 | 0 | ss <<"END OF LOOP :"<<tmp<<","<<(char)in.peek()<<","<<in.tellg(); |
1241 | 0 | if(vv) obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug); |
1242 | 0 | break; |
1243 | 0 | } |
1244 | 0 | for(unsigned int i=0;i<tit.size();++i) |
1245 | 0 | {//Read all values |
1246 | 0 | if(i>0) tmp=CIFReadValue(in,lastc); |
1247 | 0 | lp[tit[i]].push_back(tmp); |
1248 | 0 | stringstream ss; |
1249 | 0 | ss <<" LOOP VALUE #"<<lp[tit[i]].size()<<","<<i<<" : "<<tmp; |
1250 | 0 | if(vv) obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug); |
1251 | 0 | } |
1252 | 0 | } |
1253 | | // The key to the mvLoop map is the set of column titles |
1254 | 0 | set<ci_string> stit; |
1255 | 0 | for(unsigned int i=0;i<tit.size();++i) stit.insert(tit[i]); |
1256 | 0 | mvData[block].mvLoop[stit]=lp; |
1257 | 0 | continue; |
1258 | 0 | } |
1259 | | // If we get here, something went wrong ! Discard till end of line... |
1260 | | // It is OK if this is just a blank line though |
1261 | 0 | string junk; |
1262 | 0 | getline(in,junk); |
1263 | |
|
1264 | 0 | if(junk.size()>0) |
1265 | 0 | { |
1266 | 0 | stringstream errorMsg; |
1267 | 0 | errorMsg << "Warning: one line could not be interpreted while reading a CIF file:"<<endl |
1268 | 0 | << " -> line contents:" << junk; |
1269 | 0 | obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , obWarning); |
1270 | 0 | } |
1271 | 0 | } |
1272 | 0 | } |
1273 | | |
1274 | | float CIFNumeric2Float(const string &s) |
1275 | 0 | { |
1276 | 0 | if((s==".") || (s=="?")) return 0.0; |
1277 | 0 | float v; |
1278 | 0 | const int n=sscanf(s.c_str(),"%f",&v); |
1279 | 0 | if(n!=1) return 0.0; |
1280 | 0 | return v; |
1281 | 0 | } |
1282 | | |
1283 | | int CIFNumeric2Int(const string &s) |
1284 | 0 | { |
1285 | 0 | if((s==".") || (s=="?")) return 0; |
1286 | 0 | int v; |
1287 | 0 | const int n=sscanf(s.c_str(),"%d",&v); |
1288 | 0 | if(n!=1) return 0; |
1289 | 0 | return v; |
1290 | 0 | } |
1291 | | |
1292 | | bool is_double(const std::string& s, double& r_double) |
1293 | 0 | { |
1294 | 0 | std::istringstream i(s); |
1295 | |
|
1296 | 0 | if (i >> r_double) |
1297 | 0 | return true; |
1298 | | |
1299 | 0 | r_double = 0.0; |
1300 | 0 | return false; |
1301 | 0 | } |
1302 | | |
1303 | | |
1304 | | //################ END CIF CLASSES###################################### |
1305 | | |
1306 | | //Make an instance of the format class |
1307 | | CIFFormat theCIFFormat; |
1308 | | |
1309 | | // Helper function for CorrectFormatCharges |
1310 | | // Is this atom an oxygen in a water molecule |
1311 | | // We know the oxygen is connected to one ion, but check for non-hydrogens |
1312 | | // Returns: true if the atom is an oxygen and connected to two hydrogens and up to one other atom |
1313 | | bool CIFisWaterOxygen(OBAtom *atom) |
1314 | 0 | { |
1315 | 0 | if (atom->GetAtomicNum() != OBElements::Oxygen) |
1316 | 0 | return false; |
1317 | | |
1318 | 0 | int nonHydrogenCount = 0; |
1319 | 0 | int hydrogenCount = 0; |
1320 | 0 | FOR_NBORS_OF_ATOM(neighbor, *atom) { |
1321 | 0 | if (neighbor->GetAtomicNum() != OBElements::Hydrogen) |
1322 | 0 | nonHydrogenCount++; |
1323 | 0 | else |
1324 | 0 | hydrogenCount++; |
1325 | 0 | } |
1326 | |
|
1327 | 0 | return (hydrogenCount == 2 && nonHydrogenCount <= 1); |
1328 | 0 | } |
1329 | | |
1330 | | // Look for lone ions, and correct their formal charges |
1331 | | void CorrectFormalCharges(OBMol *mol) |
1332 | 0 | { |
1333 | 0 | if (!mol) |
1334 | 0 | return; |
1335 | | |
1336 | | // First look for NR4, PR4 ions, |
1337 | | // or bare halides, alkali and alkaline earth metal ions |
1338 | 0 | FOR_ATOMS_OF_MOL(atom, *mol) { |
1339 | |
|
1340 | 0 | if ((atom->GetAtomicNum() == 7 || atom->GetAtomicNum() == 15) |
1341 | 0 | && atom->GetExplicitValence() == 4) { |
1342 | | // check if we should make a positive charge? |
1343 | | // i.e., 4 non-metal neighbors |
1344 | 0 | bool nonMetalNeighbors = true; |
1345 | 0 | FOR_NBORS_OF_ATOM(neighbor, &*atom) { |
1346 | 0 | switch (neighbor->GetAtomicNum()) { |
1347 | 0 | case 1: |
1348 | 0 | case 5: case 6: case 7: case 8: case 9: |
1349 | 0 | case 14: case 15: case 16: case 17: |
1350 | 0 | case 33: case 34: case 35: |
1351 | 0 | case 53: |
1352 | 0 | continue; // good non-metals |
1353 | 0 | default: |
1354 | 0 | nonMetalNeighbors = false; |
1355 | 0 | break; // stop looking |
1356 | 0 | } |
1357 | 0 | } |
1358 | 0 | if (nonMetalNeighbors) // 4 non-metals, e.g. NH4+ |
1359 | 0 | atom->SetFormalCharge(+1); |
1360 | 0 | } |
1361 | | |
1362 | | // Now look for simple atomic ions like Na, Li, F, Cl, Br... |
1363 | | // If we have an existing formal charge, keep going |
1364 | 0 | if (atom->GetFormalCharge() != 0) |
1365 | 0 | continue; |
1366 | | |
1367 | | // If we're connected to anything besides H2O, keep going |
1368 | 0 | if (atom->GetExplicitDegree() != 0) { |
1369 | 0 | int nonWaterBonds = 0; |
1370 | 0 | FOR_NBORS_OF_ATOM(neighbor, &*atom) { |
1371 | 0 | if (!CIFisWaterOxygen(&*neighbor)) { |
1372 | 0 | nonWaterBonds = 1; |
1373 | 0 | break; |
1374 | 0 | } |
1375 | 0 | } |
1376 | 0 | if (nonWaterBonds) |
1377 | 0 | continue; // look at another atom |
1378 | 0 | } |
1379 | | |
1380 | 0 | switch(atom->GetAtomicNum()) { |
1381 | 0 | case 3: case 11: case 19: case 37: case 55: case 87: |
1382 | | // Alkali ions |
1383 | 0 | atom->SetFormalCharge(+1); |
1384 | 0 | break; |
1385 | 0 | case 4: case 12: case 20: case 38: case 56: case 88: |
1386 | | // Alkaline earth ions |
1387 | 0 | atom->SetFormalCharge(+2); |
1388 | 0 | break; |
1389 | 0 | case 9: case 17: case 35: case 53: case 85: |
1390 | | // Halides |
1391 | 0 | atom->SetFormalCharge(-1); |
1392 | 0 | break; |
1393 | 0 | } |
1394 | 0 | } |
1395 | 0 | } |
1396 | | |
1397 | | ///////////////////////////////////////////////////////////////// |
1398 | | bool CIFFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv) |
1399 | 0 | { |
1400 | | // If installed, use the mmCIF parser to read CIF |
1401 | 0 | OBFormat *obformat = OBFormat::FindType("mmcif"); |
1402 | 0 | if (obformat) { return obformat->ReadMolecule(pOb, pConv); } |
1403 | 0 | obErrorLog.ThrowError(__FUNCTION__, "mmCIF parser not found. Using CIF parser.", obDebug); |
1404 | |
|
1405 | 0 | OBMol* pmol = dynamic_cast<OBMol*>(pOb); |
1406 | 0 | if (pmol == nullptr) |
1407 | 0 | return false; |
1408 | | |
1409 | 0 | CIF cif(*pConv->GetInStream(),true); |
1410 | | // Loop on all data blocks until we find one structure :@todo: handle multiple structures |
1411 | 0 | for(map<string,CIFData>::iterator pos=cif.mvData.begin();pos!=cif.mvData.end();++pos) |
1412 | 0 | if(pos->second.mvAtom.size()>0) |
1413 | 0 | { |
1414 | 0 | pmol->BeginModify(); |
1415 | 0 | if(pos->second.mvLatticePar.size()==6) |
1416 | 0 | {// We have one unit cell |
1417 | 0 | string spg=pos->second.mSpacegroupSymbolHall; |
1418 | 0 | if(spg=="") spg=pos->second.mSpacegroupHermannMauguin; |
1419 | 0 | if(spg=="") spg=pos->second.mSpacegroupNumberIT; |
1420 | 0 | if(spg=="") spg="P1"; |
1421 | 0 | OBUnitCell *pCell=new OBUnitCell; |
1422 | 0 | pCell->SetOrigin(fileformatInput); |
1423 | 0 | pCell->SetData(pos->second.mvLatticePar[0], |
1424 | 0 | pos->second.mvLatticePar[1], |
1425 | 0 | pos->second.mvLatticePar[2], |
1426 | 0 | pos->second.mvLatticePar[3]/DEG_TO_RAD, |
1427 | 0 | pos->second.mvLatticePar[4]/DEG_TO_RAD, |
1428 | 0 | pos->second.mvLatticePar[5]/DEG_TO_RAD); |
1429 | 0 | pCell->SetSpaceGroup(spg); |
1430 | 0 | pCell->SetSpaceGroup(pos->second.mSpaceGroup); |
1431 | 0 | pmol->SetData(pCell); |
1432 | 0 | } |
1433 | 0 | if(pos->second.mName!="") pmol->SetTitle(pos->second.mName); |
1434 | 0 | else |
1435 | 0 | if(pos->second.mFormula!="") pmol->SetTitle(pos->second.mFormula); |
1436 | 0 | else pmol->SetTitle(pConv->GetTitle()); |
1437 | |
|
1438 | 0 | if(pos->second.mFormula!="") pmol->SetFormula(pos->second.mFormula); |
1439 | | |
1440 | | // Keep a map linking the cif atom label to the obatom*, for bond interpretation later |
1441 | 0 | std::map<std::string,OBAtom *> vLabelOBatom; |
1442 | |
|
1443 | 0 | const unsigned int nbatoms=pos->second.mvAtom.size(); |
1444 | 0 | pmol->ReserveAtoms(nbatoms); |
1445 | 0 | for(vector<CIFData::CIFAtom>::const_iterator posat=pos->second.mvAtom.begin();posat!=pos->second.mvAtom.end();++posat) |
1446 | 0 | { |
1447 | | // Problem: posat->mSymbol is not guaranteed to actually be a symbol |
1448 | | // see http://www.iucr.org/iucr-top/cif/cifdic_html/1/cif_core.dic/Iatom_type_symbol.html |
1449 | | // Try to strip the string to have a better chance to have a valid symbol |
1450 | | // This is not guaranteed to work still, as the CIF standard allows about any string... |
1451 | 0 | string tmpSymbol=posat->mSymbol; |
1452 | 0 | unsigned int nbc=0; |
1453 | 0 | if((tmpSymbol.size()==1) && isalpha(tmpSymbol[0])) nbc=1; |
1454 | 0 | else if(tmpSymbol.size()>=2) |
1455 | 0 | { |
1456 | 0 | if(isalpha(tmpSymbol[0]) && isalpha(tmpSymbol[1])) nbc=2; |
1457 | 0 | else if(isalpha(tmpSymbol[0])) nbc=1; |
1458 | 0 | } |
1459 | |
|
1460 | 0 | OBAtom *atom = pmol->NewAtom(); |
1461 | |
|
1462 | 0 | vLabelOBatom.insert(make_pair(posat->mLabel,atom)); |
1463 | |
|
1464 | 0 | if(tmpSymbol.size()>nbc) |
1465 | 0 | {// Try to find a formal charge in the symbol |
1466 | 0 | int charge=0; |
1467 | 0 | int sign=0; |
1468 | 0 | for(unsigned int i=nbc;i<tmpSymbol.size();++i) |
1469 | 0 | {// Use first number found as formal charge |
1470 | 0 | if(isdigit(tmpSymbol[i]) && (charge==0)) charge=atoi(tmpSymbol.substr(i,1).c_str()); |
1471 | 0 | if('-'==tmpSymbol[i]) sign-=1; |
1472 | 0 | if('+'==tmpSymbol[i]) sign+=1; |
1473 | 0 | } |
1474 | 0 | if(0!=sign) // no sign, no charge |
1475 | 0 | { |
1476 | 0 | if(charge==0) charge=1; |
1477 | 0 | stringstream ss; |
1478 | 0 | ss << tmpSymbol<<" / symbol="<<tmpSymbol.substr(0,nbc)<<" charge= "<<sign*charge; |
1479 | 0 | obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug); |
1480 | 0 | atom->SetFormalCharge(sign*charge); |
1481 | 0 | } |
1482 | 0 | } |
1483 | |
|
1484 | 0 | if(nbc>0) tmpSymbol=tmpSymbol.substr(0,nbc); |
1485 | 0 | else tmpSymbol="C";//Something went wrong, no symbol ! Default to C ?? |
1486 | |
|
1487 | 0 | int atomicNum = OBElements::GetAtomicNum(tmpSymbol.c_str()); |
1488 | | // Test for some oxygens with subscripts |
1489 | 0 | if (atomicNum == 0 && tmpSymbol[0] == 'O') { |
1490 | 0 | atomicNum = 8; // e.g. Ob, OH, etc. |
1491 | 0 | } |
1492 | |
|
1493 | 0 | atom->SetAtomicNum(atomicNum); //set atomic number, or '0' if the atom type is not recognized |
1494 | 0 | atom->SetType(tmpSymbol); //set atomic number, or '0' if the atom type is not recognized |
1495 | 0 | atom->SetVector(posat->mCoordCart[0],posat->mCoordCart[1],posat->mCoordCart[2]); |
1496 | 0 | if(posat->mLabel.size()>0) |
1497 | 0 | { |
1498 | 0 | OBPairData *label = new OBPairData; |
1499 | 0 | label->SetAttribute("_atom_site_label"); |
1500 | 0 | label->SetValue(posat->mLabel); |
1501 | 0 | label->SetOrigin(fileformatInput); |
1502 | 0 | atom->SetData(label); |
1503 | 0 | } |
1504 | |
|
1505 | 0 | OBPairFloatingPoint *occup_data = new OBPairFloatingPoint; |
1506 | 0 | occup_data->SetAttribute("_atom_site_occupancy"); |
1507 | 0 | occup_data->SetValue(posat->mOccupancy); |
1508 | 0 | occup_data->SetOrigin(fileformatInput); |
1509 | 0 | atom->SetData(occup_data); |
1510 | |
|
1511 | 0 | if( posat->mCharge != NOCHARGE ) |
1512 | 0 | { |
1513 | 0 | OBPairFloatingPoint *charge_data = new OBPairFloatingPoint; |
1514 | 0 | charge_data->SetAttribute("input_charge"); |
1515 | 0 | charge_data->SetValue(posat->mCharge); |
1516 | 0 | charge_data->SetOrigin(fileformatInput); |
1517 | 0 | atom->SetData(charge_data); |
1518 | 0 | } |
1519 | 0 | } |
1520 | 0 | if (!pConv->IsOption("b",OBConversion::INOPTIONS)) |
1521 | 0 | pmol->ConnectTheDots(); |
1522 | 0 | if (pConv->IsOption("B",OBConversion::INOPTIONS)) |
1523 | 0 | { |
1524 | 0 | for(vector<CIFData::CIFBond>::const_iterator posbond=pos->second.mvBond.begin();posbond!=pos->second.mvBond.end();++posbond) |
1525 | 0 | {// Add bonds present in the cif and not detected by ConnectTheDots() |
1526 | 0 | std::map<std::string,OBAtom *>::iterator posat1,posat2; |
1527 | 0 | posat1=vLabelOBatom.find(posbond->mLabel1); |
1528 | 0 | posat2=vLabelOBatom.find(posbond->mLabel2); |
1529 | 0 | if(posat1!=vLabelOBatom.end() && posat2!=vLabelOBatom.end()) |
1530 | 0 | { |
1531 | 0 | stringstream ss; |
1532 | 0 | ss << " Adding cif bond ? "<<posat1->first<<"-"<<posat2->first; |
1533 | 0 | obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug); |
1534 | 0 | if (pmol->GetBond(posat1->second, posat2->second) == nullptr) |
1535 | 0 | { |
1536 | 0 | obErrorLog.ThrowError(__FUNCTION__, " :Bond added !", obDebug); |
1537 | 0 | OBBond * bond=pmol->NewBond(); |
1538 | 0 | bond->SetBegin(posat1->second); |
1539 | 0 | bond->SetEnd(posat2->second); |
1540 | 0 | bond->SetBondOrder(1); |
1541 | 0 | bond->SetLength(double(posbond->mDistance)); |
1542 | 0 | } |
1543 | 0 | else obErrorLog.ThrowError(__FUNCTION__, " :Bond already present.. ", obDebug); |
1544 | 0 | } |
1545 | 0 | } |
1546 | 0 | } |
1547 | 0 | if (!pConv->IsOption("s",OBConversion::INOPTIONS) && !pConv->IsOption("b",OBConversion::INOPTIONS)) |
1548 | 0 | pmol->PerceiveBondOrders(); |
1549 | 0 | pmol->EndModify(); |
1550 | 0 | pmol->SetAutomaticFormalCharge(false); // we should have set formal charges |
1551 | 0 | CorrectFormalCharges(pmol); // Look for lone Na -> Na+, etc. |
1552 | 0 | return true; |
1553 | 0 | } |
1554 | | |
1555 | | // If we got here, no structure was found |
1556 | 0 | obErrorLog.ThrowError(__FUNCTION__, "Problems reading a CIF file: no structure found !" , obWarning); |
1557 | 0 | return(false); |
1558 | 0 | } |
1559 | | |
1560 | | //////////////////////////////////////////////////////////////// |
1561 | | |
1562 | | bool CIFFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv) |
1563 | 0 | { |
1564 | 0 | OBMol* pmol = dynamic_cast<OBMol*>(pOb); |
1565 | 0 | if (pmol == nullptr) |
1566 | 0 | return false; |
1567 | 0 | ostream &ofs = *pConv->GetOutStream(); |
1568 | | |
1569 | | // default is false - leave coordinates as they are |
1570 | 0 | bool wrapFractional = pConv->IsOption("w", OBConversion::OUTOPTIONS); |
1571 | |
|
1572 | 0 | char buffer[BUFF_SIZE]; |
1573 | |
|
1574 | 0 | ofs <<"# CIF file generated by openbabel "<<BABEL_VERSION<<", see https://openbabel.org"<<endl; |
1575 | |
|
1576 | 0 | ofs << "data_I"<<endl; |
1577 | | // Use pmol->GetTitle() as chemical name, though it will probably be the file name |
1578 | 0 | ofs <<"_chemical_name_common '"<<pmol->GetTitle()<<"'"<<endl; |
1579 | | // Print Unit cell if we have it |
1580 | 0 | OBUnitCell *pUC = nullptr; |
1581 | 0 | if (pmol->HasData(OBGenericDataType::UnitCell)) |
1582 | 0 | { |
1583 | 0 | pUC = (OBUnitCell*)pmol->GetData(OBGenericDataType::UnitCell); |
1584 | 0 | ofs << "_cell_length_a " << pUC->GetA() << endl |
1585 | 0 | << "_cell_length_b " << pUC->GetB() << endl |
1586 | 0 | << "_cell_length_c " << pUC->GetC() << endl |
1587 | 0 | << "_cell_angle_alpha " << pUC->GetAlpha() << endl |
1588 | 0 | << "_cell_angle_beta " << pUC->GetBeta() << endl |
1589 | 0 | << "_cell_angle_gamma " << pUC->GetGamma() << endl; |
1590 | | // Save the space group if known |
1591 | 0 | const SpaceGroup* pSG = pUC->GetSpaceGroup(); |
1592 | 0 | if (pSG != nullptr) |
1593 | 0 | { |
1594 | | // Do we have an extended HM symbol, with origin choice as ":1" or ":2" ? If so, remove it. |
1595 | 0 | size_t n=pSG->GetHMName().find(":"); |
1596 | 0 | if(n==string::npos) |
1597 | 0 | ofs << "_space_group_name_H-M_alt '" << pSG->GetHMName() << "'" << endl; |
1598 | 0 | else |
1599 | 0 | ofs << "_space_group_name_H-M_alt '" << pSG->GetHMName().substr(0,n) << "'" << endl; |
1600 | 0 | ofs << "_space_group_name_Hall '" << pSG->GetHallName() << "'" << endl; |
1601 | 0 | ofs << "loop_" <<endl |
1602 | 0 | << " _symmetry_equiv_pos_as_xyz" << endl; |
1603 | 0 | transform3dIterator ti; |
1604 | 0 | const transform3d *t = pSG->BeginTransform(ti); |
1605 | 0 | while(t) |
1606 | 0 | { |
1607 | 0 | ofs << " " << t->DescribeAsString() << endl; |
1608 | 0 | t = pSG->NextTransform(ti); |
1609 | 0 | } |
1610 | 0 | } |
1611 | 0 | } |
1612 | |
|
1613 | 0 | ofs << "loop_" << endl |
1614 | 0 | << " _atom_site_label" << endl |
1615 | 0 | << " _atom_site_type_symbol" << endl |
1616 | 0 | << " _atom_site_fract_x" << endl |
1617 | 0 | << " _atom_site_fract_y" << endl |
1618 | 0 | << " _atom_site_fract_z" << endl |
1619 | 0 | << " _atom_site_occupancy" << endl; |
1620 | 0 | std::map<OBAtom*,std::string> label_table; |
1621 | 0 | unsigned int i = 0; |
1622 | 0 | FOR_ATOMS_OF_MOL(atom, *pmol) |
1623 | 0 | { |
1624 | 0 | double X, Y, Z; //atom coordinates |
1625 | 0 | vector3 v = atom->GetVector(); |
1626 | 0 | if (pUC != nullptr) { |
1627 | 0 | v = pUC->CartesianToFractional(v); |
1628 | 0 | if (wrapFractional) |
1629 | 0 | v = pUC->WrapFractionalCoordinate(v); |
1630 | 0 | } |
1631 | 0 | X = v.x(); |
1632 | 0 | Y = v.y(); |
1633 | 0 | Z = v.z(); |
1634 | 0 | string label_str; |
1635 | 0 | double occup; |
1636 | |
|
1637 | 0 | if (atom->HasData("_atom_site_occupancy")) |
1638 | 0 | { |
1639 | 0 | occup = (dynamic_cast<OBPairFloatingPoint *> (atom->GetData("_atom_site_occupancy")))->GetGenericValue(); |
1640 | 0 | } |
1641 | 0 | else occup = 1.0; |
1642 | |
|
1643 | 0 | if (atom->HasData("_atom_site_label")) |
1644 | 0 | { |
1645 | 0 | OBPairData *label = dynamic_cast<OBPairData *> (atom->GetData("_atom_site_label")); |
1646 | 0 | label_str = label->GetValue().c_str(); |
1647 | 0 | } |
1648 | 0 | else |
1649 | 0 | { |
1650 | 0 | label_str = OBElements::GetSymbol(atom->GetAtomicNum()) + to_string(i); |
1651 | 0 | i++; |
1652 | 0 | } |
1653 | | // Save the existing or generated label for optional bonding |
1654 | 0 | label_table[&*atom] = label_str; |
1655 | |
|
1656 | 0 | snprintf(buffer, BUFF_SIZE, " %-8s %-5s %10.5f %10.5f %10.5f %8.3f\n", |
1657 | 0 | label_str.c_str(), OBElements::GetSymbol(atom->GetAtomicNum()), |
1658 | 0 | X, Y, Z, occup); |
1659 | |
|
1660 | 0 | ofs << buffer; |
1661 | 0 | } |
1662 | |
|
1663 | 0 | if (pConv->IsOption("g", OBConversion::OUTOPTIONS)) |
1664 | 0 | { |
1665 | 0 | if (pmol->NumBonds() > 0) { |
1666 | 0 | obErrorLog.ThrowError(__FUNCTION__, "Writing bonds to output CIF", obDebug); |
1667 | 0 | ofs << "loop_" << endl |
1668 | 0 | << " _geom_bond_atom_site_label_1" << endl |
1669 | 0 | << " _geom_bond_atom_site_label_2" << endl |
1670 | 0 | << " _geom_bond_distance" << endl |
1671 | 0 | << " _geom_bond_site_symmetry_2" << endl |
1672 | 0 | << " _ccdc_geom_bond_type" << endl; |
1673 | 0 | } else { |
1674 | 0 | obErrorLog.ThrowError(__FUNCTION__, "No bonds defined in molecule for CIF export", obDebug); |
1675 | 0 | } |
1676 | |
|
1677 | 0 | FOR_BONDS_OF_MOL(bond, *pmol) |
1678 | 0 | { |
1679 | 0 | std::string label_1 = label_table[bond->GetBeginAtom()]; |
1680 | 0 | std::string label_2 = label_table[bond->GetEndAtom()]; |
1681 | |
|
1682 | 0 | std::string sym_key; |
1683 | 0 | int symmetry_num = 555; |
1684 | 0 | if (bond->IsPeriodic()) { |
1685 | 0 | OBUnitCell *box = (OBUnitCell*)pmol->GetData(OBGenericDataType::UnitCell); |
1686 | 0 | vector3 begin, end_orig, end_expected, uc_direction; |
1687 | | // Use consistent coordinates with the X Y Z written in the _atom_site_* loop earlier |
1688 | 0 | begin = box->CartesianToFractional(bond->GetBeginAtom()->GetVector()); |
1689 | 0 | begin = box->WrapFractionalCoordinate(begin); |
1690 | 0 | end_orig = box->CartesianToFractional(bond->GetEndAtom()->GetVector()); |
1691 | 0 | end_orig = box->WrapFractionalCoordinate(end_orig); |
1692 | 0 | end_expected = box->UnwrapFractionalNear(end_orig, begin); |
1693 | | |
1694 | | // To get the signs right, consider the example {0, 0.7}. We want -1 as the periodic direction. |
1695 | | // TODO: Think about edge cases, particularly atoms on the border of the unit cell. |
1696 | 0 | uc_direction = end_expected - end_orig; |
1697 | |
|
1698 | 0 | std::vector<int> uc; |
1699 | 0 | for (int i = 0; i < 3; ++i) { |
1700 | 0 | double raw_cell = uc_direction[i]; |
1701 | 0 | uc.push_back(static_cast<int>(lrint(raw_cell))); |
1702 | 0 | } |
1703 | 0 | symmetry_num += 100*uc[0] + 10*uc[1] + 1*uc[2]; // Unit cell directionality vs. 555, per CIF spec |
1704 | 0 | } |
1705 | 0 | if (symmetry_num == 555) |
1706 | 0 | { |
1707 | 0 | sym_key = "."; |
1708 | 0 | } |
1709 | 0 | else |
1710 | 0 | { |
1711 | 0 | stringstream ss; |
1712 | 0 | ss << "1_" << symmetry_num; |
1713 | 0 | sym_key = ss.str(); |
1714 | 0 | } |
1715 | |
|
1716 | 0 | std::string bond_type; |
1717 | 0 | int bond_order = bond->GetBondOrder(); |
1718 | 0 | switch (bond_order) |
1719 | 0 | { |
1720 | 0 | case 1: |
1721 | 0 | bond_type = "S"; |
1722 | 0 | break; |
1723 | 0 | case 2: |
1724 | 0 | bond_type = "D"; |
1725 | 0 | break; |
1726 | 0 | case 3: |
1727 | 0 | bond_type = "T"; |
1728 | 0 | break; |
1729 | 0 | case 5: // FIXME: this will be different in upstream code |
1730 | 0 | bond_type = "A"; // aromatic, per OBBond::_order |
1731 | 0 | break; |
1732 | 0 | default: |
1733 | 0 | stringstream ss; |
1734 | 0 | ss << "Unexpected bond order " << bond_order |
1735 | 0 | << " for bond" << label_1 << "-" << label_2 << std::endl |
1736 | 0 | << "Defaulting to single bond."; |
1737 | 0 | obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning); |
1738 | 0 | bond_type = "S"; |
1739 | 0 | } |
1740 | | |
1741 | | |
1742 | | //printf("%p: %s\n", &*atom, label_table[&*atom].c_str()); |
1743 | 0 | snprintf(buffer, BUFF_SIZE, " %-7s%-7s%10.5f%7s%4s\n", |
1744 | 0 | label_1.c_str(), label_2.c_str(), |
1745 | 0 | bond->GetLength(), sym_key.c_str(), |
1746 | 0 | bond_type.c_str()); |
1747 | |
|
1748 | 0 | ofs << buffer; |
1749 | 0 | } |
1750 | 0 | } |
1751 | 0 | return true; |
1752 | 0 | }//WriteMolecule |
1753 | | } //namespace OpenBabel |