/src/openbabel/include/openbabel/obconversion.h
Line | Count | Source (jump to first uncovered line) |
1 | | /********************************************************************** |
2 | | obconversion.h - Handle file conversions. Declaration of OBFormat, OBConversion |
3 | | |
4 | | Copyright (C) 2004-2009 by Chris Morley |
5 | | |
6 | | This file is part of the Open Babel project. |
7 | | For more information, see <http://openbabel.org/> |
8 | | |
9 | | This program is free software; you can redistribute it and/or modify |
10 | | it under the terms of the GNU General Public License as published by |
11 | | the Free Software Foundation version 2 of the License. |
12 | | |
13 | | This program is distributed in the hope that it will be useful, |
14 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | | GNU General Public License for more details. |
17 | | ***********************************************************************/ |
18 | | |
19 | | #ifndef OB_CONV_H |
20 | | #define OB_CONV_H |
21 | | |
22 | | #include <openbabel/babelconfig.h> |
23 | | |
24 | | #include <iostream> |
25 | | #include <fstream> |
26 | | #include <sstream> |
27 | | #include <cassert> |
28 | | |
29 | | #include <string> |
30 | | #include <vector> |
31 | | #include <map> |
32 | | #ifdef HAVE_STRINGS_H |
33 | | #include <strings.h> |
34 | | #endif |
35 | | |
36 | | #include <openbabel/oberror.h> |
37 | | #include <openbabel/format.h> |
38 | | #include <openbabel/lineend.h> |
39 | | |
40 | | // These macros are used in DLL builds. If they have not |
41 | | // been set in babelconfig.h, define them as nothing. |
42 | | #ifndef OBCONV |
43 | | #define OBCONV |
44 | | #endif |
45 | | #ifndef OBDLL |
46 | | #define OBDLL |
47 | | #endif |
48 | | |
49 | | namespace OpenBabel { |
50 | | |
51 | | // Needed to preserve deprecated API |
52 | | typedef OBPlugin::PluginIterator Formatpos; |
53 | | |
54 | | OBERROR extern OBMessageHandler obErrorLog; |
55 | | |
56 | | //************************************************* |
57 | | /// @brief Class to convert from one format to another. |
58 | | // Class introduction in obconversion.cpp |
59 | | class OBCONV OBConversion |
60 | | { |
61 | | /// @nosubgrouping |
62 | | public: |
63 | | /// @name Construction |
64 | | //@{ |
65 | | OBConversion(std::istream* is=nullptr, std::ostream* os=nullptr); |
66 | | OBConversion(std::string inFilename, std::string outFilename=""); |
67 | | /// @brief Copy constructor. Stream *ownership* is not copied. Source remains responsible for the memory. |
68 | | OBConversion(const OBConversion& o); |
69 | | /// @brief Assignment. Stream *ownership* is not copied. Source remains responsible for the memory. |
70 | | OBConversion& operator=(const OBConversion& rhs); |
71 | | |
72 | | virtual ~OBConversion(); |
73 | | //@} |
74 | | /// @name Collection of formats |
75 | | //@{ |
76 | | /// @brief Called once by each format class |
77 | | static int RegisterFormat(const char* ID, OBFormat* pFormat, const char* MIME = nullptr); |
78 | | /// @brief Searches registered formats |
79 | | static OBFormat* FindFormat(const char* ID); |
80 | | /// @brief Searches registered formats |
81 | | /// \since version 2.3 |
82 | | static OBFormat* FindFormat(const std::string ID); |
83 | | /// @brief Searches registered formats for an ID the same as the file extension |
84 | | static OBFormat* FormatFromExt(const char* filename); |
85 | | static OBFormat* FormatFromExt(const char* filename, bool& isgzip); |
86 | | /// @brief Searches registered formats for an ID the same as the file extension |
87 | | /// \since version 2.3 |
88 | | static OBFormat* FormatFromExt(const std::string filename); |
89 | | static OBFormat* FormatFromExt(const std::string filename, bool& isgzip); |
90 | | /// @brief Searches registered formats for a MIME the same as the chemical MIME type passed |
91 | | static OBFormat* FormatFromMIME(const char* MIME); |
92 | | |
93 | | ///Deprecated!.Repeatedly called to recover available Formats |
94 | | #ifndef SWIG |
95 | | static bool GetNextFormat(Formatpos& itr, const char*& str,OBFormat*& pFormat); |
96 | | #endif |
97 | | //@} |
98 | | |
99 | | /// @name Information |
100 | | //@{ |
101 | | static const char* Description(); //generic conversion options |
102 | | //@} |
103 | | |
104 | | /// These return a filtered stream for reading/writing (possible filters include compression, decompression, and newline transformation) |
105 | | /// @name Parameter get and set |
106 | | //@{ |
107 | 9.30k | std::istream* GetInStream() const {return pInput;}; |
108 | 0 | std::ostream* GetOutStream() const {return pOutput;}; |
109 | | |
110 | | /// @brief Set input stream. If takeOwnership is true, will deallocate when done. |
111 | | /// If isGzipped is true, will treat as a gzipped stream regardless of option settings, |
112 | | // if false, then will be treated as gzipped stream only if z/zin is set. |
113 | | void SetInStream(std::istream* pIn, bool takeOwnership=false); |
114 | | void SetOutStream(std::ostream* pOut, bool takeOwnership=false); |
115 | | |
116 | | /// Sets the formats from their ids, e g CML |
117 | | bool SetInAndOutFormats(const char* inID, const char* outID, bool ingzip=false, bool outgzip=false); |
118 | | bool SetInAndOutFormats(OBFormat* pIn, OBFormat* pOut, bool ingzip=false, bool outgzip=false); |
119 | | /// Sets the input format from an id e.g. CML |
120 | | bool SetInFormat(const char* inID, bool isgzip=false); |
121 | | bool SetInFormat(OBFormat* pIn, bool isgzip=false); |
122 | | /// Sets the output format from an id e.g. CML |
123 | | bool SetOutFormat(const char* outID, bool isgzip=false); |
124 | | bool SetOutFormat(OBFormat* pOut, bool isgzip=false); |
125 | | |
126 | 0 | OBFormat* GetInFormat() const{return pInFormat;}; |
127 | 0 | OBFormat* GetOutFormat() const{return pOutFormat;}; |
128 | 0 | bool GetInGzipped() const{return inFormatGzip;}; |
129 | 0 | bool GetOutGzipped() const{return outFormatGzip;}; |
130 | 0 | std::string GetInFilename() const{return InFilename;}; |
131 | 0 | std::string GetOutFilename() const{return OutFilename;}; |
132 | | |
133 | | ///Get the position in the input stream of the object being read |
134 | 0 | std::streampos GetInPos()const{return wInpos;}; |
135 | | |
136 | | ///Get the length in the input stream of the object being read |
137 | 0 | size_t GetInLen()const{return wInlen;}; |
138 | | |
139 | | /// \return a default title which is the filename |
140 | | const char* GetTitle() const; |
141 | | |
142 | | ///@brief Extension method: deleted in ~OBConversion() |
143 | 0 | OBConversion* GetAuxConv() const {return pAuxConv;}; |
144 | 0 | void SetAuxConv(OBConversion* pConv) {pAuxConv=pConv;}; |
145 | | //@} |
146 | | /** @name Option handling |
147 | | Three types of Option provide information and control instructions to the |
148 | | conversion process, INOPTIONS, OUTOPTIONS, GENOPTIONS, and are stored in each |
149 | | OBConversion object in separate maps. Each option has an id and an optional |
150 | | text string. They are set individually by AddOption() or (rarely) collectively |
151 | | in SetOptions(). Options cannot be altered but can be replaced with AddOption() |
152 | | and deleted with RemoveOption(), which, however, should be used in an op derived |
153 | | from OBOp (because of iterator invalidation). |
154 | | |
155 | | If the "Convert" interface is used, the GENOPTIONS are acted upon in the |
156 | | OBBase::DoTransformations() functions (currently only OBMol has one). This |
157 | | happens after the object has been input but before it has been output. |
158 | | All the options are available to input and output formats, etc. via the IsOption() |
159 | | function, and the interpretation of any text string needs to be done subsequently. |
160 | | |
161 | | In the commandline interface, options with single character ids are are indicated |
162 | | like -s, and those with multiple character ids like --gen3D. An option may have |
163 | | one or more parameters which appear, space separated, in the option's text string. |
164 | | With babel, unless the option is at the end of the command, it is necessary for |
165 | | the number of its parameters to be exactly that specified in RegisterOptionParam(). |
166 | | The default is 0, but if it is more, and babel is likely to be used, this function |
167 | | should be called in the constructor of a format or op. |
168 | | With obabel (or the GUI), it is not necessary to call RegisterOptionParam(). |
169 | | |
170 | | New GENOPTIONS can be defined (as plugins) using the class OBOp. |
171 | | |
172 | | It is customary for a format or op to document any INOPTIONS or OUTPTIONS it |
173 | | uses in its Description() function. As well as providing documentation during |
174 | | use, this is also parsed by the GUI to construct its checkboxes,etc., so it is |
175 | | advisable to give new Descriptions the same form as existing ones. |
176 | | |
177 | | Some conversion options, such as -f, -l, -m, are unlikely to be used in |
178 | | programming, but are listed in OBConversion::Description(). The built-in |
179 | | GENOPTIONS for OBMol objects are listed in OBMol::ClassDescription() which |
180 | | is in transform.cpp and also in this documentation under AddOption(). |
181 | | */ |
182 | | //@{ |
183 | | ///@brief Three types of options set on the the command line by -a? , -x? , or -? |
184 | | enum Option_type { INOPTIONS, OUTOPTIONS, GENOPTIONS, ALL }; |
185 | | |
186 | | ///@brief Determine whether an option is set. \return NULL if option not and a pointer to the associated text if it is |
187 | | const char* IsOption(const char* opt,Option_type opttyp=OUTOPTIONS); |
188 | | |
189 | | ///@brief Access the map with option name as key and any associated text as value |
190 | | const std::map<std::string,std::string>* GetOptions(Option_type opttyp) |
191 | 0 | { return &OptionsArray[opttyp];}; |
192 | | |
193 | | ///@brief Set an option of specified type, with optional text |
194 | | void AddOption(const char* opt, Option_type opttyp=OUTOPTIONS, const char* txt=nullptr); |
195 | | |
196 | | bool RemoveOption(const char* opt, Option_type optype); |
197 | | |
198 | | ///@brief Set several single character options of specified type from string like ab"btext"c"ctext" |
199 | | void SetOptions(const char* options, Option_type opttyp); |
200 | | |
201 | | ///@brief For example -h takes 0 parameters; -f takes 1. Call in a format constructor. |
202 | | static void RegisterOptionParam(std::string name, OBFormat* pFormat, |
203 | | int numberParams=0, Option_type typ=OUTOPTIONS); |
204 | | |
205 | | /// \return the number of parameters registered for the option, or 0 if not found |
206 | | static int GetOptionParams(std::string name, Option_type typ); |
207 | | //@} |
208 | | |
209 | | ///@brief Copies the options (by default of all types) from one OBConversion Object to another. |
210 | | void CopyOptions(OBConversion* pSourceConv, Option_type typ=ALL); |
211 | | |
212 | | /// @name Supported file format |
213 | | //@{ |
214 | | // @brief Set and return the list of supported input format |
215 | | std::vector<std::string> GetSupportedInputFormat(); |
216 | | // @brief Set and return the list of supported output format |
217 | | std::vector<std::string> GetSupportedOutputFormat(); |
218 | | //@} |
219 | | |
220 | | /// @name Conversion |
221 | | //@{ |
222 | | /// @brief Conversion for single input and output stream |
223 | | int Convert(std::istream* is, std::ostream* os); |
224 | | |
225 | | /// @brief Conversion with existing streams |
226 | | int Convert(); |
227 | | |
228 | | /// @brief Conversion with multiple input/output files: |
229 | | /// makes input and output streams, and carries out normal, batch, aggregation, and splitting conversion. |
230 | | int FullConvert(std::vector<std::string>& FileList, |
231 | | std::string& OutputFileName, std::vector<std::string>& OutputFileList); |
232 | | //@} |
233 | | |
234 | | /// @name Conversion loop control |
235 | | //@{ |
236 | | int AddChemObject(OBBase* pOb);///< @brief Adds to internal array during input |
237 | | OBBase* GetChemObject(); ///< @brief Retrieve from internal array during output |
238 | | bool IsLast();///< @brief True if no more objects to be output |
239 | | bool IsFirstInput();///< @brief True if the first input object is being processed |
240 | | void SetFirstInput(bool b=true);///< @brief Setwhether or not is the first input |
241 | | int GetOutputIndex() const ;///< @brief Retrieves number of ChemObjects that have been actually output |
242 | | void SetOutputIndex(int indx);///< @brief Sets output index (maybe to control whether seen as first object) |
243 | | void SetMoreFilesToCome();///<@brief Used with multiple input files. Off by default. |
244 | | void SetOneObjectOnly(bool b=true);///< @brief Used with multiple input files. Off by default. |
245 | 0 | void SetLast(bool b){SetOneObjectOnly(b);}///< @brief Synonym for SetOneObjectOnly() |
246 | 0 | bool IsLastFile(){ return !MoreFilesToCome;}///< @brief True if no more files to be read |
247 | | /// @brief Number of objects read and processed |
248 | | /// Incremented after options are processed, so 0 for first object. Returns -1 if Convert interface not used. |
249 | 0 | int GetCount()const { return Count; } |
250 | | //@} |
251 | | /// @name Convenience functions |
252 | | //@{ |
253 | | ///The default format is set in a single OBFormat class (generally it is OBMol) |
254 | 0 | static OBFormat* GetDefaultFormat(){return OBFormat::FindType(nullptr);}; |
255 | | |
256 | | /// @brief Outputs an object of a class derived from OBBase. |
257 | | |
258 | | /// Part of "API" interface. |
259 | | /// The output stream can be specified and the change is retained in the OBConversion instance |
260 | | bool Write(OBBase* pOb, std::ostream* pout=nullptr); |
261 | | |
262 | | /// @brief Outputs an object of a class derived from OBBase as a string |
263 | | |
264 | | /// Part of "API" interface. |
265 | | /// The output stream is temporarily changed to the string and then restored |
266 | | /// This method is primarily intended for scripting languages without "stream" classes |
267 | | /// The optional "trimWhitespace" parameter allows trailing whitespace to be removed |
268 | | /// (e.g., in a SMILES string or InChI, etc.) |
269 | | std::string WriteString(OBBase* pOb, bool trimWhitespace = false); |
270 | | |
271 | | /// @brief Outputs an object of a class derived from OBBase as a file (with the supplied path) |
272 | | |
273 | | /// Part of "API" interface. |
274 | | /// The output stream is changed to the supplied file and the change is retained in the |
275 | | /// OBConversion instance. |
276 | | /// This method is primarily intended for scripting languages without "stream" classes |
277 | | bool WriteFile(OBBase* pOb, std::string filePath); |
278 | | |
279 | | /// @brief Manually closes and deletes the output stream |
280 | | /// The file is closed anyway when in the OBConversion destructor or when WriteFile |
281 | | /// is called again. |
282 | | /// \since version 2.1 |
283 | | void CloseOutFile(); |
284 | | |
285 | | /// @brief Reads an object of a class derived from OBBase into pOb. |
286 | | |
287 | | /// Part of "API" interface. |
288 | | /// The input stream can be specified and the change is retained in the OBConversion instance |
289 | | /// \return false and pOb=NULL on error |
290 | | bool Read(OBBase* pOb, std::istream* pin=nullptr); |
291 | | |
292 | | /// Part of "API" interface. |
293 | | /// The input stream can be specified and the change is retained in the OBConversion instance |
294 | | /// \return NULL on error |
295 | | // OBBase* ReadObject(std::istream* pin=NULL); |
296 | | |
297 | | /// @brief Reads an object of a class derived from OBBase into pOb from the supplied string |
298 | | |
299 | | /// Part of "API" interface. |
300 | | /// \return false and pOb=NULL on error |
301 | | /// This method is primarily intended for scripting languages without "stream" classes |
302 | | /// Any existing input stream will be replaced by stringstream. |
303 | | bool ReadString(OBBase* pOb, std::string input); |
304 | | |
305 | | /// @brief Reads an object of a class derived from OBBase into pOb from the file specified |
306 | | |
307 | | /// Part of "API" interface. |
308 | | /// The output stream is changed to the supplied file and the change is retained in the |
309 | | /// OBConversion instance. For multi-molecule files, the remaining molecules |
310 | | /// can be read by repeatedly calling the Read() method. |
311 | | /// \return false and pOb=NULL on error |
312 | | /// This method is primarily intended for scripting languages without "stream" classes |
313 | | bool ReadFile(OBBase* pOb, std::string filePath); |
314 | | |
315 | | /// Part of the "Convert" interface. |
316 | | /// Open the files and update the streams in the OBConversion object. |
317 | | /// This method is primarily intended for scripting languages without "stream" classes |
318 | | /// and will usually followed by a call to Convert(). |
319 | | /// Will set format from file extension if format has not already been set. |
320 | | /// Files will be opened even if format cannot be determined, but not if file path is empty. |
321 | | /// \return false if unsuccessful. |
322 | | bool OpenInAndOutFiles(std::string infilepath, std::string outfilepath); |
323 | | |
324 | | /// @brief Sends a message like "2 molecules converted" to clog |
325 | | /// The type of object is taken from the TargetClassDescription |
326 | | /// of the specified class (or the output format if not specified)and |
327 | | /// is appropriately singular or plural. |
328 | | void ReportNumberConverted(int count, OBFormat* pFormat=nullptr); |
329 | | |
330 | | /// \return the number of objects in the inputstream, |
331 | | /// or -1 if error or if SkipObjects for the input format is not implemented |
332 | | /// Adjusts for the value of -f and -l options (first and last objects). |
333 | | int NumInputObjects(); |
334 | | |
335 | | |
336 | | protected: |
337 | | ///Replaces * in BaseName by InFile without extension and path |
338 | | static std::string BatchFileName(std::string& BaseName, std::string& InFile); |
339 | | ///Replaces * in BaseName by Count |
340 | | static std::string IncrementedFileName(std::string& BaseName, const int Count); |
341 | | ///Checks for misunderstandings when using the -m option |
342 | | static bool CheckForUnintendedBatch(const std::string& infile, const std::string& outfile); |
343 | | |
344 | | void ClearInStreams(); |
345 | | //@} |
346 | | |
347 | | protected: |
348 | | |
349 | | //helper class for saving stream state |
350 | | struct StreamState |
351 | | { |
352 | | std::ios *pStream; //active stream |
353 | | std::vector<std::ios *> ownedStreams; //streams we own the memory to |
354 | | |
355 | 0 | StreamState(): pStream(nullptr) {} |
356 | | ~StreamState() |
357 | 0 | { |
358 | 0 | assert(ownedStreams.size() == 0); //should be popped |
359 | 0 | } |
360 | | |
361 | | void pushInput(OBConversion& conv); |
362 | | void popInput(OBConversion& conv); |
363 | | |
364 | | void pushOutput(OBConversion& conv); |
365 | | void popOutput(OBConversion& conv); |
366 | | |
367 | 0 | bool isSet() const { return pStream != nullptr; } |
368 | | }; |
369 | | |
370 | | bool SetStartAndEnd(); |
371 | | // static FMapType& FormatsMap();///<contains ID and pointer to all OBFormat classes |
372 | | // static FMapType& FormatsMIMEMap();///<contains MIME and pointer to all OBFormat classes |
373 | | typedef std::map<std::string,int> OPAMapType; |
374 | | static OPAMapType& OptionParamArray(Option_type typ); |
375 | | bool OpenAndSetFormat(bool SetFormat, std::ifstream* is, std::stringstream* ss=nullptr); |
376 | | |
377 | | std::string InFilename, OutFilename; //OutFileName added v2.4.0 |
378 | | |
379 | | typedef FilteringInputStream< LineEndingExtractor > LEInStream; |
380 | | |
381 | | std::istream *pInput; //input stream, may be filtered |
382 | | std::vector<std::istream *> ownedInStreams; //streams we own the memory to |
383 | | |
384 | | std::ostream *pOutput; //output stream, may have filters applied |
385 | | std::vector<std::ostream *> ownedOutStreams; //streams we own the memory to |
386 | | |
387 | | |
388 | | static OBFormat* pDefaultFormat; |
389 | | OBFormat* pInFormat; |
390 | | OBFormat* pOutFormat; |
391 | | |
392 | | std::map<std::string,std::string> OptionsArray[3]; |
393 | | |
394 | | int Index; |
395 | | unsigned int StartNumber; |
396 | | unsigned int EndNumber; |
397 | | int Count; |
398 | | bool m_IsFirstInput; |
399 | | bool m_IsLast; |
400 | | bool MoreFilesToCome; |
401 | | bool OneObjectOnly; |
402 | | bool ReadyToInput; |
403 | | bool SkippedMolecules; /// skip molecules using -f and -l |
404 | | |
405 | | //unlike the z and zin options, these are not sticky - setting formats will reset them |
406 | | bool inFormatGzip; |
407 | | bool outFormatGzip; |
408 | | |
409 | | OBBase* pOb1; |
410 | | std::streampos wInpos; ///<position in the input stream of the object being written |
411 | | std::streampos rInpos; ///<position in the input stream of the object being read |
412 | | size_t wInlen; ///<length in the input stream of the object being written |
413 | | size_t rInlen; ///<length in the input stream of the object being read |
414 | | |
415 | | OBConversion* pAuxConv;///<Way to extend OBConversion |
416 | | |
417 | | std::vector<std::string> SupportedInputFormat; ///< list of supported input format |
418 | | std::vector<std::string> SupportedOutputFormat; ///< list of supported output format |
419 | | |
420 | | }; |
421 | | |
422 | | } //namespace OpenBabel |
423 | | #endif //OB_CONV_H |
424 | | |
425 | | //! \file |
426 | | //! \brief Handle file conversions. Declaration of OBFormat, OBConversion. |
427 | | |
428 | | |