Coverage Report

Created: 2025-07-13 06:40

/src/openbabel/include/openbabel/obconversion.h
Line
Count
Source (jump to first uncovered line)
1
/**********************************************************************
2
obconversion.h - Handle file conversions. Declaration of OBFormat, OBConversion
3
4
Copyright (C) 2004-2009 by Chris Morley
5
6
This file is part of the Open Babel project.
7
For more information, see <http://openbabel.org/>
8
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation version 2 of the License.
12
13
This program is distributed in the hope that it will be useful,
14
but WITHOUT ANY WARRANTY; without even the implied warranty of
15
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
GNU General Public License for more details.
17
***********************************************************************/
18
19
#ifndef OB_CONV_H
20
#define OB_CONV_H
21
22
#include <openbabel/babelconfig.h>
23
24
#include <iostream>
25
#include <fstream>
26
#include <sstream>
27
#include <cassert>
28
29
#include <string>
30
#include <vector>
31
#include <map>
32
#ifdef HAVE_STRINGS_H
33
#include <strings.h>
34
#endif
35
36
#include <openbabel/oberror.h>
37
#include <openbabel/format.h>
38
#include <openbabel/lineend.h>
39
40
// These macros are used in DLL builds. If they have not
41
// been set in babelconfig.h, define them as nothing.
42
#ifndef OBCONV
43
  #define OBCONV
44
#endif
45
#ifndef OBDLL
46
  #define OBDLL
47
#endif
48
49
namespace OpenBabel {
50
51
  // Needed to preserve deprecated API
52
  typedef OBPlugin::PluginIterator Formatpos;
53
54
  OBERROR extern  OBMessageHandler obErrorLog;
55
56
  //*************************************************
57
  /// @brief Class to convert from one format to another.
58
  // Class introduction in obconversion.cpp
59
  class OBCONV OBConversion
60
    {
61
      /// @nosubgrouping
62
    public:
63
      /// @name Construction
64
      //@{
65
      OBConversion(std::istream* is=nullptr, std::ostream* os=nullptr);
66
      OBConversion(std::string inFilename, std::string outFilename="");
67
      /// @brief Copy constructor.  Stream *ownership* is not copied. Source remains responsible for the memory.
68
      OBConversion(const OBConversion& o);
69
      /// @brief Assignment.  Stream *ownership* is not copied.  Source remains responsible for the memory.
70
      OBConversion& operator=(const OBConversion& rhs);
71
72
      virtual     ~OBConversion();
73
      //@}
74
      /// @name Collection of formats
75
      //@{
76
      /// @brief Called once by each format class
77
      static int        RegisterFormat(const char* ID, OBFormat* pFormat, const char* MIME = nullptr);
78
      /// @brief Searches registered formats
79
      static OBFormat*  FindFormat(const char* ID);
80
      /// @brief Searches registered formats
81
      /// \since version 2.3
82
      static OBFormat*  FindFormat(const std::string ID);
83
      /// @brief Searches registered formats for an ID the same as the file extension
84
      static OBFormat*  FormatFromExt(const char* filename);
85
      static OBFormat*  FormatFromExt(const char* filename, bool& isgzip);
86
      /// @brief Searches registered formats for an ID the same as the file extension
87
      /// \since version 2.3
88
      static OBFormat*  FormatFromExt(const std::string filename);
89
      static OBFormat*  FormatFromExt(const std::string filename, bool& isgzip);
90
      /// @brief Searches registered formats for a MIME the same as the chemical MIME type passed
91
      static OBFormat*        FormatFromMIME(const char* MIME);
92
93
      ///Deprecated!.Repeatedly called to recover available Formats
94
#ifndef SWIG
95
      static bool         GetNextFormat(Formatpos& itr, const char*& str,OBFormat*& pFormat);
96
#endif
97
      //@}
98
99
      /// @name Information
100
      //@{
101
      static const char* Description(); //generic conversion options
102
      //@}
103
104
      /// These return a filtered stream for reading/writing (possible filters include compression, decompression, and newline transformation)
105
      /// @name Parameter get and set
106
      //@{
107
9.30k
      std::istream* GetInStream() const {return pInput;};
108
0
      std::ostream* GetOutStream() const {return pOutput;};
109
110
      /// @brief Set input stream.  If takeOwnership is true, will deallocate when done.
111
      /// If isGzipped is true, will treat as a gzipped stream regardless of option settings,
112
      //  if false, then will be treated as gzipped stream only if z/zin is set.
113
      void          SetInStream(std::istream* pIn, bool takeOwnership=false);
114
      void          SetOutStream(std::ostream* pOut, bool takeOwnership=false);
115
116
      /// Sets the formats from their ids, e g CML
117
      bool        SetInAndOutFormats(const char* inID, const char* outID, bool ingzip=false, bool outgzip=false);
118
      bool        SetInAndOutFormats(OBFormat* pIn, OBFormat* pOut, bool ingzip=false, bool outgzip=false);
119
      /// Sets the input format from an id e.g. CML
120
      bool        SetInFormat(const char* inID, bool isgzip=false);
121
      bool        SetInFormat(OBFormat* pIn, bool isgzip=false);
122
      /// Sets the output format from an id e.g. CML
123
      bool        SetOutFormat(const char* outID, bool isgzip=false);
124
      bool        SetOutFormat(OBFormat* pOut, bool isgzip=false);
125
126
0
      OBFormat*   GetInFormat() const{return pInFormat;};
127
0
      OBFormat*   GetOutFormat() const{return pOutFormat;};
128
0
      bool GetInGzipped() const{return inFormatGzip;};
129
0
      bool GetOutGzipped() const{return outFormatGzip;};
130
0
      std::string GetInFilename() const{return InFilename;};
131
0
      std::string GetOutFilename() const{return OutFilename;};
132
133
      ///Get the position in the input stream of the object being read
134
0
      std::streampos GetInPos()const{return wInpos;};
135
136
      ///Get the length in the input stream of the object being read
137
0
      size_t GetInLen()const{return wInlen;};
138
139
      /// \return a default title which is the filename
140
      const char* GetTitle() const;
141
142
      ///@brief Extension method: deleted in ~OBConversion()
143
0
      OBConversion* GetAuxConv() const {return pAuxConv;};
144
0
      void          SetAuxConv(OBConversion* pConv) {pAuxConv=pConv;};
145
      //@}
146
      /** @name Option handling
147
       Three types of Option provide information and control instructions to the
148
       conversion process, INOPTIONS, OUTOPTIONS, GENOPTIONS, and are stored in each
149
       OBConversion object in separate maps. Each option has an id and an optional
150
       text string. They are set individually by AddOption() or (rarely) collectively
151
       in SetOptions(). Options cannot be altered but can be replaced with AddOption()
152
       and deleted with RemoveOption(), which, however, should be used in an op derived
153
       from OBOp (because of iterator invalidation).
154
155
       If the "Convert" interface is used, the GENOPTIONS are acted upon in the
156
       OBBase::DoTransformations() functions (currently only OBMol has one). This
157
       happens after the object has been input but before it has been output.
158
       All the options are available to input and output formats, etc. via the IsOption()
159
       function, and the interpretation of any text string needs to be done subsequently.
160
161
       In the commandline interface, options with single character ids are are indicated
162
       like -s, and those with multiple character ids like --gen3D. An option may have
163
       one or more parameters which appear, space separated, in the option's text string.
164
       With babel, unless the option is at the end of the command, it is necessary for
165
       the number of its parameters to be exactly that specified in RegisterOptionParam().
166
       The default is 0, but if it is more, and babel is likely to be used, this function
167
       should be called in the constructor of a format or op.
168
       With obabel (or the GUI), it is not necessary to call RegisterOptionParam().
169
170
       New GENOPTIONS can be defined (as plugins) using the class OBOp.
171
172
       It is customary for a format or op to document any INOPTIONS or OUTPTIONS it
173
       uses in its Description() function. As well as providing documentation during
174
       use, this is also parsed by the GUI to construct its checkboxes,etc., so it is
175
       advisable to give new Descriptions the same form as existing ones.
176
177
       Some conversion options, such as -f, -l, -m, are unlikely to be used in
178
       programming, but are listed in OBConversion::Description().  The built-in
179
       GENOPTIONS for OBMol objects are listed in OBMol::ClassDescription() which
180
       is in transform.cpp and also in this documentation under AddOption().
181
       */
182
      //@{
183
      ///@brief Three types of options set on the the command line by -a? , -x? , or -?
184
      enum Option_type { INOPTIONS, OUTOPTIONS, GENOPTIONS, ALL };
185
186
      ///@brief Determine whether an option is set. \return NULL if option not and a pointer to the associated text if it is
187
      const char* IsOption(const char* opt,Option_type opttyp=OUTOPTIONS);
188
189
      ///@brief Access the map with option name as key and any associated text as value
190
      const std::map<std::string,std::string>* GetOptions(Option_type opttyp)
191
0
        { return &OptionsArray[opttyp];};
192
193
      ///@brief Set an option of specified type, with optional text
194
      void AddOption(const char* opt, Option_type opttyp=OUTOPTIONS, const char* txt=nullptr);
195
196
      bool RemoveOption(const char* opt, Option_type optype);
197
198
      ///@brief Set several single character options of specified type from string like ab"btext"c"ctext"
199
      void SetOptions(const char* options, Option_type opttyp);
200
201
      ///@brief For example -h takes 0 parameters; -f takes 1. Call in a format constructor.
202
      static void RegisterOptionParam(std::string name, OBFormat* pFormat,
203
                                      int numberParams=0, Option_type typ=OUTOPTIONS);
204
205
      /// \return the number of parameters registered for the option, or 0 if not found
206
      static int GetOptionParams(std::string name, Option_type typ);
207
      //@}
208
209
      ///@brief Copies the options (by default of all types) from one OBConversion Object to another.
210
      void CopyOptions(OBConversion* pSourceConv, Option_type typ=ALL);
211
212
      /// @name Supported file format
213
      //@{
214
      // @brief Set and return the list of supported input format
215
      std::vector<std::string> GetSupportedInputFormat();
216
      // @brief Set and return the list of supported output format
217
      std::vector<std::string> GetSupportedOutputFormat();
218
      //@}
219
220
      /// @name Conversion
221
      //@{
222
      /// @brief Conversion for single input and output stream
223
      int         Convert(std::istream* is, std::ostream* os);
224
225
      /// @brief Conversion with existing streams
226
      int         Convert();
227
228
      /// @brief Conversion with multiple input/output files:
229
      /// makes input and output streams, and carries out normal, batch, aggregation, and splitting conversion.
230
      int         FullConvert(std::vector<std::string>& FileList,
231
                              std::string& OutputFileName, std::vector<std::string>& OutputFileList);
232
      //@}
233
234
      /// @name Conversion loop control
235
      //@{
236
      int     AddChemObject(OBBase* pOb);///< @brief Adds to internal array during input
237
      OBBase*  GetChemObject(); ///< @brief Retrieve from internal array during output
238
      bool     IsLast();///< @brief True if no more objects to be output
239
      bool     IsFirstInput();///< @brief True if the first input object is being processed
240
      void     SetFirstInput(bool b=true);///< @brief Setwhether or not is the first input
241
      int      GetOutputIndex() const ;///< @brief Retrieves number of ChemObjects that have been actually output
242
      void     SetOutputIndex(int indx);///< @brief Sets output index (maybe to control whether seen as first object)
243
      void     SetMoreFilesToCome();///<@brief Used with multiple input files. Off by default.
244
      void     SetOneObjectOnly(bool b=true);///< @brief Used with multiple input files. Off by default.
245
0
      void     SetLast(bool b){SetOneObjectOnly(b);}///< @brief Synonym for SetOneObjectOnly()
246
0
      bool     IsLastFile(){ return !MoreFilesToCome;}///< @brief True if no more files to be read
247
      /// @brief Number of objects read and processed
248
      /// Incremented after options are processed, so 0 for first object.  Returns -1 if Convert interface not used. 
249
0
      int      GetCount()const { return Count; }
250
      //@}
251
      /// @name Convenience functions
252
      //@{
253
      ///The default format is set in a single OBFormat class (generally it is OBMol)
254
0
      static OBFormat* GetDefaultFormat(){return OBFormat::FindType(nullptr);};
255
256
      /// @brief Outputs an object of a class derived from OBBase.
257
258
      /// Part of "API" interface.
259
      /// The output stream can be specified and the change is retained in the OBConversion instance
260
      bool        Write(OBBase* pOb, std::ostream* pout=nullptr);
261
262
      /// @brief Outputs an object of a class derived from OBBase as a string
263
264
      /// Part of "API" interface.
265
      /// The output stream is temporarily changed to the string and then restored
266
      /// This method is primarily intended for scripting languages without "stream" classes
267
      /// The optional "trimWhitespace" parameter allows trailing whitespace to be removed
268
      /// (e.g., in a SMILES string or InChI, etc.)
269
      std::string                     WriteString(OBBase* pOb, bool trimWhitespace = false);
270
271
      /// @brief Outputs an object of a class derived from OBBase as a file (with the supplied path)
272
273
      /// Part of "API" interface.
274
      /// The output stream is changed to the supplied file and the change is retained in the
275
      /// OBConversion instance.
276
      /// This method is primarily intended for scripting languages without "stream" classes
277
      bool                            WriteFile(OBBase* pOb, std::string filePath);
278
279
      /// @brief Manually closes and deletes the output stream
280
      /// The file is closed anyway when in the OBConversion destructor or when WriteFile
281
      /// is called again.
282
      /// \since version 2.1
283
      void CloseOutFile();
284
285
      /// @brief Reads an object of a class derived from OBBase into pOb.
286
287
      /// Part of "API" interface.
288
      /// The input stream can be specified and the change is retained in the OBConversion instance
289
      /// \return false and pOb=NULL on error
290
      bool  Read(OBBase* pOb, std::istream* pin=nullptr);
291
292
      /// Part of "API" interface.
293
      /// The input stream can be specified and the change is retained in the OBConversion instance
294
      /// \return NULL on error
295
//      OBBase* ReadObject(std::istream* pin=NULL);
296
297
      /// @brief Reads an object of a class derived from OBBase into pOb from the supplied string
298
299
      /// Part of "API" interface.
300
      /// \return false and pOb=NULL on error
301
      /// This method is primarily intended for scripting languages without "stream" classes
302
      /// Any existing input stream will be replaced by stringstream.
303
      bool  ReadString(OBBase* pOb, std::string input);
304
305
      /// @brief Reads an object of a class derived from OBBase into pOb from the file specified
306
307
      /// Part of "API" interface.
308
      /// The output stream is changed to the supplied file and the change is retained in the
309
      /// OBConversion instance. For multi-molecule files, the remaining molecules
310
      /// can be read by repeatedly calling the Read() method.
311
      /// \return false and pOb=NULL on error
312
      /// This method is primarily intended for scripting languages without "stream" classes
313
      bool  ReadFile(OBBase* pOb, std::string filePath);
314
315
      /// Part of the "Convert" interface.
316
      /// Open the files and update the streams in the OBConversion object.
317
      /// This method is primarily intended for scripting languages without "stream" classes
318
      /// and will usually followed by a call to Convert().
319
      /// Will set format from file extension if format has not already been set.
320
      /// Files will be opened even if format cannot be determined, but not if file path is empty.
321
      /// \return false if unsuccessful.
322
      bool OpenInAndOutFiles(std::string infilepath, std::string outfilepath);
323
324
      /// @brief Sends a message like "2 molecules converted" to clog
325
      /// The type of object is taken from the TargetClassDescription
326
      /// of the specified class (or the output format if not specified)and
327
      /// is appropriately singular or plural.
328
      void ReportNumberConverted(int count, OBFormat* pFormat=nullptr);
329
330
      /// \return the number of objects in the inputstream,
331
      /// or -1 if error or if SkipObjects for the input format is not implemented
332
      /// Adjusts for the value of -f and -l options (first and last objects).
333
      int NumInputObjects();
334
335
336
protected:
337
      ///Replaces * in BaseName by InFile without extension and path
338
      static std::string BatchFileName(std::string& BaseName, std::string& InFile);
339
      ///Replaces * in BaseName by Count
340
      static std::string IncrementedFileName(std::string& BaseName, const int Count);
341
      ///Checks for misunderstandings when using the -m option
342
      static bool CheckForUnintendedBatch(const std::string& infile, const std::string& outfile);
343
344
      void ClearInStreams();
345
      //@}
346
347
    protected:
348
349
      //helper class for saving stream state
350
      struct StreamState
351
      {
352
          std::ios *pStream; //active stream
353
          std::vector<std::ios *> ownedStreams; //streams we own the memory to
354
355
0
          StreamState(): pStream(nullptr) {}
356
          ~StreamState()
357
0
          {
358
0
            assert(ownedStreams.size() == 0); //should be popped
359
0
          }
360
361
          void pushInput(OBConversion& conv);
362
          void popInput(OBConversion& conv);
363
364
          void pushOutput(OBConversion& conv);
365
          void popOutput(OBConversion& conv);
366
367
0
          bool isSet() const { return pStream != nullptr; }
368
      };
369
370
      bool             SetStartAndEnd();
371
//      static FMapType& FormatsMap();///<contains ID and pointer to all OBFormat classes
372
//      static FMapType& FormatsMIMEMap();///<contains MIME and pointer to all OBFormat classes
373
      typedef std::map<std::string,int> OPAMapType;
374
      static OPAMapType& OptionParamArray(Option_type typ);
375
      bool             OpenAndSetFormat(bool SetFormat, std::ifstream* is, std::stringstream* ss=nullptr);
376
377
      std::string   InFilename, OutFilename; //OutFileName added v2.4.0
378
379
      typedef   FilteringInputStream< LineEndingExtractor > LEInStream;
380
381
      std::istream *pInput; //input stream, may be filtered
382
      std::vector<std::istream *> ownedInStreams; //streams we own the memory to
383
384
      std::ostream *pOutput; //output stream, may have filters applied
385
      std::vector<std::ostream *> ownedOutStreams; //streams we own the memory to
386
387
388
      static OBFormat*  pDefaultFormat;
389
      OBFormat*     pInFormat;
390
      OBFormat*   pOutFormat;
391
392
      std::map<std::string,std::string> OptionsArray[3];
393
394
      int     Index;
395
      unsigned int    StartNumber;
396
      unsigned int    EndNumber;
397
      int           Count;
398
      bool      m_IsFirstInput;
399
      bool      m_IsLast;
400
      bool      MoreFilesToCome;
401
      bool      OneObjectOnly;
402
      bool      ReadyToInput;
403
      bool      SkippedMolecules;    /// skip molecules using -f and -l
404
405
      //unlike the z and zin options, these are not sticky - setting formats will reset them
406
      bool inFormatGzip;
407
      bool outFormatGzip;
408
409
      OBBase*     pOb1;
410
      std::streampos wInpos; ///<position in the input stream of the object being written
411
      std::streampos rInpos; ///<position in the input stream of the object being read
412
      size_t wInlen; ///<length in the input stream of the object being written
413
      size_t rInlen; ///<length in the input stream of the object being read
414
415
      OBConversion* pAuxConv;///<Way to extend OBConversion
416
417
      std::vector<std::string> SupportedInputFormat; ///< list of supported input format
418
      std::vector<std::string> SupportedOutputFormat; ///< list of supported output format
419
420
    };
421
422
} //namespace OpenBabel
423
#endif //OB_CONV_H
424
425
//! \file
426
//! \brief Handle file conversions. Declaration of OBFormat, OBConversion.
427
428