Coverage Report

Created: 2025-10-10 06:56

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openbabel/include/openbabel/inchiformat.h
Line
Count
Source
1
/**********************************************************************
2
Copyright (C) 2005,2006,2007 Chris Morley
3
4
Based on the IUPAC InChI reference software, which is distributed
5
under the GNU LGPL:
6
Copyright (C) 2005 The International Union of Pure and Applied Chemistry
7
IUPAC International Chemical Identifier (InChI) (contact:secretariat@iupac.org)
8
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation version 2 of the License.
12
13
This program is distributed in the hope that it will be useful,
14
but WITHOUT ANY WARRANTY; without even the implied warranty of
15
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
GNU General Public License for more details.
17
***********************************************************************/
18
#ifndef OB_INCHIFORMAT_H
19
#define OB_INCHIFORMAT_H
20
#include <openbabel/babelconfig.h>
21
#include <openbabel/obconversion.h>
22
#include <openbabel/obmolecformat.h>
23
24
#include "inchi_api.h"
25
#ifdef HAVE_SSTREAM
26
#include <sstream>
27
#else
28
#include <strstream>
29
#endif
30
#include <set>
31
#include <vector>
32
#include <cstdlib>
33
#include <algorithm>
34
35
namespace OpenBabel
36
{
37
extern std::string GetInChI(std::istream& is);
38
39
class InChIFormat : public OBMoleculeFormat
40
{
41
public:
42
  InChIFormat()
43
0
  {
44
0
    OBConversion::RegisterFormat("inchi",this);
45
0
    OBConversion::RegisterOptionParam("n", this, 0, OBConversion::INOPTIONS);
46
0
    OBConversion::RegisterOptionParam("t", this);
47
0
    OBConversion::RegisterOptionParam("l", this);
48
0
    OBConversion::RegisterOptionParam("X", this, 1, OBConversion::OUTOPTIONS);
49
0
    OBConversion::RegisterOptionParam("K", this, 0, OBConversion::OUTOPTIONS);
50
0
    OBConversion::RegisterOptionParam("F", this, 0, OBConversion::OUTOPTIONS);
51
0
    OBConversion::RegisterOptionParam("X", this, 1, OBConversion::INOPTIONS);
52
0
    OBConversion::RegisterOptionParam("T", this, 1, OBConversion::OUTOPTIONS);
53
0
  }
54
55
  const char* Description() override
56
0
  {
57
0
    return
58
0
    "InChI format\n"
59
0
    "IUPAC/NIST molecular identifier\n\n"
60
0
61
0
    "Write Options, e.g. -xa\n"
62
0
    "    Standard InChI is written unless certain InChI options are used\n \n"
63
0
    " K output InChIKey only\n"
64
0
    " t add molecule name after InChI\n"
65
0
    " w ignore less important warnings\n"
66
0
    "    These are:\n"
67
0
    "    \'Omitted undefined stereo\'\n"
68
0
    "    \'Charges were rearranged\'\n"
69
0
    "    \'Proton(s) added/removed\'\n"
70
0
    "    \'Metal was disconnected\'\n"
71
0
    " a output auxiliary information\n"
72
0
    " l display InChI log\n"
73
0
    " r recalculate InChI; normally an input InChI is reused\n"
74
0
    " s recalculate wedge and hash bonds(2D structures only)\n \n"
75
0
    "    **Uniqueness options** (see also ``--unique`` and ``--sort`` which are more versatile)\n"
76
0
    " u output only unique molecules\n"
77
0
    " U output only unique molecules and sort them\n"
78
0
    " e compare first molecule to others\n"
79
0
    "    This can also be done with :ref:`InChICompare format <Compare_molecules_using_InChI>`::\n \n"
80
0
    "      babel first.smi second.mol third.cml -ok\n \n"
81
0
    " T <param> truncate InChI according to various parameters\n"
82
0
    "    See below for possible truncation parameters.\n"
83
0
    
84
0
    " X <Option string> Additional InChI options\n"
85
0
    "    See InChI documentation.\n"
86
0
    "    These options should be space delimited in a single quoted string.\n \n"
87
0
    "    - Structure perception (compatible with stdInChI): ``NEWPSOFF``, ``DoNotAddH``, ``SNon``\n"
88
0
    "    - Stereo interpretation (produces non-standard InChI): ``SRel``, ``SRac``,\n"
89
0
    "      ``SUCF``, ``ChiralFlagON``, ``ChiralFlagOFF``\n"
90
0
    "    - InChI creation options (produces non-standard InChI): ``SUU``, ``SLUUD``,\n"
91
0
    "      ``FixedH``, ``RecMet``, ``KET``, ``15T``\n \n"
92
0
    "    The following options are for convenience, e.g. ``-xF``\n"
93
0
    "    but produce non-standard InChI.\n"
94
0
    " F include fixed hydrogen layer\n"
95
0
    " M include bonds to metal\n\n"
96
0
97
0
    "Read Options, e.g. -an\n"
98
0
    " X <Option string> List of InChI options\n"
99
0
    " n molecule name follows InChI on same line\n"
100
0
    " a add InChI string to molecule name\n\n"
101
0
102
0
    "Truncation parameters used with ``-xT``:\n\n"
103
0
    "/formula   formula only\n"
104
0
    "/connect   formula and connectivity only\n"
105
0
    "/nostereo  ignore E/Z and sp3 stereochemistry\n"
106
0
    "/nosp3       ignore sp3 stereochemistry\n"
107
0
    "/noEZ      ignore E/Z steroeochemistry\n"
108
0
    "/nochg     ignore charge and protonation\n"
109
0
    "/noiso     ignore isotopes\n\n"
110
0
    "Note that these can also be combined, e.g. ``/nochg/noiso``\n"
111
0
;
112
0
  };
113
114
  const char* SpecificationURL() override
115
0
  { return "http://www.iupac.org/inchi/";};
116
117
  bool ReadMolecule(OBBase* pOb, OBConversion* pConv) override;
118
  bool WriteMolecule(OBBase* pOb, OBConversion* pConv) override;
119
  int  SkipObjects(int n, OBConversion* pConv) override;
120
121
  static char CompareInchi(const std::string& Inchi1, const std::string& Inchi2);
122
  static std::string InChIErrorMessage(const char ch);
123
124
  ///Removes layers or truncates InChi, according to \param spec
125
  ///which can contain any number of:/formula /connect /nostereo /nosp3 /noEZ /nochg /noiso
126
  /// @param inchi The inchi string
127
  static bool EditInchi(std::string& inchi, std::string& spec);
128
129
  ///Compare std::strings with embedded numbers so that
130
  // "a6b" (or "a06b") is less than "a15b"
131
  // and "CH4" is less than "C2H6"
132
  // and "CH4" is less than "ClH" (hydrogen chloride)
133
  struct InchiLess
134
  {
135
    bool operator()(const std::string& s1, const std::string& s2) const
136
0
    {
137
      //stop at the first space or the end of the strings
138
0
      std::string::const_iterator p1=s1.begin(), p2=s2.begin(),
139
0
        p1end=find(s1.begin(), s1.end(), ' '), p2end=find(s2.begin(), s2.end(), ' ');
140
141
0
      while( p1<p1end && p2<p2end)
142
0
      {
143
0
        int n1=-1,n2=-1;
144
0
        if(isdigit(*p1))
145
0
          {
146
0
            n1 = atoi(&*p1);
147
            //skip over number
148
0
            while(p1!=s1.end() && isdigit(*p1++)); --p1;
149
0
          }
150
0
        if(isdigit(*p2))
151
0
          {
152
0
            n2 = atoi(&*p2);
153
0
            while(p2!=s2.end() && isdigit(*p2++)); --p2;
154
0
          }
155
0
        if(n1<0 && n2 < 0)
156
0
          {
157
            //neither numbers
158
0
            if(*p1 != *p2)
159
0
        return *p1 < *p2;
160
0
          }
161
0
        else if(n1>=0 && n2>0)
162
0
          {
163
            //both numbers
164
0
            if(n1!=n2)
165
0
        return n1 < n2;
166
0
          }
167
0
        else if(n1>0)
168
0
          return islower(*p2)!=0;
169
0
        else if(n2>0)
170
0
          return !islower(*p1);
171
172
0
        ++p1; ++p2; // iterate
173
0
      } // while loop
174
0
      return false; //identical
175
0
    }
176
  };
177
178
private:
179
  ///Erases the layer starting with \param str and, if \param all is true, all the subsequent ones
180
  static void RemoveLayer (std::string& inchi, const std::string& str, bool all=false);
181
182
private:
183
  OBAtom* GetCommonAtom(OBBond* pb1, OBBond* pb2);
184
  char* GetInChIOptions(OBConversion* pConv, bool Reading);
185
  void SaveInchi(OBMol* pmol, const std::string& s);
186
187
  typedef std::set<std::string, InchiLess> nSet;
188
  nSet allInchi;
189
  std::string firstInchi;
190
  std::string firstID;
191
};
192
193
//*****************************************************
194
class InChICompareFormat : public OBMoleculeFormat
195
{
196
public:
197
  InChICompareFormat()
198
0
  {
199
0
      OBConversion::RegisterFormat("k",this);
200
0
  }
201
  virtual const char* Description() //required
202
0
  {
203
0
    return
204
0
      "Compare molecules using InChI\n"
205
0
      "A utility format that allows you to compare molecules using their InChIs\n"
206
0
      "The first molecule is compared with the rest, e.g.::\n\n"
207
0
208
0
      "  babel first.smi second.mol third.cml -ok\n\n"
209
0
210
0
      "This is the same as using ``-oinchi -xet`` and can take the same options as InChI format\n"
211
0
      "(see :ref:`InChI_format`).\n";
212
0
  }
213
  virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv);
214
0
  virtual unsigned int Flags() { return NOTREADABLE;};
215
};
216
217
//*****************************************************
218
class InChIKeyFormat : public OBMoleculeFormat
219
{
220
public:
221
  InChIKeyFormat()
222
0
  {
223
0
      OBConversion::RegisterFormat("inchikey",this);
224
0
  }
225
  virtual const char* Description() //required
226
0
  {
227
0
    return
228
0
      "InChIKey\n"
229
0
      "A hashed representation of the InChI.\n\n"
230
0
231
0
      "The InChIKey is a fixed-length (27-character) condensed digital\n"
232
0
      "representation of an InChI, developed to make it easy to perform\n"
233
0
      "web searches for chemical structures.\n\n"
234
0
235
0
      "An InChIKey consists of 14 characters (derived from the connectivity\n"
236
0
      "layer in the InChI), a hyphen, 9 characters (derived from the\n"
237
0
      "remaining layers), a character indicating the InChI version, a hyphen\n"
238
0
      "and a final checksum character. Contrast the InChI and InChIKey of the\n"
239
0
      "molecule represented by the SMILES string `CC(=O)Cl`::\n\n"
240
0
241
0
      "  obabel -:CC(=O)Cl -oinchi\n"
242
0
      "  InChI=1S/C2H3ClO/c1-2(3)4/h1H3\n\n"
243
0
244
0
      "  obabel -:CC(=O)Cl -oinchikey\n"
245
0
      "  WETWJCDKMRHUPV-UHFFFAOYSA-N\n\n"
246
0
      
247
0
      "This is the same as using ``-oinchi -xK`` and can take the same options\n"
248
0
      "as the InChI format (see :ref:`InChI_format`)::\n\n"
249
0
250
0
      "  obabel -:CC(=O)Cl -oinchi -xK\n"
251
0
      "  WETWJCDKMRHUPV-UHFFFAOYSA-N\n\n"
252
0
253
0
      "Note that while a molecule with a particular InChI will always give the\n"
254
0
      "same InChIKey, the reverse is not true; there may exist more than one\n"
255
0
      "molecule which have different InChIs but yield the same InChIKey.\n";
256
0
  }
257
  virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv);
258
0
  virtual unsigned int Flags() { return NOTREADABLE;};
259
};
260
261
}//namespace OpenBabel
262
263
#endif
264