Coverage Report

Created: 2026-03-31 06:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/rdkit/Code/GraphMol/SmilesParse/SmilesWrite.h
Line
Count
Source
1
//
2
//  Copyright (C) 2002-2021 Greg Landrum and other RDKit contributors
3
//
4
//   @@ All Rights Reserved @@
5
//  This file is part of the RDKit.
6
//  The contents are covered by the terms of the BSD license
7
//  which is included in the file license.txt, found at the root
8
//  of the RDKit source tree.
9
//
10
#include <RDGeneral/export.h>
11
#ifndef RD_SMILESWRITE_H_012020
12
#define RD_SMILESWRITE_H_012020
13
14
#include <string>
15
#include <vector>
16
#include <memory>
17
#include <cstdint>
18
#include <limits>
19
#include <RDGeneral/BetterEnums.h>
20
21
#include <boost/shared_ptr.hpp>
22
23
namespace RDKit {
24
class Atom;
25
class Bond;
26
class ROMol;
27
28
typedef std::vector<boost::shared_ptr<ROMol>> MOL_SPTR_VECT;
29
30
struct RDKIT_SMILESPARSE_EXPORT SmilesWriteParams {
31
  bool doIsomericSmiles =
32
      true;              /**< include stereochemistry and isotope information */
33
  bool doKekule = false; /**< kekulize the molecule before generating the SMILES
34
                            and output single/double bonds. NOTE that the output
35
                            is not canonical and that this will thrown an
36
                            exception if the molecule cannot be kekulized. */
37
  bool canonical = true; /**< generate canonical SMILES */
38
  bool cleanStereo = true;       /**< clean up stereo */
39
  bool allBondsExplicit = false; /**< include symbols for all bonds */
40
  bool allHsExplicit = false;    /**< provide hydrogen counts for every atom */
41
  bool doRandom = false; /**< randomize the output order. The resulting SMILES
42
                              is not canonical and the value of the canonical
43
                              parameter will be ignored. */
44
  int rootedAtAtom = -1; /**< make sure the SMILES starts at the specified
45
                             atom. The resulting SMILES is not canonical and
46
                             the value of the canonical parameter will be
47
                             ignored. */
48
  bool includeDativeBonds =
49
      true; /**< include the RDKit extension for dative bonds. Otherwise dative
50
               bonds will be written as single bonds*/
51
  bool ignoreAtomMapNumbers = false; /**< If true, ignores any atom map numbers
52
                                        when canonicalizing the molecule */
53
};
54
55
namespace SmilesWrite {
56
57
BETTER_ENUM(CXSmilesFields, uint32_t,  // clang-format off
58
  CX_NONE = 0,
59
  CX_ATOM_LABELS = 1 << 0,
60
  CX_MOLFILE_VALUES = 1 << 1,
61
  CX_COORDS = 1 << 2,
62
  CX_RADICALS = 1 << 3,
63
  CX_ATOM_PROPS = 1 << 4,
64
  CX_LINKNODES = 1 << 5,
65
  CX_ENHANCEDSTEREO = 1 << 6,
66
  CX_SGROUPS = 1 << 7,
67
  CX_POLYMER = 1 << 8,
68
  CX_BOND_CFG = 1 << 9,
69
  CX_BOND_ATROPISOMER = 1 << 10,
70
  CX_COORDINATE_BONDS = 1 << 11,
71
  CX_HYDROGEN_BONDS = 1 << 12,
72
  CX_ZERO_BONDS = 1 << 13,
73
  CX_ALL = 0x7fffffff,
74
  CX_ALL_BUT_COORDS = CX_ALL ^ CX_COORDS
75
);
76
77
//! \brief returns the cxsmiles data for a molecule
78
RDKIT_SMILESPARSE_EXPORT std::string getCXExtensions(
79
    const ROMol &mol, std::uint32_t flags = CXSmilesFields::CX_ALL);
80
81
//! \brief returns the cxsmiles data for a vector of molecules
82
RDKIT_SMILESPARSE_EXPORT std::string getCXExtensions(
83
  const std::vector<ROMol *> &mols, std::uint32_t flags);
84
  
85
//! \brief returns true if the atom number is in the SMILES organic subset
86
RDKIT_SMILESPARSE_EXPORT bool inOrganicSubset(int atomicNumber);
87
88
//! \brief returns the SMILES for an atom
89
/*!
90
  \param atom : the atom to work with
91
  \param ps : the parameters controlling the SMILES generation
92
*/
93
RDKIT_SMILESPARSE_EXPORT std::string GetAtomSmiles(const Atom *atom,
94
                                                   const SmilesWriteParams &ps);
95
96
//! \brief returns the SMILES for an atom
97
/*!
98
  \param atom : the atom to work with
99
  \param doKekule : we're doing kekulized smiles (e.g. don't use
100
    lower case for the atom label)
101
  \param bondIn : the bond we came into the atom on (unused)
102
  \param allHsExplicit : if true, hydrogen counts will be provided for every
103
  atom.
104
  \param isomericSmiles : if true, isomeric SMILES will be generated
105
*/
106
inline std::string GetAtomSmiles(const Atom *atom, bool doKekule = false,
107
                                 const Bond * = nullptr,
108
                                 bool allHsExplicit = false,
109
0
                                 bool isomericSmiles = true) {
110
0
  // RDUNUSED_PARAM(bondIn);
111
0
  SmilesWriteParams ps;
112
0
  ps.doIsomericSmiles = isomericSmiles;
113
0
  ps.doKekule = doKekule;
114
0
  ps.allHsExplicit = allHsExplicit;
115
0
  return GetAtomSmiles(atom, ps);
116
0
};
117
118
//! \brief returns the SMILES for a bond
119
/*!
120
  \param bond : the bond to work with
121
  \param ps : the parameters controlling the SMILES generation
122
  \param atomToLeftIdx : the index of the atom preceding \c bond
123
    in the SMILES
124
*/
125
RDKIT_SMILESPARSE_EXPORT std::string GetBondSmiles(const Bond *bond,
126
                                                   const SmilesWriteParams &ps,
127
                                                   int atomToLeftIdx = -1);
128
//! \brief returns the SMILES for a bond
129
/*!
130
  \param bond : the bond to work with
131
  \param atomToLeftIdx : the index of the atom preceding \c bond
132
    in the SMILES
133
  \param doKekule : we're doing kekulized smiles (e.g. write out
134
    bond orders for aromatic bonds)
135
  \param allBondsExplicit : if true, symbols will be included for all bonds.
136
*/
137
inline std::string GetBondSmiles(const Bond *bond, int atomToLeftIdx = -1,
138
                                 bool doKekule = false,
139
0
                                 bool allBondsExplicit = false) {
140
0
  SmilesWriteParams ps;
141
0
  ps.doKekule = doKekule;
142
0
  ps.allBondsExplicit = allBondsExplicit;
143
0
  ps.doIsomericSmiles = false;
144
0
  return GetBondSmiles(bond, ps, atomToLeftIdx);
145
0
};
146
147
namespace detail {
148
RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(
149
    const ROMol &mol, const SmilesWriteParams &params, bool doingCXSmiles, bool includeStereoGroups=true);
150
}
151
152
}  // namespace SmilesWrite
153
154
//! \brief returns canonical SMILES for a molecule
155
RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(
156
    const ROMol &mol, const SmilesWriteParams &params);
157
158
//! \brief returns SMILES for a molecule, canonical by default
159
/*!
160
  \param mol : the molecule in question.
161
  \param doIsomericSmiles : include stereochemistry and isotope information
162
      in the SMILES
163
164
  \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds) NOTE that
165
      this will throw an exception if the molecule cannot be kekulized.
166
167
  \param rootedAtAtom : make sure the SMILES starts at the specified atom.
168
      The resulting SMILES is not, of course, canonical.
169
  \param canonical : if false, no attempt will be made to canonicalize the
170
  SMILES
171
  \param allBondsExplicit : if true, symbols will be included for all bonds.
172
  \param allHsExplicit : if true, hydrogen counts will be provided for every
173
  atom.
174
  \param doRandom : if true, the first atom in the SMILES string will be
175
  selected at random and the SMILES string will not be canonical
176
  \param ignoreAtomMapNumbers : if true, ignores any atom map numbers when
177
  canonicalizing the molecule
178
 */
179
inline std::string MolToSmiles(const ROMol &mol, bool doIsomericSmiles = true,
180
                               bool doKekule = false, int rootedAtAtom = -1,
181
                               bool canonical = true,
182
                               bool allBondsExplicit = false,
183
                               bool allHsExplicit = false,
184
                               bool doRandom = false,
185
0
                               bool ignoreAtomMapNumbers = false) {
186
0
  SmilesWriteParams ps;
187
0
  ps.doIsomericSmiles = doIsomericSmiles;
188
0
  ps.doKekule = doKekule;
189
0
  ps.rootedAtAtom = rootedAtAtom;
190
0
  ps.canonical = canonical;
191
0
  ps.allBondsExplicit = allBondsExplicit;
192
0
  ps.allHsExplicit = allHsExplicit;
193
0
  ps.doRandom = doRandom;
194
0
  ps.ignoreAtomMapNumbers = ignoreAtomMapNumbers;
195
0
  return MolToSmiles(mol, ps);
196
0
};
197
198
//! \brief returns a vector of random SMILES for a molecule (may contain
199
//! duplicates)
200
/*!
201
  \param mol : the molecule in question.
202
  \param numSmiles : the number of SMILES to return
203
  \param randomSeed : if >0, will be used to seed the random number generator
204
  \param doIsomericSmiles : include stereochemistry and isotope information
205
      in the SMILES
206
  \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
207
  \param allBondsExplicit : if true, symbols will be included for all bonds.
208
  \param allHsExplicit : if true, hydrogen counts will be provided for every
209
  atom.
210
 */
211
RDKIT_SMILESPARSE_EXPORT std::vector<std::string> MolToRandomSmilesVect(
212
    const ROMol &mol, unsigned int numSmiles, unsigned int randomSeed = 0,
213
    bool doIsomericSmiles = true, bool doKekule = false,
214
    bool allBondsExplicit = false, bool allHsExplicit = false);
215
216
//! \brief returns canonical SMILES for part of a molecule
217
RDKIT_SMILESPARSE_EXPORT std::string MolFragmentToSmiles(
218
    const ROMol &mol, const SmilesWriteParams &params,
219
    const std::vector<int> &atomsToUse,
220
    const std::vector<int> *bondsToUse = nullptr,
221
    const std::vector<std::string> *atomSymbols = nullptr,
222
    const std::vector<std::string> *bondSymbols = nullptr);
223
224
//! \brief returns canonical SMILES for part of a molecule
225
/*!
226
  \param mol : the molecule in question.
227
  \param atomsToUse : indices of the atoms in the fragment
228
  \param bondsToUse : indices of the bonds in the fragment. If this is not
229
  provided,
230
                      all bonds between the atoms in atomsToUse will be included
231
  \param atomSymbols : symbols to use for the atoms in the output SMILES
232
  \param bondSymbols : symbols to use for the bonds in the output SMILES
233
  \param doIsomericSmiles : include stereochemistry and isotope information
234
      in the SMILES
235
  \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
236
  \param rootedAtAtom : make sure the SMILES starts at the specified atom.
237
      The resulting SMILES is not, of course, canonical.
238
  \param canonical : if false, no attempt will be made to canonicalize the
239
  SMILES
240
  \param allBondsExplicit : if true, symbols will be included for all bonds.
241
  \param allHsExplicit : if true, hydrogen counts will be provided for every
242
  atom.
243
  \param doRandom : generate a randomized smiles string by randomly choosing
244
                    the priority to follow in the DFS traversal. [default false]
245
246
  \b NOTE: the bondSymbols are *not* currently used in the canonicalization.
247
248
 */
249
inline std::string MolFragmentToSmiles(
250
    const ROMol &mol, const std::vector<int> &atomsToUse,
251
    const std::vector<int> *bondsToUse = nullptr,
252
    const std::vector<std::string> *atomSymbols = nullptr,
253
    const std::vector<std::string> *bondSymbols = nullptr,
254
    bool doIsomericSmiles = true, bool doKekule = false, int rootedAtAtom = -1,
255
    bool canonical = true, bool allBondsExplicit = false,
256
0
    bool allHsExplicit = false) {
257
0
  SmilesWriteParams ps;
258
0
  ps.doIsomericSmiles = doIsomericSmiles;
259
0
  ps.doKekule = doKekule;
260
0
  ps.rootedAtAtom = rootedAtAtom;
261
0
  ps.canonical = canonical;
262
0
  ps.allBondsExplicit = allBondsExplicit;
263
0
  ps.allHsExplicit = allHsExplicit;
264
0
  return MolFragmentToSmiles(mol, ps, atomsToUse, bondsToUse, atomSymbols,
265
0
                             bondSymbols);
266
0
}
267
268
BETTER_ENUM(RestoreBondDirOption, unsigned int,
269
  RestoreBondDirOptionTrue = 0,  //<!DO restore bond dirs
270
  RestoreBondDirOptionClear = 1  //<!clear all bond dir information
271
);
272
273
//! \brief returns canonical CXSMILES for a molecule
274
RDKIT_SMILESPARSE_EXPORT std::string MolToCXSmiles(
275
    const ROMol &mol, const SmilesWriteParams &ps,
276
    std::uint32_t flags = SmilesWrite::CXSmilesFields::CX_ALL,
277
    RestoreBondDirOption restoreBondDirs =
278
        RestoreBondDirOption::RestoreBondDirOptionClear);
279
280
//! \brief returns canonical CXSMILES for a molecule
281
/*!
282
  \param mol : the molecule in question.
283
  \param doIsomericSmiles : include stereochemistry and isotope information
284
      in the SMILES
285
  \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
286
  \param rootedAtAtom : make sure the SMILES starts at the specified atom.
287
      The resulting SMILES is not, of course, canonical.
288
  \param canonical : if false, no attempt will be made to canonicalize the
289
  SMILES
290
  \param allBondsExplicit : if true, symbols will be included for all bonds.
291
  \param allHsExplicit : if true, hydrogen counts will be provided for every
292
  \param doRandom : generate a randomized smiles string by randomly choosing
293
                    the priority to follow in the DFS traversal. [default false]
294
  atom.
295
 */
296
inline std::string MolToCXSmiles(const ROMol &mol, bool doIsomericSmiles = true,
297
                                 bool doKekule = false, int rootedAtAtom = -1,
298
                                 bool canonical = true,
299
                                 bool allBondsExplicit = false,
300
                                 bool allHsExplicit = false,
301
0
                                 bool doRandom = false) {
302
0
  SmilesWriteParams ps;
303
0
  ps.doIsomericSmiles = doIsomericSmiles;
304
0
  ps.doKekule = doKekule;
305
0
  ps.rootedAtAtom = rootedAtAtom;
306
0
  ps.canonical = canonical;
307
0
  ps.allBondsExplicit = allBondsExplicit;
308
0
  ps.allHsExplicit = allHsExplicit;
309
0
  ps.doRandom = doRandom;
310
0
  return MolToCXSmiles(mol, ps, SmilesWrite::CXSmilesFields::CX_ALL);
311
0
};
312
313
//! \brief returns canonical CXSMILES for part of a molecule
314
RDKIT_SMILESPARSE_EXPORT std::string MolFragmentToCXSmiles(
315
    const ROMol &mol, const SmilesWriteParams &params,
316
    const std::vector<int> &atomsToUse,
317
    const std::vector<int> *bondsToUse = nullptr,
318
    const std::vector<std::string> *atomSymbols = nullptr,
319
    const std::vector<std::string> *bondSymbols = nullptr);
320
321
//! \brief returns canonical CXSMILES for part of a molecule
322
/*!
323
  \param mol : the molecule in question.
324
  \param atomsToUse : indices of the atoms in the fragment
325
  \param bondsToUse : indices of the bonds in the fragment. If this is not
326
  provided,
327
                      all bonds between the atoms in atomsToUse will be included
328
  \param atomSymbols : symbols to use for the atoms in the output SMILES
329
  \param bondSymbols : symbols to use for the bonds in the output SMILES
330
  \param doIsomericSmiles : include stereochemistry and isotope information
331
      in the SMILES
332
  \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
333
  \param rootedAtAtom : make sure the SMILES starts at the specified atom.
334
      The resulting SMILES is not, of course, canonical.
335
  \param canonical : if false, no attempt will be made to canonicalize the
336
  SMILES
337
  \param allBondsExplicit : if true, symbols will be included for all bonds.
338
  \param allHsExplicit : if true, hydrogen counts will be provided for every
339
  atom.
340
341
  \b NOTE: the bondSymbols are *not* currently used in the canonicalization.
342
343
 */
344
inline std::string MolFragmentToCXSmiles(
345
    const ROMol &mol, const std::vector<int> &atomsToUse,
346
    const std::vector<int> *bondsToUse = nullptr,
347
    const std::vector<std::string> *atomSymbols = nullptr,
348
    const std::vector<std::string> *bondSymbols = nullptr,
349
    bool doIsomericSmiles = true, bool doKekule = false, int rootedAtAtom = -1,
350
    bool canonical = true, bool allBondsExplicit = false,
351
0
    bool allHsExplicit = false) {
352
0
  SmilesWriteParams ps;
353
0
  ps.doIsomericSmiles = doIsomericSmiles;
354
0
  ps.doKekule = doKekule;
355
0
  ps.rootedAtAtom = rootedAtAtom;
356
0
  ps.canonical = canonical;
357
0
  ps.allBondsExplicit = allBondsExplicit;
358
0
  ps.allHsExplicit = allHsExplicit;
359
0
  return MolFragmentToCXSmiles(mol, ps, atomsToUse, bondsToUse, atomSymbols,
360
0
                               bondSymbols);
361
0
}
362
363
}  // namespace RDKit
364
#endif