/src/rdkit/Code/GraphMol/Atom.h
Line | Count | Source |
1 | | // |
2 | | // Copyright (C) 2001-2024 Greg Landrum and other RDKit contributors |
3 | | // |
4 | | // @@ All Rights Reserved @@ |
5 | | // This file is part of the RDKit. |
6 | | // The contents are covered by the terms of the BSD license |
7 | | // which is included in the file license.txt, found at the root |
8 | | // of the RDKit source tree. |
9 | | // |
10 | | /*! \file Atom.h |
11 | | |
12 | | \brief Defines the Atom class and associated typedefs |
13 | | |
14 | | */ |
15 | | #include <RDGeneral/export.h> |
16 | | #ifndef _RD_ATOM_H |
17 | | #define _RD_ATOM_H |
18 | | |
19 | | #include <limits> |
20 | | |
21 | | // ours |
22 | | #include <RDGeneral/Invariant.h> |
23 | | #include <Query/QueryObjects.h> |
24 | | #include <RDGeneral/types.h> |
25 | | #include <RDGeneral/RDProps.h> |
26 | | #include <GraphMol/details.h> |
27 | | |
28 | | namespace RDKit { |
29 | | class Atom; |
30 | | } |
31 | | //! allows Atom objects to be dumped to streams |
32 | | RDKIT_GRAPHMOL_EXPORT std::ostream &operator<<(std::ostream &target, |
33 | | const RDKit::Atom &at); |
34 | | |
35 | | namespace RDKit { |
36 | | class ROMol; |
37 | | class RWMol; |
38 | | class AtomMonomerInfo; |
39 | | |
40 | | //! The class for representing atoms |
41 | | /*! |
42 | | |
43 | | <b>Notes:</b> |
44 | | - many of the methods of Atom require that the Atom be associated |
45 | | with a molecule (an ROMol). |
46 | | - each Atom maintains a Dict of \c properties: |
47 | | - Each \c property is keyed by name and can store an |
48 | | arbitrary type. |
49 | | - \c Properties can be marked as \c calculated, in which case |
50 | | they will be cleared when the \c clearComputedProps() method |
51 | | is called. |
52 | | - Because they have no impact upon chemistry, all \c property |
53 | | operations are \c const, this allows extra flexibility for |
54 | | clients who need to store extra data on Atom objects. |
55 | | - Atom objects are lazy about computing their explicit and implicit valence |
56 | | values. These will not be computed until their values are requested. |
57 | | |
58 | | <b>Chirality:</b> |
59 | | |
60 | | The chirality of an Atom is determined by two things: |
61 | | - its \c chiralTag |
62 | | - the input order of its bonds (see note below for handling of |
63 | | implicit Hs) |
64 | | |
65 | | For tetrahedral coordination, the \c chiralTag tells you what |
66 | | direction you have to rotate to get from bond 2 to bond 3 while looking |
67 | | down bond 1. This is pretty much identical to the SMILES representation of |
68 | | chirality. |
69 | | |
70 | | NOTE: if an atom has an implicit H, the bond to that H is considered to be |
71 | | at the *end* of the list of other bonds. |
72 | | |
73 | | */ |
74 | | class RDKIT_GRAPHMOL_EXPORT Atom : public RDProps { |
75 | | friend class MolPickler; //!< the pickler needs access to our privates |
76 | | friend class ROMol; |
77 | | friend class RWMol; |
78 | | friend std::ostream &(::operator<<)(std::ostream &target, |
79 | | const ::RDKit::Atom &at); |
80 | | friend int calculateImplicitValence(const Atom &, bool, bool); |
81 | | |
82 | | public: |
83 | | // used to mark missing atoms, e.g. in Chirality::StereoInfo |
84 | | // and the _CIPNeighborOrder in CIP labeler |
85 | | inline static constexpr unsigned int NOATOM = |
86 | | std::numeric_limits<unsigned int>::max(); |
87 | | |
88 | | // FIX: grn... |
89 | | typedef Queries::Query<int, Atom const *, true> QUERYATOM_QUERY; |
90 | | |
91 | | //! store hybridization |
92 | | typedef enum { |
93 | | UNSPECIFIED = 0, //!< hybridization that hasn't been specified |
94 | | S, |
95 | | SP, |
96 | | SP2, |
97 | | SP3, |
98 | | SP2D, |
99 | | SP3D, |
100 | | SP3D2, |
101 | | OTHER //!< unrecognized hybridization |
102 | | } HybridizationType; |
103 | | |
104 | | //! store type of chirality |
105 | | typedef enum { |
106 | | CHI_UNSPECIFIED = 0, //!< chirality that hasn't been specified |
107 | | CHI_TETRAHEDRAL_CW, //!< tetrahedral: clockwise rotation (SMILES \@\@) |
108 | | CHI_TETRAHEDRAL_CCW, //!< tetrahedral: counter-clockwise rotation (SMILES |
109 | | //\@) |
110 | | CHI_OTHER, //!< some unrecognized type of chirality |
111 | | CHI_TETRAHEDRAL, //!< tetrahedral, use permutation flag |
112 | | CHI_ALLENE, //!< allene, use permutation flag |
113 | | CHI_SQUAREPLANAR, //!< square planar, use permutation flag |
114 | | CHI_TRIGONALBIPYRAMIDAL, //!< trigonal bipyramidal, use permutation flag |
115 | | CHI_OCTAHEDRAL //!< octahedral, use permutation flag |
116 | | } ChiralType; |
117 | | |
118 | | enum class ValenceType : std::uint8_t { |
119 | | IMPLICIT = 0, |
120 | | EXPLICIT |
121 | | }; |
122 | | |
123 | | Atom(); |
124 | | //! construct an Atom with a particular atomic number |
125 | | explicit Atom(unsigned int num); |
126 | | //! construct an Atom with a particular symbol (looked up in the |
127 | | /// PeriodicTable) |
128 | | explicit Atom(const std::string &what); |
129 | | Atom(const Atom &other); |
130 | | Atom &operator=(const Atom &other); |
131 | | // NOTE: the move methods are somewhat fraught for atoms associated with |
132 | | // molecules since the molecule will still be pointing to the original object |
133 | | Atom(Atom &&other) = default; |
134 | | Atom &operator=(Atom &&other) = default; |
135 | | |
136 | | virtual ~Atom(); |
137 | | |
138 | | //! makes a copy of this Atom and returns a pointer to it. |
139 | | /*! |
140 | | <b>Note:</b> the caller is responsible for <tt>delete</tt>ing the result |
141 | | */ |
142 | | virtual Atom *copy() const; |
143 | | |
144 | | //! returns our atomic number |
145 | 1.01G | int getAtomicNum() const { return d_atomicNum; } |
146 | | //! sets our atomic number |
147 | 228k | void setAtomicNum(int newNum) { d_atomicNum = newNum; } |
148 | | |
149 | | //! returns our symbol (determined by our atomic number) |
150 | | std::string getSymbol() const; |
151 | | |
152 | | //! returns whether or not this instance belongs to a molecule |
153 | 80.6M | bool hasOwningMol() const { return dp_mol != nullptr; } |
154 | | |
155 | | //! returns a reference to the ROMol that owns this instance |
156 | 573M | ROMol &getOwningMol() const { |
157 | 573M | PRECONDITION(dp_mol, "no owner"); |
158 | 573M | return *dp_mol; |
159 | 573M | } |
160 | | |
161 | | //! returns our index within the ROMol |
162 | 523M | unsigned int getIdx() const { return d_index; } |
163 | | //! sets our index within the ROMol |
164 | | /*! |
165 | | <b>Notes:</b> |
166 | | - this makes no sense if we do not have an owning molecule |
167 | | - the index should be <tt>< this->getOwningMol()->getNumAtoms()</tt> |
168 | | */ |
169 | 29.5M | void setIdx(unsigned int index) { d_index = index; } |
170 | | //! overload |
171 | | template <class U> |
172 | 18.9M | void setIdx(const U index) { |
173 | 18.9M | setIdx(rdcast<unsigned int>(index)); |
174 | 18.9M | } |
175 | | //! returns the explicit degree of the Atom (number of bonded |
176 | | //! neighbors in the graph) |
177 | | unsigned int getDegree() const; |
178 | | |
179 | | //! returns the total degree of the Atom (number of bonded |
180 | | //! neighbors + number of Hs) |
181 | | unsigned int getTotalDegree() const; |
182 | | |
183 | | //! \brief returns the total number of Hs (implicit and explicit) that |
184 | | //! this Atom is bound to |
185 | | unsigned int getTotalNumHs(bool includeNeighbors = false) const; |
186 | | |
187 | | //! \brief returns the total valence (implicit and explicit) |
188 | | //! for an atom |
189 | | unsigned int getTotalValence() const; |
190 | | |
191 | | //! returns the number of implicit Hs this Atom is bound to |
192 | | unsigned int getNumImplicitHs() const; |
193 | | |
194 | | //! returns the valence (explicit or implicit) of this atom |
195 | | unsigned int getValence(ValenceType which) const; |
196 | | |
197 | | //! returns the explicit valence (including Hs) of this atom |
198 | | [[deprecated("please use getValence(true)")]] int getExplicitValence() const; |
199 | | |
200 | | //! returns the implicit valence for this Atom |
201 | | [[deprecated("please use getValence(false)")]] int getImplicitValence() const; |
202 | | |
203 | | //! returns whether the atom has a valency violation or not |
204 | | bool hasValenceViolation() const; |
205 | | |
206 | | //! returns the number of radical electrons for this Atom |
207 | 6.37M | unsigned int getNumRadicalElectrons() const { return d_numRadicalElectrons; } |
208 | 101k | void setNumRadicalElectrons(unsigned int num) { d_numRadicalElectrons = num; } |
209 | | |
210 | | //! returns the formal charge of this atom |
211 | 690M | int getFormalCharge() const { return d_formalCharge; } |
212 | | //! set's the formal charge of this atom |
213 | 262k | void setFormalCharge(int what) { d_formalCharge = what; } |
214 | | |
215 | | //! \brief sets our \c noImplicit flag, indicating whether or not |
216 | | //! we are allowed to have implicit Hs |
217 | 760k | void setNoImplicit(bool what) { df_noImplicit = what; } |
218 | | //! returns the \c noImplicit flag |
219 | 5.43M | bool getNoImplicit() const { return df_noImplicit; } |
220 | | |
221 | | //! sets our number of explicit Hs |
222 | 572k | void setNumExplicitHs(unsigned int what) { d_numExplicitHs = what; } |
223 | | //! returns our number of explicit Hs |
224 | 362M | unsigned int getNumExplicitHs() const { return d_numExplicitHs; } |
225 | | |
226 | | //! sets our \c isAromatic flag, indicating whether or not we are aromatic |
227 | 13.9M | void setIsAromatic(bool what) { df_isAromatic = what; } |
228 | | //! returns our \c isAromatic flag |
229 | 101M | bool getIsAromatic() const { return df_isAromatic; } |
230 | | |
231 | | //! returns our mass |
232 | | double getMass() const; |
233 | | |
234 | | //! sets our isotope number |
235 | | void setIsotope(unsigned int what); |
236 | | //! returns our isotope number |
237 | 574M | unsigned int getIsotope() const { return d_isotope; } |
238 | | |
239 | | //! sets our \c chiralTag |
240 | 1.91M | void setChiralTag(ChiralType what) { d_chiralTag = what; } |
241 | | //! inverts our \c chiralTag, returns whether or not a change was made |
242 | | bool invertChirality(); |
243 | | //! returns our \c chiralTag |
244 | 647M | ChiralType getChiralTag() const { |
245 | 647M | return static_cast<ChiralType>(d_chiralTag); |
246 | 647M | } |
247 | | |
248 | | //! sets our hybridization |
249 | 3.54M | void setHybridization(HybridizationType what) { d_hybrid = what; } |
250 | | //! returns our hybridization |
251 | 2.79M | HybridizationType getHybridization() const { |
252 | 2.79M | return static_cast<HybridizationType>(d_hybrid); |
253 | 2.79M | } |
254 | | |
255 | | // ------------------------------------ |
256 | | // Some words of explanation before getting down into |
257 | | // the query stuff. |
258 | | // These query functions are really only here so that they |
259 | | // can have real functionality in subclasses (like QueryAtoms). |
260 | | // Since pretty much it's gonna be a mistake to call any of these |
261 | | // (ever), we're saddling them all with a precondition which |
262 | | // is guaranteed to fail. I'd like to have them be pure virtual, |
263 | | // but that doesn't work since we need to be able to instantiate |
264 | | // Atoms. |
265 | | // ------------------------------------ |
266 | | |
267 | | // This method can be used to distinguish query atoms from standard atoms: |
268 | 283M | virtual bool hasQuery() const { return false; } |
269 | | |
270 | 0 | virtual std::string getQueryType() const { return ""; } |
271 | | |
272 | | //! NOT CALLABLE |
273 | | virtual void setQuery(QUERYATOM_QUERY *what); |
274 | | |
275 | | //! NOT CALLABLE |
276 | | virtual QUERYATOM_QUERY *getQuery() const; |
277 | | //! NOT CALLABLE |
278 | | virtual void expandQuery( |
279 | | QUERYATOM_QUERY *what, |
280 | | Queries::CompositeQueryType how = Queries::COMPOSITE_AND, |
281 | | bool maintainOrder = true); |
282 | | |
283 | | //! returns whether or not we match the argument |
284 | | /*! |
285 | | <b>Notes:</b> |
286 | | The general rule is that if a property on this atom has a non-default |
287 | | value, |
288 | | the property on the other atom must have the same value. |
289 | | The exception to this is H counts, which are ignored. These turns out to |
290 | | be |
291 | | impossible to handle generally, so rather than having odd and |
292 | | hard-to-explain |
293 | | exceptions, we ignore them entirely. |
294 | | |
295 | | Here are the rules for atom-atom matching: |
296 | | | This | Other | Match | Reason |
297 | | | CCO | CCO | Yes | |
298 | | | CCO | CC[O-] | Yes | |
299 | | | CC[O-] | CCO | No | Charge |
300 | | | CC[O-] | CC[O-] | Yes | |
301 | | | CC[OH] | CC[O-] | Yes | |
302 | | | CC[OH] | CCOC | Yes | |
303 | | | CCO | CCOC | Yes | |
304 | | | CCC | CCC | Yes | |
305 | | | CCC | CC[14C] | Yes | |
306 | | | CC[14C] | CCC | No | Isotope |
307 | | | CC[14C] | CC[14C] | Yes | |
308 | | | C | OCO | Yes | |
309 | | | [CH] | OCO | Yes | |
310 | | | [CH2] | OCO | Yes | |
311 | | | [CH3] | OCO | No | Radical |
312 | | | C | O[CH2]O | Yes | |
313 | | | [CH2] | O[CH2]O | Yes | |
314 | | */ |
315 | | virtual bool Match(Atom const *what) const; |
316 | | |
317 | | //! returns the perturbation order for a list of integers |
318 | | /*! |
319 | | |
320 | | This value is associated with chirality. |
321 | | |
322 | | \param probe a list of bond indices. This must be the same |
323 | | length as our number of incoming bonds (our degree). |
324 | | |
325 | | \return the number of swaps required to convert the ordering |
326 | | of the probe list to match the order of our incoming bonds: |
327 | | e.g. if our incoming bond order is: <tt>[0,1,2,3]</tt> |
328 | | \verbatim |
329 | | getPerturbationOrder([1,0,2,3]) = 1 |
330 | | getPerturbationOrder([1,2,3,0]) = 3 |
331 | | getPerturbationOrder([1,2,0,3]) = 2 |
332 | | \endverbatim |
333 | | |
334 | | See the class documentation for a more detailed description |
335 | | of our representation of chirality. |
336 | | |
337 | | <b>Notes:</b> |
338 | | - requires an owning molecule |
339 | | |
340 | | */ |
341 | | int getPerturbationOrder(const INT_LIST &probe) const; |
342 | | |
343 | | //! calculates any of our lazy \c properties |
344 | | /*! |
345 | | <b>Notes:</b> |
346 | | - requires an owning molecule |
347 | | - the current lazy \c properties are implicit and explicit valence |
348 | | */ |
349 | | void updatePropertyCache(bool strict = true); |
350 | | |
351 | | bool needsUpdatePropertyCache() const; |
352 | | void clearPropertyCache(); |
353 | | |
354 | | //! calculates and returns our explicit valence |
355 | | /*! |
356 | | <b>Notes:</b> |
357 | | - requires an owning molecule |
358 | | */ |
359 | | int calcExplicitValence(bool strict = true); |
360 | | |
361 | | //! calculates and returns our implicit valence |
362 | | /*! |
363 | | <b>Notes:</b> |
364 | | - requires an owning molecule |
365 | | */ |
366 | | int calcImplicitValence(bool strict = true); |
367 | | |
368 | 0 | AtomMonomerInfo *getMonomerInfo() { return dp_monomerInfo; } |
369 | 0 | const AtomMonomerInfo *getMonomerInfo() const { return dp_monomerInfo; } |
370 | | //! takes ownership of the pointer |
371 | | void setMonomerInfo(AtomMonomerInfo *info); |
372 | | |
373 | | //! Set the atom map Number of the atom |
374 | 0 | void setAtomMapNum(int mapno, bool strict = true) { |
375 | 0 | PRECONDITION( |
376 | 0 | !strict || (mapno >= 0 && mapno < 1000), |
377 | 0 | "atom map number out of range [0..1000], use strict=false to override"); |
378 | 0 | if (mapno) { |
379 | 0 | setProp(common_properties::molAtomMapNumber, mapno); |
380 | 0 | } else if (hasProp(common_properties::molAtomMapNumber)) { |
381 | 0 | clearProp(common_properties::molAtomMapNumber); |
382 | 0 | } |
383 | 0 | } |
384 | | //! Gets the atom map Number of the atom, if no atom map exists, 0 is |
385 | | //! returned. |
386 | 0 | int getAtomMapNum() const { |
387 | 0 | int mapno = 0; |
388 | 0 | getPropIfPresent(common_properties::molAtomMapNumber, mapno); |
389 | 0 | return mapno; |
390 | 0 | } |
391 | | |
392 | | //! Flags that can be used by to store information on atoms. |
393 | | //! These are not serialized and should be treated as temporary values. |
394 | | //! No guarantees are made about preserving these flags across library |
395 | | //! calls. |
396 | 0 | void setFlags(std::uint64_t flags) { d_flags = flags; } |
397 | 0 | std::uint64_t getFlags() const { return d_flags; } |
398 | 2.83M | std::uint64_t &getFlags() { return d_flags; } |
399 | | |
400 | | protected: |
401 | | //! sets our owning molecule |
402 | | void setOwningMol(ROMol *other); |
403 | | //! sets our owning molecule |
404 | 0 | void setOwningMol(ROMol &other) { setOwningMol(&other); } |
405 | | |
406 | | bool df_isAromatic; |
407 | | bool df_noImplicit; |
408 | | std::uint8_t d_numExplicitHs; |
409 | | std::int8_t d_formalCharge; |
410 | | std::uint8_t d_atomicNum; |
411 | | // NOTE that these cannot be signed, they are calculated using |
412 | | // a lazy scheme and are initialized to -1 to indicate that the |
413 | | // calculation has not yet been done. |
414 | | std::int8_t d_implicitValence, d_explicitValence; |
415 | | std::uint8_t d_numRadicalElectrons; |
416 | | std::uint8_t d_chiralTag; |
417 | | std::uint8_t d_hybrid; |
418 | | |
419 | | std::uint16_t d_isotope; |
420 | | atomindex_t d_index; |
421 | | std::uint64_t d_flags = 0ul; |
422 | | |
423 | | ROMol *dp_mol; |
424 | | AtomMonomerInfo *dp_monomerInfo; |
425 | | void initAtom(); |
426 | | void initFromOther(const Atom &other); |
427 | | }; |
428 | | |
429 | | //! Set the atom's MDL integer RLabel |
430 | | /// Setting to 0 clears the rlabel. Rlabel must be in the range [0..99] |
431 | | RDKIT_GRAPHMOL_EXPORT void setAtomRLabel(Atom *atm, int rlabel); |
432 | | RDKIT_GRAPHMOL_EXPORT int getAtomRLabel(const Atom *atm); |
433 | | |
434 | | //! Set the atom's MDL atom alias |
435 | | /// Setting to an empty string clears the alias |
436 | | RDKIT_GRAPHMOL_EXPORT void setAtomAlias(Atom *atom, const std::string &alias); |
437 | | RDKIT_GRAPHMOL_EXPORT std::string getAtomAlias(const Atom *atom); |
438 | | |
439 | | //! Set the atom's MDL atom value |
440 | | /// Setting to an empty string clears the value |
441 | | /// This is where recursive smarts get stored in MolBlock Queries |
442 | | RDKIT_GRAPHMOL_EXPORT void setAtomValue(Atom *atom, const std::string &value); |
443 | | RDKIT_GRAPHMOL_EXPORT std::string getAtomValue(const Atom *atom); |
444 | | |
445 | | //! Sets the supplemental label that will follow the atom when writing |
446 | | /// smiles strings. |
447 | | RDKIT_GRAPHMOL_EXPORT void setSupplementalSmilesLabel(Atom *atom, |
448 | | const std::string &label); |
449 | | RDKIT_GRAPHMOL_EXPORT std::string getSupplementalSmilesLabel(const Atom *atom); |
450 | | |
451 | | //! returns true if the atom is to the left of C |
452 | | RDKIT_GRAPHMOL_EXPORT bool isEarlyAtom(int atomicNum); |
453 | | //! returns true if the atom is aromatic or has an aromatic bond |
454 | | RDKIT_GRAPHMOL_EXPORT bool isAromaticAtom(const Atom &atom); |
455 | | //! returns the number of pi electrons on the atom |
456 | | RDKIT_GRAPHMOL_EXPORT unsigned int numPiElectrons(const Atom &atom); |
457 | | }; // namespace RDKit |
458 | | |
459 | | //! allows Atom objects to be dumped to streams |
460 | | RDKIT_GRAPHMOL_EXPORT std::ostream &operator<<(std::ostream &target, |
461 | | const RDKit::Atom &at); |
462 | | #endif |