Coverage Report

Created: 2023-06-07 06:20

/src/openbabel/src/phmodel.cpp
Line
Count
Source (jump to first uncovered line)
1
/**********************************************************************
2
phmodel.cpp - Read pH rules and assign charges.
3
4
Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc.
5
Some portions Copyright (C) 2001-2005 by Geoffrey R. Hutchison
6
7
This file is part of the Open Babel project.
8
For more information, see <http://openbabel.org/>
9
10
This program is free software; you can redistribute it and/or modify
11
it under the terms of the GNU General Public License as published by
12
the Free Software Foundation version 2 of the License.
13
14
This program is distributed in the hope that it will be useful,
15
but WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
GNU General Public License for more details.
18
***********************************************************************/
19
#include <openbabel/babelconfig.h>
20
21
#include <openbabel/mol.h>
22
#include <openbabel/atom.h>
23
#include <openbabel/bond.h>
24
#include <openbabel/typer.h>
25
#include <openbabel/obiter.h>
26
#include <openbabel/oberror.h>
27
#include <openbabel/phmodel.h>
28
#include <openbabel/obfunctions.h>
29
30
#include <cstdlib>
31
32
// private data header with default parameters
33
#include "phmodeldata.h"
34
35
#ifdef WIN32
36
#pragma warning (disable : 4786)
37
#endif
38
39
using namespace std;
40
41
namespace OpenBabel
42
{
43
44
  // Global OBPhModel for assigning formal charges and hydrogen addition rules
45
  THREAD_LOCAL OBPhModel phmodel;
46
47
  OBPhModel::OBPhModel()
48
0
  {
49
0
    _init = false;
50
0
    _dir = BABEL_DATADIR;
51
0
    _envvar = "BABEL_DATADIR";
52
0
    _filename = "phmodel.txt";
53
0
    _subdir = "data";
54
0
    _dataptr = PhModelData;
55
0
  }
56
57
  OBPhModel::~OBPhModel()
58
0
  {
59
0
    vector<OBChemTsfm*>::iterator k;
60
0
    for (k = _vtsfm.begin();k != _vtsfm.end();++k)
61
0
      delete *k;
62
63
0
    vector<pair<OBSmartsPattern*,vector<double> > >::iterator m;
64
0
    for (m = _vschrg.begin();m != _vschrg.end();++m)
65
0
      delete m->first;
66
0
  }
67
68
  void OBPhModel::ParseLine(const char *buffer)
69
0
  {
70
0
    vector<string> vs;
71
0
    OBSmartsPattern *sp;
72
73
0
    if (buffer[0] == '#')
74
0
      return;
75
76
0
    if (EQn(buffer,"TRANSFORM",7))
77
0
      {
78
0
        tokenize(vs,buffer);
79
0
        if (vs.size() < 5)
80
0
          {
81
0
            obErrorLog.ThrowError(__FUNCTION__, " Could not parse line in phmodel table from phmodel.txt", obInfo);
82
0
            return;
83
0
          }
84
85
0
        OBChemTsfm *tsfm = new OBChemTsfm;
86
0
        if (!tsfm->Init(vs[1],vs[3]))
87
0
          {
88
0
            delete tsfm;
89
0
            tsfm = nullptr;
90
0
            obErrorLog.ThrowError(__FUNCTION__, " Could not parse line in phmodel table from phmodel.txt", obInfo);
91
0
            return;
92
0
          }
93
94
0
        _vtsfm.push_back(tsfm);
95
0
        _vpKa.push_back(atof(vs[4].c_str()));
96
0
      }
97
0
    else if (EQn(buffer,"SEEDCHARGE",10))
98
0
      {
99
0
        tokenize(vs,buffer);
100
0
        if (vs.size() < 2)
101
0
          {
102
0
            obErrorLog.ThrowError(__FUNCTION__, " Could not parse line in phmodel table from phmodel.txt", obInfo);
103
0
            return;
104
0
          }
105
106
0
        sp = new OBSmartsPattern;
107
0
        if (!sp->Init(vs[1]) || (vs.size()-2) != sp->NumAtoms())
108
0
          {
109
0
            delete sp;
110
0
            sp = nullptr;
111
0
            obErrorLog.ThrowError(__FUNCTION__, " Could not parse line in phmodel table from phmodel.txt", obInfo);
112
0
            return;
113
0
          }
114
115
0
        vector<double> vf;
116
0
        vector<string>::iterator i;
117
0
        for (i = vs.begin()+2;i != vs.end();++i)
118
0
          vf.push_back(atof((char*)i->c_str()));
119
120
0
        _vschrg.push_back(pair<OBSmartsPattern*,vector<double> > (sp,vf));
121
0
      }
122
0
  }
123
124
  void OBPhModel::AssignSeedPartialCharge(OBMol &mol)
125
0
  {
126
0
    if (!_init)
127
0
      Init();
128
129
0
    mol.SetPartialChargesPerceived();
130
0
    if (!mol.AutomaticPartialCharge())
131
0
      return;
132
133
0
    vector<pair<OBSmartsPattern*,vector<double> > >::iterator i;
134
0
    for (i = _vschrg.begin(); i != _vschrg.end(); ++i) {
135
0
      std::vector<std::vector<int> > mlist;
136
0
      if (i->first->Match(mol, mlist, OBSmartsPattern::AllUnique))
137
0
      {
138
0
        unsigned int k;
139
0
        vector<vector<int> >::iterator j;
140
141
0
        for (j = mlist.begin(); j != mlist.end(); ++j)
142
0
          for (k = 0; k < j->size(); ++k)
143
0
            mol.GetAtom((*j)[k])->SetPartialCharge(i->second[k]);
144
0
      }
145
0
    }
146
0
  }
147
148
  void OBPhModel::CorrectForPH(OBMol &mol, double pH)
149
0
  {
150
0
    if (!_init)
151
0
      Init();
152
0
    if (mol.IsCorrectedForPH())
153
0
      return;
154
0
    if (mol.GetDimension() > 0 && !mol.AutomaticFormalCharge())
155
0
      return;
156
157
0
    mol.SetCorrectedForPH();
158
159
0
    obErrorLog.ThrowError(__FUNCTION__,
160
0
                          "Ran OpenBabel::CorrectForPH", obAuditMsg);
161
162
0
    mol.DeleteHydrogens();
163
164
0
    for (unsigned int i = 0; i < _vtsfm.size(); ++i) {
165
166
0
      if (_vpKa[i] > 1E+9) {
167
        // always apply when pKa is > 1e+9
168
0
        _vtsfm[i]->Apply(mol);
169
0
      } else {
170
        // 10^(pKa - pH) = [HA] / [A-]
171
        //
172
        // > 1 : [HA] > [A-]
173
        // < 1 : [HA] < [A-]
174
0
        if (_vtsfm[i]->IsAcid()) {
175
          //cout << "IsAcid == " << _vtsfm[i]->IsAcid() << endl;
176
          //cout << "pKa == " << _vpKa[i] << endl;
177
          //cout << "pow(10, _vpKa[i] - pH) == " << pow(10, _vpKa[i] - pH) << endl;
178
0
          if (pow(10, _vpKa[i] - pH) < 1.0) {
179
            //cout << "APPLY!!" << endl;
180
0
            _vtsfm[i]->Apply(mol);
181
0
          }
182
0
        }
183
184
        // 10^(pKa - pH) = [BH+] / [B:]
185
        //
186
        // > 1 : [BH+] > [B:]
187
        // < 1 : [BH+] < [B:]
188
0
        if (_vtsfm[i]->IsBase()) {
189
          //cout << "IsBase == " << _vtsfm[i]->IsBase() << endl;
190
          //cout << "pKa == " << _vpKa[i] << endl;
191
          //cout << "pow(10, _vpKa[i] - pH) == " << pow(10, _vpKa[i] - pH) << endl;
192
0
          if (pow(10, _vpKa[i] - pH) > 1.0) {
193
            //cout << "APPLY!!" << endl;
194
0
            _vtsfm[i]->Apply(mol);
195
0
          }
196
0
        }
197
0
      }
198
0
    }
199
200
0
  }
201
202
203
  // Portions of this documentation adapted from the JOELib docs, written by
204
  // Joerg Wegner
205
  /** \class OBChemTsfm phmodel.h <openbabel/phmodel.h>
206
      \brief SMARTS based structural modification (chemical transformation)
207
208
      Transformation of chemical structures can be used for pH value correction
209
      (i.e. via OBPhModel and OBMol::CorrectForPH()). The OBChemTsfm class
210
      defines SMARTS based TRANSFORM patterns to delete atoms, change atom types,
211
      atom formal charges, and bond types.
212
213
      For storing and converting chemical reaction files, use the OBReaction class.
214
  **/
215
  bool OBChemTsfm::Init(string &bgn,string &end)
216
0
  {
217
0
    if (!_bgn.Init(bgn))
218
0
      return(false);
219
0
    if (!end.empty())
220
0
      if (!_end.Init(end))
221
0
        return(false);
222
223
    //find atoms to be deleted
224
0
    unsigned int i,j;
225
0
    int vb;
226
0
    bool found;
227
0
    for (i = 0;i < _bgn.NumAtoms();++i)
228
0
      if ((vb = _bgn.GetVectorBinding(i)))
229
0
        {
230
0
          found = false;
231
0
          for (j = 0;j < _end.NumAtoms();++j)
232
0
            if (vb == _end.GetVectorBinding(j))
233
0
              {
234
0
                found = true;
235
0
                break;
236
0
              }
237
238
0
          if (!found)
239
0
            _vadel.push_back(i);
240
0
        }
241
242
    //find elements to be changed
243
0
    int ele;
244
0
    for (i = 0;i < _bgn.NumAtoms();++i)
245
      // Allow single-atom transformations without vector bindings
246
0
      if ((vb = _bgn.GetVectorBinding(i)) || _bgn.NumAtoms() == 1)
247
0
        {
248
0
          ele = _bgn.GetAtomicNum(i);
249
0
          for (j = 0;j < _end.NumAtoms();++j)
250
0
            if (vb == _end.GetVectorBinding(j))
251
0
              if (ele != _end.GetAtomicNum(j))
252
0
                {
253
0
                  _vele.push_back(pair<int,int> (i,_end.GetAtomicNum(j)));
254
0
                  break;
255
0
                }
256
0
        }
257
258
    //find charges to modify
259
0
    int chrg;
260
0
    for (i = 0;i < _bgn.NumAtoms();++i)
261
      // Allow single-atom transformations without vector bindings
262
      // PR#2802980.
263
0
      if ((vb = _bgn.GetVectorBinding(i)) || _bgn.NumAtoms() == 1)
264
0
        {
265
0
          chrg = _bgn.GetCharge(i);
266
0
          for (j = 0;j < _end.NumAtoms();++j)
267
0
            if (vb == _end.GetVectorBinding(j))
268
0
              if (chrg != _end.GetCharge(j))
269
0
                _vchrg.push_back(pair<int,int> (i,_end.GetCharge(j)));
270
0
        }
271
272
    //find bonds to be modified
273
    //find bonds to be modified
274
0
    int bsrc,bdst,bord,bvb1,bvb2;
275
0
    int esrc,edst,eord,evb1,evb2;
276
277
0
    for (i = 0;i < _bgn.NumBonds();++i)
278
0
      {
279
0
        _bgn.GetBond(bsrc,bdst,bord,i);
280
0
        bvb1 = _bgn.GetVectorBinding(bsrc);
281
0
        bvb2 = _bgn.GetVectorBinding(bdst);
282
0
        if (!bvb1 || !bvb2)
283
0
          continue;
284
285
0
        for (j = 0;j < _end.NumBonds();++j)
286
0
          {
287
0
            _end.GetBond(esrc,edst,eord,j);
288
0
            evb1 = _end.GetVectorBinding(esrc);
289
0
            evb2 = _end.GetVectorBinding(edst);
290
0
            if ((bvb1 == evb1 && bvb2 == evb2) || (bvb1 == evb2 && bvb2 == evb1))
291
0
              {
292
0
                if (bord == eord)
293
0
                  break; //nothing to modify if bond orders identical
294
0
                _vbond.push_back(pair<pair<int,int>,int> (pair<int,int> (bsrc,bdst),eord));
295
0
                break;
296
0
              }
297
0
          }
298
0
      }
299
300
    //make sure there is some kind of transform to do here
301
0
    if (_vadel.empty() && _vchrg.empty() && _vbond.empty() && _vele.empty())
302
0
      return(false);
303
304
0
    return(true);
305
0
  }
306
307
  bool OBChemTsfm::Apply(OBMol &mol)
308
0
  {
309
0
    if (!_bgn.Match(mol))
310
0
      return(false);
311
0
    mol.BeginModify();
312
0
    vector<vector<int> > mlist = _bgn.GetUMapList();
313
314
0
    obErrorLog.ThrowError(__FUNCTION__,
315
0
                          "Ran OpenBabel::OBChemTransform", obAuditMsg);
316
317
0
    if (!_vchrg.empty()) //modify charges
318
0
      {
319
0
        vector<vector<int> >::iterator i;
320
0
        vector<pair<int,int> >::iterator j;
321
322
0
        for (i = mlist.begin();i != mlist.end();++i)
323
0
          for (j = _vchrg.begin();j != _vchrg.end();++j)
324
0
            if (j->first < (signed)i->size()) { //goof proofing
325
0
              OBAtom *atom = mol.GetAtom((*i)[j->first]);
326
0
              int old_charge = atom->GetFormalCharge();
327
0
              if(j->second != old_charge) {
328
0
                atom->SetFormalCharge(j->second);
329
0
                OBAtomAssignTypicalImplicitHydrogens(atom); //update with new charge info
330
0
              }
331
0
            }
332
0
      }
333
334
0
    if (!_vbond.empty()) //modify bond orders
335
0
      {
336
0
        OBBond *bond;
337
0
        vector<vector<int> >::iterator i;
338
0
        vector<pair<pair<int,int>,int> >::iterator j;
339
0
        for (i = mlist.begin();i != mlist.end();++i)
340
0
          for (j = _vbond.begin();j != _vbond.end();++j)
341
0
            {
342
0
              bond = mol.GetBond((*i)[j->first.first],(*i)[j->first.second]);
343
0
              if (!bond)
344
0
                {
345
0
                  obErrorLog.ThrowError(__FUNCTION__, "unable to find bond", obDebug);
346
0
                  continue;
347
0
                }
348
0
              unsigned int old_bond_order = bond->GetBondOrder();
349
0
              bond->SetBondOrder(j->second);
350
0
              for (int k = 0; k < 2; ++k) {
351
0
                OBAtom* atom = k == 0 ? bond->GetBeginAtom() : bond->GetEndAtom();
352
0
                int new_hcount = atom->GetImplicitHCount() - (j->second - old_bond_order);
353
0
                if (new_hcount < 0)
354
0
                  new_hcount = 0;
355
0
                atom->SetImplicitHCount(new_hcount);
356
0
              }
357
0
            }
358
0
      }
359
360
0
    if (!_vadel.empty() || !_vele.empty()) //delete atoms and change elements
361
0
      {
362
0
        vector<int>::iterator j;
363
0
        vector<vector<int> >::iterator i;
364
365
0
        if (!_vele.empty())
366
0
          {
367
0
            vector<pair<int,int> >::iterator k;
368
0
            for (i = mlist.begin();i != mlist.end();++i)
369
0
              for (k = _vele.begin();k != _vele.end();++k)
370
0
                mol.GetAtom((*i)[k->first])->SetAtomicNum(k->second);
371
0
          }
372
373
        //make sure same atom isn't deleted twice
374
0
        vector<bool> vda;
375
0
        vector<OBAtom*> vdel;
376
0
        vda.resize(mol.NumAtoms()+1,false);
377
0
        for (i = mlist.begin();i != mlist.end();++i)
378
0
          for (j = _vadel.begin();j != _vadel.end();++j)
379
0
            if (!vda[(*i)[*j]])
380
0
              {
381
0
                vda[(*i)[*j]] = true;
382
0
                vdel.push_back(mol.GetAtom((*i)[*j]));
383
0
              }
384
385
0
        vector<OBAtom*>::iterator k;
386
0
        for (k = vdel.begin();k != vdel.end();++k)
387
0
          mol.DeleteAtom((OBAtom*)*k);
388
0
      }
389
390
0
    mol.EndModify();
391
0
    return(true);
392
0
  }
393
394
  bool OBChemTsfm::IsAcid()
395
0
  {
396
0
    if (_bgn.NumAtoms() > _end.NumAtoms())  // O=CO[#1:1] >> O=CO
397
0
      return true;
398
399
0
    for (unsigned int i = 0; i < _end.NumAtoms(); ++i) {
400
0
      if (_end.GetCharge(i) < 0)
401
0
        return true;
402
0
    }
403
404
0
    return false;
405
0
  }
406
407
  bool OBChemTsfm::IsBase()
408
0
  {
409
0
    for (unsigned int i = 0; i < _end.NumAtoms(); ++i) {
410
0
      if (_end.GetCharge(i) > 0)
411
0
        return true;
412
0
    }
413
414
0
    return false;
415
0
  }
416
417
} //namespace OpenBabel
418
419
//! \file phmodel.cpp
420
//! \brief Read pH rules and assign charges.