/src/openbabel/src/phmodel.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /********************************************************************** |
2 | | phmodel.cpp - Read pH rules and assign charges. |
3 | | |
4 | | Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc. |
5 | | Some portions Copyright (C) 2001-2005 by Geoffrey R. Hutchison |
6 | | |
7 | | This file is part of the Open Babel project. |
8 | | For more information, see <http://openbabel.org/> |
9 | | |
10 | | This program is free software; you can redistribute it and/or modify |
11 | | it under the terms of the GNU General Public License as published by |
12 | | the Free Software Foundation version 2 of the License. |
13 | | |
14 | | This program is distributed in the hope that it will be useful, |
15 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | | GNU General Public License for more details. |
18 | | ***********************************************************************/ |
19 | | #include <openbabel/babelconfig.h> |
20 | | |
21 | | #include <openbabel/mol.h> |
22 | | #include <openbabel/atom.h> |
23 | | #include <openbabel/bond.h> |
24 | | #include <openbabel/typer.h> |
25 | | #include <openbabel/obiter.h> |
26 | | #include <openbabel/oberror.h> |
27 | | #include <openbabel/phmodel.h> |
28 | | #include <openbabel/obfunctions.h> |
29 | | |
30 | | #include <cstdlib> |
31 | | |
32 | | // private data header with default parameters |
33 | | #include "phmodeldata.h" |
34 | | |
35 | | #ifdef WIN32 |
36 | | #pragma warning (disable : 4786) |
37 | | #endif |
38 | | |
39 | | using namespace std; |
40 | | |
41 | | namespace OpenBabel |
42 | | { |
43 | | |
44 | | // Global OBPhModel for assigning formal charges and hydrogen addition rules |
45 | | THREAD_LOCAL OBPhModel phmodel; |
46 | | |
47 | | OBPhModel::OBPhModel() |
48 | 0 | { |
49 | 0 | _init = false; |
50 | 0 | _dir = BABEL_DATADIR; |
51 | 0 | _envvar = "BABEL_DATADIR"; |
52 | 0 | _filename = "phmodel.txt"; |
53 | 0 | _subdir = "data"; |
54 | 0 | _dataptr = PhModelData; |
55 | 0 | } |
56 | | |
57 | | OBPhModel::~OBPhModel() |
58 | 0 | { |
59 | 0 | vector<OBChemTsfm*>::iterator k; |
60 | 0 | for (k = _vtsfm.begin();k != _vtsfm.end();++k) |
61 | 0 | delete *k; |
62 | |
|
63 | 0 | vector<pair<OBSmartsPattern*,vector<double> > >::iterator m; |
64 | 0 | for (m = _vschrg.begin();m != _vschrg.end();++m) |
65 | 0 | delete m->first; |
66 | 0 | } |
67 | | |
68 | | void OBPhModel::ParseLine(const char *buffer) |
69 | 0 | { |
70 | 0 | vector<string> vs; |
71 | 0 | OBSmartsPattern *sp; |
72 | |
|
73 | 0 | if (buffer[0] == '#') |
74 | 0 | return; |
75 | | |
76 | 0 | if (EQn(buffer,"TRANSFORM",7)) |
77 | 0 | { |
78 | 0 | tokenize(vs,buffer); |
79 | 0 | if (vs.size() < 5) |
80 | 0 | { |
81 | 0 | obErrorLog.ThrowError(__FUNCTION__, " Could not parse line in phmodel table from phmodel.txt", obInfo); |
82 | 0 | return; |
83 | 0 | } |
84 | | |
85 | 0 | OBChemTsfm *tsfm = new OBChemTsfm; |
86 | 0 | if (!tsfm->Init(vs[1],vs[3])) |
87 | 0 | { |
88 | 0 | delete tsfm; |
89 | 0 | tsfm = nullptr; |
90 | 0 | obErrorLog.ThrowError(__FUNCTION__, " Could not parse line in phmodel table from phmodel.txt", obInfo); |
91 | 0 | return; |
92 | 0 | } |
93 | | |
94 | 0 | _vtsfm.push_back(tsfm); |
95 | 0 | _vpKa.push_back(atof(vs[4].c_str())); |
96 | 0 | } |
97 | 0 | else if (EQn(buffer,"SEEDCHARGE",10)) |
98 | 0 | { |
99 | 0 | tokenize(vs,buffer); |
100 | 0 | if (vs.size() < 2) |
101 | 0 | { |
102 | 0 | obErrorLog.ThrowError(__FUNCTION__, " Could not parse line in phmodel table from phmodel.txt", obInfo); |
103 | 0 | return; |
104 | 0 | } |
105 | | |
106 | 0 | sp = new OBSmartsPattern; |
107 | 0 | if (!sp->Init(vs[1]) || (vs.size()-2) != sp->NumAtoms()) |
108 | 0 | { |
109 | 0 | delete sp; |
110 | 0 | sp = nullptr; |
111 | 0 | obErrorLog.ThrowError(__FUNCTION__, " Could not parse line in phmodel table from phmodel.txt", obInfo); |
112 | 0 | return; |
113 | 0 | } |
114 | | |
115 | 0 | vector<double> vf; |
116 | 0 | vector<string>::iterator i; |
117 | 0 | for (i = vs.begin()+2;i != vs.end();++i) |
118 | 0 | vf.push_back(atof((char*)i->c_str())); |
119 | |
|
120 | 0 | _vschrg.push_back(pair<OBSmartsPattern*,vector<double> > (sp,vf)); |
121 | 0 | } |
122 | 0 | } |
123 | | |
124 | | void OBPhModel::AssignSeedPartialCharge(OBMol &mol) |
125 | 0 | { |
126 | 0 | if (!_init) |
127 | 0 | Init(); |
128 | |
|
129 | 0 | mol.SetPartialChargesPerceived(); |
130 | 0 | if (!mol.AutomaticPartialCharge()) |
131 | 0 | return; |
132 | | |
133 | 0 | vector<pair<OBSmartsPattern*,vector<double> > >::iterator i; |
134 | 0 | for (i = _vschrg.begin(); i != _vschrg.end(); ++i) { |
135 | 0 | std::vector<std::vector<int> > mlist; |
136 | 0 | if (i->first->Match(mol, mlist, OBSmartsPattern::AllUnique)) |
137 | 0 | { |
138 | 0 | unsigned int k; |
139 | 0 | vector<vector<int> >::iterator j; |
140 | |
|
141 | 0 | for (j = mlist.begin(); j != mlist.end(); ++j) |
142 | 0 | for (k = 0; k < j->size(); ++k) |
143 | 0 | mol.GetAtom((*j)[k])->SetPartialCharge(i->second[k]); |
144 | 0 | } |
145 | 0 | } |
146 | 0 | } |
147 | | |
148 | | void OBPhModel::CorrectForPH(OBMol &mol, double pH) |
149 | 0 | { |
150 | 0 | if (!_init) |
151 | 0 | Init(); |
152 | 0 | if (mol.IsCorrectedForPH()) |
153 | 0 | return; |
154 | 0 | if (mol.GetDimension() > 0 && !mol.AutomaticFormalCharge()) |
155 | 0 | return; |
156 | | |
157 | 0 | mol.SetCorrectedForPH(); |
158 | |
|
159 | 0 | obErrorLog.ThrowError(__FUNCTION__, |
160 | 0 | "Ran OpenBabel::CorrectForPH", obAuditMsg); |
161 | |
|
162 | 0 | mol.DeleteHydrogens(); |
163 | |
|
164 | 0 | for (unsigned int i = 0; i < _vtsfm.size(); ++i) { |
165 | |
|
166 | 0 | if (_vpKa[i] > 1E+9) { |
167 | | // always apply when pKa is > 1e+9 |
168 | 0 | _vtsfm[i]->Apply(mol); |
169 | 0 | } else { |
170 | | // 10^(pKa - pH) = [HA] / [A-] |
171 | | // |
172 | | // > 1 : [HA] > [A-] |
173 | | // < 1 : [HA] < [A-] |
174 | 0 | if (_vtsfm[i]->IsAcid()) { |
175 | | //cout << "IsAcid == " << _vtsfm[i]->IsAcid() << endl; |
176 | | //cout << "pKa == " << _vpKa[i] << endl; |
177 | | //cout << "pow(10, _vpKa[i] - pH) == " << pow(10, _vpKa[i] - pH) << endl; |
178 | 0 | if (pow(10, _vpKa[i] - pH) < 1.0) { |
179 | | //cout << "APPLY!!" << endl; |
180 | 0 | _vtsfm[i]->Apply(mol); |
181 | 0 | } |
182 | 0 | } |
183 | | |
184 | | // 10^(pKa - pH) = [BH+] / [B:] |
185 | | // |
186 | | // > 1 : [BH+] > [B:] |
187 | | // < 1 : [BH+] < [B:] |
188 | 0 | if (_vtsfm[i]->IsBase()) { |
189 | | //cout << "IsBase == " << _vtsfm[i]->IsBase() << endl; |
190 | | //cout << "pKa == " << _vpKa[i] << endl; |
191 | | //cout << "pow(10, _vpKa[i] - pH) == " << pow(10, _vpKa[i] - pH) << endl; |
192 | 0 | if (pow(10, _vpKa[i] - pH) > 1.0) { |
193 | | //cout << "APPLY!!" << endl; |
194 | 0 | _vtsfm[i]->Apply(mol); |
195 | 0 | } |
196 | 0 | } |
197 | 0 | } |
198 | 0 | } |
199 | |
|
200 | 0 | } |
201 | | |
202 | | |
203 | | // Portions of this documentation adapted from the JOELib docs, written by |
204 | | // Joerg Wegner |
205 | | /** \class OBChemTsfm phmodel.h <openbabel/phmodel.h> |
206 | | \brief SMARTS based structural modification (chemical transformation) |
207 | | |
208 | | Transformation of chemical structures can be used for pH value correction |
209 | | (i.e. via OBPhModel and OBMol::CorrectForPH()). The OBChemTsfm class |
210 | | defines SMARTS based TRANSFORM patterns to delete atoms, change atom types, |
211 | | atom formal charges, and bond types. |
212 | | |
213 | | For storing and converting chemical reaction files, use the OBReaction class. |
214 | | **/ |
215 | | bool OBChemTsfm::Init(string &bgn,string &end) |
216 | 0 | { |
217 | 0 | if (!_bgn.Init(bgn)) |
218 | 0 | return(false); |
219 | 0 | if (!end.empty()) |
220 | 0 | if (!_end.Init(end)) |
221 | 0 | return(false); |
222 | | |
223 | | //find atoms to be deleted |
224 | 0 | unsigned int i,j; |
225 | 0 | int vb; |
226 | 0 | bool found; |
227 | 0 | for (i = 0;i < _bgn.NumAtoms();++i) |
228 | 0 | if ((vb = _bgn.GetVectorBinding(i))) |
229 | 0 | { |
230 | 0 | found = false; |
231 | 0 | for (j = 0;j < _end.NumAtoms();++j) |
232 | 0 | if (vb == _end.GetVectorBinding(j)) |
233 | 0 | { |
234 | 0 | found = true; |
235 | 0 | break; |
236 | 0 | } |
237 | |
|
238 | 0 | if (!found) |
239 | 0 | _vadel.push_back(i); |
240 | 0 | } |
241 | | |
242 | | //find elements to be changed |
243 | 0 | int ele; |
244 | 0 | for (i = 0;i < _bgn.NumAtoms();++i) |
245 | | // Allow single-atom transformations without vector bindings |
246 | 0 | if ((vb = _bgn.GetVectorBinding(i)) || _bgn.NumAtoms() == 1) |
247 | 0 | { |
248 | 0 | ele = _bgn.GetAtomicNum(i); |
249 | 0 | for (j = 0;j < _end.NumAtoms();++j) |
250 | 0 | if (vb == _end.GetVectorBinding(j)) |
251 | 0 | if (ele != _end.GetAtomicNum(j)) |
252 | 0 | { |
253 | 0 | _vele.push_back(pair<int,int> (i,_end.GetAtomicNum(j))); |
254 | 0 | break; |
255 | 0 | } |
256 | 0 | } |
257 | | |
258 | | //find charges to modify |
259 | 0 | int chrg; |
260 | 0 | for (i = 0;i < _bgn.NumAtoms();++i) |
261 | | // Allow single-atom transformations without vector bindings |
262 | | // PR#2802980. |
263 | 0 | if ((vb = _bgn.GetVectorBinding(i)) || _bgn.NumAtoms() == 1) |
264 | 0 | { |
265 | 0 | chrg = _bgn.GetCharge(i); |
266 | 0 | for (j = 0;j < _end.NumAtoms();++j) |
267 | 0 | if (vb == _end.GetVectorBinding(j)) |
268 | 0 | if (chrg != _end.GetCharge(j)) |
269 | 0 | _vchrg.push_back(pair<int,int> (i,_end.GetCharge(j))); |
270 | 0 | } |
271 | | |
272 | | //find bonds to be modified |
273 | | //find bonds to be modified |
274 | 0 | int bsrc,bdst,bord,bvb1,bvb2; |
275 | 0 | int esrc,edst,eord,evb1,evb2; |
276 | |
|
277 | 0 | for (i = 0;i < _bgn.NumBonds();++i) |
278 | 0 | { |
279 | 0 | _bgn.GetBond(bsrc,bdst,bord,i); |
280 | 0 | bvb1 = _bgn.GetVectorBinding(bsrc); |
281 | 0 | bvb2 = _bgn.GetVectorBinding(bdst); |
282 | 0 | if (!bvb1 || !bvb2) |
283 | 0 | continue; |
284 | | |
285 | 0 | for (j = 0;j < _end.NumBonds();++j) |
286 | 0 | { |
287 | 0 | _end.GetBond(esrc,edst,eord,j); |
288 | 0 | evb1 = _end.GetVectorBinding(esrc); |
289 | 0 | evb2 = _end.GetVectorBinding(edst); |
290 | 0 | if ((bvb1 == evb1 && bvb2 == evb2) || (bvb1 == evb2 && bvb2 == evb1)) |
291 | 0 | { |
292 | 0 | if (bord == eord) |
293 | 0 | break; //nothing to modify if bond orders identical |
294 | 0 | _vbond.push_back(pair<pair<int,int>,int> (pair<int,int> (bsrc,bdst),eord)); |
295 | 0 | break; |
296 | 0 | } |
297 | 0 | } |
298 | 0 | } |
299 | | |
300 | | //make sure there is some kind of transform to do here |
301 | 0 | if (_vadel.empty() && _vchrg.empty() && _vbond.empty() && _vele.empty()) |
302 | 0 | return(false); |
303 | | |
304 | 0 | return(true); |
305 | 0 | } |
306 | | |
307 | | bool OBChemTsfm::Apply(OBMol &mol) |
308 | 0 | { |
309 | 0 | if (!_bgn.Match(mol)) |
310 | 0 | return(false); |
311 | 0 | mol.BeginModify(); |
312 | 0 | vector<vector<int> > mlist = _bgn.GetUMapList(); |
313 | |
|
314 | 0 | obErrorLog.ThrowError(__FUNCTION__, |
315 | 0 | "Ran OpenBabel::OBChemTransform", obAuditMsg); |
316 | |
|
317 | 0 | if (!_vchrg.empty()) //modify charges |
318 | 0 | { |
319 | 0 | vector<vector<int> >::iterator i; |
320 | 0 | vector<pair<int,int> >::iterator j; |
321 | |
|
322 | 0 | for (i = mlist.begin();i != mlist.end();++i) |
323 | 0 | for (j = _vchrg.begin();j != _vchrg.end();++j) |
324 | 0 | if (j->first < (signed)i->size()) { //goof proofing |
325 | 0 | OBAtom *atom = mol.GetAtom((*i)[j->first]); |
326 | 0 | int old_charge = atom->GetFormalCharge(); |
327 | 0 | if(j->second != old_charge) { |
328 | 0 | atom->SetFormalCharge(j->second); |
329 | 0 | OBAtomAssignTypicalImplicitHydrogens(atom); //update with new charge info |
330 | 0 | } |
331 | 0 | } |
332 | 0 | } |
333 | |
|
334 | 0 | if (!_vbond.empty()) //modify bond orders |
335 | 0 | { |
336 | 0 | OBBond *bond; |
337 | 0 | vector<vector<int> >::iterator i; |
338 | 0 | vector<pair<pair<int,int>,int> >::iterator j; |
339 | 0 | for (i = mlist.begin();i != mlist.end();++i) |
340 | 0 | for (j = _vbond.begin();j != _vbond.end();++j) |
341 | 0 | { |
342 | 0 | bond = mol.GetBond((*i)[j->first.first],(*i)[j->first.second]); |
343 | 0 | if (!bond) |
344 | 0 | { |
345 | 0 | obErrorLog.ThrowError(__FUNCTION__, "unable to find bond", obDebug); |
346 | 0 | continue; |
347 | 0 | } |
348 | 0 | unsigned int old_bond_order = bond->GetBondOrder(); |
349 | 0 | bond->SetBondOrder(j->second); |
350 | 0 | for (int k = 0; k < 2; ++k) { |
351 | 0 | OBAtom* atom = k == 0 ? bond->GetBeginAtom() : bond->GetEndAtom(); |
352 | 0 | int new_hcount = atom->GetImplicitHCount() - (j->second - old_bond_order); |
353 | 0 | if (new_hcount < 0) |
354 | 0 | new_hcount = 0; |
355 | 0 | atom->SetImplicitHCount(new_hcount); |
356 | 0 | } |
357 | 0 | } |
358 | 0 | } |
359 | |
|
360 | 0 | if (!_vadel.empty() || !_vele.empty()) //delete atoms and change elements |
361 | 0 | { |
362 | 0 | vector<int>::iterator j; |
363 | 0 | vector<vector<int> >::iterator i; |
364 | |
|
365 | 0 | if (!_vele.empty()) |
366 | 0 | { |
367 | 0 | vector<pair<int,int> >::iterator k; |
368 | 0 | for (i = mlist.begin();i != mlist.end();++i) |
369 | 0 | for (k = _vele.begin();k != _vele.end();++k) |
370 | 0 | mol.GetAtom((*i)[k->first])->SetAtomicNum(k->second); |
371 | 0 | } |
372 | | |
373 | | //make sure same atom isn't deleted twice |
374 | 0 | vector<bool> vda; |
375 | 0 | vector<OBAtom*> vdel; |
376 | 0 | vda.resize(mol.NumAtoms()+1,false); |
377 | 0 | for (i = mlist.begin();i != mlist.end();++i) |
378 | 0 | for (j = _vadel.begin();j != _vadel.end();++j) |
379 | 0 | if (!vda[(*i)[*j]]) |
380 | 0 | { |
381 | 0 | vda[(*i)[*j]] = true; |
382 | 0 | vdel.push_back(mol.GetAtom((*i)[*j])); |
383 | 0 | } |
384 | |
|
385 | 0 | vector<OBAtom*>::iterator k; |
386 | 0 | for (k = vdel.begin();k != vdel.end();++k) |
387 | 0 | mol.DeleteAtom((OBAtom*)*k); |
388 | 0 | } |
389 | |
|
390 | 0 | mol.EndModify(); |
391 | 0 | return(true); |
392 | 0 | } |
393 | | |
394 | | bool OBChemTsfm::IsAcid() |
395 | 0 | { |
396 | 0 | if (_bgn.NumAtoms() > _end.NumAtoms()) // O=CO[#1:1] >> O=CO |
397 | 0 | return true; |
398 | | |
399 | 0 | for (unsigned int i = 0; i < _end.NumAtoms(); ++i) { |
400 | 0 | if (_end.GetCharge(i) < 0) |
401 | 0 | return true; |
402 | 0 | } |
403 | | |
404 | 0 | return false; |
405 | 0 | } |
406 | | |
407 | | bool OBChemTsfm::IsBase() |
408 | 0 | { |
409 | 0 | for (unsigned int i = 0; i < _end.NumAtoms(); ++i) { |
410 | 0 | if (_end.GetCharge(i) > 0) |
411 | 0 | return true; |
412 | 0 | } |
413 | | |
414 | 0 | return false; |
415 | 0 | } |
416 | | |
417 | | } //namespace OpenBabel |
418 | | |
419 | | //! \file phmodel.cpp |
420 | | //! \brief Read pH rules and assign charges. |