RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
LinkNode.h
Go to the documentation of this file.
1//
2// Copyright (C) 2020 Greg Landrum and T5 Informatics GmbH
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/Invariant.h>
11
12#include <map>
13#include <boost/lexical_cast.hpp>
14#include <boost/tokenizer.hpp>
15#include <boost/format.hpp>
16#include <algorithm>
17
18typedef boost::tokenizer<boost::char_separator<char>> tokenizer;
19
20namespace RDKit {
21namespace MolEnumerator {
22
23struct LinkNode {
24 unsigned int minRep = 0;
25 unsigned int maxRep = 0;
26 unsigned int nBonds = 0;
27 std::vector<std::pair<unsigned int, unsigned int>> bondAtoms;
28};
29
30namespace utils {
31inline std::vector<LinkNode> getMolLinkNodes(
32 const ROMol &mol, bool strict = true,
33 const std::map<unsigned, Atom *> *atomIdxMap = nullptr) {
34 std::vector<LinkNode> res;
35 std::string pval;
37 return res;
38 }
39 std::vector<int> mapping;
40
41 boost::char_separator<char> pipesep("|");
42 boost::char_separator<char> spacesep(" ");
43 for (auto linknodetext : tokenizer(pval, pipesep)) {
46 std::vector<unsigned int> data;
47 try {
48 std::transform(tokens.begin(), tokens.end(), std::back_inserter(data),
49 [](const std::string &token) -> unsigned int {
50 return boost::lexical_cast<unsigned int>(token);
51 });
52 } catch (boost::bad_lexical_cast &) {
53 std::ostringstream errout;
54 errout << "Cannot convert values in LINKNODE '" << linknodetext
55 << "' to unsigned ints";
56 if (strict) {
57 throw ValueErrorException(errout.str());
58 } else {
59 BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
60 continue;
61 }
62 }
63 // the second test here is for the atom-pairs defining the bonds
64 // data[2] contains the number of bonds
65 if (data.size() < 5 || data.size() < 3 + 2 * data[2]) {
66 std::ostringstream errout;
67 errout << "not enough values in LINKNODE '" << linknodetext << "'";
68 if (strict) {
69 throw ValueErrorException(errout.str());
70 } else {
71 BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
72 continue;
73 }
74 }
75
76 node.minRep = data[0];
77 node.maxRep = data[1];
78 if (node.minRep == 0 || node.maxRep < node.minRep) {
79 std::ostringstream errout;
80 errout << "bad counts in LINKNODE '" << linknodetext << "'";
81 if (strict) {
82 throw ValueErrorException(errout.str());
83 } else {
84 BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
85 continue;
86 }
87 }
88 node.nBonds = data[2];
89 if (node.nBonds != 2) {
90 if (strict) {
92 "only link nodes with 2 bonds are currently supported");
93 } else {
95 << "only link nodes with 2 bonds are currently supported"
96 << std::endl;
97 continue;
98 }
99 }
100 // both bonds must start from the same atom:
101 if (data[3] != data[5]) {
102 std::ostringstream errout;
103 errout << "bonds don't start at the same atom for LINKNODE '"
104 << linknodetext << "'";
105 if (strict) {
106 throw ValueErrorException(errout.str());
107 } else {
108 BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
109 continue;
110 }
111 }
112
113 if (atomIdxMap) {
114 // map the indices back to the original atom numbers
115 for (unsigned int i = 3; i <= 6; ++i) {
116 const auto aidx = atomIdxMap->find(data[i] - 1);
117 if (aidx == atomIdxMap->end()) {
118 std::ostringstream errout;
119 errout << "atom index " << data[i]
120 << " cannot be found in molecule for LINKNODE '"
121 << linknodetext << "'";
122 if (strict) {
123 throw ValueErrorException(errout.str());
124 } else {
125 BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
126 continue;
127 }
128 } else {
129 data[i] = aidx->second->getIdx();
130 }
131 }
132 } else {
133 for (unsigned int i = 3; i <= 6; ++i) {
134 --data[i];
135 }
136 }
137 node.bondAtoms.push_back(std::make_pair(data[3], data[4]));
138 node.bondAtoms.push_back(std::make_pair(data[5], data[6]));
139 if (!mol.getBondBetweenAtoms(data[4], data[3]) ||
140 !mol.getBondBetweenAtoms(data[6], data[5])) {
141 std::ostringstream errout;
142 errout << "bond not found between atoms in LINKNODE '" << linknodetext
143 << "'";
144 if (strict) {
145 throw ValueErrorException(errout.str());
146 } else {
147 BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
148 continue;
149 }
150 }
151 res.push_back(std::move(node));
152 }
153 return res;
154}
155
156} // namespace utils
157} // namespace MolEnumerator
158
159} // namespace RDKit
#define UNDER_CONSTRUCTION(fn)
Definition Invariant.h:125
boost::tokenizer< boost::char_separator< char > > tokenizer
Definition LinkNode.h:18
#define BOOST_LOG(__arg__)
Definition RDLog.h:110
RDKIT_RDGENERAL_EXPORT RDLogger rdWarningLog
bool getPropIfPresent(const std::string &key, T &res) const
Definition RDProps.h:121
Bond * getBondBetweenAtoms(unsigned int idx1, unsigned int idx2)
returns a pointer to the bond between two atoms, Null on failure
Class to allow us to throw a ValueError from C++ and have it make it back to Python.
Definition Exceptions.h:40
std::vector< LinkNode > getMolLinkNodes(const ROMol &mol, bool strict=true, const std::map< unsigned, Atom * > *atomIdxMap=nullptr)
Definition LinkNode.h:31
RDKIT_RDGENERAL_EXPORT const std::string molFileLinkNodes
Std stuff.
bool rdvalue_is(const RDValue_cast_t)
std::vector< std::pair< unsigned int, unsigned int > > bondAtoms
Definition LinkNode.h:27