RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
ScaffoldNetwork.h
Go to the documentation of this file.
1//
2// Copyright (C) 2019 Greg Landrum and T5 Informatics GmbH
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_SCAFFOLDNETWORK_H
12#define RD_SCAFFOLDNETWORK_H
13
14#include <vector>
15#include <map>
16#include <string>
17#include <sstream>
18#include <memory>
19
20#ifdef RDK_USE_BOOST_SERIALIZATION
21#include <RDGeneral/Invariant.h>
23#include <boost/archive/text_oarchive.hpp>
24#include <boost/archive/text_iarchive.hpp>
25#include <boost/serialization/vector.hpp>
26#include <boost/serialization/shared_ptr.hpp>
27#include <boost/serialization/version.hpp>
29#endif
30
31namespace RDKit {
32class ROMol;
34
35namespace ScaffoldNetwork {
36
39 true; ///< include scaffolds with all atoms replaced by dummies
41 false; ///< include scaffolds with all bonds replaced by single bonds
43 true; ///< remove attachment points from scaffolds and include the result
45 true; ///< Include the version of the scaffold with attachment points
47 false; ///< Include molecules names of the input molecules
49 true; ///< keep only the first fragment from the bond breaking rule
51 true; ///< Do a pruning/flattening step before starting fragmenting
52 bool flattenIsotopes = true; ///< remove isotopes when flattening
54 true; ///< remove chirality and bond stereo when flattening
56 true; ///< keep only the largest fragment when doing flattening
57 bool collectMolCounts = true; ///< keep track of the number of molecules each
58 ///< scaffold was reached from
59
60 std::vector<std::shared_ptr<ChemicalReaction>>
61 bondBreakersRxns; ///< the reaction(s) used to fragment. Should expect a
62 ///< single reactant and produce two products
64 : ScaffoldNetworkParams{{"[!#0;R:1]-!@[!#0:2]>>[*:1]-[#0].[#0]-[*:2]"}} {}
65 ScaffoldNetworkParams(const std::vector<std::string> &bondBreakersSmarts);
66};
67
68enum class EdgeType {
69 Fragment = 1, ///< molecule -> fragment
70 Generic = 2, ///< molecule -> generic molecule (all atoms are dummies)
71 GenericBond = 3, ///< molecule -> generic bond molecule (all bonds single)
72 RemoveAttachment = 4, ///< molecule -> molecule with no attachment points
73 Initialize = 5 ///< molecule -> flattened molecule
74};
75
77 size_t beginIdx;
78 size_t endIdx;
81 NetworkEdge(size_t bi, size_t ei, EdgeType typ)
82 : beginIdx(bi), endIdx(ei), type(typ) {}
84 return (beginIdx == o.beginIdx) && (endIdx == o.endIdx) && (type == o.type);
85 }
87 return (beginIdx != o.beginIdx) || (endIdx != o.endIdx) || (type != o.type);
88 }
89#ifdef RDK_USE_BOOST_SERIALIZATION
90 private:
91 friend class boost::serialization::access;
92 template <class Archive>
93 void serialize(Archive &ar, const unsigned int version) {
94 RDUNUSED_PARAM(version);
95 ar & beginIdx;
96 ar & endIdx;
97 ar & type;
98 }
99#endif
100};
101
103 std::vector<std::string> nodes; ///< SMILES for the scaffolds
104 std::vector<unsigned>
105 counts; ///< number of times each scaffold was encountered
106 std::vector<unsigned>
107 molCounts; ///< number of molecules each scaffold was found in
108 std::vector<NetworkEdge> edges; ///< edges in the network
110#ifdef RDK_USE_BOOST_SERIALIZATION
111 ScaffoldNetwork(const std::string &pkl) {
112 std::stringstream iss(pkl);
113 boost::archive::text_iarchive ia(iss);
114 ia >> *this;
115 }
116
117 private:
118 friend class boost::serialization::access;
119 template <class Archive>
120 void serialize(Archive &ar, const unsigned int version) {
121 RDUNUSED_PARAM(version);
122 ar & nodes;
123 ar & counts;
124 if (version > 0) {
125 ar & molCounts;
126 }
127 ar & edges;
128 }
129#endif
130};
131
132//! update an existing ScaffoldNetwork using a set of molecules
133template <typename T>
134void updateScaffoldNetwork(const T &mols, ScaffoldNetwork &network,
135 const ScaffoldNetworkParams &params);
136
137//! create a new ScaffoldNetwork for a set of molecules
138template <typename T>
140 const ScaffoldNetworkParams &params) {
141 ScaffoldNetwork res;
142 updateScaffoldNetwork(mols, res, params);
143 return res;
144}
145//! allows nodes to output nicely as strings
146inline std::ostream &operator<<(std::ostream &ostr,
148 switch (e) {
150 ostr << "Fragment";
151 break;
153 ostr << "Generic";
154 break;
156 ostr << "GenericBond";
157 break;
159 ostr << "RemoveAttachment";
160 break;
162 ostr << "Initialize";
163 break;
164 default:
165 ostr << "UNKNOWN";
166 break;
167 }
168 return ostr;
169}
170//! allows edges to output nicely as strings
171inline std::ostream &operator<<(std::ostream &ostr,
173 ostr << "NetworkEdge( " << e.beginIdx << "->" << e.endIdx
174 << ", type:" << e.type << " )";
175 return ostr;
176}
177
178//! returns parameters for constructing scaffold networks using BRICS
179//! fragmentation
181
182} // namespace ScaffoldNetwork
183} // namespace RDKit
184
185#ifdef RDK_USE_BOOST_SERIALIZATION
186namespace boost {
187namespace serialization {
188template <>
189struct version<RDKit::ScaffoldNetwork::ScaffoldNetwork> {
190 BOOST_STATIC_CONSTANT(int, value = 1);
191};
192} // namespace serialization
193} // namespace boost
194#endif
195
196#endif
RDKIT_GRAPHMOL_EXPORT std::ostream & operator<<(std::ostream &target, const RDKit::Atom &at)
allows Atom objects to be dumped to streams
#define RDUNUSED_PARAM(x)
Definition Invariant.h:196
This is a class for storing and applying general chemical reactions.
Definition Reaction.h:121
#define RDKIT_SCAFFOLDNETWORK_EXPORT
Definition export.h:513
ScaffoldNetwork createScaffoldNetwork(const T &mols, const ScaffoldNetworkParams &params)
create a new ScaffoldNetwork for a set of molecules
void updateScaffoldNetwork(const T &mols, ScaffoldNetwork &network, const ScaffoldNetworkParams &params)
update an existing ScaffoldNetwork using a set of molecules
@ Initialize
molecule -> flattened molecule
@ Generic
molecule -> generic molecule (all atoms are dummies)
@ RemoveAttachment
molecule -> molecule with no attachment points
@ GenericBond
molecule -> generic bond molecule (all bonds single)
RDKIT_SCAFFOLDNETWORK_EXPORT ScaffoldNetworkParams getBRICSNetworkParams()
Std stuff.
Definition RDLog.h:24
bool operator==(const RDKit::ScaffoldNetwork::NetworkEdge &o) const
NetworkEdge(size_t bi, size_t ei, EdgeType typ)
bool operator!=(const RDKit::ScaffoldNetwork::NetworkEdge &o) const
bool includeGenericBondScaffolds
include scaffolds with all bonds replaced by single bonds
bool includeNames
Include molecules names of the input molecules.
bool pruneBeforeFragmenting
Do a pruning/flattening step before starting fragmenting.
bool includeGenericScaffolds
include scaffolds with all atoms replaced by dummies
bool flattenKeepLargest
keep only the largest fragment when doing flattening
std::vector< std::shared_ptr< ChemicalReaction > > bondBreakersRxns
bool includeScaffoldsWithAttachments
Include the version of the scaffold with attachment points.
bool flattenIsotopes
remove isotopes when flattening
ScaffoldNetworkParams(const std::vector< std::string > &bondBreakersSmarts)
bool flattenChirality
remove chirality and bond stereo when flattening
bool keepOnlyFirstFragment
keep only the first fragment from the bond breaking rule
bool includeScaffoldsWithoutAttachments
remove attachment points from scaffolds and include the result
std::vector< NetworkEdge > edges
edges in the network
std::vector< unsigned > molCounts
number of molecules each scaffold was found in
std::vector< std::string > nodes
SMILES for the scaffolds.
std::vector< unsigned > counts
number of times each scaffold was encountered