RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
RascalResult.h
Go to the documentation of this file.
1//
2// Copyright (C) David Cosgrove 2023
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9
10// A class to hold the results of a RASCAL MCES determination
11// between 2 molecules. Contains the bonds and atoms that
12// correspond between the molecules, and also a SMARTS pattern
13// defining the MCES.
14//
15#include <RDGeneral/export.h>
16
17#ifndef RASCALRESULT_H
18#define RASCALRESULT_H
19
20#include <vector>
21
22#include <GraphMol/ROMol.h>
23
24namespace RDKit {
25
26namespace RascalMCES {
27
29 public:
30 RascalResult(const RDKit::ROMol &mol1, const RDKit::ROMol &mol2,
31 const std::vector<std::vector<int>> &adjMatrix1,
32 const std::vector<std::vector<int>> &adjMatrix2,
33 const std::vector<unsigned int> &clique,
34 const std::vector<std::pair<int, int>> &vtx_pairs, bool timedOut,
35 bool swapped, double tier1Sim, double tier2Sim,
36 bool ringMatchesRingOnly, bool singleLargestFrag,
37 int minFragSep);
38 // For when the tier[12]Sim didn't hit the threshold, but it
39 // might be of interest what the estimates of similarity were.
40 RascalResult(double tier1Sim, double tier2Sim);
41
43
44 RascalResult(RascalResult &&other) = default;
45
46 ~RascalResult() = default;
47
49
50 RascalResult &operator=(RascalResult &&other) = default;
51
52 // Cut the result down to the single largest fragment. This is
53 // irrecoverably destructive.
55 void largestFragsOnly(unsigned int numFrags = 2);
56 void trimSmallFrags(unsigned int minFragSize = 3);
57
58 std::vector<std::pair<int, int>> getBondMatches() const {
59 return d_bondMatches;
60 }
61
62 std::vector<std::pair<int, int>> getAtomMatches() const {
63 return d_atomMatches;
64 }
65
66 // The following 5 functions are used in resultCompare to rank
67 // 2 MCES of the same size for the same pair of molecules.
68 // returns the number of contiguous fragments in the MCES.
69 int getNumFrags() const;
70
71 // returns how many bonds in the clique don't match
72 // cyclic/non-cyclic i.e. count as a matche in the MCES but
73 // are ring bonds in one of the molecules and not in the other.
75
76 // returns a score for how well the atoms in the clique from mol1 match the
77 // atoms for the clique in mol2. Currently, the atom scores are the
78 // difference in H count for matching atoms, and summed for the molecule. Its
79 // so that, for example, an OH in mol1 that could match an OH or OMe matches
80 // the OH for preference.
81 int getAtomMatchScore() const;
82
83 // returns a score for the maximum difference in through-bond distance for
84 // pairs of matching atoms in the 2 molecules. An MCES where 2 atoms
85 // are far apart in one molecule and the corresponding atoms are close
86 // together in the other will get a high score by this measure.
88
89 // returns the number of atoms in the largest contiguous fragment
90 // in the MCES.
91 unsigned int getLargestFragSize() const;
92
93 std::string getSmarts() const;
94 const std::shared_ptr<ROMol> getMcesMol() const;
95 bool getTimedOut() const { return d_timedOut; };
96
97 double getTier1Sim() const { return d_tier1Sim; }
98 double getTier2Sim() const { return d_tier2Sim; }
99 double getSimilarity() const;
100
101 private:
102 std::shared_ptr<ROMol> d_mol1;
103 std::shared_ptr<ROMol> d_mol2;
104 mutable std::shared_ptr<ROMol> d_mcesMol;
105 std::vector<std::pair<int, int>> d_bondMatches;
106 std::vector<std::pair<int, int>> d_atomMatches;
107
108 mutable std::string d_smarts;
109 bool d_timedOut{false};
110 double d_tier1Sim;
111 double d_tier2Sim;
112 bool d_ringMatchesRingOnly{false};
113 int d_maxFragSep{-1};
114
115 // These are used for sorting the results.
116 mutable int d_numFrags{-1};
117 mutable int d_ringNonRingBondScore{-1};
118 mutable int d_atomMatchScore{-1};
119 mutable int d_maxDeltaAtomAtomDist{-1};
120 mutable int d_largestFragSize{-1};
121
122 // Assuming the frags are all part of the original MCES, just cut it
123 // down to what's in the frags.
124 void rebuildFromFrags(const std::vector<boost::shared_ptr<ROMol>> &frags);
125
126 std::string createSmartsString() const;
127
128 void matchCliqueAtoms(const std::vector<std::vector<int>> &mol1_adj_matrix);
129
130 // If the clique involves a fragment that is more than d_maxFragSep from
131 // any other frag in either molecule, discard the smaller frag.
132 void applyMaxFragSep();
133
134 // Make the fragments for either mol1 or mol2. If molNum is not 1 or 2,
135 // returns nullptr.
136 RDKit::ROMol *makeMolFrags(int molNum) const;
137
138 int calcRingNonRingScore() const;
139
140 int calcAtomMatchScore() const;
141
142 int calcLargestFragSize() const;
143
144 // If there are multiple fragments, can be helpful as a tie-breaker. It's the
145 // maximum difference between through-bond distances between matching atoms in
146 // the 2 molecules.
147 int calcMaxDeltaAtomAtomDistScore() const;
148};
149
150} // namespace RascalMCES
151} // namespace RDKit
152
153#endif // RASCALRESULT_H
Defines the primary molecule class ROMol as well as associated typedefs.
std::string getSmarts() const
std::vector< std::pair< int, int > > getAtomMatches() const
RascalResult(double tier1Sim, double tier2Sim)
void trimSmallFrags(unsigned int minFragSize=3)
unsigned int getLargestFragSize() const
std::vector< std::pair< int, int > > getBondMatches() const
RascalResult & operator=(const RascalResult &other)
RascalResult & operator=(RascalResult &&other)=default
const std::shared_ptr< ROMol > getMcesMol() const
RascalResult(const RDKit::ROMol &mol1, const RDKit::ROMol &mol2, const std::vector< std::vector< int > > &adjMatrix1, const std::vector< std::vector< int > > &adjMatrix2, const std::vector< unsigned int > &clique, const std::vector< std::pair< int, int > > &vtx_pairs, bool timedOut, bool swapped, double tier1Sim, double tier2Sim, bool ringMatchesRingOnly, bool singleLargestFrag, int minFragSep)
RascalResult(RascalResult &&other)=default
void largestFragsOnly(unsigned int numFrags=2)
RascalResult(const RascalResult &other)
#define RDKIT_RASCALMCES_EXPORT
Definition export.h:425
Std stuff.