RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
RascalClusterOptions.h
Go to the documentation of this file.
1//
2// Copyright (C) David Cosgrove 2023
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10// Options for Rascal Clustering. In general, the option names and defaults
11// are taken from the paper:
12// 'A Line Graph Algorithm for Clustering Chemical Structures Based
13// on Common Substructural Cores', JW Raymond, PW Willett.
14// https://match.pmf.kg.ac.rs/electronic_versions/Match48/match48_197-207.pdf
15// https://eprints.whiterose.ac.uk/77598/
16
17#include <RDGeneral/export.h>
18
19#ifndef RASCALCLUSTEROPTIONS_H
20#define RASCALCLUSTEROPTIONS_H
21
22namespace RDKit {
23namespace RascalMCES {
24
26 double similarityCutoff = 0.7; /* Similarity cutoff for clustering. Initial
27 clusters will have molecule pairs of at
28 least this similarity. */
29 double a = 0.05; /* penalty score for each unconnected component in MCES */
30 double b = 2.0; /* weight of matched bonds over matched atoms */
31 unsigned int minFragSize =
32 3; /* minimum number of atoms in a fragment for it to
33 be included in the MCES. Also p in the paper. */
34 double minIntraClusterSim = 0.9; /* two pairs of molecules are included in the
35 same cluster if the similarity between
36 their MCESs is greater than this. S_a
37 in the paper */
38 double clusterMergeSim = 0.6; /* two clusters are merged if fraction of
39 molecules they have in common is greater than
40 this. S_b in the paper */
41 unsigned int maxNumFrags = 2; /* The maximum number of fragments in any MCES.
42 Otherwise the MCES can be a lot of small
43 fragments scattered across the molecule - it
44 tries too hard to find a match, sometimes */
45 int numThreads = -1; /* The number of threads to use. If > 0, will use that
46 number. If <= 0, will use the number of hardware
47 threads plus this number. So if the number of
48 hardware threads is 8, and numThreads is -1, it will
49 use 7 threads. */
50};
51} // namespace RascalMCES
52} // namespace RDKit
53#endif // RASCALCLUSTEROPTIONS_H
#define RDKIT_RASCALMCES_EXPORT
Definition export.h:425
Std stuff.