RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
CorrMatGenerator.h
Go to the documentation of this file.
1//
2// Copyright (C) 2003-2006 Rational Discovery LLC
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef _RD_CORRMATGENERATOR_H_
12#define _RD_CORRMATGENERATOR_H_
13
14#include <RDGeneral/types.h>
16#include <boost/dynamic_bitset.hpp>
17
18namespace RDInfoTheory {
19// FIX: won't worry about it now, but this class can be templated by the type of
20// container for the bit list and type of descriptors (fingerprint vs. real
21// valued)
23 /*! \brief A class to generate a correlation matrix for a bunch of
24 *fingerprints
25 *
26 * The correlation matrix is done only for the bit IDs that are set by a call
27 *to the
28 * function setDescriptorIdList
29 *
30 * cr = CorrMatGenerator();
31 * cr.setDescriptorIdList(descList);
32 * for each fingerprint in list of fingerprints {
33 * cr.collectVotes(fingerprint);
34 * }
35 * double *corrMat = cr.getCorrMat()
36 *
37 * The resulting correlation matrix is a one dimension matrix with only the
38 *lower triangle elements
39 * of the symmetric matrix
40 */
41 public:
43
44 ~BitCorrMatGenerator() { delete[] dp_corrMat; }
45
47 dp_corrMat = nullptr;
48 d_descs.resize(0);
49 d_nExamples = 0;
50 }
51
52 /*! \brief Set the list bits that we are interested in correlating
53 *
54 * \param bitIdList is a list of bit ids that need to be correlated e.g. a
55 *list top ranked ensemble
56 * of bits
57 */
58 void setBitIdList(const RDKit::INT_VECT &bitIdList) {
59 d_descs = bitIdList;
60 int i, nd = d_descs.size();
61 int nelem = nd * (nd - 1) / 2;
62 delete[] dp_corrMat;
63
64 dp_corrMat = new double[nd * (nd - 1) / 2];
65 for (i = 0; i < nelem; i++) {
66 dp_corrMat[i] = 0.0;
67 }
68 }
69
70 //! \brief get the number of examples we used so far to compute the
71 /// correlation matrix
72 int getNumExamples() const { return d_nExamples; }
73
74 //! \brief Get the list of bits ID that are used to generate the correlation
75 /// matrix
76 RDKit::INT_VECT getCorrBitList() const { return d_descs; }
77
78 //! \brief Gets a pointer to the correlation matrix
79 double *getCorrMat() { return dp_corrMat; }
80
81 //! \brief For each pair of on bits (bi, bj) in fp increase the correlation
82 /// count
83 /// for the pair by 1
84 void collectVotes(const BitVect &fp) {
85 unsigned int nd = d_descs.size();
86 // use a temporary bit vector to first mask the fingerprint
87 ExplicitBitVect ebv(nd);
88 int bi;
89 for (unsigned int i = 0; i < nd; i++) {
90 bi = d_descs[i];
91 if (fp[bi]) {
92 ebv.setBit(i);
93 }
94 }
95 for (unsigned i = 1; i < nd; i++) {
96 unsigned int itab = i * (i - 1) / 2;
97 if (ebv[i]) {
98 for (unsigned int j = 0; j < i; j++) {
99 if (ebv[j]) {
100 dp_corrMat[itab + j] += 1;
101 }
102 }
103 }
104 }
105 d_nExamples++;
106 }
107
108 private:
109 RDKit::INT_VECT d_descs;
110 double *dp_corrMat;
111 int d_nExamples;
112};
113} // namespace RDInfoTheory
114
115#endif
Pulls in all the BitVect classes.
Abstract base class for storing BitVectors.
Definition BitVect.h:24
a class for bit vectors that are densely occupied
bool setBit(const unsigned int which) override
sets a particular bit and returns its original value
int getNumExamples() const
get the number of examples we used so far to compute the correlation matrix
double * getCorrMat()
Gets a pointer to the correlation matrix.
void setBitIdList(const RDKit::INT_VECT &bitIdList)
Set the list bits that we are interested in correlating.
BitCorrMatGenerator()
A class to generate a correlation matrix for a bunch of fingerprints.
void collectVotes(const BitVect &fp)
For each pair of on bits (bi, bj) in fp increase the correlation count for the pair by 1.
RDKit::INT_VECT getCorrBitList() const
Get the list of bits ID that are used to generate the correlation matrix.
Class used to rank bits based on a specified measure of information.
std::vector< int > INT_VECT
Definition types.h:284