RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
new_canon.h
Go to the documentation of this file.
1//
2// Copyright (C) 2014 Greg Landrum
3// Adapted from pseudo-code from Roger Sayle
4//
5// @@ All Rights Reserved @@
6// This file is part of the RDKit.
7// The contents are covered by the terms of the BSD license
8// which is included in the file license.txt, found at the root
9// of the RDKit source tree.
10//
11
12#include <RDGeneral/export.h>
13#include <RDGeneral/hanoiSort.h>
14#include <GraphMol/ROMol.h>
15#include <GraphMol/RingInfo.h>
18#include <cstdint>
19#include <boost/dynamic_bitset.hpp>
21#include <cstring>
22#include <iostream>
23#include <cassert>
24#include <cstring>
25#include <vector>
26
27// #define VERBOSE_CANON 1
28
29namespace RDKit {
30namespace Canon {
31struct canon_atom;
32
34 Bond::BondType bondType{Bond::BondType::UNSPECIFIED};
35 unsigned int bondStereo{
36 static_cast<unsigned int>(Bond::BondStereo::STEREONONE)};
37 unsigned int nbrSymClass{0};
38 unsigned int nbrIdx{0};
39 Bond::BondStereo stype{Bond::BondStereo::STEREONONE};
40 const canon_atom *controllingAtoms[4]{nullptr, nullptr, nullptr, nullptr};
41 const std::string *p_symbol{
42 nullptr}; // if provided, this is used to order bonds
43 unsigned int bondIdx{0};
44
47 unsigned int nsc, unsigned int bidx)
48 : bondType(bt),
49 bondStereo(static_cast<unsigned int>(bs)),
50 nbrSymClass(nsc),
51 nbrIdx(ni),
52 bondIdx(bidx) {}
53 bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni,
54 unsigned int nsc, unsigned int bidx)
55 : bondType(bt),
56 bondStereo(bs),
57 nbrSymClass(nsc),
58 nbrIdx(ni),
59 bondIdx(bidx) {}
60
61 int compareStereo(const bondholder &o) const;
62
63 bool operator<(const bondholder &o) const { return compare(*this, o) < 0; }
64 static bool greater(const bondholder &lhs, const bondholder &rhs) {
65 return compare(lhs, rhs) > 0;
66 }
67
68 static int compare(const bondholder &x, const bondholder &y,
69 unsigned int div = 1) {
70 if (x.p_symbol && y.p_symbol) {
71 if ((*x.p_symbol) < (*y.p_symbol)) {
72 return -1;
73 } else if ((*x.p_symbol) > (*y.p_symbol)) {
74 return 1;
75 }
76 }
77 if (x.bondType < y.bondType) {
78 return -1;
79 } else if (x.bondType > y.bondType) {
80 return 1;
81 }
82 if (x.bondStereo < y.bondStereo) {
83 return -1;
84 } else if (x.bondStereo > y.bondStereo) {
85 return 1;
86 }
87 auto scdiv = x.nbrSymClass / div - y.nbrSymClass / div;
88 if (scdiv) {
89 return scdiv;
90 }
91 if (x.bondStereo && y.bondStereo) {
92 auto cs = x.compareStereo(y);
93 if (cs) {
94 return cs;
95 }
96 }
97 return 0;
98 }
99};
101 const Atom *atom{nullptr};
102 int index{-1};
103 unsigned int degree{0};
104 unsigned int totalNumHs{0};
105 bool hasRingNbr{false};
106 bool isRingStereoAtom{false};
107 unsigned int whichStereoGroup{0};
108 StereoGroupType typeOfStereoGroup{StereoGroupType::STEREO_ABSOLUTE};
109 std::unique_ptr<int[]> nbrIds;
110 const std::string *p_symbol{
111 nullptr}; // if provided, this is used to order atoms
112 std::vector<int> neighborNum;
113 std::vector<int> revistedNeighbors;
114 std::vector<bondholder> bonds;
115};
116
118 canon_atom *atoms, std::vector<bondholder> &nbrs);
119
121 canon_atom *atoms, std::vector<bondholder> &nbrs, unsigned int atomIdx,
122 std::vector<std::pair<unsigned int, unsigned int>> &result);
123
124/*
125 * Different types of atom compare functions:
126 *
127 * - SpecialChiralityAtomCompareFunctor: Allows canonizing molecules exhibiting
128 *dependent chirality
129 * - SpecialSymmetryAtomCompareFunctor: Very specialized, allows canonizing
130 *highly symmetrical graphs/molecules
131 * - AtomCompareFunctor: Basic atom compare function which also allows to
132 *include neighbors within the ranking
133 */
134
136 public:
137 Canon::canon_atom *dp_atoms{nullptr};
138 const ROMol *dp_mol{nullptr};
139 const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
140 *dp_bondsInPlay{nullptr};
141
144 Canon::canon_atom *atoms, const ROMol &m,
145 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
146 const boost::dynamic_bitset<> *bondsInPlay = nullptr)
147 : dp_atoms(atoms),
148 dp_mol(&m),
149 dp_atomsInPlay(atomsInPlay),
150 dp_bondsInPlay(bondsInPlay) {}
151 int operator()(int i, int j) const {
152 PRECONDITION(dp_atoms, "no atoms");
153 PRECONDITION(dp_mol, "no molecule");
154 PRECONDITION(i != j, "bad call");
155 if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
156 return 0;
157 }
158
159 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
160 updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
161 }
162 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
163 updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
164 }
165 for (unsigned int ii = 0;
166 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
167 int cmp =
168 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
169 if (cmp) {
170 return cmp;
171 }
172 }
173
174 std::vector<std::pair<unsigned int, unsigned int>> swapsi;
175 std::vector<std::pair<unsigned int, unsigned int>> swapsj;
176 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
177 updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[i].bonds, i, swapsi);
178 }
179 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
180 updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[j].bonds, j, swapsj);
181 }
182 for (unsigned int ii = 0; ii < swapsi.size() && ii < swapsj.size(); ++ii) {
183 int cmp = swapsi[ii].second - swapsj[ii].second;
184 if (cmp) {
185 return cmp;
186 }
187 }
188 return 0;
189 }
190};
191
193 public:
194 Canon::canon_atom *dp_atoms{nullptr};
195 const ROMol *dp_mol{nullptr};
196 const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
197 *dp_bondsInPlay{nullptr};
198
201 Canon::canon_atom *atoms, const ROMol &m,
202 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
203 const boost::dynamic_bitset<> *bondsInPlay = nullptr)
204 : dp_atoms(atoms),
205 dp_mol(&m),
206 dp_atomsInPlay(atomsInPlay),
207 dp_bondsInPlay(bondsInPlay) {}
208 int operator()(int i, int j) const {
209 PRECONDITION(dp_atoms, "no atoms");
210 PRECONDITION(dp_mol, "no molecule");
211 PRECONDITION(i != j, "bad call");
212 if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
213 return 0;
214 }
215
216 if (dp_atoms[i].neighborNum < dp_atoms[j].neighborNum) {
217 return -1;
218 } else if (dp_atoms[i].neighborNum > dp_atoms[j].neighborNum) {
219 return 1;
220 }
221
222 if (dp_atoms[i].revistedNeighbors < dp_atoms[j].revistedNeighbors) {
223 return -1;
224 } else if (dp_atoms[i].revistedNeighbors > dp_atoms[j].revistedNeighbors) {
225 return 1;
226 }
227
228 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
229 updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
230 }
231 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
232 updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
233 }
234 for (unsigned int ii = 0;
235 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
236 int cmp =
237 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
238 if (cmp) {
239 return cmp;
240 }
241 }
242
243 if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
244 return -1;
245 } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
246 return 1;
247 }
248 return 0;
249 }
250};
251
252namespace {
253unsigned int getChiralRank(const ROMol *dp_mol, canon_atom *dp_atoms,
254 unsigned int i) {
255 unsigned int res = 0;
256 std::vector<unsigned int> perm;
257 perm.reserve(dp_atoms[i].atom->getDegree());
258 for (const auto nbr : dp_mol->atomNeighbors(dp_atoms[i].atom)) {
259 auto rnk = dp_atoms[nbr->getIdx()].index;
260 // make sure we don't have duplicate ranks
261 if (std::find(perm.begin(), perm.end(), rnk) != perm.end()) {
262 break;
263 } else {
264 perm.push_back(rnk);
265 }
266 }
267 if (perm.size() == dp_atoms[i].atom->getDegree()) {
268 auto ctag = dp_atoms[i].atom->getChiralTag();
271 auto sortedPerm = perm;
272 std::sort(sortedPerm.begin(), sortedPerm.end());
275 if (nswaps % 2) {
276 res = res == 2 ? 1 : 2;
277 }
278 }
279 }
280 return res;
281}
282} // namespace
284 unsigned int getAtomRingNbrCode(unsigned int i) const {
285 if (!dp_atoms[i].hasRingNbr) {
286 return 0;
287 }
288
289 auto nbrs = dp_atoms[i].nbrIds.get();
290 unsigned int code = 0;
291 for (unsigned j = 0; j < dp_atoms[i].degree; ++j) {
292 if (dp_atoms[nbrs[j]].isRingStereoAtom) {
293 code += dp_atoms[nbrs[j]].index * 10000 + 1; // j;
294 }
295 }
296 return code;
297 }
298
299 int basecomp(int i, int j) const {
300 unsigned int ivi, ivj;
301
302 // always start with the current class:
303 ivi = dp_atoms[i].index;
304 ivj = dp_atoms[j].index;
305 if (ivi < ivj) {
306 return -1;
307 } else if (ivi > ivj) {
308 return 1;
309 }
310
311 if (df_useNonStereoRanks) {
312 // use the non-stereo ranks if they were assigned
313 int rankingNumber_i = 0;
314 int rankingNumber_j = 0;
315 dp_atoms[i].atom->getPropIfPresent(
316 common_properties::_CanonicalRankingNumber, rankingNumber_i);
317 dp_atoms[j].atom->getPropIfPresent(
318 common_properties::_CanonicalRankingNumber, rankingNumber_j);
319 if (rankingNumber_i < rankingNumber_j) {
320 return -1;
321 } else if (rankingNumber_i > rankingNumber_j) {
322 return 1;
323 }
324 }
325
326 if (df_useAtomMaps || df_useAtomMapsOnDummies) {
327 // use the atom-mapping numbers if they were assigned
328 int molAtomMapNumber_i = 0;
329 int molAtomMapNumber_j = 0;
330 if (df_useAtomMaps ||
331 (df_useAtomMapsOnDummies && dp_atoms[i].atom->getAtomicNum() == 0)) {
332 dp_atoms[i].atom->getPropIfPresent(common_properties::molAtomMapNumber,
333 molAtomMapNumber_i);
334 }
335 if (df_useAtomMaps ||
336 (df_useAtomMapsOnDummies && dp_atoms[j].atom->getAtomicNum() == 0)) {
337 dp_atoms[j].atom->getPropIfPresent(common_properties::molAtomMapNumber,
338 molAtomMapNumber_j);
339 }
340 if (molAtomMapNumber_i < molAtomMapNumber_j) {
341 return -1;
342 } else if (molAtomMapNumber_i > molAtomMapNumber_j) {
343 return 1;
344 }
345 }
346 // start by comparing degree
347 ivi = dp_atoms[i].degree;
348 ivj = dp_atoms[j].degree;
349 if (ivi < ivj) {
350 return -1;
351 } else if (ivi > ivj) {
352 return 1;
353 }
354 if (dp_atoms[i].p_symbol && dp_atoms[j].p_symbol) {
355 if (*(dp_atoms[i].p_symbol) < *(dp_atoms[j].p_symbol)) {
356 return -1;
357 } else if (*(dp_atoms[i].p_symbol) > *(dp_atoms[j].p_symbol)) {
358 return 1;
359 } else {
360 return 0;
361 }
362 }
363
364 // move onto atomic number
365 ivi = dp_atoms[i].atom->getAtomicNum();
366 ivj = dp_atoms[j].atom->getAtomicNum();
367 if (ivi < ivj) {
368 return -1;
369 } else if (ivi > ivj) {
370 return 1;
371 }
372 // isotopes if we're using them
373 if (df_useIsotopes) {
374 ivi = dp_atoms[i].atom->getIsotope();
375 ivj = dp_atoms[j].atom->getIsotope();
376 if (ivi < ivj) {
377 return -1;
378 } else if (ivi > ivj) {
379 return 1;
380 }
381 }
382
383 // nHs
384 ivi = dp_atoms[i].totalNumHs;
385 ivj = dp_atoms[j].totalNumHs;
386 if (ivi < ivj) {
387 return -1;
388 } else if (ivi > ivj) {
389 return 1;
390 }
391 // charge
392 ivi = dp_atoms[i].atom->getFormalCharge();
393 ivj = dp_atoms[j].atom->getFormalCharge();
394 if (ivi < ivj) {
395 return -1;
396 } else if (ivi > ivj) {
397 return 1;
398 }
399 // presence of specified chirality if it's being used
400 if (df_useChiralPresence) {
401 ivi =
402 dp_atoms[i].atom->getChiralTag() != Atom::ChiralType::CHI_UNSPECIFIED;
403 ivj =
404 dp_atoms[j].atom->getChiralTag() != Atom::ChiralType::CHI_UNSPECIFIED;
405 if (ivi < ivj) {
406 return -1;
407 } else if (ivi > ivj) {
408 return 1;
409 }
410 }
411 // chirality if we're using it
412 if (df_useChirality) {
413 // look at enhanced stereo - whichStereoGroup == 0 means no stereo
414 ivi = dp_atoms[i].whichStereoGroup; // can't use the index itself, but if
415 // it's set then we're in an SG
416 ivj = dp_atoms[j].whichStereoGroup;
417 if (ivi || ivj) {
418 if (ivi && !ivj) {
419 return 1;
420 } else if (ivj && !ivi) {
421 return -1;
422 } else if (ivi && ivj) {
423 auto iType = dp_atoms[i].typeOfStereoGroup;
424 auto jType = dp_atoms[j].typeOfStereoGroup;
425 if (iType < jType) {
426 return -1;
427 } else if (iType > jType) {
428 return 1;
429 }
430 if (ivi != ivj) {
431 // now check the current classes of the other members of the SG
432 std::set<unsigned int> sgi;
433 for (const auto sgat :
434 dp_mol->getStereoGroups()[ivi - 1].getAtoms()) {
435 sgi.insert(dp_atoms[sgat->getIdx()].index);
436 }
437 std::set<unsigned int> sgj;
438 for (const auto sgat :
439 dp_mol->getStereoGroups()[ivj - 1].getAtoms()) {
440 sgj.insert(dp_atoms[sgat->getIdx()].index);
441 }
442 if (sgi < sgj) {
443 return -1;
444 } else if (sgi > sgj) {
445 return 1;
446 }
447 } else { // same stereo group
448 if (iType == StereoGroupType::STEREO_ABSOLUTE) {
449 ivi = getChiralRank(dp_mol, dp_atoms, i);
450 ivj = getChiralRank(dp_mol, dp_atoms, j);
451 if (ivi < ivj) {
452 return -1;
453 } else if (ivi > ivj) {
454 return 1;
455 }
456 }
457 }
458 }
459 } else {
460 // if there's no stereogroup, then use whatever atom stereochem is
461 // specfied:
462 ivi = 0;
463 ivj = 0;
464 // can't actually use values here, because they are arbitrary
465 ivi = dp_atoms[i].atom->getChiralTag() != 0;
466 ivj = dp_atoms[j].atom->getChiralTag() != 0;
467 if (ivi < ivj) {
468 return -1;
469 } else if (ivi > ivj) {
470 return 1;
471 }
472 // stereo set
473 if (ivi && ivj) {
474 if (ivi) {
475 ivi = getChiralRank(dp_mol, dp_atoms, i);
476 }
477 if (ivj) {
478 ivj = getChiralRank(dp_mol, dp_atoms, j);
479 }
480 if (ivi < ivj) {
481 return -1;
482 } else if (ivi > ivj) {
483 return 1;
484 }
485 }
486 }
487 }
488
489 if (df_useChiralityRings) {
490 // ring stereochemistry
491 ivi = getAtomRingNbrCode(i);
492 ivj = getAtomRingNbrCode(j);
493 if (ivi < ivj) {
494 return -1;
495 } else if (ivi > ivj) {
496 return 1;
497 } // bond stereo is taken care of in the neighborhood comparison
498 }
499 return 0;
500 }
501
502 public:
503 Canon::canon_atom *dp_atoms{nullptr};
504 const ROMol *dp_mol{nullptr};
505 const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
506 *dp_bondsInPlay{nullptr};
507 bool df_useNbrs{false};
508 bool df_useIsotopes{true};
509 bool df_useChirality{true};
510 bool df_useChiralityRings{true};
511 bool df_useAtomMaps{true};
512 bool df_useNonStereoRanks{false};
513 bool df_useChiralPresence{true};
514 bool df_useAtomMapsOnDummies{true};
515
518 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
519 const boost::dynamic_bitset<> *bondsInPlay = nullptr)
520 : dp_atoms(atoms),
521 dp_mol(&m),
522 dp_atomsInPlay(atomsInPlay),
523 dp_bondsInPlay(bondsInPlay) {}
524
525 int operator()(int i, int j) const {
526 if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
527 return 0;
528 }
529 int v = basecomp(i, j);
530 if (v) {
531 return v;
532 }
533
534 if (df_useNbrs) {
535 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
536 updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
537 }
538 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
539 updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
540 }
541
542 for (unsigned int ii = 0;
543 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
544 ++ii) {
545 int cmp =
546 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
547 if (cmp) {
548 return cmp;
549 }
550 }
551
552 if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
553 return -1;
554 } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
555 return 1;
556 }
557 }
558 return 0;
559 }
560};
561
562/*
563 * A compare function to discriminate chiral atoms, similar to the CIP rules.
564 * This functionality is currently not used.
565 */
566
567const unsigned int ATNUM_CLASS_OFFSET = 10000;
569 void getAtomNeighborhood(std::vector<bondholder> &nbrs) const {
570 for (unsigned j = 0; j < nbrs.size(); ++j) {
571 unsigned int nbrIdx = nbrs[j].nbrIdx;
572 if (nbrIdx == ATNUM_CLASS_OFFSET) {
573 // Ignore the Hs
574 continue;
575 }
576 const Atom *nbr = dp_atoms[nbrIdx].atom;
577 nbrs[j].nbrSymClass =
578 nbr->getAtomicNum() * ATNUM_CLASS_OFFSET + dp_atoms[nbrIdx].index + 1;
579 }
580 std::sort(nbrs.begin(), nbrs.end(), bondholder::greater);
581 // FIX: don't want to be doing this long-term
582 }
583
584 int basecomp(int i, int j) const {
585 PRECONDITION(dp_atoms, "no atoms");
586 unsigned int ivi, ivj;
587
588 // always start with the current class:
589 ivi = dp_atoms[i].index;
590 ivj = dp_atoms[j].index;
591 if (ivi < ivj) {
592 return -1;
593 } else if (ivi > ivj) {
594 return 1;
595 }
596
597 // move onto atomic number
598 ivi = dp_atoms[i].atom->getAtomicNum();
599 ivj = dp_atoms[j].atom->getAtomicNum();
600 if (ivi < ivj) {
601 return -1;
602 } else if (ivi > ivj) {
603 return 1;
604 }
605
606 // isotopes:
607 ivi = dp_atoms[i].atom->getIsotope();
608 ivj = dp_atoms[j].atom->getIsotope();
609 if (ivi < ivj) {
610 return -1;
611 } else if (ivi > ivj) {
612 return 1;
613 }
614
615 // atom stereochem:
616 ivi = 0;
617 ivj = 0;
618 std::string cipCode;
619 if (dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCode,
620 cipCode)) {
621 ivi = cipCode == "R" ? 2 : 1;
622 }
623 if (dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCode,
624 cipCode)) {
625 ivj = cipCode == "R" ? 2 : 1;
626 }
627 if (ivi < ivj) {
628 return -1;
629 } else if (ivi > ivj) {
630 return 1;
631 }
632
633 // bond stereo is taken care of in the neighborhood comparison
634 return 0;
635 }
636
637 public:
638 Canon::canon_atom *dp_atoms{nullptr};
639 const ROMol *dp_mol{nullptr};
640 bool df_useNbrs{false};
643 : dp_atoms(atoms), dp_mol(&m), df_useNbrs(false) {}
644 int operator()(int i, int j) const {
645 PRECONDITION(dp_atoms, "no atoms");
646 PRECONDITION(dp_mol, "no molecule");
647 PRECONDITION(i != j, "bad call");
648 int v = basecomp(i, j);
649 if (v) {
650 return v;
651 }
652
653 if (df_useNbrs) {
654 getAtomNeighborhood(dp_atoms[i].bonds);
655 getAtomNeighborhood(dp_atoms[j].bonds);
656
657 // we do two passes through the neighbor lists. The first just uses the
658 // atomic numbers (by passing the optional 10000 to bondholder::compare),
659 // the second takes the already-computed index into account
660 for (unsigned int ii = 0;
661 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
662 ++ii) {
663 int cmp = bondholder::compare(
664 dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii], ATNUM_CLASS_OFFSET);
665 if (cmp) {
666 return cmp;
667 }
668 }
669 for (unsigned int ii = 0;
670 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
671 ++ii) {
672 int cmp =
673 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
674 if (cmp) {
675 return cmp;
676 }
677 }
678 if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
679 return -1;
680 } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
681 return 1;
682 }
683 }
684 return 0;
685 }
686};
687
688/*
689 * Basic canonicalization function to organize the partitions which will be
690 * sorted next.
691 * */
692
693template <typename CompareFunc>
695 int mode, int *order, int *count, int &activeset,
696 int *next, int *changed, char *touchedPartitions) {
697 unsigned int nAtoms = mol.getNumAtoms();
698 int partition;
699 int symclass = 0;
700 int *start;
701 int offset;
702 int index;
703 int len;
704 int i;
705 // std::vector<char> touchedPartitions(mol.getNumAtoms(),0);
706
707 // std::cerr<<"&&&&&&&&&&&&&&&& RP"<<std::endl;
708 while (activeset != -1) {
709 // std::cerr<<"ITER: "<<activeset<<" next: "<<next[activeset]<<std::endl;
710 // std::cerr<<" next: ";
711 // for(unsigned int ii=0;ii<nAtoms;++ii){
712 // std::cerr<<ii<<":"<<next[ii]<<" ";
713 // }
714 // std::cerr<<std::endl;
715 // for(unsigned int ii=0;ii<nAtoms;++ii){
716 // std::cerr<<order[ii]<<" count: "<<count[order[ii]]<<" index:
717 // "<<atoms[order[ii]].index<<std::endl;
718 // }
719
721 activeset = next[partition];
722 next[partition] = -2;
723
725 offset = atoms[partition].index;
726 start = order + offset;
727 // std::cerr<<"\n\n**************************************************************"<<std::endl;
728 // std::cerr<<" sort - class:"<<atoms[partition].index<<" len: "<<len<<":";
729 // for(unsigned int ii=0;ii<len;++ii){
730 // std::cerr<<" "<<order[offset+ii]+1;
731 // }
732 // std::cerr<<std::endl;
733 // for(unsigned int ii=0;ii<nAtoms;++ii){
734 // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
735 // "<<atoms[order[ii]].index<<std::endl;
736 // }
738 // std::cerr<<"*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"<<std::endl;
739 // std::cerr<<" result:";
740 // for(unsigned int ii=0;ii<nAtoms;++ii){
741 // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
742 // "<<atoms[order[ii]].index<<std::endl;
743 // }
744 for (int k = 0; k < len; ++k) {
745 changed[start[k]] = 0;
746 }
747
748 index = start[0];
749 // std::cerr<<" len:"<<len<<" index:"<<index<<"
750 // count:"<<count[index]<<std::endl;
751 for (i = count[index]; i < len; i++) {
752 index = start[i];
753 if (count[index]) {
754 symclass = offset + i;
755 }
756 atoms[index].index = symclass;
757 // std::cerr<<" "<<index+1<<"("<<symclass<<")";
758 // if(mode && (activeset<0 || count[index]>count[activeset]) ){
759 // activeset=index;
760 //}
761 for (unsigned j = 0; j < atoms[index].degree; ++j) {
762 changed[atoms[index].nbrIds[j]] = 1;
763 }
764 }
765 // std::cerr<<std::endl;
766
767 if (mode) {
768 index = start[0];
769 for (i = count[index]; i < len; i++) {
770 index = start[i];
771 for (unsigned j = 0; j < atoms[index].degree; ++j) {
772 unsigned int nbor = atoms[index].nbrIds[j];
773 touchedPartitions[atoms[nbor].index] = 1;
774 }
775 }
776 for (unsigned int ii = 0; ii < nAtoms; ++ii) {
777 if (touchedPartitions[ii]) {
778 partition = order[ii];
779 if ((count[partition] > 1) && (next[partition] == -2)) {
780 next[partition] = activeset;
782 }
784 }
785 }
786 }
787 }
788} // end of RefinePartitions()
789
790template <typename CompareFunc>
791void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
792 int mode, int *order, int *count, int &activeset, int *next,
793 int *changed, char *touchedPartitions) {
794 unsigned int nAtoms = mol.getNumAtoms();
795 int partition;
796 int offset;
797 int index;
798 int len;
799 int oldPart = 0;
800
801 for (unsigned int i = 0; i < nAtoms; i++) {
802 partition = order[i];
803 oldPart = atoms[partition].index;
804 while (count[partition] > 1) {
806 offset = atoms[partition].index + len - 1;
807 index = order[offset];
808 atoms[index].index = offset;
809 count[partition] = len - 1;
810 count[index] = 1;
811
812 // test for ions, water molecules with no
813 if (atoms[index].degree < 1) {
814 continue;
815 }
816 for (unsigned j = 0; j < atoms[index].degree; ++j) {
817 unsigned int nbor = atoms[index].nbrIds[j];
818 touchedPartitions[atoms[nbor].index] = 1;
819 changed[nbor] = 1;
820 }
821
822 for (unsigned int ii = 0; ii < nAtoms; ++ii) {
823 if (touchedPartitions[ii]) {
824 int npart = order[ii];
825 if ((count[npart] > 1) && (next[npart] == -2)) {
826 next[npart] = activeset;
828 }
830 }
831 }
832 RefinePartitions(mol, atoms, compar, mode, order, count, activeset, next,
834 }
835 // not sure if this works each time
836 if (atoms[partition].index != oldPart) {
837 i -= 1;
838 }
839 }
840} // end of BreakTies()
841
843 int *order, int *count,
844 canon_atom *atoms);
845
846RDKIT_GRAPHMOL_EXPORT void ActivatePartitions(unsigned int nAtoms, int *order,
847 int *count, int &activeset,
848 int *next, int *changed);
849
850//! Note that atom maps on dummy atoms will always be used
852 const ROMol &mol, std::vector<unsigned int> &res, bool breakTies = true,
853 bool includeChirality = true, bool includeIsotopes = true,
854 bool includeAtomMaps = true, bool includeChiralPresence = false,
855 bool includeStereoGroups = true, bool useNonStereoRanks = false);
856
857//! Note that atom maps on dummy atoms will always be used
859 const ROMol &mol, std::vector<unsigned int> &res,
860 const boost::dynamic_bitset<> &atomsInPlay,
861 const boost::dynamic_bitset<> &bondsInPlay,
862 const std::vector<std::string> *atomSymbols,
863 const std::vector<std::string> *bondSymbols, bool breakTies,
866
867//! Note that atom maps on dummy atoms will always be used
869 const ROMol &mol, std::vector<unsigned int> &res,
870 const boost::dynamic_bitset<> &atomsInPlay,
871 const boost::dynamic_bitset<> &bondsInPlay,
872 const std::vector<std::string> *atomSymbols = nullptr,
873 bool breakTies = true, bool includeChirality = true,
874 bool includeIsotopes = true, bool includeAtomMaps = true,
875 bool includeChiralPresence = false) {
876 rankFragmentAtoms(mol, res, atomsInPlay, bondsInPlay, atomSymbols, nullptr,
879};
880
882 std::vector<unsigned int> &res);
883
885 std::vector<Canon::canon_atom> &atoms,
886 bool includeChirality = true,
887 bool includeStereoGroups = true);
888
889namespace detail {
891 std::vector<Canon::canon_atom> &atoms,
892 bool includeChirality,
893 const std::vector<std::string> *atomSymbols,
894 const std::vector<std::string> *bondSymbols,
895 const boost::dynamic_bitset<> &atomsInPlay,
896 const boost::dynamic_bitset<> &bondsInPlay,
897 bool needsInit);
898template <typename T>
899void rankWithFunctor(T &ftor, bool breakTies, int *order,
900 bool useSpecial = false, bool useChirality = false,
901 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
902 const boost::dynamic_bitset<> *bondsInPlay = nullptr);
903
904} // namespace detail
905
906} // namespace Canon
907} // namespace RDKit
#define PRECONDITION(expr, mess)
Definition Invariant.h:109
Defines the primary molecule class ROMol as well as associated typedefs.
Defines the class StereoGroup which stores relationships between the absolute configurations of atoms...
The class for representing atoms.
Definition Atom.h:75
int getAtomicNum() const
returns our atomic number
Definition Atom.h:141
@ CHI_TETRAHEDRAL_CW
tetrahedral: clockwise rotation (SMILES @@)
Definition Atom.h:103
@ CHI_TETRAHEDRAL_CCW
tetrahedral: counter-clockwise rotation (SMILES
Definition Atom.h:104
BondType
the type of Bond
Definition Bond.h:56
BondStereo
the nature of the bond's stereochem (for cis/trans)
Definition Bond.h:95
AtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition new_canon.h:517
int operator()(int i, int j) const
Definition new_canon.h:525
ChiralAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m)
Definition new_canon.h:642
int operator()(int i, int j) const
Definition new_canon.h:644
SpecialChiralityAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition new_canon.h:143
SpecialSymmetryAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition new_canon.h:200
const std::vector< StereoGroup > & getStereoGroups() const
Gets a reference to the groups of atoms with relative stereochemistry.
Definition ROMol.h:790
unsigned int getNumAtoms() const
returns our number of atoms
Definition ROMol.h:421
#define RDKIT_GRAPHMOL_EXPORT
Definition export.h:233
void rankWithFunctor(T &ftor, bool breakTies, int *order, bool useSpecial=false, bool useChirality=false, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
void initFragmentCanonAtoms(const ROMol &mol, std::vector< Canon::canon_atom > &atoms, bool includeChirality, const std::vector< std::string > *atomSymbols, const std::vector< std::string > *bondSymbols, const boost::dynamic_bitset<> &atomsInPlay, const boost::dynamic_bitset<> &bondsInPlay, bool needsInit)
RDKIT_GRAPHMOL_EXPORT void initCanonAtoms(const ROMol &mol, std::vector< Canon::canon_atom > &atoms, bool includeChirality=true, bool includeStereoGroups=true)
RDKIT_GRAPHMOL_EXPORT void CreateSinglePartition(unsigned int nAtoms, int *order, int *count, canon_atom *atoms)
RDKIT_GRAPHMOL_EXPORT void rankFragmentAtoms(const ROMol &mol, std::vector< unsigned int > &res, const boost::dynamic_bitset<> &atomsInPlay, const boost::dynamic_bitset<> &bondsInPlay, const std::vector< std::string > *atomSymbols, const std::vector< std::string > *bondSymbols, bool breakTies, bool includeChirality, bool includeIsotope, bool includeAtomMaps, bool includeChiralPresence)
Note that atom maps on dummy atoms will always be used.
RDKIT_GRAPHMOL_EXPORT void ActivatePartitions(unsigned int nAtoms, int *order, int *count, int &activeset, int *next, int *changed)
const unsigned int ATNUM_CLASS_OFFSET
Definition new_canon.h:567
RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborNumSwaps(canon_atom *atoms, std::vector< bondholder > &nbrs, unsigned int atomIdx, std::vector< std::pair< unsigned int, unsigned int > > &result)
void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition new_canon.h:791
RDKIT_GRAPHMOL_EXPORT void rankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res, bool breakTies=true, bool includeChirality=true, bool includeIsotopes=true, bool includeAtomMaps=true, bool includeChiralPresence=false, bool includeStereoGroups=true, bool useNonStereoRanks=false)
Note that atom maps on dummy atoms will always be used.
void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition new_canon.h:694
RDKIT_GRAPHMOL_EXPORT void chiralRankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res)
RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborIndex(canon_atom *atoms, std::vector< bondholder > &nbrs)
Std stuff.
StereoGroupType
Definition StereoGroup.h:31
bool rdvalue_is(const RDValue_cast_t)
void hanoisort(int *base, int nel, int *count, int *changed, CompareFunc compar)
Definition hanoiSort.h:151
unsigned int countSwapsToInterconvert(const T &ref, T probe)
Definition utils.h:54
const std::string * p_symbol
Definition new_canon.h:41
Bond::BondType bondType
Definition new_canon.h:34
static bool greater(const bondholder &lhs, const bondholder &rhs)
Definition new_canon.h:64
bool operator<(const bondholder &o) const
Definition new_canon.h:63
int compareStereo(const bondholder &o) const
bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni, unsigned int nsc, unsigned int bidx)
Definition new_canon.h:53
bondholder(Bond::BondType bt, Bond::BondStereo bs, unsigned int ni, unsigned int nsc, unsigned int bidx)
Definition new_canon.h:46
unsigned int bondStereo
Definition new_canon.h:35
static int compare(const bondholder &x, const bondholder &y, unsigned int div=1)
Definition new_canon.h:68
unsigned int nbrSymClass
Definition new_canon.h:37
std::vector< bondholder > bonds
Definition new_canon.h:114
std::unique_ptr< int[]> nbrIds
Definition new_canon.h:109
std::vector< int > revistedNeighbors
Definition new_canon.h:113
std::vector< int > neighborNum
Definition new_canon.h:112