RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
new_canon.h
Go to the documentation of this file.
1//
2// Copyright (C) 2014 Greg Landrum
3// Adapted from pseudo-code from Roger Sayle
4//
5// @@ All Rights Reserved @@
6// This file is part of the RDKit.
7// The contents are covered by the terms of the BSD license
8// which is included in the file license.txt, found at the root
9// of the RDKit source tree.
10//
11
12#include <RDGeneral/export.h>
13#include <RDGeneral/hanoiSort.h>
14#include <GraphMol/ROMol.h>
15#include <GraphMol/RingInfo.h>
18#include <cstdint>
19#include <boost/dynamic_bitset.hpp>
21#include <cstring>
22#include <cassert>
23#include <cstring>
24#include <vector>
25
26// #define VERBOSE_CANON 1
27
28namespace RDKit {
29namespace Canon {
30struct canon_atom;
31
34 unsigned int bondStereo{
35 static_cast<unsigned int>(Bond::BondStereo::STEREONONE)};
36 unsigned int nbrSymClass{0};
37 unsigned int nbrIdx{0};
39 const canon_atom *controllingAtoms[4]{nullptr, nullptr, nullptr, nullptr};
40 const std::string *p_symbol{
41 nullptr}; // if provided, this is used to order bonds
42 unsigned int bondIdx{0};
43
46 unsigned int nsc, unsigned int bidx)
47 : bondType(bt),
48 bondStereo(static_cast<unsigned int>(bs)),
49 nbrSymClass(nsc),
50 nbrIdx(ni),
51 bondIdx(bidx) {}
52 bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni,
53 unsigned int nsc, unsigned int bidx)
54 : bondType(bt),
55 bondStereo(bs),
56 nbrSymClass(nsc),
57 nbrIdx(ni),
58 bondIdx(bidx) {}
59
60 int compareStereo(const bondholder &o) const;
61
62 bool operator<(const bondholder &o) const { return compare(*this, o) < 0; }
63 static bool greater(const bondholder &lhs, const bondholder &rhs) {
64 return compare(lhs, rhs) > 0;
65 }
66
67 static int compare(const bondholder &x, const bondholder &y,
68 unsigned int div = 1) {
69 if (x.p_symbol && y.p_symbol) {
70 if ((*x.p_symbol) < (*y.p_symbol)) {
71 return -1;
72 } else if ((*x.p_symbol) > (*y.p_symbol)) {
73 return 1;
74 }
75 }
76 if (x.bondType < y.bondType) {
77 return -1;
78 } else if (x.bondType > y.bondType) {
79 return 1;
80 }
81 if (x.bondStereo < y.bondStereo) {
82 return -1;
83 } else if (x.bondStereo > y.bondStereo) {
84 return 1;
85 }
86 auto scdiv = x.nbrSymClass / div - y.nbrSymClass / div;
87 if (scdiv) {
88 return scdiv;
89 }
90 if (x.bondStereo && y.bondStereo) {
91 auto cs = x.compareStereo(y);
92 if (cs) {
93 return cs;
94 }
95 }
96 return 0;
97 }
98};
100 const Atom *atom{nullptr};
101 int index{-1};
102 unsigned int degree{0};
103 unsigned int totalNumHs{0};
104 bool hasRingNbr{false};
105 bool isRingStereoAtom{false};
106 unsigned int whichStereoGroup{0};
108 std::unique_ptr<int[]> nbrIds;
109 const std::string *p_symbol{
110 nullptr}; // if provided, this is used to order atoms
111 std::vector<int> neighborNum;
112 std::vector<int> revistedNeighbors;
113 std::vector<bondholder> bonds;
114};
115
117 canon_atom *atoms, std::vector<bondholder> &nbrs);
118
120 canon_atom *atoms, std::vector<bondholder> &nbrs, unsigned int atomIdx,
121 std::vector<std::pair<unsigned int, unsigned int>> &result);
122
123/*
124 * Different types of atom compare functions:
125 *
126 * - SpecialChiralityAtomCompareFunctor: Allows canonizing molecules exhibiting
127 *dependent chirality
128 * - SpecialSymmetryAtomCompareFunctor: Very specialized, allows canonizing
129 *highly symmetrical graphs/molecules
130 * - AtomCompareFunctor: Basic atom compare function which also allows to
131 *include neighbors within the ranking
132 */
133
135 public:
137 const ROMol *dp_mol{nullptr};
138 const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
139 *dp_bondsInPlay{nullptr};
140
143 Canon::canon_atom *atoms, const ROMol &m,
144 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
145 const boost::dynamic_bitset<> *bondsInPlay = nullptr)
146 : dp_atoms(atoms),
147 dp_mol(&m),
148 dp_atomsInPlay(atomsInPlay),
149 dp_bondsInPlay(bondsInPlay) {}
150 int operator()(int i, int j) const {
151 PRECONDITION(dp_atoms, "no atoms");
152 PRECONDITION(dp_mol, "no molecule");
153 PRECONDITION(i != j, "bad call");
154 if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
155 return 0;
156 }
157
158 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
160 }
161 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
163 }
164 for (unsigned int ii = 0;
165 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
166 int cmp =
167 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
168 if (cmp) {
169 return cmp;
170 }
171 }
172
173 std::vector<std::pair<unsigned int, unsigned int>> swapsi;
174 std::vector<std::pair<unsigned int, unsigned int>> swapsj;
175 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
176 updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[i].bonds, i, swapsi);
177 }
178 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
179 updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[j].bonds, j, swapsj);
180 }
181
182 for (unsigned int ii = 0; ii < swapsi.size() && ii < swapsj.size(); ++ii) {
183 int cmp = swapsi[ii].second - swapsj[ii].second;
184
185 if (cmp) {
186 return cmp;
187 }
188 }
189
190 return 0;
191 }
192};
193
195 public:
197 const ROMol *dp_mol{nullptr};
198 const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
199 *dp_bondsInPlay{nullptr};
200
203 Canon::canon_atom *atoms, const ROMol &m,
204 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
205 const boost::dynamic_bitset<> *bondsInPlay = nullptr)
206 : dp_atoms(atoms),
207 dp_mol(&m),
208 dp_atomsInPlay(atomsInPlay),
209 dp_bondsInPlay(bondsInPlay) {}
210 int operator()(int i, int j) const {
211 PRECONDITION(dp_atoms, "no atoms");
212 PRECONDITION(dp_mol, "no molecule");
213 PRECONDITION(i != j, "bad call");
214 if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
215 return 0;
216 }
217
218 if (dp_atoms[i].neighborNum < dp_atoms[j].neighborNum) {
219 return -1;
220 } else if (dp_atoms[i].neighborNum > dp_atoms[j].neighborNum) {
221 return 1;
222 }
223
224 if (dp_atoms[i].revistedNeighbors < dp_atoms[j].revistedNeighbors) {
225 return -1;
226 } else if (dp_atoms[i].revistedNeighbors > dp_atoms[j].revistedNeighbors) {
227 return 1;
228 }
229
230 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
232 }
233 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
235 }
236 for (unsigned int ii = 0;
237 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
238 int cmp =
239 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
240 if (cmp) {
241 return cmp;
242 }
243 }
244
245 if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
246 return -1;
247 } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
248 return 1;
249 }
250 return 0;
251 }
252};
253
254namespace {
255unsigned int getChiralRank(const ROMol *dp_mol, canon_atom *dp_atoms,
256 unsigned int i) {
257 unsigned int res = 0;
258 std::vector<unsigned int> perm;
259 perm.reserve(dp_atoms[i].atom->getDegree());
260 for (const auto nbr : dp_mol->atomNeighbors(dp_atoms[i].atom)) {
261 auto rnk = dp_atoms[nbr->getIdx()].index;
262 // make sure we don't have duplicate ranks
263 if (std::find(perm.begin(), perm.end(), rnk) != perm.end()) {
264 break;
265 } else {
266 perm.push_back(rnk);
267 }
268 }
269 if (perm.size() == dp_atoms[i].atom->getDegree()) {
270 auto ctag = dp_atoms[i].atom->getChiralTag();
273 auto sortedPerm = perm;
274 std::sort(sortedPerm.begin(), sortedPerm.end());
275 auto nswaps = countSwapsToInterconvert(perm, sortedPerm);
276 res = ctag == Atom::ChiralType::CHI_TETRAHEDRAL_CW ? 2 : 1;
277 if (nswaps % 2) {
278 res = res == 2 ? 1 : 2;
279 }
280 }
281 }
282 return res;
283}
284} // namespace
286 unsigned int getAtomRingNbrCode(unsigned int i) const {
287 if (!dp_atoms[i].hasRingNbr) {
288 return 0;
289 }
290
291 auto nbrs = dp_atoms[i].nbrIds.get();
292 unsigned int code = 0;
293 for (unsigned j = 0; j < dp_atoms[i].degree; ++j) {
294 if (dp_atoms[nbrs[j]].isRingStereoAtom) {
295 code += dp_atoms[nbrs[j]].index * 10000 + 1; // j;
296 }
297 }
298 return code;
299 }
300
301 int basecomp(int i, int j) const {
302 unsigned int ivi, ivj;
303
304 // always start with the current class:
305 ivi = dp_atoms[i].index;
306 ivj = dp_atoms[j].index;
307 if (ivi < ivj) {
308 return -1;
309 } else if (ivi > ivj) {
310 return 1;
311 }
312
314 // use the non-stereo ranks if they were assigned
315 int rankingNumber_i = 0;
316 int rankingNumber_j = 0;
317 dp_atoms[i].atom->getPropIfPresent(
319 dp_atoms[j].atom->getPropIfPresent(
321 if (rankingNumber_i < rankingNumber_j) {
322 return -1;
323 } else if (rankingNumber_i > rankingNumber_j) {
324 return 1;
325 }
326 }
327
329 // use the atom-mapping numbers if they were assigned
330 int molAtomMapNumber_i = 0;
331 int molAtomMapNumber_j = 0;
332 if (df_useAtomMaps ||
333 (df_useAtomMapsOnDummies && dp_atoms[i].atom->getAtomicNum() == 0)) {
334 dp_atoms[i].atom->getPropIfPresent(common_properties::molAtomMapNumber,
335 molAtomMapNumber_i);
336 }
337 if (df_useAtomMaps ||
338 (df_useAtomMapsOnDummies && dp_atoms[j].atom->getAtomicNum() == 0)) {
339 dp_atoms[j].atom->getPropIfPresent(common_properties::molAtomMapNumber,
340 molAtomMapNumber_j);
341 }
342 if (molAtomMapNumber_i < molAtomMapNumber_j) {
343 return -1;
344 } else if (molAtomMapNumber_i > molAtomMapNumber_j) {
345 return 1;
346 }
347 }
348 // start by comparing degree
349 ivi = dp_atoms[i].degree;
350 ivj = dp_atoms[j].degree;
351 if (ivi < ivj) {
352 return -1;
353 } else if (ivi > ivj) {
354 return 1;
355 }
356 if (dp_atoms[i].p_symbol && dp_atoms[j].p_symbol) {
357 if (*(dp_atoms[i].p_symbol) < *(dp_atoms[j].p_symbol)) {
358 return -1;
359 } else if (*(dp_atoms[i].p_symbol) > *(dp_atoms[j].p_symbol)) {
360 return 1;
361 } else {
362 return 0;
363 }
364 }
365
366 // move onto atomic number
367 ivi = dp_atoms[i].atom->getAtomicNum();
368 ivj = dp_atoms[j].atom->getAtomicNum();
369 if (ivi < ivj) {
370 return -1;
371 } else if (ivi > ivj) {
372 return 1;
373 }
374 // isotopes if we're using them
375 if (df_useIsotopes) {
376 ivi = dp_atoms[i].atom->getIsotope();
377 ivj = dp_atoms[j].atom->getIsotope();
378 if (ivi < ivj) {
379 return -1;
380 } else if (ivi > ivj) {
381 return 1;
382 }
383 }
384
385 // nHs
386 ivi = dp_atoms[i].totalNumHs;
387 ivj = dp_atoms[j].totalNumHs;
388 if (ivi < ivj) {
389 return -1;
390 } else if (ivi > ivj) {
391 return 1;
392 }
393 // charge
394 ivi = dp_atoms[i].atom->getFormalCharge();
395 ivj = dp_atoms[j].atom->getFormalCharge();
396 if (ivi < ivj) {
397 return -1;
398 } else if (ivi > ivj) {
399 return 1;
400 }
401 // presence of specified chirality if it's being used
403 ivi =
404 dp_atoms[i].atom->getChiralTag() != Atom::ChiralType::CHI_UNSPECIFIED;
405 ivj =
406 dp_atoms[j].atom->getChiralTag() != Atom::ChiralType::CHI_UNSPECIFIED;
407 if (ivi < ivj) {
408 return -1;
409 } else if (ivi > ivj) {
410 return 1;
411 }
412 }
413 // chirality if we're using it
414 if (df_useChirality) {
415 // look at enhanced stereo - whichStereoGroup == 0 means no stereo
416 ivi = dp_atoms[i].whichStereoGroup; // can't use the index itself, but if
417 // it's set then we're in an SG
418 ivj = dp_atoms[j].whichStereoGroup;
419 if (ivi || ivj) {
420 if (ivi && !ivj) {
421 return 1;
422 } else if (ivj && !ivi) {
423 return -1;
424 } else if (ivi && ivj) {
425 auto iType = dp_atoms[i].typeOfStereoGroup;
426 auto jType = dp_atoms[j].typeOfStereoGroup;
427 if (iType < jType) {
428 return -1;
429 } else if (iType > jType) {
430 return 1;
431 }
432 if (ivi != ivj) {
433 // now check the current classes of the other members of the SG
434 std::set<unsigned int> sgi;
435 for (const auto sgat :
436 dp_mol->getStereoGroups()[ivi - 1].getAtoms()) {
437 sgi.insert(dp_atoms[sgat->getIdx()].index);
438 }
439 std::set<unsigned int> sgj;
440 for (const auto sgat :
441 dp_mol->getStereoGroups()[ivj - 1].getAtoms()) {
442 sgj.insert(dp_atoms[sgat->getIdx()].index);
443 }
444 if (sgi < sgj) {
445 return -1;
446 } else if (sgi > sgj) {
447 return 1;
448 }
449 } else { // same stereo group
451 ivi = getChiralRank(dp_mol, dp_atoms, i);
452 ivj = getChiralRank(dp_mol, dp_atoms, j);
453 if (ivi < ivj) {
454 return -1;
455 } else if (ivi > ivj) {
456 return 1;
457 }
458 }
459 }
460 }
461 } else {
462 // if there's no stereogroup, then use whatever atom stereochem is
463 // specfied:
464 ivi = 0;
465 ivj = 0;
466 // can't actually use values here, because they are arbitrary
467 ivi = dp_atoms[i].atom->getChiralTag() != 0;
468 ivj = dp_atoms[j].atom->getChiralTag() != 0;
469 if (ivi < ivj) {
470 return -1;
471 } else if (ivi > ivj) {
472 return 1;
473 }
474 // stereo set
475 if (ivi && ivj) {
476 if (ivi) {
477 ivi = getChiralRank(dp_mol, dp_atoms, i);
478 }
479 if (ivj) {
480 ivj = getChiralRank(dp_mol, dp_atoms, j);
481 }
482 if (ivi < ivj) {
483 return -1;
484 } else if (ivi > ivj) {
485 return 1;
486 }
487 }
488 }
489 }
490
492 // ring stereochemistry
493 ivi = getAtomRingNbrCode(i);
494 ivj = getAtomRingNbrCode(j);
495 if (ivi < ivj) {
496 return -1;
497 } else if (ivi > ivj) {
498 return 1;
499 } // bond stereo is taken care of in the neighborhood comparison
500 }
501 return 0;
502 }
503
504 public:
506 const ROMol *dp_mol{nullptr};
507 const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
508 *dp_bondsInPlay{nullptr};
509 bool df_useNbrs{false};
510 bool df_useIsotopes{true};
511 bool df_useChirality{true};
513 bool df_useAtomMaps{true};
517
520 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
521 const boost::dynamic_bitset<> *bondsInPlay = nullptr)
522 : dp_atoms(atoms),
523 dp_mol(&m),
524 dp_atomsInPlay(atomsInPlay),
525 dp_bondsInPlay(bondsInPlay) {}
526
527 int operator()(int i, int j) const {
528 if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
529 return 0;
530 }
531 int v = basecomp(i, j);
532 if (v) {
533 return v;
534 }
535
536 if (df_useNbrs) {
537 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
539 }
540 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
542 }
543
544 for (unsigned int ii = 0;
545 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
546 ++ii) {
547 int cmp =
548 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
549 if (cmp) {
550 return cmp;
551 }
552 }
553
554 if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
555 return -1;
556 } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
557 return 1;
558 }
559 }
560 return 0;
561 }
562};
563
564/*
565 * A compare function to discriminate chiral atoms, similar to the CIP rules.
566 * This functionality is currently not used.
567 */
568
569const unsigned int ATNUM_CLASS_OFFSET = 10000;
571 void getAtomNeighborhood(std::vector<bondholder> &nbrs) const {
572 for (unsigned j = 0; j < nbrs.size(); ++j) {
573 unsigned int nbrIdx = nbrs[j].nbrIdx;
574 if (nbrIdx == ATNUM_CLASS_OFFSET) {
575 // Ignore the Hs
576 continue;
577 }
578 const Atom *nbr = dp_atoms[nbrIdx].atom;
579 nbrs[j].nbrSymClass =
580 nbr->getAtomicNum() * ATNUM_CLASS_OFFSET + dp_atoms[nbrIdx].index + 1;
581 }
582 std::sort(nbrs.begin(), nbrs.end(), bondholder::greater);
583 // FIX: don't want to be doing this long-term
584 }
585
586 int basecomp(int i, int j) const {
587 PRECONDITION(dp_atoms, "no atoms");
588 unsigned int ivi, ivj;
589
590 // always start with the current class:
591 ivi = dp_atoms[i].index;
592 ivj = dp_atoms[j].index;
593 if (ivi < ivj) {
594 return -1;
595 } else if (ivi > ivj) {
596 return 1;
597 }
598
599 // move onto atomic number
600 ivi = dp_atoms[i].atom->getAtomicNum();
601 ivj = dp_atoms[j].atom->getAtomicNum();
602 if (ivi < ivj) {
603 return -1;
604 } else if (ivi > ivj) {
605 return 1;
606 }
607
608 // isotopes:
609 ivi = dp_atoms[i].atom->getIsotope();
610 ivj = dp_atoms[j].atom->getIsotope();
611 if (ivi < ivj) {
612 return -1;
613 } else if (ivi > ivj) {
614 return 1;
615 }
616
617 // atom stereochem:
618 ivi = 0;
619 ivj = 0;
620 std::string cipCode;
621 if (dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCode,
622 cipCode)) {
623 ivi = cipCode == "R" ? 2 : 1;
624 }
625 if (dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCode,
626 cipCode)) {
627 ivj = cipCode == "R" ? 2 : 1;
628 }
629 if (ivi < ivj) {
630 return -1;
631 } else if (ivi > ivj) {
632 return 1;
633 }
634
635 // bond stereo is taken care of in the neighborhood comparison
636 return 0;
637 }
638
639 public:
641 const ROMol *dp_mol{nullptr};
642 bool df_useNbrs{false};
645 : dp_atoms(atoms), dp_mol(&m), df_useNbrs(false) {}
646 int operator()(int i, int j) const {
647 PRECONDITION(dp_atoms, "no atoms");
648 PRECONDITION(dp_mol, "no molecule");
649 PRECONDITION(i != j, "bad call");
650 int v = basecomp(i, j);
651 if (v) {
652 return v;
653 }
654
655 if (df_useNbrs) {
656 getAtomNeighborhood(dp_atoms[i].bonds);
657 getAtomNeighborhood(dp_atoms[j].bonds);
658
659 // we do two passes through the neighbor lists. The first just uses the
660 // atomic numbers (by passing the optional 10000 to bondholder::compare),
661 // the second takes the already-computed index into account
662 for (unsigned int ii = 0;
663 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
664 ++ii) {
665 int cmp = bondholder::compare(
666 dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii], ATNUM_CLASS_OFFSET);
667 if (cmp) {
668 return cmp;
669 }
670 }
671 for (unsigned int ii = 0;
672 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
673 ++ii) {
674 int cmp =
675 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
676 if (cmp) {
677 return cmp;
678 }
679 }
680 if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
681 return -1;
682 } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
683 return 1;
684 }
685 }
686 return 0;
687 }
688};
689
690/*
691 * Basic canonicalization function to organize the partitions which will be
692 * sorted next.
693 * */
694
695template <typename CompareFunc>
696void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
697 int mode, int *order, int *count, int &activeset,
698 int *next, int *changed, char *touchedPartitions) {
699 unsigned int nAtoms = mol.getNumAtoms();
700 int partition;
701 int symclass = 0;
702 int *start;
703 int offset;
704 int index;
705 int len;
706 int i;
707 // std::vector<char> touchedPartitions(mol.getNumAtoms(),0);
708
709 // std::cerr<<"&&&&&&&&&&&&&&&& RP"<<std::endl;
710 while (activeset != -1) {
711 // std::cerr<<"ITER: "<<activeset<<" next: "<<next[activeset]<<std::endl;
712 // std::cerr<<" next: ";
713 // for(unsigned int ii=0;ii<nAtoms;++ii){
714 // std::cerr<<ii<<":"<<next[ii]<<" ";
715 // }
716 // std::cerr<<std::endl;
717 // for(unsigned int ii=0;ii<nAtoms;++ii){
718 // std::cerr<<order[ii]<<" count: "<<count[order[ii]]<<" index:
719 // "<<atoms[order[ii]].index<<std::endl;
720 // }
721
722 partition = activeset;
723 activeset = next[partition];
724 next[partition] = -2;
725
726 len = count[partition];
727 offset = atoms[partition].index;
728 start = order + offset;
729 // std::cerr<<"\n\n**************************************************************"<<std::endl;
730 // std::cerr<<" sort - class:"<<atoms[partition].index<<" len: "<<len<<":";
731 // for(unsigned int ii=0;ii<len;++ii){
732 // std::cerr<<" "<<order[offset+ii]+1;
733 // }
734 // std::cerr<<std::endl;
735 // for(unsigned int ii=0;ii<nAtoms;++ii){
736 // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
737 // "<<atoms[order[ii]].index<<std::endl;
738 // }
739 hanoisort(start, len, count, changed, compar);
740 // std::cerr<<"*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"<<std::endl;
741 // std::cerr<<" result:";
742 // for(unsigned int ii=0;ii<nAtoms;++ii){
743 // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
744 // "<<atoms[order[ii]].index<<std::endl;
745 // }
746 for (int k = 0; k < len; ++k) {
747 changed[start[k]] = 0;
748 }
749
750 index = start[0];
751 // std::cerr<<" len:"<<len<<" index:"<<index<<"
752 // count:"<<count[index]<<std::endl;
753 for (i = count[index]; i < len; i++) {
754 index = start[i];
755 if (count[index]) {
756 symclass = offset + i;
757 }
758 atoms[index].index = symclass;
759 // std::cerr<<" "<<index+1<<"("<<symclass<<")";
760 // if(mode && (activeset<0 || count[index]>count[activeset]) ){
761 // activeset=index;
762 //}
763 for (unsigned j = 0; j < atoms[index].degree; ++j) {
764 changed[atoms[index].nbrIds[j]] = 1;
765 }
766 }
767 // std::cerr<<std::endl;
768
769 if (mode) {
770 index = start[0];
771 for (i = count[index]; i < len; i++) {
772 index = start[i];
773 for (unsigned j = 0; j < atoms[index].degree; ++j) {
774 unsigned int nbor = atoms[index].nbrIds[j];
775 touchedPartitions[atoms[nbor].index] = 1;
776 }
777 }
778 for (unsigned int ii = 0; ii < nAtoms; ++ii) {
779 if (touchedPartitions[ii]) {
780 partition = order[ii];
781 if ((count[partition] > 1) && (next[partition] == -2)) {
782 next[partition] = activeset;
783 activeset = partition;
784 }
785 touchedPartitions[ii] = 0;
786 }
787 }
788 }
789 }
790} // end of RefinePartitions()
791
792template <typename CompareFunc>
793void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
794 int mode, int *order, int *count, int &activeset, int *next,
795 int *changed, char *touchedPartitions) {
796 unsigned int nAtoms = mol.getNumAtoms();
797 int partition;
798 int offset;
799 int index;
800 int len;
801 int oldPart = 0;
802
803 for (unsigned int i = 0; i < nAtoms; i++) {
804 partition = order[i];
805 oldPart = atoms[partition].index;
806 while (count[partition] > 1) {
807 len = count[partition];
808 offset = atoms[partition].index + len - 1;
809 index = order[offset];
810 atoms[index].index = offset;
811 count[partition] = len - 1;
812 count[index] = 1;
813
814 // test for ions, water molecules with no
815 if (atoms[index].degree < 1) {
816 continue;
817 }
818 for (unsigned j = 0; j < atoms[index].degree; ++j) {
819 unsigned int nbor = atoms[index].nbrIds[j];
820 touchedPartitions[atoms[nbor].index] = 1;
821 changed[nbor] = 1;
822 }
823
824 for (unsigned int ii = 0; ii < nAtoms; ++ii) {
825 if (touchedPartitions[ii]) {
826 int npart = order[ii];
827 if ((count[npart] > 1) && (next[npart] == -2)) {
828 next[npart] = activeset;
829 activeset = npart;
830 }
831 touchedPartitions[ii] = 0;
832 }
833 }
834 RefinePartitions(mol, atoms, compar, mode, order, count, activeset, next,
835 changed, touchedPartitions);
836 }
837 // not sure if this works each time
838 if (atoms[partition].index != oldPart) {
839 i -= 1;
840 }
841 }
842} // end of BreakTies()
843
845 int *order, int *count,
846 canon_atom *atoms);
847
848RDKIT_GRAPHMOL_EXPORT void ActivatePartitions(unsigned int nAtoms, int *order,
849 int *count, int &activeset,
850 int *next, int *changed);
851
852//! Note that atom maps on dummy atoms will always be used
854 const ROMol &mol, std::vector<unsigned int> &res, bool breakTies = true,
855 bool includeChirality = true, bool includeIsotopes = true,
856 bool includeAtomMaps = true, bool includeChiralPresence = false,
857 bool includeStereoGroups = true, bool useNonStereoRanks = false,
858 bool includeRingStereo = true);
859
860//! Note that atom maps on dummy atoms will always be used
862 const ROMol &mol, std::vector<unsigned int> &res,
863 const boost::dynamic_bitset<> &atomsInPlay,
864 const boost::dynamic_bitset<> &bondsInPlay,
865 const std::vector<std::string> *atomSymbols,
866 const std::vector<std::string> *bondSymbols, bool breakTies,
867 bool includeChirality, bool includeIsotope, bool includeAtomMaps,
868 bool includeChiralPresence, bool includeRingStereo = true);
869
870//! Note that atom maps on dummy atoms will always be used
872 const ROMol &mol, std::vector<unsigned int> &res,
873 const boost::dynamic_bitset<> &atomsInPlay,
874 const boost::dynamic_bitset<> &bondsInPlay,
875 const std::vector<std::string> *atomSymbols = nullptr,
876 bool breakTies = true, bool includeChirality = true,
877 bool includeIsotopes = true, bool includeAtomMaps = true,
878 bool includeChiralPresence = false, bool includeRingStereo = true) {
879 rankFragmentAtoms(mol, res, atomsInPlay, bondsInPlay, atomSymbols, nullptr,
880 breakTies, includeChirality, includeIsotopes,
881 includeAtomMaps, includeChiralPresence, includeRingStereo);
882};
883
885 std::vector<unsigned int> &res);
886
888 std::vector<Canon::canon_atom> &atoms,
889 bool includeChirality = true,
890 bool includeStereoGroups = true);
891
892namespace detail {
894 std::vector<Canon::canon_atom> &atoms,
895 bool includeChirality,
896 const std::vector<std::string> *atomSymbols,
897 const std::vector<std::string> *bondSymbols,
898 const boost::dynamic_bitset<> &atomsInPlay,
899 const boost::dynamic_bitset<> &bondsInPlay,
900 bool needsInit);
901template <typename T>
902void rankWithFunctor(T &ftor, bool breakTies, int *order,
903 bool useSpecial = false, bool useChirality = false,
904 bool includeRingStereo = true,
905 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
906 const boost::dynamic_bitset<> *bondsInPlay = nullptr);
907
908} // namespace detail
909
910} // namespace Canon
911} // namespace RDKit
#define PRECONDITION(expr, mess)
Definition Invariant.h:108
Defines the primary molecule class ROMol as well as associated typedefs.
Defines the class StereoGroup which stores relationships between the absolute configurations of atoms...
The class for representing atoms.
Definition Atom.h:74
int getAtomicNum() const
returns our atomic number
Definition Atom.h:145
@ CHI_TETRAHEDRAL_CW
tetrahedral: clockwise rotation (SMILES @@)
Definition Atom.h:107
@ CHI_UNSPECIFIED
chirality that hasn't been specified
Definition Atom.h:106
@ CHI_TETRAHEDRAL_CCW
tetrahedral: counter-clockwise rotation (SMILES
Definition Atom.h:108
BondType
the type of Bond
Definition Bond.h:55
@ UNSPECIFIED
Definition Bond.h:56
BondStereo
the nature of the bond's stereochem (for cis/trans)
Definition Bond.h:94
@ STEREONONE
Definition Bond.h:95
const boost::dynamic_bitset * dp_bondsInPlay
Definition new_canon.h:508
AtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition new_canon.h:519
int operator()(int i, int j) const
Definition new_canon.h:527
const boost::dynamic_bitset * dp_atomsInPlay
Definition new_canon.h:507
Canon::canon_atom * dp_atoms
Definition new_canon.h:505
ChiralAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m)
Definition new_canon.h:644
int operator()(int i, int j) const
Definition new_canon.h:646
const boost::dynamic_bitset * dp_atomsInPlay
Definition new_canon.h:138
const boost::dynamic_bitset * dp_bondsInPlay
Definition new_canon.h:139
SpecialChiralityAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition new_canon.h:142
const boost::dynamic_bitset * dp_bondsInPlay
Definition new_canon.h:199
const boost::dynamic_bitset * dp_atomsInPlay
Definition new_canon.h:198
SpecialSymmetryAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition new_canon.h:202
unsigned int getNumAtoms() const
returns our number of atoms
Definition ROMol.h:421
CXXAtomIterator< const MolGraph, Atom *const, MolGraph::adjacency_iterator > atomNeighbors(Atom const *at) const
Definition ROMol.h:284
#define RDKIT_GRAPHMOL_EXPORT
Definition export.h:249
void initFragmentCanonAtoms(const ROMol &mol, std::vector< Canon::canon_atom > &atoms, bool includeChirality, const std::vector< std::string > *atomSymbols, const std::vector< std::string > *bondSymbols, const boost::dynamic_bitset<> &atomsInPlay, const boost::dynamic_bitset<> &bondsInPlay, bool needsInit)
void rankWithFunctor(T &ftor, bool breakTies, int *order, bool useSpecial=false, bool useChirality=false, bool includeRingStereo=true, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
RDKIT_GRAPHMOL_EXPORT void initCanonAtoms(const ROMol &mol, std::vector< Canon::canon_atom > &atoms, bool includeChirality=true, bool includeStereoGroups=true)
RDKIT_GRAPHMOL_EXPORT void CreateSinglePartition(unsigned int nAtoms, int *order, int *count, canon_atom *atoms)
RDKIT_GRAPHMOL_EXPORT void ActivatePartitions(unsigned int nAtoms, int *order, int *count, int &activeset, int *next, int *changed)
const unsigned int ATNUM_CLASS_OFFSET
Definition new_canon.h:569
RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborNumSwaps(canon_atom *atoms, std::vector< bondholder > &nbrs, unsigned int atomIdx, std::vector< std::pair< unsigned int, unsigned int > > &result)
void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition new_canon.h:793
void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition new_canon.h:696
RDKIT_GRAPHMOL_EXPORT void chiralRankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res)
RDKIT_GRAPHMOL_EXPORT void rankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res, bool breakTies=true, bool includeChirality=true, bool includeIsotopes=true, bool includeAtomMaps=true, bool includeChiralPresence=false, bool includeStereoGroups=true, bool useNonStereoRanks=false, bool includeRingStereo=true)
Note that atom maps on dummy atoms will always be used.
RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborIndex(canon_atom *atoms, std::vector< bondholder > &nbrs)
RDKIT_GRAPHMOL_EXPORT void rankFragmentAtoms(const ROMol &mol, std::vector< unsigned int > &res, const boost::dynamic_bitset<> &atomsInPlay, const boost::dynamic_bitset<> &bondsInPlay, const std::vector< std::string > *atomSymbols, const std::vector< std::string > *bondSymbols, bool breakTies, bool includeChirality, bool includeIsotope, bool includeAtomMaps, bool includeChiralPresence, bool includeRingStereo=true)
Note that atom maps on dummy atoms will always be used.
RDKIT_RDGENERAL_EXPORT const std::string _CanonicalRankingNumber
RDKIT_RDGENERAL_EXPORT const std::string _CIPCode
RDKIT_RDGENERAL_EXPORT const std::string molAtomMapNumber
Std stuff.
StereoGroupType
Definition StereoGroup.h:30
void hanoisort(int *base, int nel, int *count, int *changed, CompareFunc compar)
Definition hanoiSort.h:150
unsigned int countSwapsToInterconvert(const T &ref, T probe)
Definition utils.h:54
const std::string * p_symbol
Definition new_canon.h:40
Bond::BondType bondType
Definition new_canon.h:33
static bool greater(const bondholder &lhs, const bondholder &rhs)
Definition new_canon.h:63
bool operator<(const bondholder &o) const
Definition new_canon.h:62
Bond::BondStereo stype
Definition new_canon.h:38
const canon_atom * controllingAtoms[4]
Definition new_canon.h:39
int compareStereo(const bondholder &o) const
bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni, unsigned int nsc, unsigned int bidx)
Definition new_canon.h:52
bondholder(Bond::BondType bt, Bond::BondStereo bs, unsigned int ni, unsigned int nsc, unsigned int bidx)
Definition new_canon.h:45
unsigned int bondStereo
Definition new_canon.h:34
static int compare(const bondholder &x, const bondholder &y, unsigned int div=1)
Definition new_canon.h:67
unsigned int nbrSymClass
Definition new_canon.h:36
std::vector< bondholder > bonds
Definition new_canon.h:113
StereoGroupType typeOfStereoGroup
Definition new_canon.h:107
std::unique_ptr< int[]> nbrIds
Definition new_canon.h:108
std::vector< int > revistedNeighbors
Definition new_canon.h:112
std::vector< int > neighborNum
Definition new_canon.h:111
unsigned int totalNumHs
Definition new_canon.h:103
const std::string * p_symbol
Definition new_canon.h:109
unsigned int whichStereoGroup
Definition new_canon.h:106