Package ML :: Package Cluster :: Module Murtagh
[hide private]
[frames] | no frames]

Source Code for Module ML.Cluster.Murtagh

  1  # $Id: Murtagh.py 742 2008-07-05 07:42:38Z glandrum $ 
  2  # 
  3  # Copyright (C) 2001-2008  greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved  @@ 
  6  # 
  7  """ Interface to the C++ Murtagh hierarchic clustering code 
  8   
  9  """ 
 10  from ML.Cluster import Clusters 
 11  from ML.Cluster.Clustering import MurtaghCluster,MurtaghDistCluster 
 12  import numpy 
 13   
 14   
 15  # constants to select the clustering algorithm 
 16  WARDS=1 
 17  SLINK=2 
 18  CLINK=3 
 19  UPGMA=4 
 20  MCQUITTY=5 
 21  GOWER=6 
 22  CENTROID=7 
 23   
 24  # descriptions of the methods: 
 25  methods = [ 
 26    ("Ward's Minimum Variance",WARDS,"Ward's Minimum Variance"), 
 27    ('Average Linkage',UPGMA,'Group Average Linkage (UPGMA)'), 
 28    ('Single Linkage',SLINK,'Single Linkage (SLINK)'), 
 29    ('Complete Linkage',CLINK,'Complete Linkage (CLINK)'), 
 30  #  ("McQuitty",MCQUITTY,"McQuitty's method"), 
 31  #  ("Gower",GOWER,"Gower's median method"), 
 32    ("Centroid",CENTROID,"Centroid method"), 
 33    ] 
 34   
35 -def _LookupDist(dists,i,j,n):
36 """ *Internal Use Only* 37 38 returns the distance between points i and j in the symmetric 39 distance matrix _dists_ 40 41 """ 42 if i==j: return 0.0 43 if i > j: i,j=j,i 44 return dists[j*(j-1)/2+i]
45 46 47
48 -def _ToClusters(data,nPts,ia,ib,crit,isDistData=0):
49 """ *Internal Use Only* 50 51 Converts the results of the Murtagh clustering code into 52 a cluster tree, which is returned in a single-entry list 53 54 """ 55 cs = [None]*nPts 56 for i in range(nPts): 57 cs[i] = Clusters.Cluster(metric=0.0,data=i,index=(i+1)) 58 59 nClus = len(ia)-1 60 for i in range(nClus): 61 idx1 = ia[i]-1 62 idx2 = ib[i]-1 63 c1 = cs[idx1] 64 c2 = cs[idx2] 65 newClust = Clusters.Cluster(metric=crit[i],children=[c1,c2], 66 index=nPts+i+1) 67 cs[idx1] = newClust 68 69 return [newClust]
70 71
72 -def ClusterData(data,nPts,method,isDistData=0):
73 """ clusters the data points passed in and returns the cluster tree 74 75 **Arguments** 76 77 - data: a list of lists (or array, or whatever) with the input 78 data (see discussion of _isDistData_ argument for the exception) 79 80 - nPts: the number of points to be used 81 82 - method: determines which clustering algorithm should be used. 83 The defined constants for these are: 84 'WARDS, SLINK, CLINK, UPGMA' 85 86 - isDistData: set this toggle when the data passed in is a 87 distance matrix. The distance matrix should be stored 88 symmetrically so that _LookupDist (above) can retrieve 89 the results: 90 for i<j: d_ij = dists[j*(j-1)/2 + i] 91 92 93 **Returns** 94 95 - a single entry list with the cluster tree 96 """ 97 data = numpy.array(data) 98 if not isDistData: 99 sz = data.shape[1] 100 ia,ib,crit = MurtaghCluster(data,nPts,sz,method) 101 else: 102 ia,ib,crit = MurtaghDistCluster(data,nPts,method) 103 c = _ToClusters(data,nPts,ia,ib,crit,isDistData=isDistData) 104 105 return c
106