Package ML :: Package Composite :: Module AdjustComposite
[hide private]
[frames] | no frames]

Source Code for Module ML.Composite.AdjustComposite

 1  # $Id: AdjustComposite.py 778 2008-07-30 09:51:55Z glandrum $ 
 2  # 
 3  #  Copyright (C) 2003  greg Landrum and Rational Discovery LLC 
 4  #   All Rights Reserved 
 5  # 
 6  """ functionality to allow adjusting composite model contents 
 7   
 8  """ 
 9  import numpy 
10  import copy 
11   
12 -def BalanceComposite(model,set1,set2,weight,targetSize,names1=None,names2=None):
13 """ adjusts the contents of the composite model so as to maximize 14 the weighted classification accuracty across the two data sets. 15 16 The resulting composite model, with _targetSize_ models, is returned. 17 18 **Notes**: 19 20 - if _names1_ and _names2_ are not provided, _set1_ and _set2_ should 21 have the same ordering of columns and _model_ should have already 22 have had _SetInputOrder()_ called. 23 24 """ 25 # 26 # adjust the weights to be proportional to the size of the two data sets 27 # The normalization we do here assures that a perfect model contributes 28 # a score of S1+S2 to the final 29 # 30 S1 = len(set1) 31 S2 = len(set2) 32 weight1 = float(S1+S2)*(1-weight)/S1 33 weight2 = float(S1+S2)*weight/S2 34 #print '\t:::',S1,S2,weight1,weight2 35 #print 'nModels:',len(model) 36 # start with a copy so that we get all the additional schnick-schnack 37 res = copy.copy(model) 38 res.modelList = [] 39 res.errList = [] 40 res.countList = [] 41 res.quantizationRequirements = [] 42 43 startSize = len(model) 44 scores = numpy.zeros(startSize,numpy.float) 45 actQuantBounds = model.GetActivityQuantBounds() 46 if names1 is not None: 47 model.SetInputOrder(names1) 48 for pt in set1: 49 pred,conf = model.ClassifyExample(pt) 50 if actQuantBounds: 51 ans = model.QuantizeActivity(pt)[-1] 52 else: 53 ans = pt[-1] 54 votes = model.GetVoteDetails() 55 for i in range(startSize): 56 if votes[i]==ans: scores[i] += weight1 57 if names2 is not None: 58 model.SetInputOrder(names2) 59 for pt in set2: 60 pred,conf = model.ClassifyExample(pt) 61 if actQuantBounds: 62 ans = model.QuantizeActivity(pt)[-1] 63 else: 64 ans = pt[-1] 65 votes = model.GetVoteDetails() 66 for i in range(startSize): 67 if votes[i]==ans: scores[i] += weight2 68 # normalize the scores 69 nPts = S1+S2 70 scores /= nPts 71 # sort them: 72 bestOrder = list(numpy.argsort(scores)) 73 bestOrder.reverse() 74 print '\tTAKE:',bestOrder[:targetSize] 75 # and now take the best set: 76 for i in range(targetSize): 77 idx = bestOrder[i] 78 mdl = model.modelList[idx] 79 res.modelList.append(mdl) 80 res.errList.append(1.-scores[idx]) 81 res.countList.append(1) 82 # FIX: this should probably be more general: 83 res.quantizationRequirements.append(0) 84 return res
85