1
2 import numpy
3 import math
4
5 from PyML.utils import misc
6 from PyML.datagen import sample
7 from PyML.evaluators import assess
8 from baseClassifiers import Classifier, IteratorClassifier
9 import svm
10
11 __docformat__ = "restructuredtext en"
12
14
15 '''A base class for creating composite classifiers
16
17 A composite classifier has an attribute called "classifier", and by default
18 requests are forwarded to the appropriate function of the classifier
19 (including the "test" function).
20 For logging purposes, use the log attribute of the classifier rather
21 than the composite log.
22 See for example the FeatureSelect object.'''
23
24 deepcopy = True
25
26 - def __init__(self, classifier, **args) :
27
28 Classifier.__init__(self, classifier, **args)
29 if type(classifier) == type('') : return
30 if (not hasattr(classifier, 'type')) or classifier.type != 'classifier' :
31 raise ValueError, 'argument should be a classifier'
32 if classifier.__class__ == self.__class__ :
33 self.classifier = classifier.classifier.__class__(
34 classifier.classifier)
35 else :
36 self.classifier = classifier.__class__(classifier)
37
39 rep = '<' + self.__class__.__name__ + ' instance>\n'
40 rep += 'Classifier:\n'
41 rep += self.classifier.__repr__()
42
43 return rep
44
48
52
54
55 return self.classifier.classify(data, i)
56
57
58
59
60
61
63
64 return self.classifier.test
65
67
68 raise ValueError, 'do not call this function'
69
70
71
72 test = property (getTest, setTest,
73 None, 'the test function of the underlying classifier')
74
75
76
77 -class Chain (CompositeClassifier) :
78 '''A chain is a list of actions to be performed on a dataset,
79 the last of which is assumed to be a classifier.
80 The actions can be for example a chain of preprocessing steps or
81 a step of feature selection (same as using the FeatureSelect class)
82 Each action in the chain is assumed to have a "train" method and is
83 assumed to have a copy constructor'''
84
85 deepcopy = True
86
88 """
89 :Parameters:
90 - `arg` - a Chain object of a list of objects, each of which implements
91 a 'train', 'test' and has a copy constructor
92
93 """
94 Classifier.__init__(self)
95
96 if arg.__class__ == self.__class__ :
97 other = arg
98 self.classifier = other.classifier.__class__(other.classifier)
99 self.chain = [component.__class__(component)
100 for component in other.chain]
101
102 elif type(arg) == type([]) :
103 self.classifier = arg[-1].__class__(arg[-1])
104 self.chain = [arg[i].__class__(arg[i])
105 for i in range(len(arg) - 1)]
106
107
108 - def train(self, data, **args) :
109
110 Classifier.train(self, data, **args)
111
112 for component in self.chain :
113 component.train(data, **args)
114
115 self.classifier.train(data, **args)
116 self.log.trainingTime = self.getTrainingTime()
117
118 - def test(self, data, **args) :
119
120 for component in self.chain :
121 component.test(data, **args)
122
123 print 'I am testing',self.classifier
124 print 'testing function', self.classifier.test
125 print 'the data is :', data
126 return self.classifier.test(data, **args)
127
129
130 """A method for combining a feature selector and classifier;
131 training consists of performing feature selection and afterwards training
132 the classifier on the selected features;
133 use this classifier to test the accuracy of a feature selector/classifier
134 combination.
135 USAGE:
136 construction :
137 featureSelect(classifier, featureSelector)
138 featureSelect(otherFeatureSelectInstance) - copy construction
139 """
140
141 deepcopy = True
142
143 - def __init__(self, arg1, arg2 = None) :
144
145 Classifier.__init__(self)
146
147 if arg1.__class__ == self.__class__ :
148 other = arg1
149 self.classifier = other.classifier.__class__(other.classifier)
150 self.featureSelector = other.featureSelector.__class__(
151 other.featureSelector)
152 else :
153 for arg in (arg1, arg2) :
154 if arg.type == 'classifier' :
155 self.classifier = arg.__class__(arg)
156 elif arg.type == 'featureSelector' :
157 self.featureSelector = arg.__class__(arg)
158 else :
159 raise ValueError, \
160 'argument should be either classifier or featureSelector'
161
162
164
165 rep = '<' + self.__class__.__name__ + ' instance>\n'
166 if hasattr(self, 'numFeatures') :
167 rep += 'number of features trained on:' + str(self.numFeatures) + '\n'
168 rep += 'Classifier:\n'
169 rep += self.classifier.__repr__()
170 rep += 'Feature Selector:\n'
171 rep += self.featureSelector.__repr__()
172
173 return rep
174
175
176 - def train(self, data, **args) :
187
188
190
191 '''A method for combining a feature selector and classifier;
192 the difference from FeatureSelect is that it is specifically
193 designed for computing the accuracy while varying the
194 number of features.
195 '''
196
197 deepcopy = True
198
199 - def __init__(self, arg1, arg2 = None) :
200
201 Classifier.__init__(self)
202
203 if arg1.__class__ == self.__class__ :
204 other = arg1
205 self.classifier = other.classifier.__class__(other.classifier)
206 self.featureSelector = other.featureSelector.__class__(
207 other.featureSelector)
208 else :
209 for arg in (arg1, arg2) :
210 if arg.type == 'classifier' :
211 self.classifier = arg.__class__(arg)
212 elif arg.type == 'featureSelector' :
213 self.featureSelector = arg.__class__(arg)
214 else :
215 raise ValueError, \
216 'argument should be either classifier or featureSelector'
217
218
219 - def train(self, data, **args) :
242
243
244
246
247 """
248 classifier combines the predictions of classifiers trained on
249 different datasets.
250 The datasets are presented as a DataAggregate dataset container.
251 """
252
262
263 - def train(self, data, **args) :
264
265 Classifier.train(self, data, **args)
266 if not data.__class__.__name__ == 'DataAggregate' :
267 raise ValueError, 'train requires a DataAggregate dataset'
268
269 for i in range(len(self.classifiers)) :
270 self.classifiers[i].train(data.datas[i], **args)
271 self.log.trainingTime = self.getTrainingTime()
272
274
275 if not data.__class__.__name__ == 'DataAggregate' :
276 raise ValueError, 'classify requires a DataAggregate dataset'
277
278 decisionFuncs = [self.classifiers[i].decisionFunc(data.datas[i], p)
279 for i in range(len(self.classifiers))]
280
281
282
283
284
285 if decisionFuncs[0] > 0 and decisionFuncs[1] > 0 :
286 return 1, numpy.sum(decisionFuncs)
287 else :
288 return 0, min(decisionFuncs)
289