1
2 import numpy
3 from PyML.containers.ext import caggregate
4 from PyML.containers.baseDatasets import BaseDataSet, WrapperDataSet
5
6 -class Aggregate (WrapperDataSet, caggregate.Aggregate) :
7 """
8 combines several C++ dataset objects into a single dataset.
9 its dot product is a weighted sum of the kernels of the individual
10 dataset objects
11
12 Construction of an aggregate requires a list of dataset objects.
13 It is assumed that all datasets refer to the same underlying objects so
14 in particular have the same labels and same number of patterns (the labels
15 object is initialized using the labels of the first dataset in the list).
16 """
17
18 isVector = False
19
21 """
22 :Parameters:
23 - `arg` - either an Aggregate object (for copy construction) or a list
24 of C++ dataset objects
25
26 :Keywords:
27 - `weights` - a list of weights used for computing the dot product
28 element i is the weight for dataset i in the aggregate
29 """
30
31 BaseDataSet.__init__(self)
32 if arg.__class__ == self.__class__ :
33 self.copyConstruct(arg, **args)
34 elif type(arg) == type([]) :
35 self.checkDatas(arg)
36 self.pydatas = arg
37 if 'weights' in args :
38 self.pyweights = args['weights']
39 for i in range(len(self.pyweights)) :
40 self.pyweights[i] = float(self.pyweights[i])
41 assert len(self.pyweights) == len(self.pydatas)
42 else :
43 self.pyweights = [1.0 / len(self.pydatas) for i in range(len(self.pydatas))]
44 self._addDatas()
45 WrapperDataSet.attachLabels(self, self.pydatas[0].labels)
46 self.attachKernel('linear')
47
48 else :
49 raise ValueError, 'wrong type of input for constructor'
50
51
57
58
64
65
66 - def copy(self, other, patterns, deepcopy) :
67
68 self.pyweights = other.pyweights[:]
69 self.pydatas = [data.__class__(data, patterns = patterns)
70 for data in other.pydatas]
71 self._addDatas()
72
76
78
79 lengths = [len(data) for data in datas]
80 if not numpy.alltrue(numpy.equal(lengths, lengths[0])) :
81 raise ValueError, 'datasets not equal lengths'
82 for i in range(1, len(datas)) :
83 if datas[i].labels.patternID != datas[0].labels.patternID :
84 raise ValueError, 'datasets not have the same pattern IDs'
85
86
87
89
90 """An aggregate of datasets.
91 a DataAggregate object contains a list of datasets in its datas attribute,
92 and behaves like a dataset when it comes to copy construction, so it can
93 be used as a dataset object when it comes to testing classifiers.
94 USAGE:
95 DataAggregate(list) - construct an object out of a list of datasets
96 (they do not have to be of the same kind!
97 It is assumed that all datasets are the same length, and have the same labels
98 DataAggregate(other[,optional arguments]) - copy construction - all options
99 supported by the dataset classes can be used.
100 """
101
102 - def __init__(self, arg, *opt, **args) :
103
104 BaseDataSet.__init__(self)
105 if arg.__class__ == self.__class__ :
106 other = arg
107 self.datas = [other.datas[i].__class__(other.datas[i], *opt, **args)
108 for i in range(len(other.datas))]
109 elif type(arg) == type([]) :
110 self.datas = arg
111 else :
112 raise ValueError, 'wrong type of input for DataAggregate'
113 self.labels = self.datas[0].labels
114
116
117 return len(self.datas[0])
118
120
121 rep = ''
122 for i in range(len(self.datas)) :
123 rep += str(self.datas[i]) + '\n'
124
125 return rep
126