Package PyML :: Package containers :: Module pairData
[frames] | no frames]

Source Code for Module PyML.containers.pairData

  1   
  2  from PyML.utils import misc 
  3  from PyML.utils import arrayWrap 
  4  from PyML.containers.ext import cpairdataset 
  5  from PyML.containers.baseDatasets import WrapperDataSet, BaseDataSet 
  6  from PyML.containers.labels import Labels 
  7   
  8  """ 
  9  classes for dealing with data that is composed of pairs of simpler objects 
 10  for which a kernel is available 
 11  """ 
 12   
 13  __docformat__ = "restructuredtext en" 
 14   
15 -class PairDataSet (WrapperDataSet, cpairdataset.PairDataSet) :
16 17 """ 18 DataSet container for pairs of objects. 19 20 The kernel between a pair is defined via the kernel between the 21 members of the pair: 22 K((X_1,X_2), (X'_1, X'_2)) = K'(X_1, X'_1) K'(X_2, X'_2) + 23 K'(X_1, X'_2) K'(X_2, X'_1) 24 25 file format:: 26 27 id1_id2 label,... (can have additional fields that are ignored) 28 29 """ 30 31 isVector = False 32
33 - def __init__(self, arg, **args) :
34 """ 35 :Parameters: 36 - `arg` - a file name or another PairDataSet object. 37 if a file name is supplied the constructor expects a dataset 38 object as a keyword argument 'data' 39 :Keywords: 40 - `data` - a dataset object from which the kernel between the pairs 41 of patterns is derived. 42 - `patterns` - patterns to copy when performing copy construction 43 """ 44 45 BaseDataSet.__init__(self) 46 if arg.__class__ == self.__class__ : 47 self.copyConstruct(arg, **args) 48 elif type(arg) == type('') : 49 if 'data' not in args : 50 raise ValueError, 'missing data object' 51 self._data = args['data'] 52 self.constructFromFile(arg) 53 54 self.attachKernel('linear')
55
56 - def copy(self, other, patterns, deepcopy) :
57 58 self.callCopyConstructor(other, patterns) 59 self.pairs = [other.pairs[p] for p in patterns] 60 self._data = other._data
61
62 - def constructFromFile(self, fileName) :
63 64 patternIDdict = misc.list2dict(self._data.labels.patternID, 65 range(len(self._data))) 66 67 labels = Labels(fileName) 68 patterns = [] 69 pairs = [] 70 for i in range(len(labels)) : 71 p1,p2 = labels.patternID[i].split('_') 72 # add only pairs for which we have kernel data: 73 if p1 in patternIDdict and p2 in patternIDdict : 74 pairs.append((patternIDdict[p1],patternIDdict[p2])) 75 patterns.append(i) 76 else : 77 print p1, ' or ', p2, 'not found' 78 labels = labels.__class__(labels, patterns = patterns) 79 80 self.pairs = pairs 81 82 first = [pair[0] for pair in pairs] 83 second = [pair[1] for pair in pairs] 84 firstVector = arrayWrap.intVector([pair[0] for pair in pairs]) 85 secondVector = arrayWrap.intVector([pair[1] for pair in pairs]) 86 self.callConstructor(firstVector, secondVector) 87 88 WrapperDataSet.attachLabels(self, labels)
89 90
91 - def callConstructor(self, firstVector, secondVector) :
92 93 cpairdataset.PairDataSet.__init__(self, firstVector, secondVector, 94 self._data.castToBase())
95
96 - def callCopyConstructor(self, other, patterns) :
97 98 cpairdataset.PairDataSet.__init__(self, other, patterns)
99
100 - def __len__(self) :
101 102 return len(self.pairs)
103
104 - def getPair(self, i) :
105 106 return tuple(self.labels.patternID[i].split())
107
108 -class SimplePairDataSet (BaseDataSet) :
109 110 """ 111 DataSet container for pairs of objects. 112 113 file format:: 114 115 id1_id2, label,... (can have additional fields that are ignored) 116 117 """ 118 119 isVector = False 120
121 - def __init__(self, arg, **args) :
122 """ 123 :Parameters: 124 - `arg` - a file name or another PairDataSet object. 125 if a file name is supplied the constructor expects a dataset 126 object as a keyword argument 'data' 127 :Keywords: 128 - `data` - a dataset object from which the kernel between the pairs 129 of patterns is derived. 130 - `patterns` - patterns to copy when performing copy construction 131 """ 132 133 BaseDataSet.__init__(self) 134 if arg.__class__ == self.__class__ : 135 if 'patterns' in args : 136 patterns = args['patterns'] 137 else : 138 patterns = range(len(arg)) 139 self.copyConstruct(arg, patterns) 140 elif type(arg) == type('') : 141 if 'data' not in args : 142 raise ValueError, 'missing data object' 143 self.data = args['data'] 144 self.constructFromFile(arg)
145
146 - def copyConstruct(self, other, patterns) :
147 148 self.pairs = [other.pairs[p] for p in patterns] 149 self.data = other.data 150 self.labels = Labels(other.labels, patterns = patterns)
151
152 - def constructFromFile(self, fileName) :
153 154 delim = ',' 155 if self.data is not None : 156 patternIDdict = misc.list2dict(self.data.labels.patternID, 157 range(len(self.data))) 158 else : 159 patternIDdict = {} 160 161 L = [] 162 patternID = [] 163 pairs = [] 164 file = open(fileName) 165 for line in file : 166 tokens = line[:-1].split(delim) 167 #patternID.append(tokens[0]) 168 p1,p2 = tokens[0].split('_') 169 if p1 > p2 : p1,p2 = p2,p1 170 # add only pairs for which we have kernel data: 171 if p1 in patternIDdict and p2 in patternIDdict or self.data is None : 172 pairs.append((p1,p2)) 173 L.append(tokens[1]) 174 patternID.append('_'.join([p1,p2])) 175 else : 176 print p1, ' or ', p2, 'not found' 177 self.pairs = pairs 178 self.labels = Labels(L, patternID = patternID)
179
180 - def __len__(self) :
181 182 return len(self.pairs)
183
184 - def getPair(self, i) :
185 186 return tuple(self.labels.patternID[i].split())
187 188
189 -class PairDataSetSum (PairDataSet, cpairdataset.PairDataSetSum) :
190
191 - def __init__(self, arg, **args) :
192 193 PairDataSet.__init__(self, arg, **args)
194
195 - def callConstructor(self, firstVector, secondVector) :
196 197 cpairdataset.PairDataSetSum.__init__(self, firstVector, secondVector, 198 self._data.castToBase())
199
200 - def callCopyConstructor(self, other, patterns) :
201 202 cpairdataset.PairDataSetSum.__init__(self, other, patterns)
203
204 - def dotProduct(self, i, j, other = None) :
205 206 return cpairdataset.PairDataSetSum.dotProduct(self, i, j, other)
207 208
209 -class PairDataSetOrd (PairDataSet, cpairdataset.PairDataSetOrd) :
210
211 - def __init__(self, arg, **args) :
212 213 PairDataSet.__init__(self, arg, **args)
214
215 - def callConstructor(self, firstVector, secondVector) :
216 217 cpairdataset.PairDataSetOrd.__init__(self, firstVector, secondVector, 218 self._data.castToBase())
219
220 - def callCopyConstructor(self, other, patterns) :
221 222 cpairdataset.PairDataSetOrd.__init__(self, other, patterns)
223
224 - def dotProduct(self, i, j, other = None) :
225 226 return cpairdataset.PairDataSetOrd.dotProduct(self, i, j, other)
227