1
2 from PyML.containers.ext import ckerneldata
3 from PyML.containers.baseDatasets import WrapperDataSet
4 from PyML.containers.ext import ckernel
5 from PyML.containers import labels
6 from PyML.utils import misc,myio
7 from PyML.utils import arrayWrap
8
9 -class KernelData (WrapperDataSet, ckerneldata.KernelData) :
10 """
11 A container for holding a dataset with a dot product derived from
12 a pre-computed kernel matrix
13
14 File format:
15 delimited file with the first column interpreted as pattern IDs if
16 it is non-numeric; comments can appear with # or %
17 gist format is accepted as well.
18
19 Construction::
20
21 Copy construction:
22 KernelData(other) optional keyword arguments are the same as
23 other dataset containers
24
25 Construction from file:
26 KernelData(matrixFile [,labelsFile = labelsFileName, gistFormat = True])
27 matrixFile -- a file with the kernel matrix
28 labelsFile -- keyword argument containing a file name with the labels.
29 the parser tries to automatically guess if the file is in GIST format;
30 in case this is not detected, use the 'gistFormat' keyword argument.
31 A matrix file with labels in it is not supported yet.
32 additional keyword arguments are the same as those supporting reading
33 of delimited files.
34 """
35
36
37
38
39 isVector = False
40
41 - def __init__(self, arg = None, **args) :
48
49 - def copy(self, other, patterns, deepcopy) :
52
54
55 matrix = ckernel.KernelMatrix()
56 matrix.thisown = 0
57 patternID = []
58 delim = None
59 delim = misc.getDelim(fileName)
60 idColumn = 0
61 if 'idColumn' in args :
62 idColumn = args['idColumn']
63 if idColumn is None :
64 firstColumn = 0
65 else :
66 firstColumn = 1
67 print firstColumn
68 print idColumn
69 matrixFile = myio.myopen(fileName)
70 firstRow = True
71 for line in matrixFile :
72
73 if line[0] in ["%", "#"] : continue
74 tokens = line.split(delim)
75
76 if firstRow :
77 firstRow = False
78 try :
79 float(tokens[-1])
80 except :
81 continue
82 if ( ('headerRow' in args and args['headerRow']) or
83 ('gistFormat' in args and args['gistFormat']) ):
84 continue
85 values = arrayWrap.floatVector([float(token) for token in tokens[firstColumn:]])
86 matrix.addRow(values)
87 if idColumn is not None :
88 patternID.append(tokens[0])
89
90 ckerneldata.KernelData.__init__(self, matrix)
91 if 'labelsFile' in args :
92 self.attachLabels(labels.Labels(args['labelsFile'], **args))
93 else :
94 self.attachLabels(labels.Labels(None, patternID = patternID))
95