def do_pca(self,args):
'''
- PCA
+ PCA -> "pca gaeta_coor_blind50.txt 1,3,6"
Automatically measures pca from coordinates filename and shows two interactives plots
+ With the second argument (arbitrary) you can select the columns and the multiplier factor
+ to use for the pca (for es "1,3*50,6,8x10,9"). Dont use spaces. "*" or "x" are the same thing.
+ Without second argument it reads pca_config.txt file
(c)Paolo Pancaldi, Massimo Sandal 2009
'''
+ # reads the columns of pca
+ conf=open("pca_config.txt")
+ config = conf.readlines()
+ conf.close()
+
self.pca_myArray = []
self.pca_paths = {}
plot_path_temp = ""
nPlotTot = 0
nPlotGood = 0
- file_name=args
+ # prende in inpunt un arg (nome del file)
+ # e il secondo e' arbitrario riceve x es "row[1],row2,row[3]"
+ arg = args.split(" ")
+ if arg[0]==args:
+ file_name=args
+ else:
+ file_name=arg[0]
+ config[0] = arg[1]
- for arg in args.split():
- #look for a file argument.
- if 'f=' in arg:
- file_temp=arg.split('=')[1] #actual coordinates filename
- try:
- f=open(file_temp)
- f.close()
- file_name = file_temp
- print "Coordinates filename used: " + file_name
- except:
- print "Impossibile to find " + file_temp + " in current directory"
- print "Coordinates filename used: " + file_name
-
f=open(file_name)
rows = f.readlines()
for row in rows:
nPlotTot = nPlotTot+1
#plot_path_temp = row.split("/")[6][:-1]
plot_path_temp = row
- if row[0]==" " and row.find('nan')==-1:
+ if row[0]==" " and row.find('nan')==-1 and row.find("-1.#IND")==-1:
row = row[row.index(";",2)+2:].split(" ; ") # non considero la prima colonna col #picchi
row = [float(i) for i in row]
#0:Mean delta, 1:Median delta, 2:Mean force, 3:Median force, 4:First peak length, 5:Last peak length
#6:Max delta 7:Min delta 8:Max force 9:Min force 10:Std delta 11:Std force
- if (row[0]<500 and row[1]<500 and row[2]<500 and row[3]<500 and row[4]<500 and row[5]<500):
- if (row[0]>0 and row[1]>0 and row[2]>0 and row[3]>0 and row[4]>0 and row[5]>0):
+ if (row[0]<500 and row[1]<500 and row[2]<500 and row[3]<500 and row[4]<500 and row[5]<500 and row[6]<500 and row[7]<500 and row[8]<500 and row[9]<500 and row[10]<500 and row[11]<500):
+ if (row[0]>0 and row[1]>0 and row[2]>0 and row[3]>0 and row[4]>0 and row[5]>0 and row[6]>0 and row[7]>0 and row[8]>0 and row[9]>0 and row[10]>0 and row[11]>0):
self.pca_paths[nPlotGood] = plot_path_temp
- row = row[0], row[2], row[3], row[6], row[7], row[8]
- self.pca_myArray.append(row)
+ #row = row[0], row[2], row[3]*3, row[6], row[7]*56, row[8]
+ res=[]
+ for cols in config[0].split(","):
+ if cols.find("*")!=-1:
+ col = int(cols.split("*")[0])
+ molt = int(cols.split("*")[1])
+ elif cols.find("x")!=-1:
+ col = int(cols.split("x")[0])
+ molt = int(cols.split("x")[1])
+ else:
+ col = int(cols)
+ molt = 1
+ res.append(row[col]*molt)
+ self.pca_myArray.append(res)
nPlotGood = nPlotGood+1
f.close()
# array convert, calculate PCA, transpose
self.pca_myArray = np.array(self.pca_myArray,dtype='float')
print self.pca_myArray.shape
- '''for i in range(len(self.pca_myArray)):
- print i, self.pca_paths[i]
- print i, self.pca_myArray[i]'''
self.pca_myArray = pca(self.pca_myArray, output_dim=2) #other way -> y = mdp.nodes.PCANode(output_dim=2)(gigi)
myArrayTr = np.transpose(self.pca_myArray)
- '''for i in range(len(self.pca_myArray)):
- print i, self.pca_paths[i]
- print i, self.pca_myArray[i]'''
-
# plotting
X=myArrayTr[0]
Y=myArrayTr[1]
+
+ X=list(X)
+ Y=list(Y)
+
clustplot=lhc.PlotObject()
#FIXME
#This will go away after testing :)
Xsyn=[]
Ysyn=[]
+
Xgb1=[]
Ygb1=[]
+
+ Xbad=[]
+ Ybad=[]
+
+ goodnamefile=open('dataset_s3sT45base_good_blind50.log','r')
+ #goodnamefile=open('/home/massimo/python/hooke/dataset_clust/roslin_blind50.log','r')
+ goodnames=goodnamefile.readlines()
+ goodnames=[i.split()[0] for i in goodnames[1:]]
+
+
for index in range(len(self.pca_paths)):
- if 'syn' in self.pca_paths[index]:
+ '''
+ if '3s3' in self.pca_paths[index] and not 'bad' in self.pca_paths[index]:
Xsyn.append(X[index])
Ysyn.append(Y[index])
+ elif 'bad' in self.pca_paths[index]:
+ Xbad.append(X[index])
+ Ybad.append(Y[index])
else:
Xgb1.append(X[index])
Ygb1.append(Y[index])
+ '''
+ #print self.pca_paths
+ if self.pca_paths[index][:-1] in goodnames:
+ Xsyn.append(X[index])
+ Ysyn.append(Y[index])
+ else:
+ Xbad.append(X[index])
+ Ybad.append(Y[index])
+
+ print 'blath',len(Xsyn),len(Ysyn)
+ #clustplot.add_set(Xgb1,Ygb1)
+ clustplot.add_set(Xbad,Ybad)
clustplot.add_set(Xsyn,Ysyn)
- clustplot.add_set(Xgb1,Ygb1)
clustplot.normalize_vectors()
- clustplot.styles=['scatter', 'scatter']
- clustplot.colors=[None,'red']
+ clustplot.styles=['scatter', 'scatter','scatter']
+ clustplot.colors=[None,'red','green']
#clustplot.styles=['scatter',None]
clustplot.destination=1
self._send_plot([clustplot])
self.clustplot=clustplot
+ # -- exporting coordinates and plot! --
+
+ #builds coordinate s file
+
+ f = open('coordinate_punti.txt','w')
+ for i in range(len(X)):
+ f.write (str(i) + "\t" + str(X[i]) + "\t" + str(Y[i]) + "\n")
+ f.close()
+
+ #save plot
+ config = config[0].replace("*", "x")
+ self.do_export("png/" + config + " 1")
+
+ def do_multipca(self,args):
+ '''
+ MULTIPCA -> "multipca gaeta_coor_blind50.txt 3"
+ Automatically multiply the column suggest in second argument for value between 1-100 (step of 2),
+ measures pca from coordinates filename and save the png plots.
+ (c)Paolo Pancaldi, Massimo Sandal 2009
+ '''
+ # reads the columns of pca
+ conf=open("pca_config.txt")
+ config = conf.readlines() # config[0] = "1,2,3"
+ conf.close()
+ # cycling pca
+ arg = args.split(" ")
+ file_name=arg[0]
+ column=str(arg[1])
+ for i in range(1, 51, 1):
+ self.do_pca(file_name + " " + config[0].replace(column,column+"*"+str(i),1))
+
+ def do_doublepca(self,args):
+ '''
+ DOUBLEPCA -> "double gaeta_coor_blind50.txt"
+ Automatically multiply the column suggest in second argument for value between 1-100 (step of 2),
+ measures pca from coordinates filename and save the png plots.
+ (c)Paolo Pancaldi, Massimo Sandal 2009
+ '''
+ # cycling pca
+ arg = args.split(" ")
+ file_name=arg[0]
+ for i in range(1, 12):
+ for j in range(1, 12):
+ if i!=j:
+ self.do_pca(file_name + " " + str(i) + "," + str(j))
+
+ def do_triplepca(self,args):
+ '''
+ DOUBLEPCA -> "double gaeta_coor_blind50.txt"
+ Automatically multiply the column suggest in second argument for value between 1-100 (step of 2),
+ measures pca from coordinates filename and save the png plots.
+ (c)Paolo Pancaldi, Massimo Sandal 2009
+ '''
+ # cycling pca
+ arg = args.split(" ")
+ file_name=arg[0]
+ for i in range(1, 12):
+ for j in range(1, 12):
+ for k in range(1, 12):
+ if i!=j and i!=k and j!=k:
+ self.do_pca(file_name + " " + str(i) + "," + str(j) + "," + str(k))
def do_pclick(self,args):