# #--------------------------------------------------- # # kmeans v.0.5.6 - 23 / 01 / 2013 # Authors: # Alessio Papini - Department of Plant Biology University of Florence Italy, Via La Pira, 4 Firenze, mail alpapiniATunifi.it # Ugo Santosuosso - Department of Anatomny, Istology and Forensic Medicine, Largo Brambilla, 1 Firenze, mail ugoATunifi.it # # Implements: # #--------------------------------------------------- # # importo le librerie # import os, sys import math import numpy import csv from pylab import plot,show from numpy import vstack,array from numpy.random import rand from scipy.cluster.vq import kmeans,vq,whiten from numpy import * # # ----------------------------------------------------- # NUM_ARGS=3 nome=sys.argv[0] Graph=sys.argv[3] if (len(sys.argv) < NUM_ARGS ): # print" ------------------------------- " print" " print" ------ "+nome+" --------" print" " print" The coordinates must be provided in a simple text file in csv format," print" with x coordinates in the first column and y in the second column." print" Values on a row separated by a comma (0,0) is upper left corner of the image" print" How the program works: insert parameters following the program name: that is " print" " print" python "+nome+" coordinates.csv name_prefix_of_results_NO_EXT Number_of_cluster Graph" print" " print" Graph = G/N " print" G = present a Graphical rapresentation of clusters and centroids" print" N = No Graph (default)" print" " print" No path specifications = current directory" print" " print" ------------------------------- " sys.exit (0) #---------------- # input the coordinates in csv format with comma as separator # i2csv=sys.argv[1] # # input the number of clusters as an integer # nclust=int(sys.argv[2]) ccsv = open(i2csv, 'r') # # separation of first line to x coordinates and second line to y coordinates and transformation of string data in lists (of strings of the numbers) # crimescsvx=[] crimescsvy=[] pesi=[] with open(i2csv, 'rb') as fcsv: reader = csv.reader(fcsv) for row in reader: num, acsv, bcsv, peso= row crimescsvx.append(acsv) crimescsvy.append(bcsv) pesi.append(peso) # end for row --- # end with --- # # ora le devo trasfromare in integer # inoltre posso contare il numero di elementi presenti nel csv: questo metodo scambia inoltre righe con colonne # NB_CRIMEScsv=len(crimescsvx) NB_CRIMEScsvbis = len(crimescsvy) if NB_CRIMEScsv != NB_CRIMEScsvbis: print('The number of x coordinates is different from the number of y coordinates!! Please check the data') crime_x=[] crime_y=[] allcoord=zeros( (NB_CRIMEScsv,2) ) # # riempio il vettore delle coordinate # for it in range (NB_CRIMEScsv): allcoord[it][0]=(int(crimescsvx[it])) allcoord[it][1]=(int(crimescsvy[it])) #------------------------------------------------------------ # # Cluster generation - it builds the two arrays with results # #------------------------------------------------------------ centroids,_ = kmeans(allcoord,nclust) idx,_ = vq(allcoord,centroids) # # Output di debug # if (Graph=="G"): for i in range (NB_CRIMEScsv): print "Coordinate ", allcoord[i,0], allcoord[i,1], idx[i] for i in range (nclust): print "Centroidi ", centroids[i,0],centroids[i,1] # endif # # Scrive i risultati # for i in range (nclust): # # genero i nomi dei file # nome="cluster"+str(i+1)+"di"+str(nclust)+i2csv+".txt" # # apro/genero i file # salva=open(nome,"w") # # ci scrivo i dati num,X,Y,pesi # for j in range (NB_CRIMEScsv): if (idx[j]== i): # scrivo i risultati nel file... salva.write(str(j)+","+str(int(allcoord[j,0]))+","+str(int(allcoord[j,1]))+","+str(int(pesi[j]))+"\n") #endif #endfor # # li chiudo # salva.close() #ENDFOR # # salvo il file dei centroidi # nome="centroids"+str(nclust)+"cluster"+i2csv+".txt" salva=open(nome,"w") for i in range (nclust): salva.write(str(int(centroids[i,0]))+","+str(int(centroids[i,1]))+"\n") # salva.close() # #the up instructions to reset the files that will contain the final clusters data #i save here the coordinates of the centroids in the two files centroidsx.txt and centroidsy.txt # # i try with three clusters # some plotting using numpy's logical indexing # if (Graph=="G"): for i in range (nclust): plot (allcoord[idx==i,0],allcoord[idx==i,1],'o') # # end for - plot # plot(centroids[:,0],centroids[:,1],'s',markersize=6) show() # # print"" print" End of program" print""