반응형
/*******************************************************************************************************************
-- Title : [Py3.5] K-Means 및 기타 클러스터링 알고리즘 구현/소개
-- Reference : www.kdnuggets.com/2017/03/k-means-clustering-algorithms-intro-python.html
-- Key word : 클러스터링 clustering k-means sklearn scikit-learn networkx matplotlib pyplot cluster 클러스터
spectral clustering model agglomerative clustering model k-means clustering model
affinity propagation clustering model
*******************************************************************************************************************/
■ Figures
■ Scripts
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 | from sklearn import cluster import networkx as nx from collections import defaultdict import matplotlib.pyplot as plt import numpy as np from sklearn.metrics.cluster import normalized_mutual_info_score from sklearn.metrics.cluster import adjusted_rand_score # ------------------------------ # -- Declaration of functions # ------------------------------ def drawCommunities(G, partition, pos): # G is graph in networkx form # Partition is a dict containing info on clusters # Pos is base on networkx spring layout (nx.spring_layout(G)) # For separating communities colors dictList = defaultdict(list) nodelist = [] for node, com in partition.items(): dictList[com].append(node) # Get size of Communities size = len(set(partition.values())) # For loop to assign communities colors for i in range(size): amplifier = i % 3 multi = (i / 3) * 0.3 red = green = blue = 0 if amplifier == 0: red = 0.1 + multi elif amplifier == 1: green = 0.1 + multi else: blue = 0.1 + multi # Draw Nodes nx.draw_networkx_nodes(G, pos, nodelist=dictList[i], node_color=[0.0 + red, 0.0 + green, 0.0 + blue], node_size=500, alpha=0.8) # Draw edges and final plot plt.title("Zachary's Karate Club") nx.draw_networkx_edges(G, pos, alpha=0.5) def listToDict(list): listdict = {} for i in range(len(list)): listdict[i] = list[i] return listdict def graphToEdgeMatrix(G): # Initialize Edge Matrix edgeMat = [[0 for x in range(len(G))] for y in range(len(G))] # For loop to set 0 or 1 ( diagonal elements are set to 1) for node in G: tempNeighList = G.neighbors(node) for neighbor in tempNeighList: edgeMat[node][neighbor] = 1 edgeMat[node][node] = 1 return edgeMat # ------------------------------ # -- Initialization # ------------------------------ # Initialize some variables to help us with the generalization of the program kClusters = 2 results = [] nmiResults = [] arsResults = [] # Load and Store both data and groundtruth of Zachary's Karate Club G = nx.karate_club_graph() groundTruth = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] # Transform our graph data into matrix form edgeMat = graphToEdgeMatrix(G) # Positions the nodes using Fruchterman-Reingold force-directed algorithm # Too technical to discuss right now, just go with it pos = nx.spring_layout(G) drawCommunities(G, listToDict(groundTruth), pos) # ------------------------------ # -- Fit Spectral Clustering Model # ------------------------------ # Spectral Clustering Model spectral = cluster.SpectralClustering(n_clusters=kClusters, affinity="precomputed", n_init=200) spectral.fit(edgeMat) # Transform our data to list form and store them in results list results.append(list(spectral.labels_)) # ------------------------------ # -- Fit Agglomerative Clustering Model # ------------------------------ # Agglomerative Clustering Model agglomerative = cluster.AgglomerativeClustering(n_clusters=kClusters, linkage="ward") agglomerative.fit(edgeMat) # Transform our data to list form and store them in results list results.append(list(agglomerative.labels_)) # ------------------------------ # -- Fit K-means Clustering Model # ------------------------------ # K-means Clustering Model kmeans = cluster.KMeans(n_clusters=kClusters, n_init=200) kmeans.fit(edgeMat) # Transform our data to list form and store them in results list results.append(list(kmeans.labels_)) # ------------------------------ # -- Fit Affinity Propagation Clustering Model # ------------------------------ # Affinity Propagation Clustering Model affinity = cluster.affinity_propagation(S=edgeMat, max_iter=200, damping=0.6) # Transform our data to list form and store them in results list results.append(list(affinity[1])) # Append the results into lists for x in results: nmiResults.append(normalized_mutual_info_score(groundTruth, x)) arsResults.append(adjusted_rand_score(groundTruth, x)) # ------------------------------ # -- Show models # ------------------------------ # Code for plotting results # Average of NMI and ARS y = [sum(x) / 2 for x in zip(nmiResults, arsResults)] xlabels = ['Spectral', 'Agglomerative', 'Kmeans', 'Affinity Propagation'] fig = plt.figure() ax = fig.add_subplot(111) # Set parameters for plotting ind = np.arange(len(y)) width = 0.35 # Create barchart and set the axis limits and titles ax.bar(ind, y, width, color='blue', error_kw=dict(elinewidth=2, ecolor='red')) ax.set_xlim(-width, len(ind) + width) ax.set_ylim(0, 2) ax.set_ylabel('Average Score (NMI,ARS)') ax.set_title('Score Evaluation') # Add the xlabels to the chart ax.set_xticks(ind + width / 2) xtickNames = ax.set_xticklabels(xlabels) plt.setp(xtickNames, fontsize=12) # Add the actual value on top of each chart for i, v in enumerate(y): ax.text(i, v, str(round(v, 2)), color='blue', fontweight='bold') # Show the final plot plt.show() | cs |
반응형