2323from matplotlib import pyplot as plt
2424from sklearn .metrics import pairwise_distances
2525
26+
2627def get_initial_medoids (data , k , seed = None ):
2728 rng = np .random .default_rng (seed )
2829 n = data .shape [0 ]
2930 indices = rng .choice (n , k , replace = False )
3031 medoids = data [indices , :]
3132 return medoids
3233
34+
3335def assign_clusters (data , medoids ):
34- distances = pairwise_distances (data , medoids , metric = ' euclidean' )
36+ distances = pairwise_distances (data , medoids , metric = " euclidean" )
3537 cluster_assignment = np .argmin (distances , axis = 1 )
3638 return cluster_assignment
3739
40+
3841def revise_medoids (data , k , cluster_assignment ):
3942 new_medoids = []
4043 for i in range (k ):
@@ -47,6 +50,7 @@ def revise_medoids(data, k, cluster_assignment):
4750 new_medoids .append (members [medoid_index ])
4851 return np .array (new_medoids )
4952
53+
5054def compute_heterogeneity (data , k , medoids , cluster_assignment ):
5155 heterogeneity = 0.0
5256 for i in range (k ):
@@ -57,14 +61,18 @@ def compute_heterogeneity(data, k, medoids, cluster_assignment):
5761 heterogeneity += np .sum (distances ** 2 )
5862 return heterogeneity
5963
64+
6065def kmedoids (data , k , initial_medoids , maxiter = 100 , verbose = False ):
6166 medoids = initial_medoids .copy ()
6267 prev_assignment = None
6368 for itr in range (maxiter ):
6469 cluster_assignment = assign_clusters (data , medoids )
6570 medoids = revise_medoids (data , k , cluster_assignment )
6671
67- if prev_assignment is not None and (prev_assignment == cluster_assignment ).all ():
72+ if (
73+ prev_assignment is not None
74+ and (prev_assignment == cluster_assignment ).all ()
75+ ):
6876 break
6977
7078 if verbose and prev_assignment is not None :
@@ -75,21 +83,24 @@ def kmedoids(data, k, initial_medoids, maxiter=100, verbose=False):
7583
7684 return medoids , cluster_assignment
7785
86+
7887# Optional plotting
7988def plot_clusters (data , medoids , cluster_assignment ):
80- ax = plt .axes (projection = '3d' )
81- ax .scatter (data [:,0 ], data [:,1 ], data [:,2 ], c = cluster_assignment , cmap = ' viridis' )
82- ax .scatter (medoids [:,0 ], medoids [:,1 ], medoids [:,2 ], c = ' red' , s = 100 , marker = 'x' )
89+ ax = plt .axes (projection = "3d" )
90+ ax .scatter (data [:, 0 ], data [:, 1 ], data [:, 2 ], c = cluster_assignment , cmap = " viridis" )
91+ ax .scatter (medoids [:, 0 ], medoids [:, 1 ], medoids [:, 2 ], c = " red" , s = 100 , marker = "x" )
8392 ax .set_xlabel ("X" )
8493 ax .set_ylabel ("Y" )
8594 ax .set_zlabel ("Z" )
8695 ax .set_title ("3D K-Medoids Clustering" )
8796 plt .show ()
8897
98+
8999# Optional test
90100if __name__ == "__main__" :
91101 from sklearn import datasets
92- X = datasets .load_iris ()['data' ]
102+
103+ X = datasets .load_iris ()["data" ]
93104 k = 3
94105 medoids = get_initial_medoids (X , k , seed = 0 )
95106 medoids , clusters = kmedoids (X , k , medoids , maxiter = 50 , verbose = True )
0 commit comments