Source code for mapyl.clustering._kmeans

import numpy as np

[docs]class KMeans: """ KMeans instance Parameters: k (int): The number of clusters. Defaults to 2 tol (float): The tolerance for the cost. Defaults to 0.001 """ def __init__(self, k=2, tol=0.001): self.k = k self.tol = tol
[docs] def fit(self,X, iters=300): """ Fits the instance Parameters: X (ndarray): The X values to be fitted iters (int): The number of iterations. Defaults to 300 Returns none """ self.cent = {} for i in range(self.k): self.cent[i] = X[i] for i in range(iters): self.classifications = {} for i in range(self.k): self.classifications[i] = [] for feature in X: dist = [np.linalg.norm(feature-self.cent[centroid]) for centroid in self.cent] classification = dist.index(min(dist)) self.classifications[classification].append(feature) prev_cent = dict(self.cent) for classification in self.classifications: self.cent[classification] = np.average(self.classifications[classification],axis=0) optimized = True for c in self.cent: original_centroid = prev_cent[c] current_centroid = self.cent[c] if np.sum((current_centroid-original_centroid)/original_centroid*100.0) > self.tol: optimized = False if optimized: break return self
[docs] def predict(self,X): """ Predicts the class of an X value Patameter: X (ndarray): The X values to be predicted Returns: The index of the class of the supplied X """ dist = [np.linalg.norm(X-self.cent[centroid]) for centroid in self.cent] classification = dist.index(min(dist)) return classification