Source code for mapyl.clustering._kmeans

import numpy as np

[docs]class KMeans:
    """
    KMeans instance

    Parameters:
        k (int): The number of clusters. Defaults to 2

        tol (float): The tolerance for the cost. Defaults to 0.001
    """
    def __init__(self, k=2, tol=0.001):
        self.k = k
        self.tol = tol

[docs]    def fit(self,X, iters=300):
        """
        Fits the instance

        Parameters:
            X (ndarray): The X values to be fitted

            iters (int): The number of iterations. Defaults to 300

        Returns none
        """

        self.cent = {}

        for i in range(self.k):
            self.cent[i] = X[i]

        for i in range(iters):
            self.classifications = {}

            for i in range(self.k):
                self.classifications[i] = []

            for feature in X:
                dist = [np.linalg.norm(feature-self.cent[centroid]) for centroid in self.cent]
                classification = dist.index(min(dist))
                self.classifications[classification].append(feature)

            prev_cent = dict(self.cent)

            for classification in self.classifications:
                self.cent[classification] = np.average(self.classifications[classification],axis=0)

            optimized = True

            for c in self.cent:
                original_centroid = prev_cent[c]
                current_centroid = self.cent[c]
                if np.sum((current_centroid-original_centroid)/original_centroid*100.0) > self.tol:
                    optimized = False

            if optimized:
                break
        return self

[docs]    def predict(self,X):
        """
        Predicts the class of an X value

        Patameter:
            X (ndarray): The X values to be predicted

        Returns: The index of the class of the supplied X
        """
        dist = [np.linalg.norm(X-self.cent[centroid]) for centroid in self.cent]
        classification = dist.index(min(dist))
        return classification