Untitled
raw download clone
TEXT
views 16
,
size 4172 b
import numpy as np
import pandas as pa



class NaiveBayes:
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self._classes = np.unique(y)
        n_classes = len(self._classes)

        # calculate mean, var, and prior for each class
        self._mean = np.zeros((n_classes, n_features), dtype=np.float64)
        self._var = np.zeros((n_classes, n_features), dtype=np.float64)
        self._priors = np.zeros(n_classes, dtype=np.float64)

        for idx, c in enumerate(self._classes):
            X_c = X[y == c]
            self._mean[idx, :] = X_c.mean(axis=0)
            self._var[idx, :] = X_c.var(axis=0)
            self._priors[idx] = X_c.shape[0] / float(n_samples)

    def predict(self, X):
        y_pred = [self._predict(x) for x in X]
        return np.array(y_pred)

    def _predict(self, x):
        posteriors = []

        # calculate posterior probability for each class
        for idx, c in enumerate(self._classes):
            prior = np.log(self._priors[idx])
            posterior = np.sum(np.log(self._pdf(idx, x)))
            posterior = prior + posterior
            posteriors.append(posterior)

        # return class with highest posterior probability
        return self._classes[np.argmax(posteriors)]

    def _pdf(self, class_idx, x):
        mean = self._mean[class_idx]
        var = self._var[class_idx]
        numerator = np.exp(- (x - mean) ** 2 / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator


def accuracy(y_true, y_pred):
    accuracyc = np.sum(y_true == y_pred) / len(y_true)
    return accuracyc


def map(labels):
    return dict(zip(np.unique(labels), range(len(np.unique(labels)))))


def compute_confusion_matrix(true, pred):
    K = len(np.unique(true))  # Number of classes
    result = np.zeros((K, K))

    for i in range(len(true)):
        result[true[i]][pred[i]] += 1

    return result


labels = pa.read_csv("iris_training.txt", header=None, decimal=".", sep="\\s+", engine='python')
labels = labels.rename(columns={len(labels.columns) - 1: "iris"}, errors='raise')

iris_test = pa.read_csv("iris_test.txt", header=None, decimal=",", sep="\\s+", engine='python')
iris_test = iris_test.rename(columns={len(iris_test.columns) - 1: "iris"}, errors='raise')

test_y = iris_test["iris"]

iris_test = iris_test.drop(columns="iris")
test_x = iris_test.to_numpy()

train_y = labels["iris"]

labels = labels.drop(columns="iris")
train_x = labels.to_numpy()

nb = NaiveBayes()
nb.fit(train_x, train_y)
pred = nb.predict(test_x)

y_actu = pa.Series(test_y, name='Actual')
y_pred = pa.Series(pred, name='Predicted')
df_confusion = pa.crosstab(y_actu, y_pred)

print(df_confusion)
# recall, precision, accuracy ,F1 score




y_actu_mapped = y_actu.replace(map(y_actu))
y_pred_mapped = y_pred.replace(map(y_pred))





conf_matrix = compute_confusion_matrix(y_actu_mapped, y_pred_mapped)
ret_mat = np.zeros((3, 3))

print("accuracy = ", accuracy(y_actu,y_pred)*100, "%" )


for _ in range(len(conf_matrix)):
        ret_mat[_][0] = conf_matrix[_][_]/ (sum(conf_matrix[_]) )
        ret_mat[_][1] = conf_matrix[_][_]/ (np.sum(conf_matrix, axis=1)[_])
        ret_mat[_][2] = 2 * ((ret_mat[_][0] * ret_mat[_][1]) / (ret_mat[_][0] + ret_mat[_][1]))
print( "                    precison            recall      f1-score")
print("    Iris-setosa     ",  ret_mat[0][0]*100 ,  "%          " , ret_mat[0][1]*100  ,"%      ",   ret_mat[0][2]  )
print("Iris-versicolor      ",  ret_mat[1][0]*100 ,  "%          " , ret_mat[1][1]*100  ,"%       ",  ret_mat[1][2]  )
print(" Iris-virginica      ",  ret_mat[2][0]*100 ,  "%          " , ret_mat[2][1]*100  ,"%       ",  ret_mat[2][2]  )



#print(classification_report(y_actu, y_pred))


while 1:
    print("Enter data in format 1.1 2.2 3.3 4.4 with ", len(iris_test.columns), " values or enter stop to stop")
    inputed = input()
    if inputed.lower() == "stop":
        exit(1)
    input_set = np.fromstring(inputed, dtype=float, sep=' ')
    print(nb.predict([input_set]))
close fullscreen
Login or Register to edit or fork this paste. It's free.