Untitled
TEXT
views 16
,
size 4172 b
``````import numpy as np
import pandas as pa

class NaiveBayes:
def fit(self, X, y):
n_samples, n_features = X.shape
self._classes = np.unique(y)
n_classes = len(self._classes)

# calculate mean, var, and prior for each class
self._mean = np.zeros((n_classes, n_features), dtype=np.float64)
self._var = np.zeros((n_classes, n_features), dtype=np.float64)
self._priors = np.zeros(n_classes, dtype=np.float64)

for idx, c in enumerate(self._classes):
X_c = X[y == c]
self._mean[idx, :] = X_c.mean(axis=0)
self._var[idx, :] = X_c.var(axis=0)
self._priors[idx] = X_c.shape[0] / float(n_samples)

def predict(self, X):
y_pred = [self._predict(x) for x in X]
return np.array(y_pred)

def _predict(self, x):
posteriors = []

# calculate posterior probability for each class
for idx, c in enumerate(self._classes):
prior = np.log(self._priors[idx])
posterior = np.sum(np.log(self._pdf(idx, x)))
posterior = prior + posterior
posteriors.append(posterior)

# return class with highest posterior probability
return self._classes[np.argmax(posteriors)]

def _pdf(self, class_idx, x):
mean = self._mean[class_idx]
var = self._var[class_idx]
numerator = np.exp(- (x - mean) ** 2 / (2 * var))
denominator = np.sqrt(2 * np.pi * var)
return numerator / denominator

def accuracy(y_true, y_pred):
accuracyc = np.sum(y_true == y_pred) / len(y_true)
return accuracyc

def map(labels):
return dict(zip(np.unique(labels), range(len(np.unique(labels)))))

def compute_confusion_matrix(true, pred):
K = len(np.unique(true))  # Number of classes
result = np.zeros((K, K))

for i in range(len(true)):
result[true[i]][pred[i]] += 1

return result

labels = labels.rename(columns={len(labels.columns) - 1: "iris"}, errors='raise')

iris_test = iris_test.rename(columns={len(iris_test.columns) - 1: "iris"}, errors='raise')

test_y = iris_test["iris"]

iris_test = iris_test.drop(columns="iris")
test_x = iris_test.to_numpy()

train_y = labels["iris"]

labels = labels.drop(columns="iris")
train_x = labels.to_numpy()

nb = NaiveBayes()
nb.fit(train_x, train_y)
pred = nb.predict(test_x)

y_actu = pa.Series(test_y, name='Actual')
y_pred = pa.Series(pred, name='Predicted')
df_confusion = pa.crosstab(y_actu, y_pred)

print(df_confusion)
# recall, precision, accuracy ,F1 score

y_actu_mapped = y_actu.replace(map(y_actu))
y_pred_mapped = y_pred.replace(map(y_pred))

conf_matrix = compute_confusion_matrix(y_actu_mapped, y_pred_mapped)
ret_mat = np.zeros((3, 3))

print("accuracy = ", accuracy(y_actu,y_pred)*100, "%" )

for _ in range(len(conf_matrix)):
ret_mat[_][0] = conf_matrix[_][_]/ (sum(conf_matrix[_]) )
ret_mat[_][1] = conf_matrix[_][_]/ (np.sum(conf_matrix, axis=1)[_])
ret_mat[_][2] = 2 * ((ret_mat[_][0] * ret_mat[_][1]) / (ret_mat[_][0] + ret_mat[_][1]))
print( "                    precison            recall      f1-score")
print("    Iris-setosa     ",  ret_mat[0][0]*100 ,  "%          " , ret_mat[0][1]*100  ,"%      ",   ret_mat[0][2]  )
print("Iris-versicolor      ",  ret_mat[1][0]*100 ,  "%          " , ret_mat[1][1]*100  ,"%       ",  ret_mat[1][2]  )
print(" Iris-virginica      ",  ret_mat[2][0]*100 ,  "%          " , ret_mat[2][1]*100  ,"%       ",  ret_mat[2][2]  )

#print(classification_report(y_actu, y_pred))

while 1:
print("Enter data in format 1.1 2.2 3.3 4.4 with ", len(iris_test.columns), " values or enter stop to stop")
inputed = input()
if inputed.lower() == "stop":
exit(1)
input_set = np.fromstring(inputed, dtype=float, sep=' ')
print(nb.predict([input_set]))``````