logisticRegression

from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Data load
iris = pd.read_csv('./Iris/iris.data')

# Data process
iris = iris.values.reshape((-1, 5))
xData = iris[:, 0:4].reshape((-1, 4))
yData = pd.Categorical(iris[:, -1])
yData = yData.codes.reshape((-1, 1))

x0 = xData[0:100, :]
x1 = xData[50:150, :]
# x2 = xData[100:150, :]
# x2 = np.concatenate((xData[0:50, :], x2))
y0 = yData[:100, :]
y1 = yData[50:150, :] - 1
# y2 = yData[100:150, :] - 1
# y2 = np.concatenate((yData[0:50, :], y2))

# 训练集，测试集的划分
x0Train, x0Test, y0Train, y0Test = train_test_split(x0, y0, random_state=0)
x1Train, x1Test, y1Train, y1Test = train_test_split(x1, y1, random_state=0)
# x2Train, x2Test, y2Train, y2Test = train_test_split(x2, y2, random_state=0)


# Init
def init(x):
    m = x.shape[0]
    x = np.column_stack((np.ones([m, 1]), x))
    w = np.zeros(shape=(1, x.shape[1]))
    return x, w


def sigmoid(z):
    return 1 / (1 + np.exp(-z.astype(float)))


def logisticRegression(x, y, learning_rate, epoch):
    m = x.shape[0]
    x = np.column_stack((np.ones([m, 1]), x))
    w = np.zeros(shape=(1, x.shape[1]))
    cost = np.zeros(shape=(epoch, 1))
    for i in range(epoch):
        g = sigmoid(np.dot(x, w.T))  # mx1
        inner = g - y
        for j in range(w.size):
            xj = x[:, j].reshape(-1, 1)  # mx1
            inner = inner * xj
            w[0, j] = w[0, j] - (learning_rate / m) * np.sum(inner)
        cost[i] = (- 1 / m) * np.sum(y * np.log(g) + (1 - y) * (np.log(1 - g)))
    return cost, w


# predict
def test(x, y, w):
    m = x.shape[0]
    x = np.column_stack((np.ones([m, 1]), x))
    y_predict = sigmoid(np.dot(x, w.T))
    y_predict = np.where(y_predict >= 0.5, 1, y_predict)
    y_predict = (np.where(y_predict < 0.5, 0, y_predict)).astype(np.int16)
    print(list(y_predict.T-y.T)[0])


def predict(x, w0, w1):
    m = x.shape[0]
    x = np.column_stack((np.ones([m, 1]), x))
    y0_predict = 0 if (sigmoid(np.dot(x, w0.T)) < 0.5) else 1
    y1_predict = 0 if (sigmoid(np.dot(x, w1.T)) < 0.5) else 1
    # y2_predict = 0 if (sigmoid(np.dot(x, w2.T)) < 0.5) else 1
    # y0_predict = sigmoid(np.dot(x, w0.T))
    # y1_predict = sigmoid(np.dot(x, w1.T))
    # y2_predict = sigmoid(np.dot(x, w2.T))
    if y0_predict == 0 and y1_predict == 0:
        print('The specie of X is Iris-setosa')
    if y0_predict == 1 and y1_predict == 0:
        print('The specie of X is Iris-versicolor')
    if y0_predict == 1 and y1_predict == 1:
        print('The specie of X is Iris-virginica')
    # print(y0_predict, y1_predict)


Cost0, W0 = logisticRegression(x0Train, y0Train, 0.01, 500)
test(x0Test, y0Test, W0)
plt.plot(Cost0)

Cost1, W1 = logisticRegression(x1Train, y1Train, 0.01, 500)
test(x1Test, y1Test, W1)
plt.plot(Cost1)

# Cost2, W2 = logisticRegression(x2Train, y2Train, 0.01, 500)
# test(x2Test, y2Test, W2)
# plt.plot(Cost2)
# plt.show()
X0 = np.array([5.1, 3.4, 1.5, 0.2]).reshape((1, -1))
X1 = np.array([4.9, 2.4, 3.3, 1.0]).reshape((1, -1))
# X2 = np.array([6.3, 2.5, 5.0, 1.9]).reshape((1, -1))
print('y=0')
predict(X0, W0, W1)
print('__________________\ny=1')
predict(X1, W0, W1)