You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
92 lines
2.6 KiB
92 lines
2.6 KiB
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
import scipy
|
|
|
|
from ex2.ex2_ import plotdata, map_feature, sigmoid, plot_decision_boundary, predict
|
|
|
|
|
|
#COSTFUNCTIONREG Compute cost and gradient for logistic regression with regularization
|
|
# J = COSTFUNCTIONREG(theta, X, y, lambda) computes the cost of using
|
|
# theta as the parameter for regularized logistic regression and the
|
|
# gradient of the cost w.r.t. to the parameters.
|
|
# TODO Also suffers from unstablity
|
|
def cost_function_reg(theta, X, y, ld):
|
|
m = y.shape[0]
|
|
d1 = X.shape[1]
|
|
d2 = y.shape[1]
|
|
theta = theta.reshape((d1, d2))
|
|
h = sigmoid(X.dot(theta))
|
|
if theta.size > 0:
|
|
theta2 = theta
|
|
theta2[0, :] = 0
|
|
J = np.sum(-y * np.log(h) - (1 - y) * np.log(1 - h)) / m + ld/m/2 * np.sum(theta2 ** 2)
|
|
grad = (h - y).T.dot(X).T / m + ld / m * theta2
|
|
return J, grad.flatten()
|
|
|
|
|
|
## Load Data
|
|
# The first two columns contains the X values and the third column
|
|
# contains the label (y).
|
|
data = np.loadtxt('textdata/ex2data2.txt', delimiter=',')
|
|
X = data[:, :2]
|
|
y = data[:, 2:3]
|
|
|
|
plotdata(X, y)
|
|
|
|
# Put some labels
|
|
# Labels and Legend
|
|
plt.xlabel('Microchip Test 1')
|
|
plt.ylabel('Microchip Test 2')
|
|
|
|
# Specified in plot order
|
|
plt.legend(['y = 1', 'y = 0'])
|
|
plt.show()
|
|
|
|
## =========== Part 1: Regularized Logistic Regression ============
|
|
# Add Polynomial Features
|
|
|
|
# Note that mapFeature also adds a column of ones for us, so the intercept
|
|
# term is handled
|
|
X = map_feature(X[:,:1], X[:,1:2])
|
|
|
|
# Initialize fitting parameters
|
|
initial_theta = np.zeros((X.shape[1], 1))
|
|
|
|
# Set regularization parameter lambda to 1
|
|
ld = 1
|
|
|
|
# Compute and display initial cost and gradient for regularized logistic
|
|
# regression
|
|
[cost, grad] = cost_function_reg(initial_theta, X, y, ld)
|
|
|
|
print('Cost at initial theta (zeros): {0:f}\n'.format(cost))
|
|
|
|
print('\nProgram paused. Press enter to continue.\n')
|
|
input()
|
|
|
|
## ============= Part 2: Regularization and Accuracies =============
|
|
# Initialize fitting parameters
|
|
initial_theta = np.zeros((X.shape[1], 1)) + 0.01
|
|
|
|
# Set regularization parameter lambda to 1 (you should vary this)
|
|
ld = 1
|
|
|
|
# Optimize
|
|
res = scipy.optimize.minimize(cost_function_reg, initial_theta, args=(X, y, ld), method='BFGS', jac=True, options={'maxiter': 400})
|
|
theta = res['x']
|
|
|
|
# Plot Boundary
|
|
plot_decision_boundary(theta, X, y)
|
|
plt.title(print('lambda = {0:g}'.format(ld)))
|
|
|
|
# Labels and Legend
|
|
plt.xlabel('Microchip Test 1')
|
|
plt.ylabel('Microchip Test 2')
|
|
|
|
plt.legend(['y = 1', 'y = 0', 'Decision boundary'])
|
|
plt.show()
|
|
|
|
# Compute accuracy on our training set
|
|
p = predict(theta, X)
|
|
|
|
print('Train Accuracy: {0:f}\n'.format(np.mean(p == y.flatten()) * 100))
|