Programming homeworks of Stanford Machine learning open course. The original version is in Matlab, rewritten with numpy.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

92 lines
2.6 KiB

import numpy as np
import matplotlib.pyplot as plt
import scipy
from ex2.ex2_ import plotdata, map_feature, sigmoid, plot_decision_boundary, predict
#COSTFUNCTIONREG Compute cost and gradient for logistic regression with regularization
# J = COSTFUNCTIONREG(theta, X, y, lambda) computes the cost of using
# theta as the parameter for regularized logistic regression and the
# gradient of the cost w.r.t. to the parameters.
# TODO Also suffers from unstablity
def cost_function_reg(theta, X, y, ld):
m = y.shape[0]
d1 = X.shape[1]
d2 = y.shape[1]
theta = theta.reshape((d1, d2))
h = sigmoid(X.dot(theta))
if theta.size > 0:
theta2 = theta
theta2[0, :] = 0
J = np.sum(-y * np.log(h) - (1 - y) * np.log(1 - h)) / m + ld/m/2 * np.sum(theta2 ** 2)
grad = (h - y).T.dot(X).T / m + ld / m * theta2
return J, grad.flatten()
## Load Data
# The first two columns contains the X values and the third column
# contains the label (y).
data = np.loadtxt('textdata/ex2data2.txt', delimiter=',')
X = data[:, :2]
y = data[:, 2:3]
plotdata(X, y)
# Put some labels
# Labels and Legend
plt.xlabel('Microchip Test 1')
plt.ylabel('Microchip Test 2')
# Specified in plot order
plt.legend(['y = 1', 'y = 0'])
plt.show()
## =========== Part 1: Regularized Logistic Regression ============
# Add Polynomial Features
# Note that mapFeature also adds a column of ones for us, so the intercept
# term is handled
X = map_feature(X[:,:1], X[:,1:2])
# Initialize fitting parameters
initial_theta = np.zeros((X.shape[1], 1))
# Set regularization parameter lambda to 1
ld = 1
# Compute and display initial cost and gradient for regularized logistic
# regression
[cost, grad] = cost_function_reg(initial_theta, X, y, ld)
print('Cost at initial theta (zeros): {0:f}\n'.format(cost))
print('\nProgram paused. Press enter to continue.\n')
input()
## ============= Part 2: Regularization and Accuracies =============
# Initialize fitting parameters
initial_theta = np.zeros((X.shape[1], 1)) + 0.01
# Set regularization parameter lambda to 1 (you should vary this)
ld = 1
# Optimize
res = scipy.optimize.minimize(cost_function_reg, initial_theta, args=(X, y, ld), method='BFGS', jac=True, options={'maxiter': 400})
theta = res['x']
# Plot Boundary
plot_decision_boundary(theta, X, y)
plt.title(print('lambda = {0:g}'.format(ld)))
# Labels and Legend
plt.xlabel('Microchip Test 1')
plt.ylabel('Microchip Test 2')
plt.legend(['y = 1', 'y = 0', 'Decision boundary'])
plt.show()
# Compute accuracy on our training set
p = predict(theta, X)
print('Train Accuracy: {0:f}\n'.format(np.mean(p == y.flatten()) * 100))