You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
202 lines
6.6 KiB
202 lines
6.6 KiB
## Machine Learning Online Class - Exercise 2: Logistic Regression
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
import scipy.optimize
|
|
|
|
|
|
# PLOTDATA Plots the data points X and y into a new figure
|
|
# PLOTDATA(x,y) plots the data points with + for the positive examples
|
|
# and o for the negative examples. X is assumed to be a Mx2 matrix.
|
|
def plotdata(X, y):
|
|
plt.figure()
|
|
pos = y == 1
|
|
neg = y == 0
|
|
plt.plot(X[:, :1][pos], X[:, 1:2][pos], 'k+', linewidth=2, markersize=7)
|
|
plt.plot(X[:, :1][neg], X[:, 1:2][neg], 'ko', markerfacecolor='y', markersize=7)
|
|
|
|
|
|
# SIGMOID Compute sigmoid function
|
|
# J = SIGMOID(z) computes the sigmoid of z.
|
|
def sigmoid(z):
|
|
g = 1 / (1 + np.exp(-z))
|
|
return g
|
|
|
|
|
|
# COSTFUNCTION Compute cost and gradient for logistic regression
|
|
# J = COSTFUNCTION(theta, X, y) computes the cost of using theta as the
|
|
# parameter for logistic regression and the gradient of the cost
|
|
# w.r.t. to the parameters.
|
|
# TODO The logistic function used here seems to be unstable
|
|
def cost_function(theta, X, y):
|
|
m = y.shape[0]
|
|
d1 = X.shape[1]
|
|
d2 = y.shape[1]
|
|
theta = theta.reshape((d1, d2))
|
|
h = sigmoid(X.dot(theta))
|
|
J = np.sum(-y * np.log(h) - (1 - y) * np.log(1 - h)) / m
|
|
grad = (h - y).T.dot(X) / m
|
|
return J, grad.flatten()
|
|
|
|
|
|
# MAPFEATURE Feature mapping function to polynomial features
|
|
#
|
|
# MAPFEATURE(X1, X2) maps the two input features
|
|
# to quadratic features used in the regularization exercise.
|
|
#
|
|
# Returns a new feature array with more features, comprising of
|
|
# X1, X2, X1.^2, X2.^2, X1*X2, X1*X2.^2, etc..
|
|
#
|
|
# Inputs X1, X2 must be the same size
|
|
def map_feature(X1, X2):
|
|
degree = 6
|
|
out = np.ones(X1[:, :1].shape)
|
|
for i in range(1, degree + 1):
|
|
for j in range(0, i + 1):
|
|
out = np.hstack((out, (X1 ** (i - j)) * (X2 ** j)))
|
|
return out
|
|
|
|
|
|
# PLOTDECISIONBOUNDARY Plots the data points X and y into a new figure with
|
|
# the decision boundary defined by theta
|
|
# PLOTDECISIONBOUNDARY(theta, X,y) plots the data points with + for the
|
|
# positive examples and o for the negative examples. X is assumed to be
|
|
# a either
|
|
# 1) Mx3 matrix, where the first column is an all-ones column for the
|
|
# intercept.
|
|
# 2) MxN, N>3 matrix, where the first column is all-ones
|
|
def plot_decision_boundary(theta, X, y):
|
|
# Plot Data
|
|
plotdata(X[:, 1:3], y)
|
|
if theta.ndim == 2 and (theta.shape[0] == 1 or theta.shape[1] == 1):
|
|
theta = theta.flatten()
|
|
|
|
if X.shape[1] <= 3:
|
|
# Only need 2 points to define a line, so choose two endpoints
|
|
plot_x = np.array([np.min(X[:, 1]) - 2, np.max(X[:, 1]) + 2])
|
|
|
|
# Calculate the decision boundary line
|
|
plot_y = (-1 / theta[2]) * (theta[1] * plot_x + theta[0])
|
|
|
|
# Plot, and adjust axes for better viewing
|
|
plt.plot(plot_x, plot_y)
|
|
|
|
# Legend, specific for the exercise
|
|
plt.legend(['Admitted', 'Not admitted', 'Decision Boundary'])
|
|
plt.axis([30, 100, 30, 100])
|
|
else:
|
|
# Here is the grid range
|
|
u = np.linspace(-1, 1.5, 50).reshape(-1, 1)
|
|
v = np.linspace(-1, 1.5, 50).reshape(-1, 1)
|
|
|
|
z = np.zeros((u.size, v.size))
|
|
# Evaluate z = theta*x over the grid
|
|
for i in range(0, u.size):
|
|
for j in range(0, v.size):
|
|
z[i, j] = map_feature(u[i:i + 1, :1], v[j:j + 1, :1]).dot(theta)
|
|
|
|
z = z.T # important to transpose z before calling contour
|
|
|
|
# Plot z = 0
|
|
# Notice you need to specify the range [0, 0]
|
|
plt.contour(u.flatten(), v.flatten(), z, [0], linewidth=2)
|
|
|
|
|
|
# PREDICT Predict whether the label is 0 or 1 using learned logistic
|
|
# regression parameters theta
|
|
# p = PREDICT(theta, X) computes the predictions for X using a
|
|
# threshold at 0.5 (i.e., if sigmoid(theta'*x) >= 0.5, predict 1)
|
|
def predict(theta, X):
|
|
m = X.shape[0]
|
|
p = sigmoid(X.dot(theta)) >= 0.5
|
|
return p
|
|
|
|
|
|
## ===================== Start Main ======================
|
|
if __name__ == "__main__":
|
|
## Load Data
|
|
# The first two columns contains the exam scores and the third column
|
|
# contains the label.
|
|
data = np.loadtxt('textdata/ex2data1.txt', delimiter=',')
|
|
X = data[:, :2]
|
|
y = data[:, 2:3]
|
|
|
|
## ==================== Part 1: Plotting ====================
|
|
print('Plotting data with + indicating (y = 1) examples and o indicating (y = 0) examples.')
|
|
|
|
plotdata(X, y)
|
|
|
|
# Put some labels
|
|
# Labels and Legend
|
|
plt.xlabel('Exam 1 score')
|
|
plt.ylabel('Exam 2 score')
|
|
|
|
# Specified in plot order
|
|
plt.legend(['Admitted', 'Not admitted'])
|
|
plt.show()
|
|
|
|
print('\nProgram paused. Press enter to continue.\n')
|
|
input()
|
|
|
|
## ============ Part 2: Compute Cost and Gradient ============
|
|
# Setup the data matrix appropriately, and add ones for the intercept term
|
|
m, n = X.shape
|
|
|
|
# Add intercept term to x and X_test
|
|
X = np.hstack((np.ones((m, 1)), X))
|
|
|
|
# Initialize fitting parameters
|
|
initial_theta = np.zeros((n + 1, 1))
|
|
|
|
# Compute and display initial cost and gradient
|
|
cost, grad = cost_function(initial_theta, X, y)
|
|
|
|
print('Cost at initial theta (zeros): {0:f}'.format(cost))
|
|
print('Gradient at initial theta (zeros): ')
|
|
print(np.array_str(grad.reshape(-1, 1)).replace('[', ' ').replace(']', ' '))
|
|
|
|
print('\nProgram paused. Press enter to continue.\n')
|
|
input()
|
|
|
|
## ============= Part 3: Optimizing using fmin =============
|
|
|
|
# Run fmin to obtain the optimal theta
|
|
# This function will return theta and the cost
|
|
initial_theta += 0.01
|
|
res = scipy.optimize.minimize(cost_function, initial_theta, args=(X, y), method='BFGS', jac=True,
|
|
options={'maxiter': 400})
|
|
theta = res['x']
|
|
|
|
# Print theta to screen
|
|
print('Cost at theta found by optimization function: %f\n', res['fun'])
|
|
print('theta: \n')
|
|
print(np.array_str(theta).replace('[', ' ').replace(']', ' '))
|
|
|
|
# Plot Boundary
|
|
plot_decision_boundary(theta, X, y)
|
|
|
|
# Put some labels
|
|
# Labels and Legend
|
|
plt.xlabel('Exam 1 score')
|
|
plt.ylabel('Exam 2 score')
|
|
|
|
# Specified in plot order
|
|
plt.legend(['Admitted', 'Not admitted'])
|
|
plt.show()
|
|
|
|
print('\nProgram paused. Press enter to continue.')
|
|
input()
|
|
|
|
## ============== Part 4: Predict and Accuracies ==============
|
|
|
|
# Predict probability for a student with score 45 on exam 1
|
|
# and score 85 on exam 2
|
|
prob = sigmoid(np.array([1, 45, 85]).dot(theta))
|
|
print('For a student with scores 45 and 85, we predict an admission probability of {0:f}\n'.format(prob))
|
|
|
|
# Compute accuracy on our training set
|
|
p = predict(theta, X)
|
|
|
|
print('Train Accuracy: {0:f}\n'.format(np.mean((p == y.flatten())) * 100))
|
|
|
|
print('\nProgram paused. Press enter to continue.\n')
|
|
input()
|
|
|