
2 changed files with 175 additions and 0 deletions
@ -0,0 +1,173 @@ |
|||
## Machine Learning Online Class - Exercise 2: Logistic Regression |
|||
import numpy as np |
|||
import matplotlib.pyplot as plt |
|||
import scipy.optimize |
|||
import math |
|||
|
|||
|
|||
|
|||
# PLOTDATA Plots the data points X and y into a new figure |
|||
# PLOTDATA(x,y) plots the data points with + for the positive examples |
|||
# and o for the negative examples. X is assumed to be a Mx2 matrix. |
|||
def plotdata(X, y): |
|||
plt.figure() |
|||
pos = y == 1 |
|||
neg = y == 0 |
|||
plt.plot(X[:, :1][pos], X[:, 1:2][pos], 'k+', linewidth=2, markersize=7) |
|||
plt.plot(X[:, :1][neg], X[:, 1:2][neg], 'ko', markerfacecolor='y', markersize=7) |
|||
|
|||
# SIGMOID Compute sigmoid functoon |
|||
# J = SIGMOID(z) computes the sigmoid of z. |
|||
def sigmoid(z): |
|||
g = 1 / (1 + np.exp(-z)) |
|||
return g |
|||
|
|||
|
|||
# COSTFUNCTION Compute cost and gradient for logistic regression |
|||
# J = COSTFUNCTION(theta, X, y) computes the cost of using theta as the |
|||
# parameter for logistic regression and the gradient of the cost |
|||
# w.r.t. to the parameters. |
|||
# TODO The logistic function used here seems to be unstable |
|||
def cost_function(theta, X, y): |
|||
m = y.shape[0] |
|||
d1 = X.shape[1] |
|||
d2 = y.shape[1] |
|||
theta = theta.reshape((d1, d2)) |
|||
h = sigmoid(X.dot(theta)) |
|||
J = np.sum(-y * np.log(h) - (1 - y) * np.log(1 - h)) / m |
|||
grad = (h - y).T.dot(X) / m |
|||
return J.flatten()[0], grad.flatten() |
|||
|
|||
# MAPFEATURE Feature mapping function to polynomial features |
|||
# |
|||
# MAPFEATURE(X1, X2) maps the two input features |
|||
# to quadratic features used in the regularization exercise. |
|||
# |
|||
# Returns a new feature array with more features, comprising of |
|||
# X1, X2, X1.^2, X2.^2, X1*X2, X1*X2.^2, etc.. |
|||
# |
|||
# Inputs X1, X2 must be the same size |
|||
def map_feature(X1, X2): |
|||
degree = 6 |
|||
out = np.ones(X1[:,0].size) |
|||
for i in range(1, degree+1): |
|||
for j in range(0, i): |
|||
pass |
|||
# TODO out[:, end+1] = (X1 ** (i-j)).*(X2.^j) |
|||
pass |
|||
|
|||
|
|||
# PLOTDECISIONBOUNDARY Plots the data points X and y into a new figure with |
|||
# the decision boundary defined by theta |
|||
# PLOTDECISIONBOUNDARY(theta, X,y) plots the data points with + for the |
|||
# positive examples and o for the negative examples. X is assumed to be |
|||
# a either |
|||
# 1) Mx3 matrix, where the first column is an all-ones column for the |
|||
# intercept. |
|||
# 2) MxN, N>3 matrix, where the first column is all-ones |
|||
def plot_decision_boundary(theta, X, y): |
|||
# Plot Data |
|||
plotdata(X[:,2:4], y) |
|||
if theta.ndim == 2 and (theta.shape[0] == 1 or theta.shape[1] == 1): |
|||
theta = theta.flatten() |
|||
|
|||
if X.shape[1] <= 3: |
|||
# Only need 2 points to define a line, so choose two endpoints |
|||
plot_x = np.array([np.min(X[:,2])[0]-2, np.max(X[:,2])[0]+2]) |
|||
|
|||
# Calculate the decision boundary line |
|||
plot_y = (-1 / theta[2]) * (theta[1] * plot_x + theta[0]) |
|||
|
|||
# Plot, and adjust axes for better viewing |
|||
plt.plot(plot_x, plot_y) |
|||
|
|||
# Legend, specific for the exercise |
|||
plt.legend(['Admitted', 'Not admitted', 'Decision Boundary']) |
|||
plt.axis([30, 100, 30, 100]) |
|||
else: |
|||
# Here is the grid range |
|||
u = np.linspace(-1, 1.5, 50) |
|||
v = np.linspace(-1, 1.5, 50) |
|||
|
|||
z = np.zeros(u.size, v.size) |
|||
# Evaluate z = theta*x over the grid |
|||
for i in range(0, u.size): |
|||
for j in range(0, v.size): |
|||
z[i,j] = map_feature(u[i], v[j]) * theta |
|||
|
|||
z = z.T # important to transpose z before calling contour |
|||
|
|||
# Plot z = 0 |
|||
# Notice you need to specify the range [0, 0] |
|||
plt.contour(u, v, z, [0, 0], 'LineWidth', 2) |
|||
pass |
|||
|
|||
|
|||
## Load Data |
|||
# The first two columns contains the exam scores and the third column |
|||
# contains the label. |
|||
data = np.loadtxt('textdata/ex2data1.txt', delimiter=',') |
|||
X = data[:, :2] |
|||
y = data[:, 2:3] |
|||
|
|||
## ==================== Part 1: Plotting ==================== |
|||
print('Plotting data with + indicating (y = 1) examples and o indicating (y = 0) examples.') |
|||
|
|||
plotdata(X, y) |
|||
|
|||
# Put some labels |
|||
# Labels and Legend |
|||
plt.xlabel('Exam 1 score') |
|||
plt.ylabel('Exam 2 score') |
|||
|
|||
# Specified in plot order |
|||
plt.legend(['Admitted', 'Not admitted']) |
|||
# plt.show() |
|||
|
|||
print('\nProgram paused. Press enter to continue.\n') |
|||
input() |
|||
|
|||
## ============ Part 2: Compute Cost and Gradient ============ |
|||
# Setup the data matrix appropriately, and add ones for the intercept term |
|||
m, n = X.shape |
|||
|
|||
# Add intercept term to x and X_test |
|||
X = np.concatenate((np.ones((m, 1)), X), axis=1) |
|||
|
|||
# Initialize fitting parameters |
|||
initial_theta = np.zeros((n + 1, 1)) |
|||
|
|||
# Compute and display initial cost and gradient |
|||
cost, grad = cost_function(initial_theta, X, y) |
|||
|
|||
print('Cost at initial theta (zeros): {0:f}'.format(cost)) |
|||
print('Gradient at initial theta (zeros): ') |
|||
print(np.array_str(grad.reshape(-1,1)).replace('[', ' ').replace(']', ' ')) |
|||
|
|||
print('\nProgram paused. Press enter to continue.\n') |
|||
input() |
|||
|
|||
## ============= Part 3: Optimizing using fmin ============= |
|||
|
|||
# Run fmin to obtain the optimal theta |
|||
# This function will return theta and the cost |
|||
res = scipy.optimize.minimize(cost_function, initial_theta, args=(X, y), method='L-BFGS-B', jac=True, options={'maxiter': 400}) |
|||
|
|||
# Print theta to screen |
|||
print('Cost at theta found by fminunc: %f\n', res['fun']) |
|||
print('theta: \n') |
|||
print(np.array_str(res['x']).replace('[', ' ').replace(']', ' ')) |
|||
|
|||
# Plot Boundary |
|||
plot_decision_boundary(theta, X, y) |
|||
|
|||
# Put some labels |
|||
# Labels and Legend |
|||
plt.xlabel('Exam 1 score') |
|||
plt.ylabel('Exam 2 score') |
|||
|
|||
# Specified in plot order |
|||
plt.legend(['Admitted', 'Not admitted']) |
|||
|
|||
print('\nProgram paused. Press enter to continue.') |
|||
input() |
@ -0,0 +1,2 @@ |
|||
import numpy as np |
|||
import matplotlib.pyplot as plt |
Loading…
Reference in new issue