Programming homeworks of Stanford Machine learning open course. The original version is in Matlab, rewritten with numpy.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

192 lines
6.8 KiB

## Machine Learning Online Class - Exercise 3 | Part 1: One-vs-all
import numpy as np
import matplotlib.pyplot as plt
import scipy, scipy.io, scipy.optimize
import math
# DISPLAYDATA Display 2D data in a nice grid
# [h, display_array] = DISPLAYDATA(X, example_width) displays 2D data
# stored in X in a nice grid. It returns the figure handle h and the
# displayed array if requested.
def displaydata(X, example_width=None):
# Set example_width automatically if not passed in
if example_width is None:
example_width = round(math.sqrt(X.shape[1]))
# Gray Image
# colormap(gray)
# Compute rows, cols
m, n = X.shape
example_height = int(n / example_width)
# Compute number of items to display
display_rows = math.floor(math.sqrt(m))
display_cols = math.ceil(m / display_rows)
# Between images padding
pad = 1
# Setup blank display
display_array = - np.ones((pad + display_rows * (example_height + pad), \
pad + display_cols * (example_width + pad)))
# Copy each example into a patch on the display array
curr_ex = 0
for j in range(0, display_rows):
for i in range(0, display_cols):
if curr_ex > m:
break
# Copy the patch
# Get the max value of the patch
max_val = np.max(np.abs(X[curr_ex, :]))
display_array[pad + j * (example_height + pad):pad + j * (example_height + pad) + example_height, \
pad + i * (example_width + pad):pad + i * (example_width + pad) + example_width] = \
X[curr_ex, :].reshape((example_height, example_width)).T / max_val
curr_ex += 1
if curr_ex > m:
break
# Display Image
plt.imshow(display_array, vmin=-1, vmax=1, cmap='gray')
# Do not show axis
plt.axis('off')
# SIGMOID Compute sigmoid function
# J = SIGMOID(z) computes the sigmoid of z.
def sigmoid(z):
g = 1 / (1 + np.exp(-z))
return g
# LRCOSTFUNCTION Compute cost and gradient for logistic regression with
# regularization
# J = LRCOSTFUNCTION(theta, X, y, lambda) computes the cost of using
# theta as the parameter for regularized logistic regression and the
# gradient of the cost w.r.t. to the parameters.
def lr_cost_function(theta, X, y, ld):
# Shape variables
m = y.shape[0]
d1 = X.shape[1]
d2 = y.shape[1]
theta = theta.reshape((d1, d2))
if y.dtype == bool:
y = y.astype(int)
h = sigmoid(X.dot(theta))
theta[0, :] = 0
J = np.sum(-y * np.log(h) - (1 - y) * np.log(1 - h)) / m + ld / m / 2 * np.sum(theta ** 2)
grad = X.T.dot((h - y)) / m + ld / m * theta
return J, grad.flatten()
# ONEVSALL trains multiple logistic regression classifiers and returns all
# the classifiers in a matrix all_theta, where the i-th row of all_theta
# corresponds to the classifier for label i
# [all_theta] = ONEVSALL(X, y, num_labels, lambda) trains num_labels
# logisitc regression classifiers and returns each of these classifiers
# in a matrix all_theta, where the i-th row of all_theta corresponds
# to the classifier for label i
def one_vs_all(X, y, num_labels, ld):
# Some useful variables
m, n = X.shape
y_map = (y.reshape(-1, 1) == np.array(range(1, num_labels + 1))).astype(int)
X = np.hstack((np.ones((m, 1)), X))
initial_theta = np.zeros((n + 1, num_labels)) + 0.01
ret = scipy.optimize.minimize(lr_cost_function, initial_theta, args=(X, y_map, ld), jac=True,
options={'maxiter': 500, 'disp': True}, method='CG') # BFGS is too slow here
return ret['x'].reshape((n + 1, num_labels))
# Loop version of the one_vs_all function above
def one_vs_all2(X, y, num_labels, ld):
# Some useful variables
m, n = X.shape
X = np.hstack((np.ones((m, 1)), X))
all_theta = np.zeros((n + 1, num_labels))
for i in range(0, num_labels):
initial_theta = np.zeros((n + 1, 1))
ret = scipy.optimize.minimize(lr_cost_function, initial_theta, args=(X, (y == i + 1), ld), jac=True,
options={'maxiter': 50, 'disp': True}, method='CG') # BFGS is too slow here
all_theta[:, i:i + 1] = ret['x'].reshape(-1, 1)
return all_theta
# PREDICT Predict the label for a trained one-vs-all classifier. The labels
# are in the range 1..K, where K = size(all_theta, 1).
# p = PREDICTONEVSALL(all_theta, X) will return a vector of predictions
# for each example in the matrix X. Note that X contains the examples in
# rows. all_theta is a matrix where the i-th row is a trained logistic
# regression theta vector for the i-th class. You should set p to a vector
# of values from 1..K (e.g., p = [1; 3; 1; 2] predicts classes 1, 3, 1, 2
# for 4 examples)
def predict_one_vs_all(all_theta, X):
m = X.shape[0]
um_labels = all_theta.shape[0]
# Add ones to the X data matrix
X = np.hstack((np.ones((m, 1)), X))
temp = sigmoid(X.dot(all_theta))
p = temp.argmax(axis=1) + 1
return p
## ===================== Start Main ======================
if __name__ == "__main__":
np.set_printoptions(formatter={'float': '{: 0.5f}'.format}, edgeitems=20, linewidth=150)
## Setup the parameters you will use for this part of the exercise
input_layer_size = 400 # 20x20 Input Images of Digits
num_labels = 10 # 10 labels, from 1 to 10
# (note that we have mapped "0" to label 10)
## =========== Part 1: Loading and Visualizing Data =============
# We start the exercise by first loading and visualizing the dataset.
# You will be working with a dataset that contains handwritten digits.
# Load Training Data
print('Loading and Visualizing Data ...\n')
data = scipy.io.loadmat('mat/ex3data1.mat', matlab_compatible=True) # training data stored in arrays X, y
X = data['X']
y = data['y']
m = X.shape[0]
# Randomly select 100 data points to display
rand_indices = np.random.permutation(m)
sel = X[rand_indices[:100], :]
displaydata(sel)
plt.show()
print('Program paused. Press enter to continue.\n')
input()
## ============ Part 2: Vectorize Logistic Regression ============
# In this part of the exercise, you will reuse your logistic regression
# code from the last exercise. You task here is to make sure that your
# regularized logistic regression implementation is vectorized. After
# that, you will implement one-vs-all classification for the handwritten
# digit dataset.
print('\nTraining One-vs-All Logistic Regression...\n')
ld = 0.1
all_theta = one_vs_all(X, y, num_labels, ld)
print('Program paused. Press enter to continue.\n')
input()
## ================ Part 3: Predict for One-Vs-All ================
# After ...
pred = predict_one_vs_all(all_theta, X)
print('\nTraining Set Accuracy: {0:f}\n'.format(np.mean(pred == y.flatten()) * 100))