You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
192 lines
6.8 KiB
192 lines
6.8 KiB
## Machine Learning Online Class - Exercise 3 | Part 1: One-vs-all
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
import scipy, scipy.io, scipy.optimize
|
|
import math
|
|
|
|
|
|
# DISPLAYDATA Display 2D data in a nice grid
|
|
# [h, display_array] = DISPLAYDATA(X, example_width) displays 2D data
|
|
# stored in X in a nice grid. It returns the figure handle h and the
|
|
# displayed array if requested.
|
|
def displaydata(X, example_width=None):
|
|
# Set example_width automatically if not passed in
|
|
if example_width is None:
|
|
example_width = round(math.sqrt(X.shape[1]))
|
|
|
|
# Gray Image
|
|
# colormap(gray)
|
|
|
|
# Compute rows, cols
|
|
m, n = X.shape
|
|
example_height = int(n / example_width)
|
|
|
|
# Compute number of items to display
|
|
display_rows = math.floor(math.sqrt(m))
|
|
display_cols = math.ceil(m / display_rows)
|
|
|
|
# Between images padding
|
|
pad = 1
|
|
|
|
# Setup blank display
|
|
display_array = - np.ones((pad + display_rows * (example_height + pad), \
|
|
pad + display_cols * (example_width + pad)))
|
|
|
|
# Copy each example into a patch on the display array
|
|
curr_ex = 0
|
|
for j in range(0, display_rows):
|
|
for i in range(0, display_cols):
|
|
if curr_ex > m:
|
|
break
|
|
|
|
# Copy the patch
|
|
|
|
# Get the max value of the patch
|
|
max_val = np.max(np.abs(X[curr_ex, :]))
|
|
display_array[pad + j * (example_height + pad):pad + j * (example_height + pad) + example_height, \
|
|
pad + i * (example_width + pad):pad + i * (example_width + pad) + example_width] = \
|
|
X[curr_ex, :].reshape((example_height, example_width)).T / max_val
|
|
curr_ex += 1
|
|
if curr_ex > m:
|
|
break
|
|
|
|
# Display Image
|
|
plt.imshow(display_array, vmin=-1, vmax=1, cmap='gray')
|
|
|
|
# Do not show axis
|
|
plt.axis('off')
|
|
|
|
|
|
# SIGMOID Compute sigmoid function
|
|
# J = SIGMOID(z) computes the sigmoid of z.
|
|
def sigmoid(z):
|
|
g = 1 / (1 + np.exp(-z))
|
|
return g
|
|
|
|
|
|
# LRCOSTFUNCTION Compute cost and gradient for logistic regression with
|
|
# regularization
|
|
# J = LRCOSTFUNCTION(theta, X, y, lambda) computes the cost of using
|
|
# theta as the parameter for regularized logistic regression and the
|
|
# gradient of the cost w.r.t. to the parameters.
|
|
def lr_cost_function(theta, X, y, ld):
|
|
# Shape variables
|
|
m = y.shape[0]
|
|
d1 = X.shape[1]
|
|
d2 = y.shape[1]
|
|
|
|
theta = theta.reshape((d1, d2))
|
|
if y.dtype == bool:
|
|
y = y.astype(int)
|
|
|
|
h = sigmoid(X.dot(theta))
|
|
theta[0, :] = 0
|
|
|
|
J = np.sum(-y * np.log(h) - (1 - y) * np.log(1 - h)) / m + ld / m / 2 * np.sum(theta ** 2)
|
|
grad = X.T.dot((h - y)) / m + ld / m * theta
|
|
return J, grad.flatten()
|
|
|
|
|
|
# ONEVSALL trains multiple logistic regression classifiers and returns all
|
|
# the classifiers in a matrix all_theta, where the i-th row of all_theta
|
|
# corresponds to the classifier for label i
|
|
# [all_theta] = ONEVSALL(X, y, num_labels, lambda) trains num_labels
|
|
# logisitc regression classifiers and returns each of these classifiers
|
|
# in a matrix all_theta, where the i-th row of all_theta corresponds
|
|
# to the classifier for label i
|
|
def one_vs_all(X, y, num_labels, ld):
|
|
# Some useful variables
|
|
m, n = X.shape
|
|
y_map = (y.reshape(-1, 1) == np.array(range(1, num_labels + 1))).astype(int)
|
|
X = np.hstack((np.ones((m, 1)), X))
|
|
initial_theta = np.zeros((n + 1, num_labels)) + 0.01
|
|
ret = scipy.optimize.minimize(lr_cost_function, initial_theta, args=(X, y_map, ld), jac=True,
|
|
options={'maxiter': 500, 'disp': True}, method='CG') # BFGS is too slow here
|
|
return ret['x'].reshape((n + 1, num_labels))
|
|
|
|
|
|
# Loop version of the one_vs_all function above
|
|
def one_vs_all2(X, y, num_labels, ld):
|
|
# Some useful variables
|
|
m, n = X.shape
|
|
X = np.hstack((np.ones((m, 1)), X))
|
|
all_theta = np.zeros((n + 1, num_labels))
|
|
for i in range(0, num_labels):
|
|
initial_theta = np.zeros((n + 1, 1))
|
|
ret = scipy.optimize.minimize(lr_cost_function, initial_theta, args=(X, (y == i + 1), ld), jac=True,
|
|
options={'maxiter': 50, 'disp': True}, method='CG') # BFGS is too slow here
|
|
all_theta[:, i:i + 1] = ret['x'].reshape(-1, 1)
|
|
return all_theta
|
|
|
|
|
|
# PREDICT Predict the label for a trained one-vs-all classifier. The labels
|
|
# are in the range 1..K, where K = size(all_theta, 1).
|
|
# p = PREDICTONEVSALL(all_theta, X) will return a vector of predictions
|
|
# for each example in the matrix X. Note that X contains the examples in
|
|
# rows. all_theta is a matrix where the i-th row is a trained logistic
|
|
# regression theta vector for the i-th class. You should set p to a vector
|
|
# of values from 1..K (e.g., p = [1; 3; 1; 2] predicts classes 1, 3, 1, 2
|
|
# for 4 examples)
|
|
def predict_one_vs_all(all_theta, X):
|
|
m = X.shape[0]
|
|
um_labels = all_theta.shape[0]
|
|
|
|
# Add ones to the X data matrix
|
|
X = np.hstack((np.ones((m, 1)), X))
|
|
|
|
temp = sigmoid(X.dot(all_theta))
|
|
p = temp.argmax(axis=1) + 1
|
|
return p
|
|
|
|
|
|
## ===================== Start Main ======================
|
|
if __name__ == "__main__":
|
|
np.set_printoptions(formatter={'float': '{: 0.5f}'.format}, edgeitems=20, linewidth=150)
|
|
## Setup the parameters you will use for this part of the exercise
|
|
input_layer_size = 400 # 20x20 Input Images of Digits
|
|
num_labels = 10 # 10 labels, from 1 to 10
|
|
# (note that we have mapped "0" to label 10)
|
|
|
|
## =========== Part 1: Loading and Visualizing Data =============
|
|
# We start the exercise by first loading and visualizing the dataset.
|
|
# You will be working with a dataset that contains handwritten digits.
|
|
|
|
# Load Training Data
|
|
print('Loading and Visualizing Data ...\n')
|
|
|
|
data = scipy.io.loadmat('mat/ex3data1.mat', matlab_compatible=True) # training data stored in arrays X, y
|
|
X = data['X']
|
|
y = data['y']
|
|
m = X.shape[0]
|
|
|
|
# Randomly select 100 data points to display
|
|
rand_indices = np.random.permutation(m)
|
|
sel = X[rand_indices[:100], :]
|
|
|
|
displaydata(sel)
|
|
plt.show()
|
|
|
|
print('Program paused. Press enter to continue.\n')
|
|
input()
|
|
|
|
## ============ Part 2: Vectorize Logistic Regression ============
|
|
# In this part of the exercise, you will reuse your logistic regression
|
|
# code from the last exercise. You task here is to make sure that your
|
|
# regularized logistic regression implementation is vectorized. After
|
|
# that, you will implement one-vs-all classification for the handwritten
|
|
# digit dataset.
|
|
|
|
print('\nTraining One-vs-All Logistic Regression...\n')
|
|
|
|
ld = 0.1
|
|
|
|
all_theta = one_vs_all(X, y, num_labels, ld)
|
|
|
|
print('Program paused. Press enter to continue.\n')
|
|
input()
|
|
|
|
## ================ Part 3: Predict for One-Vs-All ================
|
|
# After ...
|
|
pred = predict_one_vs_all(all_theta, X)
|
|
|
|
print('\nTraining Set Accuracy: {0:f}\n'.format(np.mean(pred == y.flatten()) * 100))
|
|
|