Browse Source

Changed filename ex3 to ex3_ because it is shadowing dir name. Finished ex4.

master
wchen342 6 years ago
parent
commit
172e4be4b4
  1. 1
      ex3/ex3_.py
  2. 95
      ex3/ex3_nn.py
  3. 309
      ex4/ex4.py

1
ex3/ex3.py → ex3/ex3_.py

@ -55,7 +55,6 @@ def displaydata(X, example_width=None):
# Do not show axis
plt.axis('off')
plt.pause(0.0001)
# SIGMOID Compute sigmoid function

95
ex3/ex3_nn.py

@ -2,8 +2,7 @@
import numpy as np
import matplotlib.pyplot as plt
import scipy, scipy.io, scipy.optimize
import math
from ex3 import displaydata, sigmoid
from ex3.ex3_ import displaydata, sigmoid
# PREDICT Predict the label of an input given a trained neural network
@ -32,65 +31,67 @@ def predict(Theta1, Theta2, X):
## ===================== Start Main ======================
if __name__ == "__main__":
## Setup the parameters you will use for this exercise
input_layer_size = 400 # 20x20 Input Images of Digits
hidden_layer_size = 25 # 25 hidden units
num_labels = 10 # 10 labels, from 1 to 10
## Setup the parameters you will use for this exercise
input_layer_size = 400 # 20x20 Input Images of Digits
hidden_layer_size = 25 # 25 hidden units
num_labels = 10 # 10 labels, from 1 to 10
## =========== Part 1: Loading and Visualizing Data =============
# Load Training Data
print('Loading and Visualizing Data ...\n')
## =========== Part 1: Loading and Visualizing Data =============
# Load Training Data
print('Loading and Visualizing Data ...\n')
data = scipy.io.loadmat('mat/ex3data1.mat', matlab_compatible=True) # training data stored in arrays X, y
X = data['X']
y = data['y']
m = X.shape[0]
data = scipy.io.loadmat('mat/ex3data1.mat', matlab_compatible=True) # training data stored in arrays X, y
X = data['X']
y = data['y']
m = X.shape[0]
# Randomly select 100 data points to display
sel = np.random.permutation(m)
sel = X[sel[:100], :]
# Randomly select 100 data points to display
sel = np.random.permutation(m)
sel = X[sel[:100], :]
displaydata(sel)
plt.show()
displaydata(sel)
plt.show()
print('Program paused. Press enter to continue.\n')
input()
print('Program paused. Press enter to continue.\n')
input()
## ================ Part 2: Loading Pameters ================
print('\nLoading Saved Neural Network Parameters ...\n')
## ================ Part 2: Loading Pameters ================
print('\nLoading Saved Neural Network Parameters ...\n')
# Load the weights into variables Theta1 and Theta2
data = scipy.io.loadmat('mat/ex3weights.mat', matlab_compatible=True)
Theta1 = data['Theta1']
Theta2 = data['Theta2']
# Load the weights into variables Theta1 and Theta2
data = scipy.io.loadmat('mat/ex3weights.mat', matlab_compatible=True)
Theta1 = data['Theta1']
Theta2 = data['Theta2']
## ================= Part 3: Implement Predict =================
pred = predict(Theta1, Theta2, X)
## ================= Part 3: Implement Predict =================
pred = predict(Theta1, Theta2, X)
print('\nTraining Set Accuracy: {0:f}\n'.format(np.mean(pred == y.flatten()) * 100))
print('\nTraining Set Accuracy: {0:f}\n'.format(np.mean(pred == y.flatten()) * 100))
print('Program paused. Press enter to continue.\n')
input()
print('Program paused. Press enter to continue.\n')
input()
# To give you an idea of the network's output, you can also run
# through the examples one at the a time to see what it is predicting.
# To give you an idea of the network's output, you can also run
# through the examples one at the a time to see what it is predicting.
# Randomly permute examples
rp = np.random.permutation(m)
# Randomly permute examples
rp = np.random.permutation(m)
for i in range(0, m):
ind = rp[i] # Chosen index in X
for i in range(0, m):
ind = rp[i] # Chosen index in X
# Display
print('\nDisplaying Example Image\n')
displaydata(X[ind:ind+1, :])
plt.show(block=False)
# Display
print('\nDisplaying Example Image\n')
displaydata(X[ind:ind+1, :])
plt.pause(0.0001)
plt.show(block=False)
pred = predict(Theta1, Theta2, X[ind:ind+1,:])
print('\nNeural Network Prediction: {0:d} (digit {1:d})\n'.format(pred[0], np.mod(pred, 10)[0]))
pred = predict(Theta1, Theta2, X[ind:ind+1,:])
print('\nNeural Network Prediction: {0:d} (digit {1:d})\n'.format(pred[0], np.mod(pred, 10)[0]))
# Pause
print('Program paused. Press enter to continue.\n')
input()
# Pause
print('Program paused. Press enter to continue.\n')
input()

309
ex4/ex4.py

@ -0,0 +1,309 @@
## Machine Learning Online Class - Exercise 4 Neural Network Learning
import numpy as np
import matplotlib.pyplot as plt
import scipy, scipy.io, scipy.optimize
from ex3.ex3_ import displaydata, sigmoid
from ex3.ex3_nn import predict
# SIGMOIDGRADIENT returns the gradient of the sigmoid function
# evaluated at z
# g = SIGMOIDGRADIENT(z) computes the gradient of the sigmoid function
# evaluated at z. This should work regardless if z is a matrix or a
# vector. In particular, if z is a vector or matrix, you should return
# the gradient for each element.
def sigmoid_gradient(z):
gz = sigmoid(z)
g = gz * (1 - gz)
return g
# NNCOSTFUNCTION Implements the neural network cost function for a two layer
# neural network which performs classification
# [J grad] = NNCOSTFUNCTON(nn_params, hidden_layer_size, num_labels, ...
# X, y, lambda) computes the cost and gradient of the neural network. The
# parameters for the neural network are "unrolled" into the vector
# nn_params and need to be converted back into the weight matrices.
#
# The returned parameter grad should be a "unrolled" vector of the
# partial derivatives of the neural network.
def nn_cost_function(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, ld):
# Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices
# for our 2 layer neural network
Theta1 = np.reshape(nn_params[0:hidden_layer_size * (input_layer_size + 1)],
(hidden_layer_size, input_layer_size + 1))
Theta2 = np.reshape(nn_params[hidden_layer_size * (input_layer_size + 1):], (num_labels, hidden_layer_size + 1))
# Setup some useful variables
m = X.shape[0]
# Forward propagation
X = np.hstack((np.ones((m, 1)), X))
z2 = X.dot(Theta1.T)
a2 = sigmoid(z2) # Hidden layer
a2 = np.hstack((np.ones((m, 1)), a2))
out = sigmoid(a2.dot(Theta2.T)) # Output layer
# Get rid of parameters for constants
Theta1 = Theta1[:, 1:]
Theta2 = Theta2[:, 1:]
y = (y.reshape(-1, 1) == np.array(range(1, num_labels + 1))).astype(int) # Map numerical y value to 0 and 1s
# Cost function
J = 1 / m * np.sum(- y * np.log(out) - (1 - y) * np.log(1 - out)) + ld / 2 / m * (
np.sum(Theta1 ** 2) + np.sum(Theta2 ** 2))
# Back propagation
delta3 = out - y
delta2 = delta3.dot(Theta2) * sigmoid_gradient(z2)
# Gradients
Theta2_grad = (delta3.T.dot(a2) + ld * np.hstack((np.zeros((num_labels, 1)), Theta2))) / m
Theta1_grad = (delta2.T.dot(X) + ld * np.hstack((np.zeros((hidden_layer_size, 1)), Theta1))) / m
return J, np.hstack((Theta1_grad.flatten(), Theta2_grad.flatten()))
# RANDINITIALIZEWEIGHTS Randomly initialize the weights of a layer with L_in
# incoming connections and L_out outgoing connections
# W = RANDINITIALIZEWEIGHTS(L_in, L_out) randomly initializes the weights
# of a layer with L_in incoming connections and L_out outgoing
# connections.
#
# Note that W should be set to a matrix of size(L_out, 1 + L_in) as
# the column row of W handles the "bias" terms
def rand_initialize_weights(L_in, L_out):
# Randomly initialize the weights to small values
epsilon_init = 0.12
W = np.random.random_sample((L_out, 1 + L_in)) * 2 * epsilon_init - epsilon_init
return W
# DEBUGINITIALIZEWEIGHTS Initialize the weights of a layer with fan_in
# incoming connections and fan_out outgoing connections using a fixed
# strategy, this will help you later in debugging
# W = DEBUGINITIALIZEWEIGHTS(fan_in, fan_out) initializes the weights
# of a layer with fan_in incoming connections and fan_out outgoing
# connections using a fix set of values
#
# Note that W should be set to a matrix of size(1 + fan_in, fan_out) as
# the first row of W handles the "bias" terms
def debug_initialize_weights(fan_out, fan_in):
# Set W to zeros
W = np.zeros((fan_out, 1 + fan_in))
# Initialize W using "sin", this ensures that W is always of the same
# values and will be useful for debugging
W = np.reshape(np.sin(np.array(range(1, W.size + 1))), W.shape) / 10
return W
# COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences"
# and gives us a numerical estimate of the gradient.
# numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical
# gradient of the function J around theta. Calling y = J(theta) should
# return the function value at theta.
def compute_numerical_gradient(J, theta):
numgrad = np.zeros(theta.shape)
perturb = np.zeros(theta.shape)
e = 1e-4
for p in range(0, theta.size):
# Set perturbation vector
perturb[p] = e
loss1 = J(theta - perturb)[0]
loss2 = J(theta + perturb)[0]
# Compute Numerical Gradient
numgrad[p] = (loss2 - loss1) / (2 * e)
perturb[p] = 0
return numgrad
# CHECKNNGRADIENTS Creates a small neural network to check the
# backpropagation gradients
# CHECKNNGRADIENTS(lambda) Creates a small neural network to check the
# backpropagation gradients, it will output the analytical gradients
# produced by your backprop code and the numerical gradients (computed
# using computeNumericalGradient). These two gradient computations should
# result in very similar values.
def check_nn_gradients(ld=None):
if ld is None:
ld = 0
input_layer_size = 3
hidden_layer_size = 5
num_labels = 3
m = 5
# We generate some 'random' test data
Theta1 = debug_initialize_weights(hidden_layer_size, input_layer_size)
Theta2 = debug_initialize_weights(num_labels, hidden_layer_size)
# Reusing debugInitializeWeights to generate X
X = debug_initialize_weights(m, input_layer_size - 1)
y = 1 + np.mod(np.array(range(1, m + 1)), num_labels).T
# Unroll parameters
nn_params = np.hstack((Theta1.flatten(), Theta2.flatten()))
# Short hand for cost function
costfunc = lambda p: nn_cost_function(p, input_layer_size, hidden_layer_size, num_labels, X, y, ld)
cost, grad = costfunc(nn_params)
numgrad = compute_numerical_gradient(costfunc, nn_params)
# Visually examine the two gradient computations. The two columns
# you get should be very similar.
print(np.hstack((numgrad.reshape(-1, 1), grad.reshape(-1, 1))))
print(
'The above two columns you get should be very similar.\n(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n')
# Evaluate the norm of the difference between two solutions.
# If you have a correct implementation, and assuming you used EPSILON = 0.0001
# in computeNumericalGradient.m, then diff below should be less than 1e-9
diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad)
print(
'If your backpropagation implementation is correct, then \nthe relative difference will be small (less than 1e-9). \n\nRelative Difference: {0:g}\n'.format(
diff))
np.set_printoptions(formatter={'float': '{: 0.5f}'.format}, edgeitems=50, linewidth=150)
## Setup the parameters you will use for this exercise
input_layer_size = 400 # 20x20 Input Images of Digits
hidden_layer_size = 25 # 25 hidden units
num_labels = 10 # 10 labels, from 1 to 10
## =========== Part 1: Loading and Visualizing Data =============
# Load Training Data
print('Loading and Visualizing Data ...\n')
data = scipy.io.loadmat('mat/ex4data1.mat', matlab_compatible=True)
X = data['X']
y = data['y']
m = X.shape[0]
# Randomly select 100 data points to display
sel = np.random.permutation(m)
sel = sel[:100]
displaydata(X[sel, :])
plt.show()
print('Program paused. Press enter to continue.\n')
input()
## ================ Part 2: Loading Parameters ================
print('\nLoading Saved Neural Network Parameters ...\n')
# Load the weights into variables Theta1 and Theta2
data = scipy.io.loadmat('mat/ex4weights.mat', matlab_compatible=True)
Theta1 = data['Theta1']
Theta2 = data['Theta2']
# Unroll parameters
nn_params = np.concatenate((Theta1.flatten(), Theta2.flatten()))
## ================ Part 3: Compute Cost (Feedforward) ================
print('\nFeedforward Using Neural Network ...\n')
# Weight regularization parameter (we set this to 0 here).
ld = 0
J = nn_cost_function(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, ld)[0]
print('Cost at parameters (loaded from ex4weights): {0:f} \n(this value should be about 0.287629)\n'.format(J))
print('\nProgram paused. Press enter to continue.\n')
input()
## =============== Part 4: Implement Regularization ===============
print('\nChecking Cost Function (w/ Regularization) ... \n')
# Weight regularization parameter (we set this to 1 here).
ld = 1
J = nn_cost_function(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, ld)[0]
print('Cost at parameters (loaded from ex4weights): {0:f} \n(this value should be about 0.383770)\n'.format(J))
print('Program paused. Press enter to continue.\n')
input()
## ================ Part 5: Sigmoid Gradient ================
print('\nEvaluating sigmoid gradient...\n')
g = sigmoid_gradient(np.array([1, -0.5, 0, 0.5, 1]))
print('Sigmoid gradient evaluated at [1 -0.5 0 0.5 1]:\n ')
print(np.array_str(g).replace('[', ' ').replace(']', ' '))
print('\n\n')
print('Program paused. Press enter to continue.\n')
## ================ Part 6: Initializing Pameters ================
print('\nInitializing Neural Network Parameters ...\n')
initial_Theta1 = rand_initialize_weights(input_layer_size, hidden_layer_size)
initial_Theta2 = rand_initialize_weights(hidden_layer_size, num_labels)
# Unroll parameters
initial_nn_params = np.hstack((initial_Theta1.flatten(), initial_Theta2.flatten()))
## =============== Part 7: Implement Backpropagation ===============
print('\nChecking Backpropagation... \n')
# Check gradients by running checkNNGradients
check_nn_gradients()
print('\nProgram paused. Press enter to continue.\n')
input()
## =============== Part 8: Implement Regularization ===============
print('\nChecking Backpropagation (w/ Regularization) ... \n')
# Check gradients by running checkNNGradients
ld = 3
check_nn_gradients(ld)
# Also output the costFunction debugging values
debug_J = nn_cost_function(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, ld)[0]
print('\n\nCost at (fixed) debugging parameters (w/ lambda = 10): {0:f} \n(this value should be about 0.576051)\n\n'.format(debug_J))
print('Program paused. Press enter to continue.\n')
input()
## =================== Part 9: Training NN ===================
print('\nTraining Neural Network... \n')
# After you have completed the assignment, change the MaxIter to a larger
# value to see how more training helps.
options = {'maxiter': 50, 'disp': True}
# You should also try different values of lambda
ld = 1
# Create "short hand" for the cost function to be minimized
cost_function = lambda p: nn_cost_function(p, input_layer_size, hidden_layer_size, num_labels, X, y, ld)
# Now, costFunction is a function that takes in only one argument (the
# neural network parameters)
ret = scipy.optimize.minimize(cost_function, initial_nn_params, jac=True, options=options, method='CG')
# Obtain Theta1 and Theta2 back from nn_params
Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)], (hidden_layer_size, input_layer_size + 1))
Theta2 = np.reshape(nn_params[hidden_layer_size * (input_layer_size + 1):], (num_labels, hidden_layer_size + 1))
print('Program paused. Press enter to continue.\n')
# input()
## ================= Part 10: Visualize Weights =================
print('\nVisualizing Neural Network... \n')
displaydata(Theta1[:, 1:])
plt.show()
print('\nProgram paused. Press enter to continue.\n')
input()
## ================= Part 11: Implement Predict =================
pred = predict(Theta1, Theta2, X)
print('\nTraining Set Accuracy: {0:f}\n'.format(np.mean(pred == y.flatten()) * 100))
Loading…
Cancel
Save