diff --git a/ex3/ex3.py b/ex3/ex3_.py similarity index 99% rename from ex3/ex3.py rename to ex3/ex3_.py index 4dc46d8..a9b67de 100644 --- a/ex3/ex3.py +++ b/ex3/ex3_.py @@ -55,7 +55,6 @@ def displaydata(X, example_width=None): # Do not show axis plt.axis('off') - plt.pause(0.0001) # SIGMOID Compute sigmoid function diff --git a/ex3/ex3_nn.py b/ex3/ex3_nn.py index f589c6e..311cd98 100644 --- a/ex3/ex3_nn.py +++ b/ex3/ex3_nn.py @@ -2,8 +2,7 @@ import numpy as np import matplotlib.pyplot as plt import scipy, scipy.io, scipy.optimize -import math -from ex3 import displaydata, sigmoid +from ex3.ex3_ import displaydata, sigmoid # PREDICT Predict the label of an input given a trained neural network @@ -32,65 +31,67 @@ def predict(Theta1, Theta2, X): +## ===================== Start Main ====================== +if __name__ == "__main__": + ## Setup the parameters you will use for this exercise + input_layer_size = 400 # 20x20 Input Images of Digits + hidden_layer_size = 25 # 25 hidden units + num_labels = 10 # 10 labels, from 1 to 10 -## Setup the parameters you will use for this exercise -input_layer_size = 400 # 20x20 Input Images of Digits -hidden_layer_size = 25 # 25 hidden units -num_labels = 10 # 10 labels, from 1 to 10 + ## =========== Part 1: Loading and Visualizing Data ============= + # Load Training Data + print('Loading and Visualizing Data ...\n') -## =========== Part 1: Loading and Visualizing Data ============= -# Load Training Data -print('Loading and Visualizing Data ...\n') - -data = scipy.io.loadmat('mat/ex3data1.mat', matlab_compatible=True) # training data stored in arrays X, y -X = data['X'] -y = data['y'] -m = X.shape[0] + data = scipy.io.loadmat('mat/ex3data1.mat', matlab_compatible=True) # training data stored in arrays X, y + X = data['X'] + y = data['y'] + m = X.shape[0] -# Randomly select 100 data points to display -sel = np.random.permutation(m) -sel = X[sel[:100], :] + # Randomly select 100 data points to display + sel = np.random.permutation(m) + sel = X[sel[:100], :] -displaydata(sel) -plt.show() + displaydata(sel) + plt.show() -print('Program paused. Press enter to continue.\n') -input() + print('Program paused. Press enter to continue.\n') + input() -## ================ Part 2: Loading Pameters ================ -print('\nLoading Saved Neural Network Parameters ...\n') + ## ================ Part 2: Loading Pameters ================ + print('\nLoading Saved Neural Network Parameters ...\n') -# Load the weights into variables Theta1 and Theta2 -data = scipy.io.loadmat('mat/ex3weights.mat', matlab_compatible=True) -Theta1 = data['Theta1'] -Theta2 = data['Theta2'] + # Load the weights into variables Theta1 and Theta2 + data = scipy.io.loadmat('mat/ex3weights.mat', matlab_compatible=True) + Theta1 = data['Theta1'] + Theta2 = data['Theta2'] -## ================= Part 3: Implement Predict ================= -pred = predict(Theta1, Theta2, X) + ## ================= Part 3: Implement Predict ================= + pred = predict(Theta1, Theta2, X) -print('\nTraining Set Accuracy: {0:f}\n'.format(np.mean(pred == y.flatten()) * 100)) + print('\nTraining Set Accuracy: {0:f}\n'.format(np.mean(pred == y.flatten()) * 100)) -print('Program paused. Press enter to continue.\n') -input() + print('Program paused. Press enter to continue.\n') + input() -# To give you an idea of the network's output, you can also run -# through the examples one at the a time to see what it is predicting. + # To give you an idea of the network's output, you can also run + # through the examples one at the a time to see what it is predicting. -# Randomly permute examples -rp = np.random.permutation(m) + # Randomly permute examples + rp = np.random.permutation(m) -for i in range(0, m): - ind = rp[i] # Chosen index in X + for i in range(0, m): + ind = rp[i] # Chosen index in X - # Display - print('\nDisplaying Example Image\n') - displaydata(X[ind:ind+1, :]) - plt.show(block=False) + # Display + print('\nDisplaying Example Image\n') + displaydata(X[ind:ind+1, :]) + plt.pause(0.0001) + plt.show(block=False) - pred = predict(Theta1, Theta2, X[ind:ind+1,:]) - print('\nNeural Network Prediction: {0:d} (digit {1:d})\n'.format(pred[0], np.mod(pred, 10)[0])) + pred = predict(Theta1, Theta2, X[ind:ind+1,:]) + print('\nNeural Network Prediction: {0:d} (digit {1:d})\n'.format(pred[0], np.mod(pred, 10)[0])) - # Pause - print('Program paused. Press enter to continue.\n') - input() + # Pause + print('Program paused. Press enter to continue.\n') + input() diff --git a/ex4/ex4.py b/ex4/ex4.py new file mode 100644 index 0000000..be46edd --- /dev/null +++ b/ex4/ex4.py @@ -0,0 +1,309 @@ +## Machine Learning Online Class - Exercise 4 Neural Network Learning +import numpy as np +import matplotlib.pyplot as plt +import scipy, scipy.io, scipy.optimize +from ex3.ex3_ import displaydata, sigmoid +from ex3.ex3_nn import predict + + +# SIGMOIDGRADIENT returns the gradient of the sigmoid function +# evaluated at z +# g = SIGMOIDGRADIENT(z) computes the gradient of the sigmoid function +# evaluated at z. This should work regardless if z is a matrix or a +# vector. In particular, if z is a vector or matrix, you should return +# the gradient for each element. +def sigmoid_gradient(z): + gz = sigmoid(z) + g = gz * (1 - gz) + return g + + +# NNCOSTFUNCTION Implements the neural network cost function for a two layer +# neural network which performs classification +# [J grad] = NNCOSTFUNCTON(nn_params, hidden_layer_size, num_labels, ... +# X, y, lambda) computes the cost and gradient of the neural network. The +# parameters for the neural network are "unrolled" into the vector +# nn_params and need to be converted back into the weight matrices. +# +# The returned parameter grad should be a "unrolled" vector of the +# partial derivatives of the neural network. +def nn_cost_function(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, ld): + # Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices + # for our 2 layer neural network + Theta1 = np.reshape(nn_params[0:hidden_layer_size * (input_layer_size + 1)], + (hidden_layer_size, input_layer_size + 1)) + Theta2 = np.reshape(nn_params[hidden_layer_size * (input_layer_size + 1):], (num_labels, hidden_layer_size + 1)) + + # Setup some useful variables + m = X.shape[0] + + # Forward propagation + X = np.hstack((np.ones((m, 1)), X)) + z2 = X.dot(Theta1.T) + a2 = sigmoid(z2) # Hidden layer + a2 = np.hstack((np.ones((m, 1)), a2)) + out = sigmoid(a2.dot(Theta2.T)) # Output layer + + # Get rid of parameters for constants + Theta1 = Theta1[:, 1:] + Theta2 = Theta2[:, 1:] + y = (y.reshape(-1, 1) == np.array(range(1, num_labels + 1))).astype(int) # Map numerical y value to 0 and 1s + + # Cost function + J = 1 / m * np.sum(- y * np.log(out) - (1 - y) * np.log(1 - out)) + ld / 2 / m * ( + np.sum(Theta1 ** 2) + np.sum(Theta2 ** 2)) + + # Back propagation + delta3 = out - y + delta2 = delta3.dot(Theta2) * sigmoid_gradient(z2) + # Gradients + Theta2_grad = (delta3.T.dot(a2) + ld * np.hstack((np.zeros((num_labels, 1)), Theta2))) / m + Theta1_grad = (delta2.T.dot(X) + ld * np.hstack((np.zeros((hidden_layer_size, 1)), Theta1))) / m + + return J, np.hstack((Theta1_grad.flatten(), Theta2_grad.flatten())) + + +# RANDINITIALIZEWEIGHTS Randomly initialize the weights of a layer with L_in +# incoming connections and L_out outgoing connections +# W = RANDINITIALIZEWEIGHTS(L_in, L_out) randomly initializes the weights +# of a layer with L_in incoming connections and L_out outgoing +# connections. +# +# Note that W should be set to a matrix of size(L_out, 1 + L_in) as +# the column row of W handles the "bias" terms +def rand_initialize_weights(L_in, L_out): + # Randomly initialize the weights to small values + epsilon_init = 0.12 + W = np.random.random_sample((L_out, 1 + L_in)) * 2 * epsilon_init - epsilon_init + return W + + +# DEBUGINITIALIZEWEIGHTS Initialize the weights of a layer with fan_in +# incoming connections and fan_out outgoing connections using a fixed +# strategy, this will help you later in debugging +# W = DEBUGINITIALIZEWEIGHTS(fan_in, fan_out) initializes the weights +# of a layer with fan_in incoming connections and fan_out outgoing +# connections using a fix set of values +# +# Note that W should be set to a matrix of size(1 + fan_in, fan_out) as +# the first row of W handles the "bias" terms +def debug_initialize_weights(fan_out, fan_in): + # Set W to zeros + W = np.zeros((fan_out, 1 + fan_in)) + + # Initialize W using "sin", this ensures that W is always of the same + # values and will be useful for debugging + W = np.reshape(np.sin(np.array(range(1, W.size + 1))), W.shape) / 10 + return W + + +# COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences" +# and gives us a numerical estimate of the gradient. +# numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical +# gradient of the function J around theta. Calling y = J(theta) should +# return the function value at theta. +def compute_numerical_gradient(J, theta): + numgrad = np.zeros(theta.shape) + perturb = np.zeros(theta.shape) + e = 1e-4 + for p in range(0, theta.size): + # Set perturbation vector + perturb[p] = e + loss1 = J(theta - perturb)[0] + loss2 = J(theta + perturb)[0] + # Compute Numerical Gradient + numgrad[p] = (loss2 - loss1) / (2 * e) + perturb[p] = 0 + return numgrad + + +# CHECKNNGRADIENTS Creates a small neural network to check the +# backpropagation gradients +# CHECKNNGRADIENTS(lambda) Creates a small neural network to check the +# backpropagation gradients, it will output the analytical gradients +# produced by your backprop code and the numerical gradients (computed +# using computeNumericalGradient). These two gradient computations should +# result in very similar values. +def check_nn_gradients(ld=None): + if ld is None: + ld = 0 + + input_layer_size = 3 + hidden_layer_size = 5 + num_labels = 3 + m = 5 + + # We generate some 'random' test data + Theta1 = debug_initialize_weights(hidden_layer_size, input_layer_size) + Theta2 = debug_initialize_weights(num_labels, hidden_layer_size) + # Reusing debugInitializeWeights to generate X + X = debug_initialize_weights(m, input_layer_size - 1) + y = 1 + np.mod(np.array(range(1, m + 1)), num_labels).T + + # Unroll parameters + nn_params = np.hstack((Theta1.flatten(), Theta2.flatten())) + + # Short hand for cost function + costfunc = lambda p: nn_cost_function(p, input_layer_size, hidden_layer_size, num_labels, X, y, ld) + + cost, grad = costfunc(nn_params) + numgrad = compute_numerical_gradient(costfunc, nn_params) + + # Visually examine the two gradient computations. The two columns + # you get should be very similar. + print(np.hstack((numgrad.reshape(-1, 1), grad.reshape(-1, 1)))) + print( + 'The above two columns you get should be very similar.\n(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n') + + # Evaluate the norm of the difference between two solutions. + # If you have a correct implementation, and assuming you used EPSILON = 0.0001 + # in computeNumericalGradient.m, then diff below should be less than 1e-9 + diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad) + + print( + 'If your backpropagation implementation is correct, then \nthe relative difference will be small (less than 1e-9). \n\nRelative Difference: {0:g}\n'.format( + diff)) + + +np.set_printoptions(formatter={'float': '{: 0.5f}'.format}, edgeitems=50, linewidth=150) +## Setup the parameters you will use for this exercise +input_layer_size = 400 # 20x20 Input Images of Digits +hidden_layer_size = 25 # 25 hidden units +num_labels = 10 # 10 labels, from 1 to 10 + +## =========== Part 1: Loading and Visualizing Data ============= +# Load Training Data +print('Loading and Visualizing Data ...\n') + +data = scipy.io.loadmat('mat/ex4data1.mat', matlab_compatible=True) +X = data['X'] +y = data['y'] +m = X.shape[0] + +# Randomly select 100 data points to display +sel = np.random.permutation(m) +sel = sel[:100] + +displaydata(X[sel, :]) +plt.show() + +print('Program paused. Press enter to continue.\n') +input() + +## ================ Part 2: Loading Parameters ================ +print('\nLoading Saved Neural Network Parameters ...\n') + +# Load the weights into variables Theta1 and Theta2 +data = scipy.io.loadmat('mat/ex4weights.mat', matlab_compatible=True) +Theta1 = data['Theta1'] +Theta2 = data['Theta2'] + +# Unroll parameters +nn_params = np.concatenate((Theta1.flatten(), Theta2.flatten())) + +## ================ Part 3: Compute Cost (Feedforward) ================ +print('\nFeedforward Using Neural Network ...\n') + +# Weight regularization parameter (we set this to 0 here). +ld = 0 + +J = nn_cost_function(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, ld)[0] + +print('Cost at parameters (loaded from ex4weights): {0:f} \n(this value should be about 0.287629)\n'.format(J)) + +print('\nProgram paused. Press enter to continue.\n') +input() + +## =============== Part 4: Implement Regularization =============== +print('\nChecking Cost Function (w/ Regularization) ... \n') + +# Weight regularization parameter (we set this to 1 here). +ld = 1 + +J = nn_cost_function(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, ld)[0] + +print('Cost at parameters (loaded from ex4weights): {0:f} \n(this value should be about 0.383770)\n'.format(J)) + +print('Program paused. Press enter to continue.\n') +input() + +## ================ Part 5: Sigmoid Gradient ================ +print('\nEvaluating sigmoid gradient...\n') + +g = sigmoid_gradient(np.array([1, -0.5, 0, 0.5, 1])) +print('Sigmoid gradient evaluated at [1 -0.5 0 0.5 1]:\n ') +print(np.array_str(g).replace('[', ' ').replace(']', ' ')) +print('\n\n') + +print('Program paused. Press enter to continue.\n') + +## ================ Part 6: Initializing Pameters ================ +print('\nInitializing Neural Network Parameters ...\n') + +initial_Theta1 = rand_initialize_weights(input_layer_size, hidden_layer_size) +initial_Theta2 = rand_initialize_weights(hidden_layer_size, num_labels) + +# Unroll parameters +initial_nn_params = np.hstack((initial_Theta1.flatten(), initial_Theta2.flatten())) + +## =============== Part 7: Implement Backpropagation =============== +print('\nChecking Backpropagation... \n') + +# Check gradients by running checkNNGradients +check_nn_gradients() + +print('\nProgram paused. Press enter to continue.\n') +input() + +## =============== Part 8: Implement Regularization =============== +print('\nChecking Backpropagation (w/ Regularization) ... \n') + +# Check gradients by running checkNNGradients +ld = 3 +check_nn_gradients(ld) + +# Also output the costFunction debugging values +debug_J = nn_cost_function(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, ld)[0] + +print('\n\nCost at (fixed) debugging parameters (w/ lambda = 10): {0:f} \n(this value should be about 0.576051)\n\n'.format(debug_J)) + +print('Program paused. Press enter to continue.\n') +input() + +## =================== Part 9: Training NN =================== +print('\nTraining Neural Network... \n') + +# After you have completed the assignment, change the MaxIter to a larger +# value to see how more training helps. +options = {'maxiter': 50, 'disp': True} + +# You should also try different values of lambda +ld = 1 + +# Create "short hand" for the cost function to be minimized +cost_function = lambda p: nn_cost_function(p, input_layer_size, hidden_layer_size, num_labels, X, y, ld) + +# Now, costFunction is a function that takes in only one argument (the +# neural network parameters) +ret = scipy.optimize.minimize(cost_function, initial_nn_params, jac=True, options=options, method='CG') + +# Obtain Theta1 and Theta2 back from nn_params +Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)], (hidden_layer_size, input_layer_size + 1)) +Theta2 = np.reshape(nn_params[hidden_layer_size * (input_layer_size + 1):], (num_labels, hidden_layer_size + 1)) + +print('Program paused. Press enter to continue.\n') +# input() + +## ================= Part 10: Visualize Weights ================= +print('\nVisualizing Neural Network... \n') + +displaydata(Theta1[:, 1:]) +plt.show() + +print('\nProgram paused. Press enter to continue.\n') +input() + +## ================= Part 11: Implement Predict ================= +pred = predict(Theta1, Theta2, X) + +print('\nTraining Set Accuracy: {0:f}\n'.format(np.mean(pred == y.flatten()) * 100))