### Finish debugging ex5. Add numpy version of fmincg.

master
wchen342 6 years ago
parent
commit
46de43741a
2 changed files with 241 additions and 8 deletions
1. 58
ex5/ex5.py
2. 191
extra/fmincg.py

#### 58 ex5/ex5.py View File

 `@ -2,6 +2,7 @@` `import numpy as np` `import matplotlib.pyplot as plt` `import scipy, scipy.io, scipy.optimize` `from extra.fmincg import fmincg` ``` ``` ``` ``` `# LINEARREGCOSTFUNCTION Compute cost and gradient for regularized linear` `@ -18,7 +19,7 @@ def linear_reg_cost_function(X, y, theta, ld):` ` theta = theta.reshape((d1, d2))` ``` ``` ` J = 0.5 / m * (np.sum((X.dot(theta) - y) ** 2) + ld * np.sum(theta[1:, :] ** 2))` ` grad = 1 / m * X.T.dot((X.dot(theta) - y)) + ld / m * np.vstack((np.zeros((1, theta.shape[1])), theta[1:, :]))` ` grad = 1 / m * X.T.dot(X.dot(theta) - y) + ld / m * np.vstack((np.zeros((1, theta.shape[1])), theta[1:, :]))` ` return J, grad.flatten()` ``` ``` ``` ``` `@ -35,12 +36,16 @@ def train_linear_reg(X, y, ld):` ` cost_function = lambda t: linear_reg_cost_function(X, y, t, ld)` ``` ``` ` # Now, costFunction is a function that takes in only one argument` ` options = {'maxiter': 2000, 'norm': 3, 'disp': True}` ` options = {'maxiter': 200, 'disp': True, 'gtol': 0.05}` ``` ``` ` # Minimize using minimize` ` # Because the implementation of Scipy, the result looks somehow different` ` theta = scipy.optimize.minimize(cost_function, initial_theta, options=options, jac=True, method='BFGS')['x']` ` return theta` ` # Because the implementation of Scipy's minimize function, the result looks a little different` ` ret = scipy.optimize.minimize(cost_function, initial_theta, options=options, jac=True, method='BFGS')` ` return ret['x']` ``` ``` ` # Used this minimization method provided in MATLAB to verify my implementation` ` # theta = fmincg(cost_function, initial_theta.flatten(), {'MaxIter': 200}, ())` ` # return theta` ``` ``` ``` ``` `# LEARNINGCURVE Generates the train and cross validation set errors needed` `@ -116,7 +121,30 @@ def plotfit(min_x, max_x, mu, sigma, theta, p):` ` plt.plot(x, X_poly.dot(theta), '--', linewidth=2)` ``` ``` ``` ``` `np.set_printoptions(formatter={'float': '{: 0.5f}'.format}, edgeitems=50, linewidth=150)` `# VALIDATIONCURVE Generate the train and validation errors needed to` `# plot a validation curve that we can use to select lambda` `# [lambda_vec, error_train, error_val] = ...` `# VALIDATIONCURVE(X, y, Xval, yval) returns the train` `# and validation errors (in error_train, error_val)` `# for different values of lambda. You are given the training set (X,` `# y) and validation set (Xval, yval).` `def validation_curve(X, y, Xval, yval):` ` # Selected values of lambda (you should not change this)` ` lambda_vec = np.array([0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10]).T` ``` ``` ` # Initialize return values` ` error_train = np.zeros((lambda_vec.size, 1))` ` error_val = np.zeros((lambda_vec.size, 1))` ``` ``` ` for i in range(0, lambda_vec.size):` ` ld = lambda_vec[i]` ` theta = train_linear_reg(X, y, ld).reshape(X.shape[1], y.shape[1])` ` error_train[i] = 0.5 / X.shape[0] * np.sum((X.dot(theta) - y) ** 2)` ` error_val[i] = 0.5 / yval.shape[0] * np.sum((Xval.dot(theta) - yval) ** 2)` ` return lambda_vec, error_train, error_val` ``` ``` ``` ``` `np.set_printoptions(formatter={'float': '{: 0.64f}'.format, 'longfloat': '{: 0.64f}'.format}, edgeitems=50, linewidth=150)` `## =========== Part 1: Loading and Visualizing Data =============` `# Load Training Data` `print('Loading and Visualizing Data ...\n')` `@ -201,7 +229,6 @@ p = 8` ``` ``` `# Map X onto Polynomial Features and Normalize` `X_poly = poly_features(X, p)` `X_poly2 = np.copy(X_poly)` `X_poly, mu, sigma = feature_normalize(X_poly) # Normalize` `X_poly = np.hstack((np.ones((m, 1)), X_poly)) # Add Ones` ``` ``` `@ -213,7 +240,6 @@ X_poly_test = np.hstack((np.ones((X_poly_test.shape[0], 1)), X_poly_test)) # Ad` ``` ``` `# Map X_poly_val and normalize (using mu and sigma)` `X_poly_val = poly_features(Xval, p)` `X_poly_val2 = np.copy(X_poly_val)` `X_poly_val = X_poly_val - mu` `X_poly_val = X_poly_val / sigma` `X_poly_val = np.hstack((np.ones((X_poly_val.shape[0], 1)), X_poly_val)) # Add Ones` `@ -253,3 +279,19 @@ for i in range(0, m):` ` print(' \t{0:d}\t\t{1:f}\t{2:f}\n'.format(i + 1, error_train[i, 0], error_val[i, 0]))` ``` ``` `print('Program paused. Press enter to continue.\n')` `input()` ``` ``` `## =========== Part 8: Validation for Selecting Lambda =============` `lambda_vec, error_train, error_val = validation_curve(X_poly, y, X_poly_val, yval)` ``` ``` `plt.plot(lambda_vec, error_train, lambda_vec, error_val)` `plt.legend(['Train', 'Cross Validation'])` `plt.xlabel('lambda')` `plt.ylabel('Error')` `plt.show()` ``` ``` `print('lambda\t\tTrain Error\tValidation Error\n')` `for i in range(0, lambda_vec.size):` ` print(' {0:f}\t{1:f}\t{2:f}\n'.format(lambda_vec[i], error_train[i, 0], error_val[i, 0]))` ``` ``` `print('Program paused. Press enter to continue.\n')`

#### 191 extra/fmincg.py View File

 `@ -0,0 +1,191 @@` `import numpy as np` `import math` ``` ``` ``` ``` `# Minimize a continuous differentialble multivariate function. Starting point` `# is given by "X" (D by 1), and the function named in the string "f", must` `# return a function value and a vector of partial derivatives. The Polack-` `# Ribiere flavour of conjugate gradients is used to compute search directions,` `# and a line search using quadratic and cubic polynomial approximations and the` `# Wolfe-Powell stopping criteria is used together with the slope ratio method` `# for guessing initial step sizes. Additionally a bunch of checks are made to` `# make sure that exploration is taking place and that extrapolation will not` `# be unboundedly large. The "length" gives the length of the run: if it is` `# positive, it gives the maximum number of line searches, if negative its` `# absolute gives the maximum allowed number of function evaluations. You can` `# (optionally) give "length" a second component, which will indicate the` `# reduction in function value to be expected in the first line-search (defaults` `# to 1.0). The function returns when either its length is up, or if no further` `# progress can be made (ie, we are at a minimum, or so close that due to` `# numerical problems, we cannot get any closer). If the function terminates` `# within a few iterations, it could be an indication that the function value` `# and derivatives are not consistent (ie, there may be a bug in the` `# implementation of your "f" function). The function returns the found` `# solution "X", a vector of function values "fX" indicating the progress made` `# and "i" the number of iterations (line searches or function evaluations,` `# depending on the sign of "length") used.` `#` `# Usage: [X, fX, i] = fmincg(f, X, options, P1, P2, P3, P4, P5)` `#` `# See also: checkgrad` `#` `# Copyright (C) 2001 and 2002 by Carl Edward Rasmussen. Date 2002-02-13` `#` `#` `# (C) Copyright 1999, 2000 & 2001, Carl Edward Rasmussen` `#` `# Permission is granted for anyone to copy, use, or modify these` `# programs and accompanying documents for purposes of research or` `# education, provided this copyright notice is retained, and note is` `# made of any changes that have been made.` `#` `# These programs and documents are distributed without any warranty,` `# express or implied. As the programs were written for research` `# purposes only, they have not been tested to the degree that would be` `# advisable in any important application. All use of these programs is` `# entirely at the user's own risk.` `#` `# [ml-class] Changes Made:` `# 1) Function name and argument specifications` `# 2) Output display` `#` ``` ``` `# Read options` `def fmincg(f, X, options, args):` ` if options is not None and type(options) is dict and options.get('MaxIter') is not None:` ` length = options.get('MaxIter')` ` else:` ` length = 100` ``` ``` ` RHO = 0.01 # a bunch of constants for line searches` ` SIG = 0.5 # RHO and SIG are the constants in the Wolfe-Powell conditions` ` INT = 0.1 # don't reevaluate within 0.1 of the limit of the current bracket` ` EXT = 3.0 # extrapolate maximum 3 times the current bracket` ` MAX = 20 # max 20 function evaluations per line search` ` RATIO = 100 # maximum allowed slope ratio` ``` ``` ` red = 1` ` S = 'Iteration '` ``` ``` ` i = 0 # zero the run length counter` ` ls_failed = 0 # no previous line search has failed` ` fX = np.array([])` ` f1, df1 = f(*((X,) + args)) # get function value and gradient` ` i += length < 0 # count epochs?!` ` s = -df1 # search direction is steepest` ` d1 = -s.T.dot(s) # this is the slope` ` z1 = red / (1 - d1) # initial step is red/(|s|+1)` ``` ``` ` while i < abs(length): # while not finished` ` i += length > 0 # count iterations?!` ``` ``` ` X0 = X` ` f0 = f1` ` df0 = df1 # make a copy of current values` ` X = X + z1 * s # begin line search` ` f2, df2 = f(*((X,) + args))` ` i += length < 0 # count epochs?!` ` d2 = df2.T.dot(s)` ` f3 = f1` ` d3 = d1` ` z3 = -z1 # initialize point 3 equal to point 1` ` if length > 0:` ` M = MAX` ` else:` ` M = min(MAX, -length - i)` ``` ``` ` success = 0` ` limit = -1 # initialize quanteties` ` while 1:` ` while ((f2 > f1 + z1 * RHO * d1) or (d2 > -SIG * d1)) and (M > 0):` ` limit = z1 # tighten the bracket` ` if f2 > f1:` ` z2 = z3 - (0.5 * d3 * z3 * z3) / (d3 * z3 + f2 - f3) # quadratic fit` ` else:` ` A = 6 * (f2 - f3) / z3 + 3 * (d2 + d3) # cubic fit` ` B = 3 * (f3 - f2) - z3 * (d3 + 2 * d2)` ` z2 = (np.sqrt(B * B - A * d2 * z3 * z3) - B) / A # numerical error possible - ok!` ``` ``` ` if math.isnan(z2) or math.isinf(z2):` ` z2 = z3 / 2 # if we had a numerical problem then bisect` ``` ``` ` z2 = np.amax(` ` np.array([np.amin(np.array([z2, INT * z3])), (1 - INT) * z3])) # don't accept too close to limits` ` z1 = z1 + z2 # update the step` ` X = X + z2 * s` ` f2, df2 = f(*((X,) + args))` ` M = M - 1` ` i = i + (length < 0) # count epochs?!` ` d2 = df2.T.dot(s)` ` z3 = z3 - z2 # z3 is now relative to the location of z2` ``` ``` ` if f2 > f1 + z1 * RHO * d1 or d2 > -SIG * d1:` ` break # this is a failure` ` elif d2 > SIG * d1:` ` success = 1` ` break # success` ` elif M == 0:` ` break # failure` ``` ``` ` A = 6 * (f2 - f3) / z3 + 3 * (d2 + d3) # make cubic extrapolation` ` B = 3 * (f3 - f2) - z3 * (d3 + 2 * d2)` ` z2 = -d2 * z3 * z3 / (B + np.sqrt(B * B - A * d2 * z3 * z3)) # num. error possible - ok!` ` if not np.isreal(z2) or np.isnan(z2) or np.isinf(z2) or z2 < 0: # num prob or wrong sign?` ` if limit < -0.5: # if we have no upper limit` ` z2 = z1 * (EXT - 1) # the extrapolate the maximum amount` ` else:` ` z2 = (limit - z1) / 2 # otherwise bisect` ``` ``` ` elif (limit > -0.5) and (z2 + z1 > limit): # extraplation beyond max?` ` z2 = (limit - z1) / 2 # bisect` ` elif (limit < -0.5) and (z2 + z1 > z1 * EXT): # extrapolation beyond limit` ` z2 = z1 * (EXT - 1.0) # set to extrapolation limit` ` elif z2 < -z3 * INT:` ` z2 = -z3 * INT` ` elif (limit > -0.5) and (z2 < (limit - z1) * (1.0 - INT)): # too close to limit?` ` z2 = (limit - z1) * (1.0 - INT)` ``` ``` ` f3 = f2` ` d3 = d2` ` z3 = -z2 # set point 3 equal to point 2` ` z1 = z1 + z2` ` X = X + z2 * s # update current estimates` ` f2, df2 = f(*((X,) + args))` ` M = M - 1` ` i = i + (length < 0) # count epochs?!` ` d2 = df2.T.dot(s)` ` # end of line search` ``` ``` ` if success: # if line search succeeded` ` f1 = f2` ` fX = np.hstack((fX, f1))` ` print('{0:s} {1:4d} | Cost: {2:4.6e}\r'.format(S, i, f1))` ` s = (df2.dot(df2) - df1.dot(df2)) / (df1.dot(df1)) * s - df2 # Polack-Ribiere direction` ` tmp = df1` ` df1 = df2` ` df2 = tmp # swap derivatives` ` d2 = df1.dot(s)` ` if d2 > 0: # new slope must be negative` ` s = -df1 # otherwise use steepest direction` ` d2 = -s.dot(s)` ``` ``` ` z1 = z1 * min(RATIO, d1 / (d2 - np.finfo(np.double).tiny)) # slope ratio but max RATIO` ` d1 = d2` ` ls_failed = 0 # this line search did not fail` ` else:` ` X = X0` ` f1 = f0` ` df1 = df0 # restore point from before failed line search` ` if ls_failed or i > np.abs(length): # line search failed twice in a row` ` break # or we ran out of time, so we give up` ``` ``` ` tmp = df1` ` df1 = df2` ` df2 = tmp # swap derivatives` ` s = -df1 # try steepest` ` d1 = -s.dot(s)` ` z1 = 1 / (1 - d1)` ` ls_failed = 1 # this line search failed` ``` ``` ` print('\n')` ` return X`