### Completed the first two parts of ex2.

master wchen342 6 years ago
parent
commit
2870f35511
2 changed files with 175 additions and 0 deletions
1. 173
ex2/ex2.py
2. 2
ex2/ex2_reg.py

#### 173 ex2/ex2.py View File

 `@ -0,0 +1,173 @@` `## Machine Learning Online Class - Exercise 2: Logistic Regression` `import numpy as np` `import matplotlib.pyplot as plt` `import scipy.optimize` `import math` ``` ``` ``` ``` ``` ``` `# PLOTDATA Plots the data points X and y into a new figure` `# PLOTDATA(x,y) plots the data points with + for the positive examples` `# and o for the negative examples. X is assumed to be a Mx2 matrix.` `def plotdata(X, y):` ` plt.figure()` ` pos = y == 1` ` neg = y == 0` ` plt.plot(X[:, :1][pos], X[:, 1:2][pos], 'k+', linewidth=2, markersize=7)` ` plt.plot(X[:, :1][neg], X[:, 1:2][neg], 'ko', markerfacecolor='y', markersize=7)` ``` ``` `# SIGMOID Compute sigmoid functoon` `# J = SIGMOID(z) computes the sigmoid of z.` `def sigmoid(z):` ` g = 1 / (1 + np.exp(-z))` ` return g` ``` ``` ``` ``` `# COSTFUNCTION Compute cost and gradient for logistic regression` `# J = COSTFUNCTION(theta, X, y) computes the cost of using theta as the` `# parameter for logistic regression and the gradient of the cost` `# w.r.t. to the parameters.` `# TODO The logistic function used here seems to be unstable` `def cost_function(theta, X, y):` ` m = y.shape` ` d1 = X.shape` ` d2 = y.shape` ` theta = theta.reshape((d1, d2))` ` h = sigmoid(X.dot(theta))` ` J = np.sum(-y * np.log(h) - (1 - y) * np.log(1 - h)) / m` ` grad = (h - y).T.dot(X) / m` ` return J.flatten(), grad.flatten()` ``` ``` `# MAPFEATURE Feature mapping function to polynomial features` `#` `# MAPFEATURE(X1, X2) maps the two input features` `# to quadratic features used in the regularization exercise.` `#` `# Returns a new feature array with more features, comprising of` `# X1, X2, X1.^2, X2.^2, X1*X2, X1*X2.^2, etc..` `#` `# Inputs X1, X2 must be the same size` `def map_feature(X1, X2):` ` degree = 6` ` out = np.ones(X1[:,0].size)` ` for i in range(1, degree+1):` ` for j in range(0, i):` ` pass` ` # TODO out[:, end+1] = (X1 ** (i-j)).*(X2.^j)` ` pass` ``` ``` ``` ``` `# PLOTDECISIONBOUNDARY Plots the data points X and y into a new figure with` `# the decision boundary defined by theta` `# PLOTDECISIONBOUNDARY(theta, X,y) plots the data points with + for the` `# positive examples and o for the negative examples. X is assumed to be` `# a either` `# 1) Mx3 matrix, where the first column is an all-ones column for the` `# intercept.` `# 2) MxN, N>3 matrix, where the first column is all-ones` `def plot_decision_boundary(theta, X, y):` ` # Plot Data` ` plotdata(X[:,2:4], y)` ` if theta.ndim == 2 and (theta.shape == 1 or theta.shape == 1):` ` theta = theta.flatten()` ``` ``` ` if X.shape <= 3:` ` # Only need 2 points to define a line, so choose two endpoints` ` plot_x = np.array([np.min(X[:,2])-2, np.max(X[:,2])+2])` ``` ``` ` # Calculate the decision boundary line` ` plot_y = (-1 / theta) * (theta * plot_x + theta)` ``` ``` ` # Plot, and adjust axes for better viewing` ` plt.plot(plot_x, plot_y)` ``` ``` ` # Legend, specific for the exercise` ` plt.legend(['Admitted', 'Not admitted', 'Decision Boundary'])` ` plt.axis([30, 100, 30, 100])` ` else:` ` # Here is the grid range` ` u = np.linspace(-1, 1.5, 50)` ` v = np.linspace(-1, 1.5, 50)` ``` ``` ` z = np.zeros(u.size, v.size)` ` # Evaluate z = theta*x over the grid` ` for i in range(0, u.size):` ` for j in range(0, v.size):` ` z[i,j] = map_feature(u[i], v[j]) * theta` ``` ``` ` z = z.T # important to transpose z before calling contour` ``` ``` ` # Plot z = 0` ` # Notice you need to specify the range [0, 0]` ` plt.contour(u, v, z, [0, 0], 'LineWidth', 2)` ` pass` ``` ``` ``` ``` `## Load Data` `# The first two columns contains the exam scores and the third column` `# contains the label.` `data = np.loadtxt('textdata/ex2data1.txt', delimiter=',')` `X = data[:, :2]` `y = data[:, 2:3]` ``` ``` `## ==================== Part 1: Plotting ====================` `print('Plotting data with + indicating (y = 1) examples and o indicating (y = 0) examples.')` ``` ``` `plotdata(X, y)` ``` ``` `# Put some labels` `# Labels and Legend` `plt.xlabel('Exam 1 score')` `plt.ylabel('Exam 2 score')` ``` ``` `# Specified in plot order` `plt.legend(['Admitted', 'Not admitted'])` `# plt.show()` ``` ``` `print('\nProgram paused. Press enter to continue.\n')` `input()` ``` ``` `## ============ Part 2: Compute Cost and Gradient ============` `# Setup the data matrix appropriately, and add ones for the intercept term` `m, n = X.shape` ``` ``` `# Add intercept term to x and X_test` `X = np.concatenate((np.ones((m, 1)), X), axis=1)` ``` ``` `# Initialize fitting parameters` `initial_theta = np.zeros((n + 1, 1))` ``` ``` `# Compute and display initial cost and gradient` `cost, grad = cost_function(initial_theta, X, y)` ``` ``` `print('Cost at initial theta (zeros): {0:f}'.format(cost))` `print('Gradient at initial theta (zeros): ')` `print(np.array_str(grad.reshape(-1,1)).replace('[', ' ').replace(']', ' '))` ``` ``` `print('\nProgram paused. Press enter to continue.\n')` `input()` ``` ``` `## ============= Part 3: Optimizing using fmin =============` ``` ``` `# Run fmin to obtain the optimal theta` `# This function will return theta and the cost` `res = scipy.optimize.minimize(cost_function, initial_theta, args=(X, y), method='L-BFGS-B', jac=True, options={'maxiter': 400})` ``` ``` `# Print theta to screen` `print('Cost at theta found by fminunc: %f\n', res['fun'])` `print('theta: \n')` `print(np.array_str(res['x']).replace('[', ' ').replace(']', ' '))` ``` ``` `# Plot Boundary` `plot_decision_boundary(theta, X, y)` ``` ``` `# Put some labels` `# Labels and Legend` `plt.xlabel('Exam 1 score')` `plt.ylabel('Exam 2 score')` ``` ``` `# Specified in plot order` `plt.legend(['Admitted', 'Not admitted'])` ``` ``` `print('\nProgram paused. Press enter to continue.')` `input()`

#### 2 ex2/ex2_reg.py View File

 `@ -0,0 +1,2 @@` `import numpy as np` `import matplotlib.pyplot as plt`