Browse Source

completed ex1

master
wchen342 6 years ago
parent
commit
31cb7c7848
  1. 112
      ex1/ex1.py
  2. 119
      ex1/ex1_multi.py

112
ex1/ex1.py

@ -1,19 +1,125 @@
import numpy as np
import matplotlib.pyplot as plt
# PLOTDATA(x,y) plots the data points and gives the figure axes labels of
# population and profit.
def plotdata(x, y):
plt.plot(x, y, 'rx', markersize=10)
plt.pause(0.0001)
plt.xlabel('Population of City in 10,000s')
plt.ylabel('Profit in $10,000s')
# COMPUTECOST Compute cost for linear regression
# J = COMPUTECOST(X, y, theta) computes the cost of using theta as the
# parameter for linear regression to fit the data points in X and y
def compute_cost(X, y, theta):
m = y.shape[0]
return 0.5 / m * np.sum((np.dot(X, theta) - y) ** 2)
# GRADIENTDESCENT Performs gradient descent to learn theta
# theta = GRADIENTDESENT(X, y, theta, alpha, num_iters) updates theta by
# taking num_iters gradient steps with learning rate alpha
def gradient_descent(X, y, theta, alpha, num_iters):
# Initialize some useful values
m = y.shape[0] # number of training examples
J_history = np.zeros((num_iters, 1))
for iter in range(0, num_iters):
theta = theta - alpha / m * (np.dot(X.T, (np.dot(X, theta) - y)))
# Save the cost J in every iteration
J_history[iter, 0] = compute_cost(X, y, theta)
return theta, J_history
## ======================= Part 2: Plotting =======================
print('Plotting Data ...\n')
data = np.loadtxt('textdata/ex1data1.txt', delimiter=',')
X = data[:, :1]
y = data[:, 1:2]
m = len(y) # number of training examples
m = y.shape[0] # number of training examples
# Plot Data
# Note: You have to complete the code in plotData.m
# plotData(X, y)
plt.close()
plotdata(X, y)
plt.show(block=False)
print('Program paused. Press enter to continue.\n')
input()
print('Running Gradient Descent ...\n')
X = np.concatenate((np.ones((m, 1)), X), axis=1) # Add a column of ones to x
theta = np.zeros((2, 1)) # initialize fitting parameters
# Some gradient descent settings
iterations = 1500
alpha = 0.01
# compute and display initial cost
print(compute_cost(X, y, theta))
# run gradient descent
theta, J_history = gradient_descent(X, y, theta, alpha, iterations)
# print theta to screen
print('Theta found by gradient descent: ')
print('{0:f} {1:f} \n'.format(theta[0, 0], theta[1, 0]))
# Plot the linear fit
# plt.close()
plt.plot(X[:, 1], np.dot(X, theta), '-')
plt.legend(['Training data', 'Linear regression'])
plt.pause(0.0001)
plt.show()
# Predict values for population sizes of 35,000 and 70,000
predict1 = np.dot(np.array([1, 3.5]), theta)
print('For population = 35,000, we predict a profit of {0:f}\n'.format(predict1[0] * 10000))
predict2 = np.dot(np.array([1, 7]), theta)
print('For population = 70,000, we predict a profit of {0:f}\n'.format(predict2[0] * 10000))
print('Program paused. Press enter to continue.\n')
input()
## ============= Part 4: Visualizing J(theta_0, theta_1) =============
print('Visualizing J(theta_0, theta_1) ...\n')
# Grid over which we will calculate J
theta0_vals = np.linspace(-10, 10, 100)
theta1_vals = np.linspace(-1, 4, 100)
# initialize J_vals to a matrix of 0's
J_vals = np.zeros((theta0_vals.shape[0], theta1_vals.shape[0]))
# Fill out J_vals
for i in range(0, theta0_vals.shape[0]):
for j in range(0, theta1_vals.shape[0]):
t = np.array([[theta0_vals[i]], [theta1_vals[j]]])
J_vals[i, j] = compute_cost(X, y, t)
# Because of the way meshgrids work in the surf command, we need to
# transpose J_vals before calling surf, or else the axes will be flipped
J_vals = J_vals.T
plt.close()
# Surface plot
fig = plt.figure()
ax = fig.gca(projection='3d')
theta0_vals, theta1_vals = np.meshgrid(theta0_vals, theta1_vals)
ax.plot_surface(theta0_vals, theta1_vals, J_vals)
plt.xlabel('theta_0')
plt.ylabel('theta_1')
# Contour plot
plt.figure()
# Plot J_vals as 15 contours spaced logarithmically between 0.01 and 100
plt.contour(theta0_vals, theta1_vals, J_vals, np.logspace(-2, 3, 20))
plt.xlabel('theta_0')
plt.ylabel('theta_1')
plt.plot(theta[0], theta[1], 'rx', markersize=10, linewidth=2)
plt.show()

119
ex1/ex1_multi.py

@ -0,0 +1,119 @@
import numpy as np
import matplotlib.pyplot as plt
# FEATURENORMALIZE Normalizes the features in X
# FEATURENORMALIZE(X) returns a normalized version of X where
# the mean value of each feature is 0 and the standard deviation
# is 1. This is often a good preprocessing step to do when
# working with learning algorithms.
def feature_normalize(X):
mu = np.mean(X, axis=0)
sigma = np.std(X, axis=0)
X_norm = (X - mu) / sigma
return X_norm, mu, sigma
def compute_cost_multi(X, y, theta):
m = y.shape[0] # number of training examples
return 0.5 / m * np.sum((np.dot(X, theta) - y) ** 2)
def gradient_descent_multi(X, y, theta, alpha, num_iters):
m = y.shape[0] # number of training examples
J_history = np.zeros((num_iters, 1))
for iter in range(0, num_iters):
theta = theta - alpha / m * np.dot(X.T, (np.dot(X, theta) - y))
J_history[iter] = compute_cost_multi(X, y, theta)
return theta, J_history
def normaleqn(X, y):
theta = np.dot(np.dot(np.linalg.pinv(np.dot(X.T, X)), X.T), y)
return theta
## ================ Part 1: Feature Normalization ================
print('Loading data ...\n')
## Load Data
data = np.loadtxt('textdata/ex1data2.txt', delimiter=',')
X = data[:, :2]
y = data[:, 2:3]
m = y.shape[0]
# Print out some data points
print('First 10 examples from the dataset: \n')
for tp in np.concatenate((X[0:10, :], y[0:10, :]), axis=1):
print(' x = [{0:.0f} {1:.0f}], y = {2:.0f}'.format(tp[0], tp[1], tp[2]))
print('Program paused. Press enter to continue.\n')
input()
# Scale features and set them to zero mean
print('Normalizing Features ...\n')
X, mu, sigma = feature_normalize(X)
# Add intercept term to X
X = np.concatenate((np.ones((m, 1)), X), axis=1)
## ================ Part 2: Gradient Descent ================
print('Running gradient descent ...\n')
# Choose some alpha value
alpha = 0.1
num_iters = 100
# Init Theta and Run Gradient Descent
theta = np.zeros((X.shape[1], 1))
theta, J_history = gradient_descent_multi(X, y, theta, alpha, num_iters)
# Plot the convergence graph
plt.close()
plt.figure()
plt.plot(list(range(1, J_history.size + 1)), J_history, '-b', linewidth=2)
plt.xlabel('Number of iterations')
plt.ylabel('Cost J')
plt.show()
# Display gradient descent's result
print('Theta computed from gradient descent: ')
print(np.array_str(theta).replace('[', ' ').replace(']', ' '))
print('\n')
# Estimate the price of a 1650 sq-ft, 3 br house
# Recall that the first column of X is all-ones. Thus, it does
# not need to be normalized.
price = np.dot(np.concatenate(([1], (np.array([1650, 3]) - mu) / sigma)), theta)[0]
print('Predicted price of a 1650 sq-ft, 3 br house (using gradient descent):\n ${0:f}\n'.format(price))
print('Program paused. Press enter to continue.\n')
input()
## ================ Part 3: Normal Equations ================
print('Solving with normal equations...\n')
## Load Data
data = np.loadtxt('textdata/ex1data2.txt', delimiter=',')
X = data[:, :2]
y = data[:, 2:3]
m = y.shape[0]
# Add intercept term to X
X = np.concatenate((np.ones((m, 1)), X), axis=1)
# Calculate the parameters from the normal equation
theta = normaleqn(X, y)
# Display normal equation's result
print('Theta computed from the normal equations: ')
print(np.array_str(theta).replace('[', ' ').replace(']', ' '))
print('\n')
# Estimate the price of a 1650 sq-ft, 3 br house
price = np.dot(np.array([1,1650,3]), theta)[0]
# ============================================================
print('Predicted price of a 1650 sq-ft, 3 br house (using normal equations):\n ${0:f}\n'.format(price))
Loading…
Cancel
Save