Linear_regression与 Logistic_regression简单比较与python实现
Linear_regression与 Logistic_regression简单比较与python实现
好久没写博客了,在度厂实习期间更是天天累成了狗的节奏,最近有幸蹭到隔壁组老大小黑黑关于machine learning这块的培训(以下图片均摘自小黑黑的PPT),甚是感动,决定好好学习下这块的东西。
Linear_regression 和 Logistic_regression 其实是非常相似的两种算法。它们都属于监督学习,都可以用梯度下降等方法进行参数的迭代学习等等。
他们最大的不同应该说是 估价函数的不同。
此外Linear_regression 的 cost function:
Logistic_regression 的 cost function :
即我们的最终目标是要求出使得 J(theta)最小时theta的值。采取的方法均为类似梯度下降法的方法。
最后给出两种算法的python实现:
Linear_regression
import sys
MAX_FEATURE_DIMENSION = 1024
MAX_SAMPLE_NUMBER = 1024
MAX_ITERATE_NUMBER = 1024
##求导
def compute_gradient(x,y,theta,feature_number,feature_pos,sample_number):
sum = 0.0
for i in range(sample_number):
res = 0.0
for j in range(feature_number+1):
res += x[i][j] * theta[j]
sum += (res - y[i])*x[i][feature_pos]
return sum/sample_number
##估价函数
def compute_cost(x,y,theta,feature_number,sample_number):
sum = 0.0
for i in range(sample_number):
res = 0.0
for j in range(feature_number+1):
res += x[i][j] * theta[j]
sum += (res - y[i]) * (res - y[i])
return sum/(2*sample_number)
##梯度下降法
def gradient_descent(x,y,theta,feature_number,sample_number,alpha,iterate_number):
for i in range(iterate_number):
tmp = []
for j in range(MAX_FEATURE_DIMENSION):
tmp.append(0)
for j in range(feature_number+1):
tmp[j] = theta[j] - alpha * compute_gradient(x,y,theta,feature_number,j,sample_number)
for j in range(feature_number+1):
theta[j] = tmp[j]
##测试
def predict(theta,x,feature_number):
sum = 0.0
for i in range(feature_number+1):
sum += theta[i]*x[i]
return sum
if __name__ == ‘__main__‘:
x = [
[1,96.79,2,1,2],
[1,110.39,3,1,0],
[1,70.25,1,0,2],
[1,99.96,2,1,1],
[1,118.15,3,1,0],
[1,115.08,3,1,2]
]
y = [287,343,199,298,340,350]
sample_number = 6
alpha = 0.0001
iterate_number = 1500
feature_number = 4
theta = []
for i in range(101):
theta.append(0)
gradient_descent(x,y,theta,feature_number,sample_number,alpha,iterate_number)
print compute_cost(x,y,theta,feature_number,sample_number)
testx1 = [1,112,3,1,0]
testx2 = [1,110,3,1,1]
print predict(theta, testx1, 4)
print predict(theta, testx2, 4)
Logistic_regression
import sys
import math
MAX_FEATURE_DIMENSION = 1024
MAX_SAMPLE_NUMBER = 1024
MAX_ITERATE_NUMBER = 1024
##估价函数
def sigmoid(z):
return 1 / (1.0 + math.exp(-z))
def hypothesis(x, theta, feature_number):
h = 0.0
for i in range(feature_number+1):
h += x[i] * theta[i]
return sigmoid(h)
##计算偏导数
def compute_gradient(x, y, theta, feature_number, feature_pos, sample_number):
sum = 0.0
for i in range(sample_number):
h = hypothesis(x[i], theta, feature_number)
sum += (h - y[i]) * x[i][feature_pos]
return sum/sample_number
##代价
def compute_cost(x, y, theta, feature_number, sample_number):
sum = 0.0
for i in range(sample_number):
h = hypothesis(x[i], theta, feature_number)
sum += -y[i] * math.log(h) - (1 - y[i]) * math.log(1 - h)
return sum / sample_number
##梯度下降
def gradient_descent(x, y, theta, feature_number, sample_number, alpha, iterate_number):
for i in range(iterate_number):
tmp = []
for j in range(MAX_FEATURE_DIMENSION):
tmp.append(0)
for j in range(feature_number + 1):
tmp[j] = theta[j] - alpha * compute_gradient(x, y ,theta, feature_number, j, sample_number)
for j in range(feature_number + 1):
theta[j] = tmp[j]
print compute_cost(x, y, theta, feature_number, sample_number)
if __name__ == ‘__main__‘:
feature_number = 2
sample_number = 12
alpha = 0.001
iterate_number = 10
x = [
[1, 34.6, 78.0],
[1, 30.2, 43.8],
[1, 35.8, 72.9],
[1, 60.1, 86.3],
[1, 79.0, 75.3],
[1, 45.0, 56.3],
[1, 61.1, 96.5],
[1, 75.0, 46.5],
[1, 76.0, 87.4],
[1, 84.4, 43.5],
[1, 95.8, 38.2],
[1, 75.0, 30.6]
]
y = [0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0]
theta = []
for i in range(MAX_FEATURE_DIMENSION):
theta.append(0)
gradient_descent(x, y, theta, feature_number, sample_number, alpha, iterate_number)
outstr = ""
for i in range(3):
outstr += "\t".join([str(theta[i])])
print outstr
郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。