선형회귀식을 도출해 주는 다양한 라이브러리들이 있다.(numpy.polyfit, pygsl.fit, 온라인에서 수행 해 볼 수 있는 곳은 여기) 이 패키지들은 가중치선형회귀식 기능도 포함하고 있다. 얼마전 GPU(OpenCL) 에서 가중치선형회귀 기능을 수행할 필요가 있어 직접 구현하게 되었다. OpenCL 코드를 작성 하기전 python으로 작성하여 테스트를 해 보았다.
import matplotlib.pylab as plt
import numpy as np
def weighted_linear_regression(x_ary, y_ary, w_ary):
assert len(x_ary) == len(y_ary) == len(w_ary)
a_w = 0
a_wx = 0
a_wy = 0
a_wxx = 0
a_wxy = 0
for x, y, w in zip(x_ary, y_ary, w_ary):
a_w += w
a_wx += x * w
a_wy += y * w
a_wxx += x * x * w
a_wxy += x * y * w
v = (a_w * a_wxx - a_wx * a_wx)
slope = (a_w * a_wxy - a_wx * a_wy) / v
intercept = (a_wxx * a_wy - a_wx * a_wxy) / v
return slope, intercept
def weighted_linear_regression_v2(x_ary, y_ary, w_ary):
x_ary = np.array(x_ary)
y_ary = np.array(y_ary)
w_ary = np.array(w_ary)
s = sum(w_ary)
xa = sum(x_ary * w_ary) / s
ya = sum(y_ary * w_ary) / s
sxy = sum((x_ary - xa) * (y_ary - ya) * w_ary)
sxx = sum(np.power((x_ary - xa), 2) * w_ary)
slope = sxy / sxx
intercept = ya - slope * xa
return slope, intercept
def abline(slope, intercept, xrange, *args, **kwargs):
x_values = np.array(xrange)
y_values = intercept + slope * x_values
return plt.plot(x_values, y_values, *args, **kwargs)
def main():
x_ary = [0, 1, 2, 3, 4, 5, 6, 7, 8]
y_ary = [0, 20, 40, 80, 160, 320, 640, 1280, 2560]
w_ary = [1, 1, 1, 1, 1, 1, 2, 3, 10]
plt.plot(x_ary, y_ary, linestyle=':', label='original')
xrange = (x_ary[0], x_ary[-1])
none_w_ary = [1] * len(w_ary)
slope, intercept = weighted_linear_regression(x_ary, y_ary, none_w_ary)
abline(slope, intercept, xrange, label='none_weighted_linear_regression')
print(slope, intercept)
slope, intercept = weighted_linear_regression(x_ary, y_ary, w_ary)
abline(slope, intercept, xrange, linestyle='-', label='weighted_linear_regression')
print(slope, intercept)
slope, intercept = weighted_linear_regression_v2(x_ary, y_ary, w_ary)
abline(slope, intercept, xrange, linestyle='--', label='weighted_linear_regression_v2')
print(slope, intercept)
slope, intercept = np.polyfit(x_ary, y_ary, 1, w=w_ary)
abline(slope, intercept, xrange, label='numpy.polyfit')
print(slope, intercept)
plt.legend()
plt.show()
if __name__ == '__main__':
main()