본문 바로가기
카테고리 없음

Weighted Linear Regression

by DogBull 2018. 7. 20.

선형회귀식을 도출해 주는 다양한 라이브러리들이 있다.(numpy.polyfit, pygsl.fit, 온라인에서 수행 해 볼 수 있는 곳은 여기) 이 패키지들은 가중치선형회귀식 기능도 포함하고 있다. 얼마전 GPU(OpenCL) 에서 가중치선형회귀 기능을 수행할 필요가 있어 직접 구현하게 되었다. OpenCL 코드를 작성 하기전 python으로 작성하여 테스트를 해 보았다.


import matplotlib.pylab as plt
import numpy as np


def weighted_linear_regression(x_ary, y_ary, w_ary):
    assert len(x_ary) == len(y_ary) == len(w_ary)

    a_w = 0
    a_wx = 0
    a_wy = 0
    a_wxx = 0
    a_wxy = 0

    for x, y, w in zip(x_ary, y_ary, w_ary):
        a_w += w
        a_wx += x * w
        a_wy += y * w
        a_wxx += x * x * w
        a_wxy += x * y * w

    v = (a_w * a_wxx - a_wx * a_wx)
    slope = (a_w * a_wxy - a_wx * a_wy) / v
    intercept = (a_wxx * a_wy - a_wx * a_wxy) / v

    return slope, intercept


def weighted_linear_regression_v2(x_ary, y_ary, w_ary):
    x_ary = np.array(x_ary)
    y_ary = np.array(y_ary)
    w_ary = np.array(w_ary)

    s = sum(w_ary)
    xa = sum(x_ary * w_ary) / s
    ya = sum(y_ary * w_ary) / s
    sxy = sum((x_ary - xa) * (y_ary - ya) * w_ary)
    sxx = sum(np.power((x_ary - xa), 2) * w_ary)
    slope = sxy / sxx
    intercept = ya - slope * xa
    return slope, intercept


def abline(slope, intercept, xrange, *args, **kwargs):
    x_values = np.array(xrange)
    y_values = intercept + slope * x_values
    return plt.plot(x_values, y_values, *args, **kwargs)


def main():
    x_ary = [0, 1, 2, 3, 4, 5, 6, 7, 8]
    y_ary = [0, 20, 40, 80, 160, 320, 640, 1280, 2560]
    w_ary = [1, 1, 1, 1, 1, 1, 2, 3, 10]

    plt.plot(x_ary, y_ary, linestyle=':', label='original')

    xrange = (x_ary[0], x_ary[-1])

    none_w_ary = [1] * len(w_ary)
    slope, intercept = weighted_linear_regression(x_ary, y_ary, none_w_ary)
    abline(slope, intercept, xrange, label='none_weighted_linear_regression')
    print(slope, intercept)

    slope, intercept = weighted_linear_regression(x_ary, y_ary, w_ary)
    abline(slope, intercept, xrange, linestyle='-', label='weighted_linear_regression')
    print(slope, intercept)

    slope, intercept = weighted_linear_regression_v2(x_ary, y_ary, w_ary)
    abline(slope, intercept, xrange, linestyle='--', label='weighted_linear_regression_v2')
    print(slope, intercept)

    slope, intercept = np.polyfit(x_ary, y_ary, 1, w=w_ary)
    abline(slope, intercept, xrange, label='numpy.polyfit')
    print(slope, intercept)

    plt.legend()
    plt.show()


if __name__ == '__main__':
    main()