반응형
/*******************************************************************************************************************
-- Title : [Py3.5] Linear Regression Example w/ Scipy, Statsmodels
-- Reference : acorn, googling
-- Key word : linear regression lm 선형 회귀 선형회귀 회귀분석 회귀 분석 matplotlib numpy pandas scipy
statsmodel regression read_csv .csv rectangle plot graph 차트 플롯 ols OLS WLS wls
*******************************************************************************************************************/
■ Figures
■ Scripts
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 | # -*- coding: utf-8 -*- import matplotlib.pyplot as plt import numpy as np import pandas as pd import scipy.stats as st import urllib.request import os import matplotlib.patches as patches import matplotlib.transforms as transforms from scipy.stats import linregress import statsmodels.formula.api as smf from statsmodels.sandbox.regression.predstd import wls_prediction_std import statsmodels.api as sm from mpl_toolkits.mplot3d import Axes3D # ------------------------------ # -- Set Dataframe Option # ------------------------------ pd.set_option('display.height', 1000) pd.set_option('display.max_rows', 500) pd.set_option('display.max_columns', 500) pd.set_option('display.width', 1000) # ------------------------------ # -- Read data from .CSV # ------------------------------ path_csv = "D:\\PyProject\\20170524_acorn1\\df_rates.csv" df_rates = pd.read_csv(path_csv) print(df_rates.head()) print("... read_csv", "." * 100, "\n") # ------------------------------ # -- extract linear regression stats. values # ------------------------------ mindfe = 30. selection = ~df_rates.DFE.isnull() * df_rates.DFE>mindfe rv = df_rates[selection].as_matrix(columns=['DFE','Both']) a, b, r, p, stderr = linregress(rv.T) print('slope:{0:.4f}\nintercept:{1:.4f}\nr-value:{2:.4f}\np-value:{3:.4f}\nstderr:{4:.4f}'. format(a, b, r, p, stderr)) print(",,, linear regression", "," * 100, "\n") # ------------------------------ # -- Graw Rectangle plot include errorbar and and linspace # ------------------------------ bins = np.arange(23.5, 65+1,10, dtype='float') bins = np.linspace(23.5, 65,11, dtype='float') # now group the data into the bins groups_df_rates = df_rates.groupby(np.digitize(df_rates.DFE, bins)) fig = plt.figure() ax = fig.add_subplot(111) df_rates.plot(kind='scatter', x='DFE', y='Both', ax=ax) xdata = df_rates['DFE'] xmin, xmax = min(xdata), max(xdata) xvalues = np.linspace(mindfe, xmax, 200) yvalues = a * xvalues + b ax.plot(xvalues, yvalues, color='red', lw=1.5) ax.grid(lw=1, ls='dashed'); ax.errorbar(groups_df_rates.mean().DFE, groups_df_rates.mean().Both, yerr=np.array(groups_df_rates.std().Both), #xerr=, marker='.', ls='None', lw=1.5, color='g', ms=1) trans = transforms.blended_transform_factory(ax.transData, ax.transAxes) rect = patches.Rectangle((0,0), width=mindfe, height=1, transform=trans, color='yellow', alpha=0.5) ax.add_patch(rect) ax.set_xlim((xmin,xmax+3)) plt.show() # -- # -- Get OLS Resgression Results # -- mod = smf.ols("Both ~ DFE", df_rates[selection]).fit() print(mod.summary()) print(",,, OLS regression", "," * 100, "\n") # -- # -- Draw WLS Plot # -- prstd, iv_l, iv_u = wls_prediction_std(mod) fig = plt.figure() ax = fig.add_subplot(111) df_rates.plot(kind='scatter', x='DFE', y='Both', ax=ax) xmin, xmax = min(df_rates['DFE']), max(df_rates['DFE']) ax.plot([mindfe, xmax], [mod.fittedvalues.min(), mod.fittedvalues.max()], 'IndianRed', lw=1.5) ax.plot([mindfe, xmax], [iv_u.min(), iv_u.max()], 'b--', lw=1.5) ax.plot([mindfe, xmax], [iv_l.min(), iv_l.max()], 'y--', lw=1.5) ax.errorbar(groups_df_rates.mean().DFE, groups_df_rates.mean().Both, yerr=np.array(groups_df_rates.std().Both), ls='None', lw=1.5, color='Green') trans = transforms.blended_transform_factory( ax.transData, ax.transAxes) rect = patches.Rectangle((0,0), width=mindfe, height=1, transform=trans, color='Yellow', alpha=0.5) ax.add_patch(rect) ax.grid(lw=1, ls='dashed') ax.set_xlim((xmin,xmax+3)) plt.show() |
■ Files
반응형