반응형
/*******************************************************************************************************************
-- Title : [Py3.5] 로지스틱 회귀분석(Logistic Regression)
-- Reference : acorn, googling
-- Key word : 로지스틱 회귀분석 로지스틱 회귀 분석 logistic regression
*******************************************************************************************************************/
■ Figures
■ Scripts
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | # -*- coding: utf-8 -*- import numpy as np import pandas as pd import statsmodels.api as sm import matplotlib.pyplot as plt # ------------------------------ # -- Set Dataframe Option # ------------------------------ pd.set_option('display.height', 1000) pd.set_option('display.max_rows', 500) pd.set_option('display.max_columns', 500) pd.set_option('display.width', 1000) # ------------------------------ # -- Draw linear and logistic # ------------------------------ k = 1. m = -5. # -- # -- Function for linear and logistic # -- y = lambda x: k*x + m # for linear p = lambda x: np.exp(k*x+m) / (1+np.exp(k*x+m)) # for logistic #p = lambda x: 1 / (1+np.exp(-1*(k*x+m))) # -- # -- Show figure # -- xx = np.linspace(0,10) plt.plot(xx, y(xx), label='linear') plt.plot(xx, p(xx), label='logistic') plt.plot([0,abs(m)], [0.5,0.5], dashes=(4,4), color='.7') plt.plot([abs(m),abs(m)], [-.1,.5], dashes=(4,4), color='.7') # limits, legends and labels plt.ylim((-.1,1.1)) plt.legend(loc=2) plt.ylabel('P') plt.xlabel('xx'); plt.show() # ------------------------------ # -- Study logistic case # ------------------------------ # -- # -- Draw hist # -- studytime = [0,0,1.5,2,2.5,3,3.5,4,4,4,5.5,6,6.5,7,7,8.5,9,9,9,10.5,10.5,12,12,12,12.5,13,14,15,16,18] passed = [0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1] df_data = pd.DataFrame(data=np.array([studytime,passed]).T, columns=['Time', 'Pass']) print (df_data.head()) print("... raw_data", "." * 100, "\n") df_data.Time.hist(bins=10) plt.xlabel('Time') plt.ylabel('No. students') plt.show() # -- # -- Draw plot # -- plt.plot(df_data.Time, df_data.Pass,'o', mew=0, ms=7,) plt.ylim(-.1,1.1) plt.xlim(-0.2,16.2) plt.xlabel('Time studied') plt.ylabel('Pass? (0=no, 1=yes)') plt.show() # ------------------------------ # -- Calculate logistic # ------------------------------ # -- # -- create probfit # -- probfit = sm.Logit(df_data.Pass, sm.add_constant(df_data.Time, prepend=True)) fit_results = probfit.fit() print(fit_results.summary()) print(",,, summary", "," * 100, "\n") # -- # -- draw logistics # -- logit_pars = fit_results.params intercept_err, slope_err = np.diag(fit_results.cov_params())**.5 fit_results.cov_params() slope = logit_pars['Time'] intercept = logit_pars['const'] plt.plot(df_data.Time, df_data.Pass,'o', mew=0, ms=7, label='Data') p = lambda x,k,m: 1 / (1+np.exp(-1*(k*x+m))) xx = np.linspace(0,df_data.Time.max()) l1 = plt.plot(xx, p(xx,slope,intercept), label='Fit') plt.fill_between(xx, p(xx,slope+slope_err**2, intercept+intercept_err), p(xx,slope-slope_err**2, intercept-intercept_err), alpha=0.15, color=l1[0].get_color()) plt.ylim(-.1,1.1) plt.xlim(-0.2,16.2) plt.xlabel('Time studied') plt.ylabel('Pass? (0=no, 1=yes)') plt.legend(loc=2, numpoints=1) plt.show() # ------------------------------ # -- rate logistic # ------------------------------ target=0.5 x_prob = lambda p,k,m: (np.log(p/(1-p))-m)/k T_max = x_prob(target, slope-slope_err, intercept-intercept_err) T_min = x_prob(target, slope+slope_err, intercept+intercept_err) T_best = x_prob(target, slope, intercept) print('{0}% success rate: {1:.1f} +{2:.1f}/-{3:.1f}'.format(int(target*100),T_best,T_max-T_best,T_best-T_min)) print(",,, success rate", "," * 100, "\n") |
반응형