반응형
/*******************************************************************************************************************
-- Title : [MSR] MSDN - Analyzing loan data with ScaleR(Logistic Regression)
-- Reference : microsoft.com
-- Key word : microsoft r revoscale r scale r rximport rxsummary rxgetinfo rxgetvarinfo rxlogit rxpredict
logistic regression 로지스틱 회귀분석 회귀 모델 summary
*******************************************************************************************************************/
-- Chart
-- MSDN
* https://msdn.microsoft.com/en-us/microsoft-r/scaler-getting-started-1-example-loan-data
-- 샘플 다운로드
* http://packages.revolutionanalytics.com/datasets/
-- Microsoft R
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 | # ------------------------------ # -- 샘플 다운로드 # ------------------------------ # http://packages.revolutionanalytics.com/datasets/ # ------------------------------ # -- 경로 설정 # ------------------------------ csvDataDir <- "C:\\RProject\\MRS\\Data" mortCsvDataName <- file.path(csvDataDir, "mortDefault") # 파일네임 : mortDefault2000..csv, ... mortXdfFileName <- "C:\\RProject\\MRS\\Data\\mortDefaultSmall.xdf" # ------------------------------ # -- 여러 파일 가져오기(rxImport) # ------------------------------ append <- "none" for (i in 2000:2009) { importFile <- paste(mortCsvDataName, i, ".csv", sep = "") #cat(importFile) mortDS <- rxImport(importFile, mortXdfFileName, append = append) append <- "rows" # 기존 DS에 Append : "rows" } nrow(mortDS) # ------------------------------ # -- DS 정보 확인(rxSummary, rxGetInfo, rxGetVarInfo) # ------------------------------ # -- DS 기본 요약 및 5개의 행 확인 rxGetInfo(mortDS, numRows = 5) # -- 필드 확인 rxGetVarInfo(mortDS) # -- 요약 통계 보기 rxSummary( ~ ., data = mortDS, blocksPerRead = 2) # DS 읽기 block 개수 rxSummary( ~ creditScore, data = mortDS, transforms = list(creditScore = cut(creditScore, breaks = 20))) # ------------------------------ # -- 로지스틱 회귀 계산(rxLogit) # ------------------------------ # -- 기본 Logistic Regression logitObj <- rxLogit(default ~ F(year) + creditScore + yearsEmploy + ccDebt, data = mortDS, blocksPerRead = 2, reportProgress = 1) summary(logitObj) # -- 매개변수가 많은 Logistic Regression system.time( logitObj <- rxLogit(default ~ F(houseAge) + F(year) + creditScore + yearsEmploy + ccDebt, data = mortDS, blocksPerRead = 2, reportProgress = 1)) summary(logitObj) # -- 플롯팅 cc <- coef(logitObj) df <- data.frame(Coefficient = cc[2:41], HouseAge = 0:39) rxLinePlot(Coefficient ~ HouseAge, data = df, type = "p") # ------------------------------ # -- 기본 확률값 계산(rxPredict) # ------------------------------ # -- 관심 벡터 정의 creditScore <- c(300, 700) yearsEmploy <- c(2, 8) ccDebt <- c(5000, 10000) year <- c(2008, 2009) houseAge <- c(5, 20) # -- 데이터 프레임 생성 predictDF <- data.frame( creditScore = rep(creditScore, times = 16), yearsEmploy = rep(rep(yearsEmploy, each = 2), times = 8), ccDebt = rep(rep(ccDebt, each = 4), times = 4), year = rep(rep(year, each = 8), times = 2), houseAge = rep(houseAge, each = 16)) head(predictDF) predictDF2 <- rxPredict(modelObject = logitObj, data = predictDF, outData = predictDF) predictDF2[order(predictDF2$default_Pred, decreasing = TRUE),] | cs |
반응형