반응형
/*******************************************************************************************************************
-- Title : [MSR] MSND - ScaleR tutorial using airplane flight data
-- Reference : microsoft.com
-- Key word : microsoft r rxgetoption airline dataset file.path rximport .xdf xdf dataframe data.frame
데이터프레임 list nrow rxgetvarinfo levels rxsummary summary rxhistogram rxlinmod
linear model 선형모델 선형 모델
*******************************************************************************************************************/
-- MSDN
* https://msdn.microsoft.com/en-us/microsoft-r/scaler-getting-started-0-example-airline-data
-- Chart
-- Microsoft R
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 | # *********************************************** # -- Process Dataset # *********************************************** # ------------------------------ # -- Get the airline data set # ------------------------------ # -- 샘플 패키지 경로 지정 sampleDataDir <- rxGetOption("sampleDataDir") sampleDataDir # 경로 확인 # -- 현재 디렉토리 확인 getwd() # -- FullPath + Filename inputFile <- file.path(sampleDataDir, "AirlineDemoSmall.csv") inputFile # 경로 + 파일명 # -- 데이터셋 저장 # .xdf에 데이터를 저장하는 개념인 듯.. airDS <- rxImport(inData = inputFile, outFile = "ADS.xdf", # ADS.xdf는 getwd()에 생성 missingValueString = "M", stringsAsFactors = TRUE, overwrite = TRUE) # overwrite : .xdf 파일을 덮어씀 head(airDS) # -- 리스트 생성하기 mycol <- list(DayOfWeek = list(type = "factor", levels = c("Tuesday", "Wednesday"))) mycol class(mycol) # -- colInfo로 정해진 요일만 출력, 미정인건은 <NA>처리?? airDS2 <- rxImport(inData = inputFile, outFile = "ADS2.xdf", missingValueString = "M", colInfo = mycol, overwrite = TRUE) nrow(airDS2) head(airDS2) # ------------------------------ # -- Examining .xdf files # ------------------------------ # -- 데이터셋 정보 보기 head(airDS) mode(airDS) # S4 class(airDS) # "RxFdfData", "RevoScaleR" str(airDS) nrow(airDS) # 로우수(행수) ncol(airDS) # 컬럼수 # -- 추가 변수 정보 보기 rxGetVarInfo(airDS) # ------------------------------ # -- Generate Dataframe in specific range of dataset # ------------------------------ myData <- rxReadXdf(airDS, numRows=10, startRow=100000) myData nrow(myData) # 10 mode(myData) # list class(myData) # data.frame levels(myData$DayOfWeek) # distinct DayOfWeek # *********************************************** # -- Summarizing Dataset # *********************************************** # ------------------------------ # -- Summarizing # ------------------------------ adsSummary <- rxSummary(~ArrDelay+CRSDepTime+DayOfWeek, data = airDS) # 모든 컬럼에 대한 요약[MSR] adsSummary adsSummary2 <- summary( airDS2 ) # 모든 컬럼에 대한 요약[R] adsSummary2 addSummary3 = rxSummary(~ArrDelay:DayOfWeek, data = airDS) # DayOfWeek에 대한 요약[MSR] addSummary3 # ------------------------------ # -- Create Histogram # ------------------------------ options("device.ask.default" = T) rxHistogram(~ArrDelay, data = airDS) rxHistogram(~CRSDepTime, data = airDS) rxHistogram(~DayOfWeek, data = airDS) rxHistogram(~ArrDelay|DayOfWeek, data = airData) # ------------------------------ # -- Fitting a simple model # ------------------------------ arrDelayLm1 <- rxLinMod(ArrDelay ~ DayOfWeek, data = airDS) # Linear Model(선형모델) summary(arrDelayLm1) | cs |
반응형