반응형

/*******************************************************************************************************************
-- Title : [MSR] MSND - 
ScaleR tutorial using airplane flight data
-- Reference : microsoft.com
-- Key word : microsoft r rxgetoption airline dataset file.path rximport .xdf xdf dataframe data.frame
                  데이터프레임 list nrow rxgetvarinfo levels rxsummary summary rxhistogram rxlinmod
                  linear model 선형모델 선형 모델
*******************************************************************************************************************/


-- MSDN
    * https://msdn.microsoft.com/en-us/microsoft-r/scaler-getting-started-0-example-airline-data


-- Chart



-- Microsoft R

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
***********************************************
-- Process Dataset
***********************************************
 
------------------------------
-- Get the airline data set
------------------------------
 
-- 샘플 패키지 경로 지정
sampleDataDir <- rxGetOption("sampleDataDir")
sampleDataDir                                                             # 경로 확인
 
-- 현재 디렉토리 확인
getwd()
 
-- FullPath + Filename
inputFile <- file.path(sampleDataDir, "AirlineDemoSmall.csv")
inputFile                                                                 # 경로 + 파일명 
 
-- 데이터셋 저장 
#    .xdf에 데이터를 저장하는 개념인 듯..
airDS <- rxImport(inData = inputFile, outFile = "ADS.xdf",                # ADS.xdf는 getwd()에 생성
                  missingValueString = "M", stringsAsFactors = TRUE,
                  overwrite = TRUE)                                       # overwrite : .xdf 파일을 덮어씀 
 
head(airDS)
 
-- 리스트 생성하기 
mycol <- list(DayOfWeek = list(type = "factor",
                               levels = c("Tuesday""Wednesday")))
mycol
class(mycol) 
 
-- colInfo로 정해진 요일만 출력, 미정인건은 <NA>처리?? 
airDS2 <- rxImport(inData = inputFile, outFile = "ADS2.xdf",
                   missingValueString = "M", colInfo = mycol, overwrite = TRUE)
 
nrow(airDS2)
head(airDS2)
 
------------------------------
-- Examining .xdf files
------------------------------
 
-- 데이터셋 정보 보기 
head(airDS)
mode(airDS)                                                               # S4
class(airDS)                                                              # "RxFdfData""RevoScaleR"
str(airDS)
 
nrow(airDS)                                                               # 로우수(행수) 
ncol(airDS)                                                               # 컬럼수
 
 
-- 추가 변수 정보 보기
rxGetVarInfo(airDS)
 
------------------------------
-- Generate Dataframe in specific range of dataset
------------------------------
myData <- rxReadXdf(airDS, numRows=10, startRow=100000)
myData
 
nrow(myData)                                                              # 10
mode(myData)                                                              # list
class(myData)                                                             # data.frame
 
levels(myData$DayOfWeek)                                                  # distinct DayOfWeek
 
 
***********************************************
-- Summarizing Dataset
***********************************************
 
------------------------------
-- Summarizing
------------------------------
adsSummary <- rxSummary(~ArrDelay+CRSDepTime+DayOfWeek, data = airDS)     # 모든 컬럼에 대한 요약[MSR]
adsSummary
 
adsSummary2 <- summary( airDS2 )                                          # 모든 컬럼에 대한 요약[R]
adsSummary2
 
addSummary3 = rxSummary(~ArrDelay:DayOfWeek, data = airDS)                # DayOfWeek에 대한 요약[MSR]
addSummary3
 
------------------------------
-- Create Histogram
------------------------------
options("device.ask.default" = T)
rxHistogram(~ArrDelay, data = airDS)
rxHistogram(~CRSDepTime, data = airDS)
rxHistogram(~DayOfWeek, data = airDS)
rxHistogram(~ArrDelay|DayOfWeek,  data = airData)
 
------------------------------
-- Fitting a simple model
------------------------------
arrDelayLm1 <- rxLinMod(ArrDelay ~ DayOfWeek, data = airDS)               # Linear Model(선형모델)
summary(arrDelayLm1)

cs

반응형

+ Recent posts