반응형

/*********************************************************************************************************
-- Title : [R3.3] 추론통계분석 - 패키지(Hmisc, prettyR)를 통한 기술통계량 구현 및 보고서 작성
-- Reference : hrd-net
-- Key word : R 기술 통계량 패키지 hmisc prettyr describe 명목척도 비율척도 빈도수 결측치 백분율 freq
                  prettyR Hmisc 추론 통계 분석
*********************************************************************************************************/

-- R

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# ************************************************
# -- 패키지 이용 기술통계량 구하기
#    (Hmisc, prettyR)
# ************************************************
 
# -- 실습파일 가져오기
setwd("C:\\RProject\\Rwork\\Part-III")
data3 = read.csv("descriptive.csv", header=TRUE)
head(data3)                                                          # 데이터셋 확인
 
# -- 전체 데이터 특성 보기
dim(data3)                                                           # 차원보기
length(data3)                                                        # 열 길이
length(data3$survey)                                                 # 컬럼 관찰치  
str(data3)                                                           # 데이터 구조보기 
summary(data3)   
 
# ------------------------------
# -- 기술통계 패키지1
# ------------------------------
install.packages("Hmisc")                                            # 패키지 설치
library(Hmisc)                                                       # 메모리 로딩
 
# -- 전체 변수 대상 기술통계량 제공 - 평균,중위수,분산,표준편차,valid.n 
describe(data3)                                                      # Hmisc 패키지에서 제공되는 함수
 
# -- 개별 변수 기술통계량
describe(data3$gender)                                               # 명목척도 
describe(data3$age)                                                  # 비율척도
 
# ------------------------------
# -- 기술통계 패키지2
# ------------------------------
install.packages("prettyR")
library(prettyR)
 
# -- 전체 변수 대상 기술통계량 제공     
freq(data3)                                                          # 각 변수별 : 빈도수, 결측치(NA), 백분율
 
# -- 개별 변수 대상
freq(data3$gender)                                                   # 빈도수, 결측치(NA), 비율 
 
 
# ************************************************
# -- 기술통계량 보고서 작성법
# ************************************************
 
# ------------------------------
# -- 1) 거주지역(파생컬럼 추가)
# ------------------------------
data3$resident2[data3$resident == 1="특별시"
data3$resident2[data3$resident >=2 & data3$resident <=4="광역시"
data3$resident2[data3$resident == 5="시구군"
 
data3
 
x= table(data3$resident2)
prop.table(x)                                                        # 비율 계산 
= prop.table(x)
round(y*1002)                                                      # 백분율 적용(소수점 2자리)
 
# ------------------------------
# -- 2) 성별
# ------------------------------
data3$gender2[data3$gender== 1="남자"
data3$gender2[data3$gender== 2="여자"
x= table(data3$gender2)
 
prop.table(x)                                                        # 비율 계산 
=  prop.table(x)
round(y*1002)                                                      # 백분율 적용(소수점 2자리)
 
# ------------------------------
# -- 3) 나이
# ------------------------------
summary(data3$age)                                                   # 40 ~ 69
data3$age2[data3$age <= 45="중년층"
data3$age2[data3$age >=46 & data3$age <=59="장년층"
data3$age2[data3$age >= 60="노년층"
x= table(data3$age2)
x
prop.table(x)                                                        # 비율 계산 
=  prop.table(x)
round(y*1002)                                                      # 백분율 적용(소수점 2자리)
 
# ------------------------------
# -- 4) 학력수준
# ------------------------------
data3$level2[data3$level== 1="고졸"
data3$level2[data3$level== 2="대졸"
data3$level2[data3$level== 3="대학원졸"
 
x= table(data3$level2)
prop.table(x)                                                        # 비율 계산 
= prop.table(x)
round(y*1002)                                                      # 백분율 적용(소수점 2자리)
 
# ------------------------------
# -- 5) 합격여부
# ------------------------------
data3$pass2[data3$pass== 1="합격"
data3$pass2[data3$pass== 2="실패"
y= table(data3$pass2)
y
prop.table(x)                                                        # 비율 계산
=  prop.table(x)
round(y*1002)                                                      # 백분율 적용(소수점 2자리)
 
head(data3)
 
describe(data3)
summary(data3$cost) 
sum(data3$cost) 
 
describe(data3)
summary(data3$survey) 
sum(data3$survey, na.rm=T) 
 
# ------------------------------
# -- 기술통계량 정제 데이터 저장 
# ------------------------------
setwd("C:\\RProject\\output")
write.csv(data3,"cleanDescriptive.csv", quote=F, row.names=F)        # 행 이름 제거
 

cs


-- Files

descriptive.csv



반응형

+ Recent posts