반응형
/*******************************************************************************************************************
-- Title : [MSR] RTVS - A First Look at R
-- Reference : microsoft.com
-- Key word : microsoft r installed.packages library search ggplot2 data head tail class str nrow ggplot
scale_x_log lm linear model regression model summary coef exp r-squred 회귀분석 회귀 분석
선형모델 선형 모델 rtvs ls
*******************************************************************************************************************/
-- Chart
-- Microsoft R
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 | # ************************************************** # -- Packages used in this set of examples # ************************************************** # Package | Use # ---------- | ---------- # ggplot2 | Plots # ------------------------------ # -- Looking at Packages # ------------------------------ # You can extend the functionality of R by installing and loading packages. # A package is simply a set of functions, and sometimes data # Package authors can distribute their work on CRAN, https://cran.r-project.org/, # in addition to other repositors (e.g. BioConductor) and github # For a list of contributed packages on CRAN, see https://cran.r-project.org/ # -- List all available installed packages on your machine. installed.packages() # -- List all "attached" or loaded packages. search() # -- You "attach" a package to make it's functions available, # using the library() function. library(foreign) # -- You can get help on a package using: library(help = foreign) # -- To install a new package, use install.packages() # Install the ggplot2 package for it's plotting capability. if (!require("ggplot2")){ install.packages("ggplot2") } # -- Then load the package. library("ggplot2") # -- Notice that package:ggplot2 is now added to the search list. search() # ************************************************** # -- A Simple Regression Example # ************************************************** # -- Look at the data sets that come with the package. data(package = "ggplot2")$results # -- ggplot2 contains a dataset called diamonds. Make this dataset available using the data() function. data(diamonds, package = "ggplot2") # -- Create a listing of all objects in the "global environment". Look for "diamonds" in the results. ls() # -- Now investigate the structure of diamonds, a data frame with 53,940 observations str(diamonds) # -- Print the first few rows. head(diamonds) # -- Print the last 6 lines. tail(diamonds) # -- Find out what kind of object it is. class(diamonds) # Look at the dimension of the data frame. dim(diamonds) # ------------------------------ # -- Plots in R # R has three systems for static graphics: base graphics, lattice and ggplot2. # ------------------------------ # -- Create a random sample of the diamonds data. diamondSample <- diamonds[sample(nrow(diamonds), 5000),] dim(diamondSample) nrow(diamondSample) head(diamondSample) # -- Set the font size so that it will be clearly legible. theme_set(theme_gray(base_size = 18)) # -- In this sample you use ggplot2. ggplot(diamondSample, aes(x = carat, y = price)) + geom_point(colour = "blue") ggplot(diamondSample, aes(x = color, y = depth)) + geom_point(colour = "red") # -- Add a log scale. # 데이터가 몰려 보기 어려울 때 로그 변환으로 자료를 퍼트림. ggplot(diamondSample, aes(x = carat, y = price)) + geom_point(colour = "blue") + scale_x_log10() # -- Add a log scale for both scales. ggplot(diamondSample, aes(x = carat, y = price)) + geom_point(colour = "blue") + scale_x_log10() + scale_y_log10() # ************************************************** # -- Linear Regression in R # ************************************************** # -- Build a simple regression model model <- lm(log(price) ~ log(carat) , data = diamondSample) # -- Look at the results. summary(model) # R-squared = 0.9334, i.e. model explains 93.3% of variance # -- Extract model coefficients. coef(model) coef(model)[1] exp(coef(model)[1]) # exponentiate the log of price, to convert to original units # -- Show the model in a plot. ggplot(diamondSample, aes(x = carat, y = price)) + geom_point(colour = "blue") + geom_smooth(method = "lm", colour = "red", size = 2) + scale_x_log10() + scale_y_log10() # ------------------------------ # -- Regression Diagnostics # ------------------------------ # -- Look at some model diagnostics. # check to see Q-Q plot to see linearity which means residuals are normally distributed par(mfrow = c(2, 2)) # Set up for multiple plots on the same figure. plot(model, col = "blue") par(mfrow = c(1, 1)) # Rest plot layout to single plot on a 1x1 grid # ------------------------------ # -- The Model Object # ------------------------------ # -- Look at the model object. str(model) model$coefficients # note this is the same as coef(model) # -- Now fit a new model including more columns model2 <- lm(log(price) ~ log(carat) + ., data = diamondSample) # Model log of price against all columns # -- R-squared = 0.9824, i.e. model explains 98.2% of variance. # i.e. a better model than previously summary(model2) # -- Create data frame of actual and predicted price predicted_values <- data.frame(actual = diamonds$price, predicted = exp(predict(model, diamonds)) ) # anti-log of predictions head(predicted_values) # -- Create plot of actuals vs predictions ggplot(predicted_values, aes(x = actual, y = predicted)) + geom_point(colour = "blue", alpha = 0.01) + geom_smooth(colour = "red") + coord_equal(ylim = c(0, 20000)) + # force equal scale ggtitle("Linear model of diamonds data") | cs |
반응형