## R uge 9 ## Læs data. Air <- read.table("air.txt", header = T, sep = ",") ## Tjek efter at det ser rigtigt ud inden at du fortsætter! Air$logOzone <- log(Air$ozone) ## See the relation between ozone and temperature plot(Air$temperature, Air$logOzone, xlab="Temperature", ylab="Ozon") ## Correlation cor(Air$logOzone, Air$temperature) ## Fit a simple linear regression model Fit <- lm(logOzone ~ temperature, data=Air) summary(Fit) ## Add a vector with random values, is there a significant linear relation? ## ONLY for ILLUSTRATION purposes Air$noise <- rnorm(nrow(Air)) plot(Air$logOzone, Air$noise, xlab="Noise", ylab="Ozon") cor(Air$logOzone, Air$noise) summary(lm(logOzone ~ noise, data=Air)) ################################ ## With each of the other two independent variables ## Simple linear regression model with the wind speed plot(Air$wind, Air$logOzone, ylab="logOzone", xlab="Wind speed") cor(Air$logOzone, Air$wind) summary(lm(logOzone ~ wind, data=Air)) ## Simple linear regression model with the radiation plot( Air$radiation, Air$logOzone, ylab="logOzone", xlab="Radiation") cor(Air$logOzone, Air$radiation) summary(lm(logOzone ~ radiation, data=Air)) ### L <- lm(logOzone ~ temperature + wind + radiation , data = Air) summary(L) ##### Forward selection L <- lm(logOzone ~ temperature , data = Air) summary(L) L <- lm(logOzone ~ wind , data = Air) summary(L) L <- lm(logOzone ~ radiation , data = Air) summary(L) ## temp. mest sig. ## Næste skridt L <- lm(logOzone ~ temperature + wind, data = Air) summary(L) L <- lm(logOzone ~ temperature + radiation, data = Air) summary(L) ## stråling mest sig. ## Næste skridt L <- lm(logOzone ~ temperature + radiation + wind, data = Air) summary(L) ## Alle tre variable meget signifikante. ### Backwards selection L <- lm(logOzone ~ temperature + radiation + wind, data = Air) summary(L) ## Alle tre signifikante. L$residuals residuals(L) ## Giver det samme qqnorm(residuals(L)) qqline(residuals(L)) L$fitted.values fitted(L) ## giver det samme plot(fitted(L), residuals(L)) plot(Air$temperature, residuals(L)) plot(Air$wind, residuals(L)) plot(Air$radiation, residuals(L)) #### ################################ ## Extend the ozone model with appropriate curvilinear regression ## Make the squared wind speed Air$windSq <- Air$wind^2 ## Add it to the model fitWindSq <- lm(logOzone ~ temperature + wind + windSq + radiation, data=Air) summary(fitWindSq) ## Equivalently for the temperature Air$temperature2 <- Air$temperature^2 ## Add it fitTemperatureSq <- lm(logOzone ~ temperature + temperature2 + wind + radiation, data=Air) summary(fitTemperatureSq) fitTemperature <- lm(logOzone ~ temperature + wind + radiation, data=Air) summary(fitTemperature) ## Equivalently for the radiation Air$radiation2 <- Air$radiation^2 ## Add it fitRadiationSq <- lm(logOzone ~ temperature + wind + radiation + radiation2, data=Air) summary(fitRadiationSq) ## Which one was best? ## One could try to extend the model further fitWindSqTemperaturSq <- lm(logOzone ~ temperature + temperature2 + wind + windSq + radiation, data=Air) summary(fitWindSqTemperaturSq) ## Model validation qqnorm(fitWindSq$residuals) qqline(fitWindSq$residuals) plot(fitWindSq$residuals, fitWindSq$fitted.values, pch=19) ###### L <- lm(logOzone ~ temperature + radiation + wind, data = Air) summary(L) confint(L, level = 0.99) ## Konfidensintervaller for de enkelte parametre. ## Generate a new data.frame with constant temperature and radiation, but with varying wind speed wind<-seq(1,20.3,by=0.1) ## VindVærdier som søges undersøgt AirForPred <- data.frame(temperature=mean(Air$temperature), ## gns temp. wind=wind, ## givne vindværdier windSq=wind^2, radiation=mean(Air$radiation))## gns. stråling ## Calculate confidence and prediction intervals (actually bands) CI <- predict(fitWindSq, newdata=AirForPred, interval="confidence", level=0.95) PI <- predict(fitWindSq, newdata=AirForPred, interval="prediction", level=0.95) plot(wind, CI[,"fit"], ylim=range(CI), type="l", main=paste("At temperature =",format(mean(Air$temperature),digits=3), "and radiation =", format(mean(Air$radiation),digits=3))) lines(wind, CI[,"lwr"], lty=2, col=2) lines(wind, CI[,"upr"], lty=2, col=2) plot(wind, PI[,"fit"], ylim=range(PI), type="l", main=paste("At temperature =",format(mean(Air$temperature),digits=3), "and radiation =", format(mean(Air$radiation),digits=3))) lines(wind, PI[,"lwr"], lty=2, col=2) lines(wind, PI[,"upr"], lty=2, col=2) plot(wind, CI[,"fit"], ylim=range(CI,PI), type="l", main=paste("At temperature =",format(mean(Air$temperature),digits=3), "and radiation =", format(mean(Air$radiation),digits=3))) lines(wind, CI[,"lwr"], lty=2, col=2) lines(wind, CI[,"upr"], lty=2, col=2) lines(wind, PI[,"lwr"], lty=2, col=3) lines(wind, PI[,"upr"], lty=2, col=3)