## ----setup, include=FALSE, cache=FALSE----------------------------------- # set global chunk options ## this code is only required to set up knitr for reproducible documentation ## it has nothing to do with the exercise itself, please ignore! opts_knit$set(aliases=c(h = 'fig.height', w = 'fig.width'), out.format='latex', use.highlight=TRUE) opts_chunk$set(fig.path='kgraph/exRKGLS', fig.align='center', fig.show='hold', prompt=FALSE, fig.width=6, fig.height=6, out.width='.7\\linewidth', size='scriptsize') ## comment=NA, ## this does not comment out the output # options to be read by formatR options(replace.assign=TRUE,width=70) ## ----echo=FALSE, results='hide'------------------------------------------ rm(list=ls()) ## ----child-data, child='exRKGLS_data.Rnw'-------------------------------- ## ----data-set-parent, echo=FALSE, cache=FALSE---------------------------- set_parent('exRKGLS.Rnw') ## ----setwd-mac-no, eval=FALSE-------------------------------------------- ## setwd("~/ds/NEweather") ## ----setwd-win, eval=FALSE----------------------------------------------- ## setwd("M://css6200/dgr2/datasets/NEweather") ## ----list-files-no, eval=F----------------------------------------------- ## list.files(".", pattern=".shp$") ## ----list-files-yes, echo=F, eval=T-------------------------------------- ## this is the file specification on the author's system; he organizes datasets into a subdirectory named `ds', and then ## each data source into its own sub-subdirectory list.files("./ds/NEweather", pattern=".shp$") ## ----load-packages------------------------------------------------------- library(sp) library(rgdal) ## ----load-all-usa-no, eval=FALSE----------------------------------------- ## usa <- readOGR(dsn=".", layer="gdd50_7100j", ## integer64="allow.loss") ## ----load-all-usa-yes, echo=FALSE---------------------------------------- ## this is for the author's system usa <- readOGR(dsn="./ds/NEweather", layer="gdd50_7100j", integer64="allow.loss") ## ----str-all-use--------------------------------------------------------- bbox(usa) names(usa) strwrap(proj4string(usa)) ## ----num-to-char-ids----------------------------------------------------- sapply(usa@data, "class") usa$STATION_ID <- as.character(usa$STATION_ID) usa$STATION_NA <- as.character(usa$STATION_NA) usa$STN_NAME <- as.character(usa$STN_NAME) usa$OID_<- as.character(usa$OID_) usa$COOP_ID <- as.character(usa$COOP_ID) summary(usa) ## ----list-states--------------------------------------------------------- length(levels(usa$STATE)) levels(usa$STATE) ## ----select-ne----------------------------------------------------------- dim(usa)[1] # number of rows in the full dataset ix <- (usa$STATE %in% c("NY","VT","NJ","PA")) # logical expression str(ix) # each row is either TRUE or FALSE sum(ix) # number of rows selected head(which(ix)) # first six row numbers of stations in these states ne <- usa[ix,] # select only these states' records dim(ne)[1] # number of rows in the restricted dataset ## ----adjust-levels------------------------------------------------------- class(ne$STATE) levels(ne$STATE) ne$STATE <- factor(ne$STATE) levels(ne$STATE) ## ----new-bbox------------------------------------------------------------ bbox(ne) ## ----check-zerodist------------------------------------------------------ length(ix <- zerodist(ne)) ## ----check-zerodist-usa-------------------------------------------------- length(ix <- zerodist(usa)) usa@data[ix,1:5] ## ----adjust-coords------------------------------------------------------- coordinates(usa)[ix[1],]; coordinates(usa)[ix[4],] usa@data[ix[1],4:5]; usa@data[ix[4],4:5] usa@coords[ix[4],] <- c(-84.99, 40.42) usa@data[ix[4],4:5] <- c(40.42, -84.99) # this is the order in the dataframe ## ----eval=FALSE---------------------------------------------------------- ## usa <- (usa[-ix[4],]) ## ----show-proj4---------------------------------------------------------- strwrap(proj4string(ne)) ## ----list-projs---------------------------------------------------------- head(projInfo(type="proj")) ## ----find-albers-parameters---------------------------------------------- round(median(bbox(ne)[1,])) floor(bbox(ne)[2,1])+1 ceiling(bbox(ne)[2,2])-1 ## ----make-albers--------------------------------------------------------- ne.crs <- CRS("+proj=aea +lat_0=42.5 +lat_1=39 +lat_2=44 +lon_0=-76 +ellps=WGS84 +units=m") ne.m <- spTransform(ne, ne.crs) ## ----change-coordnames--------------------------------------------------- coordnames(ne.m) <- c("E","N") bbox(ne.m) ## ----convert-to-df------------------------------------------------------- ne.df <- as(ne.m, "data.frame") names(ne.df) ## ----get-state-boundaries-no, eval=FALSE--------------------------------- ## state <- readOGR(dsn=".", "cb_2014_us_state_500k") ## summary(state) ## ----get-state-boundaries-yes, eval=T, echo=F---------------------------- ## for the author's system state <- readOGR(dsn="./ds/NEweather", "cb_2014_us_state_500k") summary(state) ## ----limit-to-ne--------------------------------------------------------- state.ne <- state[state$STUSPS %in% c('NY','NJ', 'PA', 'VT'),] summary(state.ne) ## ----state-CRS----------------------------------------------------------- proj4string(ne.m) state.ne.m <- spTransform(state.ne, proj4string(ne.m)) proj4string(state.ne.m) coordnames(state.ne.m) <- c("E","N") bbox(state.ne.m) ## ----save-projected-stations-no, eval=FALSE------------------------------ ## save(ne, ne.m, ne.df, state.ne, state.ne.m, ne.crs, file="./ne_stations.RData") ## ----save-projected-stations-yes, eval=T, echo=F------------------------- ## this is for the author's system save(ne, ne.m, ne.df, state.ne, state.ne.m, ne.crs, file="./ds/NEweather/ne_stations.RData") ## ----child-data, child='exRKGLS_explore.Rnw'----------------------------- ## ----explore-set-parent, echo=FALSE, cache=FALSE------------------------- set_parent('exRKGLS.Rnw') ## ----load-packages-explore----------------------------------------------- library(sp) library(rgdal) ## ----load-saved-dataset, eval=FALSE-------------------------------------- ## load(file="./ne_stations.RData", verbose=TRUE) ## ----load-saved-dataset-yes, eval=TRUE, echo=FALSE----------------------- rm(list=ls()) # make sure this is all the data we need to go forward load(file="./ds/NEweather/ne_stations.RData", verbose=TRUE) ## ----summarize-ds-------------------------------------------------------- summary(ne.df[,c("ANN_GDD50","ELEVATION_")]) ## ----show-stations, out.width='\\linewidth', w=8, h=8, fig.cap="Location of weather stations"---- plot(coordinates(ne.m), asp=1, xlab="E", ylab="N", pch=20) text(coordinates(ne.m), labels=substr(ne.m$STATION_NA, 1, 4), cex=0.5, pos=2, col=as.numeric(ne.m$STATE)) ## add state boundaries if available ## otherwise omit the next command plot(state.ne.m, add=TRUE, border="darkgray", lwd=2) grid() ## ----load-ggplot2-------------------------------------------------------- library(ggplot2) ## ----display-annual, out.width='\\textwidth', w=11, h=10, fig.cap="Annual Growing Degree days, 50F"---- ggplot(data=ne.df) + aes(x=E, y=N) + geom_point(aes(size=ANN_GDD50, colour=ELEVATION_), shape=20) + xlab("E") + ylab("N") + coord_fixed() ## ----export-to-kml------------------------------------------------------- ne.wgs84 <- spTransform(ne.m, CRS("+init=epsg:4326")) # this is the EPSG code for global WGS84 long/lat library(plotKML) shape = "http://maps.google.com/mapfiles/kml/pal2/icon18.png" station.names = substr(ne.wgs84$STATION_NA,1,12) kml_open(file.name='ne.stations.kml') kml_layer(ne.wgs84, colour=ANN_GDD50, colour_scale=SAGA_pal[[1]], shape=shape, points_names=station.names) kml_close('ne.stations.kml') ## ----child-data, child='exRKGLS_OLS.Rnw'--------------------------------- ## ----OLS-set-parent, echo=FALSE, cache=FALSE----------------------------- set_parent('exRKGLS.Rnw') ## ----scatter-covars------------------------------------------------------ p1 <- ggplot() + geom_point( aes(x=ELEVATION_, y=ANN_GDD50, colour=STATE), data=ne.df ) p2 <- ggplot() + geom_point( aes(x=E, y=ANN_GDD50, colour=STATE), data=ne.df ) + xlab("Easting") p3 <- ggplot() + geom_point( aes(x=N, y=ANN_GDD50, colour=STATE), data=ne.df ) + xlab("Northing") ## ----print-scatter-covars, out.width='0.33\\linewidth'------------------- print(p3) print(p2) print(p1) ## ----scatter-xform, out.width='0.48\\linewidth'-------------------------- ggplot() + geom_point( aes(x=sqrt(ELEVATION_), y=ANN_GDD50, colour=STATE), data=ne.df ) ## ----cor-preds----------------------------------------------------------- with(ne.df, cor(ANN_GDD50, N)) with(ne.df, cor(ANN_GDD50, E)) with(ne.df, cor(ANN_GDD50, sqrt(ELEVATION_))) ## ----build-naive-elev---------------------------------------------------- m.ols.elev <- lm(ANN_GDD50 ~ sqrt(ELEVATION_), data=ne.df) summary(m.ols.elev) ## ----build-naive-two----------------------------------------------------- m.ols <- lm(ANN_GDD50 ~ sqrt(ELEVATION_) + N, data=ne.df) summary(m.ols) ## ----plot-lm-fits-------------------------------------------------------- plot(ne.m$ANN_GDD50 ~ fitted(m.ols), col=ne.m$STATE, pch=20, asp=1, xlab="Fitted by OLS", ylab="Actual", main="Annual GDD50") legend("topleft", levels(ne.m$STATE), pch=20, col=1:4) grid() abline(0,1) ## ----dx-naive, out.width='\\textwidth', w=12, h=4------------------------ par(mfrow=c(1,3)) plot(m.ols, which=c(1,2,5)) par(mfrow=c(1,1)) ## ----find-min-max-ols-resid---------------------------------------------- ne.df$ols.resid <- residuals(m.ols) (ix <- c(which.max(ne.df$ols.resid), which.min(ne.df$ols.resid))) ne.df[ix,] ne.df[ix,c("LATITUDE_D", "LONGITUDE_")] ## ----correct-it---------------------------------------------------------- ne.m[ix[1],"ELEVATION_"] <- ne.m[ix[1],"ELEV_FT"] <- 1192 ne.df[ix[1],"ELEVATION_"] <- ne.df[ix[1],"ELEV_FT"] <- 1192 ne.m[ix[2],"ELEVATION_"] <- ne.m[ix[2],"ELEV_FT"] <- 1141 ne.df[ix[2],"ELEVATION_"] <- ne.df[ix[2],"ELEV_FT"] <- 1141 ## ----re-run-ols---------------------------------------------------------- m.ols <- lm(ANN_GDD50 ~ sqrt(ELEVATION_) + N, data=ne.df) summary(m.ols) ## ----ols-model-dx2, out.width='\\textwidth', w=12, h=4------------------- par(mfrow=c(1,3)) plot(m.ols, which=c(1,2,5)) par(mfrow=c(1,1)) ## ----plot-lm-fits-2------------------------------------------------------ plot(ne.m$ANN_GDD50 ~ fitted(m.ols), col=ne.m$STATE, pch=20, asp=1, xlab="Fitted by OLS (corrected data)", ylab="Actual", main="Annual GDD50") legend("topleft", levels(ne.m$STATE), pch=20, col=1:4) grid() abline(0,1) ## ----ols-resid-summary--------------------------------------------------- summary(residuals(m.ols)) ## ----add-east, eval=FALSE, echo=FALSE, results="hide", fig.keep='none'---- ## ## this code would be useful to answer the challenge ## m.ols.ts1 <- lm(ANN_GDD50 ~ N + E, data=ne.df) ## m.ols.ts1.z <- lm(ANN_GDD50 ~ N + E + sqrt(ELEVATION_), data=ne.df) ## m.ols.ts1.z.i <- lm(ANN_GDD50 ~ (N + E)*sqrt(ELEVATION_), data=ne.df) ## summary(m.ols.ts1) ## summary(m.ols) ## summary(m.ols.ts1.z) ## summary(m.ols.ts1.z.i) ## anova(m.ols.ts1.z.i, m.ols.ts1.z, m.ols.ts1) ## anova(m.ols.ts1.z, m.ols) ## AIC(m.ols.ts1.z.i, m.ols.ts1.z, m.ols.ts1, m.ols) ## par(mfrow=c(1,3)) ## plot(m.ols.ts1.z.i, which=c(1,2,5)) ## par(mfrow=c(1,1)) ## ix <- which(abs(residuals(m.ols.ts1.z)) > 400) ## ne.m$ols.ts1.z.resid <- ne.df$ols.ts1.z.resid <- residuals(m.ols.ts1.z) ## ne.df[ix,c("STATION_NA","STATE","ELEVATION_","N","E", "ols.ts1.z.resid")] ## bubble(ne.m, zcol="ols.ts1.z.resid", pch=1, ## main="Residuals from trend surface/elevation model") ## bubble(ne.m, zcol="ols.resid", pch=1, ## main="Residuals from N/elevation model") ## ne.m$diff.resid.ols.ts1.z <- ne.df$diff.resid.ols.ts1.z <- (ne.m$ols.ts1.z.resid - ne.m$ols.resid) ## bubble(ne.m, zcol="diff.resid.ols.ts1.z", pch=1, ## main="Difference of residuals: trend surface/elevation - Northing/elevation ") ## ----add-OLS-resid-df---------------------------------------------------- ne.m$ols.resid <- residuals(m.ols) ## ----bubble-naive-------------------------------------------------------- bubble(ne.m, zcol="ols.resid", pch=1, main="Residuals from OLS fit, actual - predicted") ## ----load-gstat---------------------------------------------------------- library(gstat) ## ----vgm-ols-resid, out.width='0.5\\textwidth'--------------------------- v.r.ols <- variogram(ols.resid ~ 1, locations=ne.m, cutoff=100000, width=16000) plot(v.r.ols, pl=T) ## ----vgm-ols-resid-model, out.width='0.5\\textwidth'--------------------- (vmf.r.ols <- fit.variogram(v.r.ols, vgm(15000, "Exp", 20000, 20000))) plot(v.r.ols, pl=T, model=vmf.r.ols) ## ----vgm-ols-cloud, out.width='0.5\\textwidth'--------------------------- vc <- variogram(ols.resid ~ 1, locations=ne.m, cutoff=12000, cloud=T) plot(vc, pch=20, cex=2) ## ----why-ols-resid-diff-------------------------------------------------- vc.df <- as.data.frame(vc) vc.close <- subset(vc.df, vc.df$dist < 8000) ## sort by separation, look for anomalies. vc.close[order(vc.close$dist),c("dist","gamma","left","right")] ## ------------------------------------------------------------------------ print(ne.m@data[c(106,107),c("STATE","STATION_NA","LATITUDE_D","LONGITUDE_", "ELEVATION_", "ANN_GDD50","ols.resid")]) ## ----child-data, child='exRKGLS_GLS.Rnw'--------------------------------- ## ----GLS-set-parent, echo=FALSE, cache=FALSE----------------------------- set_parent('exRKGLS.Rnw') ## ----fit-reml------------------------------------------------------------ library(nlme) vmf.r.ols[1:2,] m.gls <- gls(model=ANN_GDD50 ~ sqrt(ELEVATION_) + N, data=ne.df, correlation=corExp( value=c(vmf.r.ols[2,"range"]), form=~E + N, nugget=FALSE)) ## ----summary-reml-------------------------------------------------------- summary(m.gls) ## ----intervals-reml------------------------------------------------------ intervals(m.gls, level=0.95)$coef ## ----summary-intervals-coef---------------------------------------------- intervals(m.gls, level=0.95)$corStruct ## ----compare-gls-ols-corr------------------------------------------------ intervals(m.gls, level=0.95)$corStruct vmf.r.ols[2,"range"] ## ----plot-gls-fits------------------------------------------------------- plot(ne.m$ANN_GDD50 ~ predict(m.gls), col=ne.m$STATE, pch=20, asp=1, xlab="Fitted by GLS", ylab="Actual", main="Annual GDD50") legend("topleft", levels(ne.m$STATE), pch=20, col=1:4) grid() abline(0,1) ## ----fit-reml-resid------------------------------------------------------ ne.m$gls.resid <- residuals(m.gls) summary(ne.m$gls.resid) bubble(ne.m, zcol="gls.resid", pch=1, main="Residuals from GLS fit, actual - predicted") ## ----gls-resid-vgm, out.width='0.5\\textwidth'--------------------------- v.r.gls <- variogram(gls.resid ~ 1, ne.m, cutoff=120000, width=12000) (vmf.r.gls <- vgm(psill=var(ne.m$gls.resid), model="Exp", range=intervals(m.gls)$corStruct["range","est."], nugget=TRUE)) plot(v.r.gls, pl=T, model=vmf.r.gls) ## ----show-spherical------------------------------------------------------ plot(v.r.ols, pl=T) ## ----fit-spherical------------------------------------------------------- (vmf.r.ols.sph <- fit.variogram(v.r.ols, vgm(psill=20000, model="Sph", range=40000, nugget=15000))) plot(v.r.ols, pl=T, model=vmf.r.ols.sph) ## ----compute-prop-nugget------------------------------------------------- (prop.nugget <- vmf.r.ols.sph[1,"psill"]/sum(vmf.r.ols.sph[,"psill"])) ## ----fit-reml-with-nugget------------------------------------------------ m.gls.2 <- gls(model=ANN_GDD50 ~ sqrt(ELEVATION_) + N, data=ne.df, correlation=corSpher( value=c(vmf.r.ols.sph[2,"range"], prop.nugget), form=~E + N, nugget=TRUE)) ## ----compare-corr-structs------------------------------------------------ intervals(m.gls, level=0.95)$corStruct intervals(m.gls.2, level=0.95)$corStruct ## ----compare-ranges------------------------------------------------------ intervals(m.gls)$corStruct["range", 2]*3 intervals(m.gls.2)$corStruct["range", 2] ## ----compare-regr-coeff-------------------------------------------------- intervals(m.gls, level=0.95)$coef[,2] intervals(m.gls.2, level=0.95)$coef[,2] ## ----compare-coeff------------------------------------------------------- coefficients(m.gls) coefficients(m.ols) round(100*(coefficients(m.gls) - coefficients(m.ols))/coefficients(m.ols),2) ## ----bubble-diffs-------------------------------------------------------- ne.m$diff.gls.ols.resid <- (ne.m$gls.resid - ne.m$ols.resid) summary(ne.m$diff.gls.ols.resid) bubble(ne.m, zcol="diff.gls.ols.resid", pch=1, main="GLS residual - OLS residual", col=c("#EF8A62","#67A9CF")) ## ----scatter-diff, out.width='\\linewidth', w=10, h=6-------------------- ggplot() + geom_point(aes(x=sqrt(ne.m$ELEVATION_), y=ne.m$diff.gls.ols.resid, size=ne.m$ANN_GDD50, colour=ne.m$STATE)) + xlab("Sqrt(Elevation)") + ylab("GLS - OLS residual") + geom_hline(yintercept=0, size=1.2) ## ----big-diff-resid------------------------------------------------------ ix <- which.max(ne.m$diff.gls.ols.resid) ne.df[ix,c("STATION_NA","STATE","ELEVATION_","N","E")] ix <- which.min(ne.m$diff.gls.ols.resid) ne.df[ix,c("STATION_NA","STATE","ELEVATION_","N","E")] ## ----child-data, child='exRKGLS_grid.Rnw'-------------------------------- ## ----grid-set-parent, echo=FALSE, cache=FALSE---------------------------- set_parent('exRKGLS.Rnw') ## ----load-1km-dem-no, eval=FALSE, echo=TRUE------------------------------ ## library(raster) ## dem.1km <- raster("./srtm_1km_48.asc") ## ----load-1km-dem-yes, eval=TRUE, echo=FALSE----------------------------- ## this is for the author's system library(raster) dem.1km <- raster("./ds/SRTM_1km_ASC/srtm_1km_48.asc") ## ----show-1km-dem-------------------------------------------------------- class(dem.1km) extent(dem.1km) projection(dem.1km) proj4string(dem.1km) res(dem.1km) nrow(dem.1km) ncol(dem.1km) ncell(dem.1km) ## ----compute-grid-cells-size--------------------------------------------- round(res(dem.1km)[1]*(10000/90),3) round(res(dem.1km)[1]*(10000/90)*cos(42*pi/180),3) ## ----crop-1km------------------------------------------------------------ bbox(state.ne) dem.ne <- crop(dem.1km, state.ne) bbox(dem.ne) nrow(dem.ne) ncol(dem.ne) ncell(dem.ne) ## ----project-1km--------------------------------------------------------- dem.ne.m <- projectRaster(dem.ne, crs=proj4string(ne.m), method="bilinear") nrow(dem.ne.m) ncol(dem.ne.m) ncell(dem.ne.m) res(dem.ne.m) summary(dem.ne.m) ## ----convert-elevations-to-feet------------------------------------------ ft.to.m <- 0.3048 dem.ne.m <- dem.ne.m/ft.to.m summary(dem.ne.m) ## ----view-1km, out.width='.8\\textwidth'--------------------------------- image(dem.ne.m, col=topo.colors(24)) ## ----aggregate-to-4km---------------------------------------------------- out.grid.res <- 4000 dem.ne.m <- aggregate(dem.ne.m, fact=c(floor(out.grid.res/res(dem.ne.m)[1]), floor(out.grid.res/res(dem.ne.m)[2])), fun=mean) ncell(dem.ne.m) ## ----save-4km-grid-no, eval=FALSE---------------------------------------- ## save(dem.ne.m, file="./dem_ne_4km.RData") ## ----save-4km-grid-yes, eval=T, echo=FALSE------------------------------- ## for the author's system save(dem.ne.m, file="./ds/NEweather/dem_ne_4km.RData") ## ----load-4km-grid-a-no, eval=FALSE-------------------------------------- ## load(file="./dem_ne_4km.RData", verbose=TRUE) ## ----load-4km-grid-a-yes, eval=T, echo=F--------------------------------- ## for the author's system load(file="./ds/NEweather/dem_ne_4km.RData", verbose=TRUE) ## ----load-4km-grid, out.width='.8\\textwidth'---------------------------- image(dem.ne.m, col=topo.colors(24)) ## ----convert-4km-df------------------------------------------------------ class(dem.ne.m) dem.ne.m.sp <- as(dem.ne.m, "SpatialPixelsDataFrame") coordnames(dem.ne.m.sp) <- c("E", "N") names(dem.ne.m.sp) <- "ELEVATION_" dem.ne.m.df <- as.data.frame(dem.ne.m.sp) class(dem.ne.m.df) dim(dem.ne.m.df) names(dem.ne.m.df) ## ----adjust-negative-to-zero--------------------------------------------- dem.ne.m.sp$ELEVATION_ <- pmax(0, dem.ne.m.sp$ELEVATION_) dem.ne.m.df$ELEVATION_ <- pmax(0, dem.ne.m.df$ELEVATION_) ## ----child-data, child='exRKGLS_pred_OLS_GLS.Rnw'------------------------ ## ----pred-OLS-GLS-set-parent, echo=FALSE, cache=FALSE-------------------- set_parent('exRKGLS.Rnw') ## ----predict-4km--------------------------------------------------------- dem.ne.m.df$pred.ols <- predict(m.ols, newdata=dem.ne.m.df) dem.ne.m.df$pred.gls <- predict(m.gls, newdata=dem.ne.m.df) summary(dem.ne.m.df[,-(1:3)]) ## ----define-display-map-function----------------------------------------- display.prediction.map <- function(.prediction.map.name, .plot.title, .legend.title, .plot.limits=NULL, .palette="YlGnBu") { ggplot(data=dem.ne.m.df) + geom_point(aes_(x=quote(E), y=quote(N), color=as.name(.prediction.map.name))) + xlab("E") + ylab("N") + coord_fixed() + geom_point(aes(x=E, y=N, color=ANN_GDD50), data=ne.df, shape=I(20)) + ggtitle(.plot.title) + scale_colour_distiller(name=.legend.title, space="Lab", palette=.palette, limits=.plot.limits) } ## ----plot-4km, out.width='.8\\textwidth'--------------------------------- (gdd.pred.lim <- round( c(min(dem.ne.m.df[,4:5])-10, max(dem.ne.m.df[,4:5])+10))) display.prediction.map("pred.ols", "Annual GDD, base 50F, OLS prediction", "GDD50", gdd.pred.lim) display.prediction.map("pred.gls", "Annual GDD, base 50F, GLS prediction", "GDD50", gdd.pred.lim) ## ----diffs-4km-gls-ols--------------------------------------------------- summary(dem.ne.m.df$diff.gls.ols <- dem.ne.m.df$pred.gls - dem.ne.m.df$pred.ols) ## ----define-difference-map-function-------------------------------------- display.difference.map <- function(.diff.map.name, .diff.map.title, .legend.name, .palette="Spectral") { ggplot(data=dem.ne.m.df) + geom_point(aes_(x=quote(E), y=quote(N), color=as.name(.diff.map.name))) + xlab("E") + ylab("N") + coord_fixed() + ggtitle(.diff.map.title) + scale_colour_distiller(name=.legend.name, space="Lab", palette=.palette) } ## ----display-diffs-4km-gls-ols, out.width='.8\\textwidth'---------------- display.difference.map("diff.gls.ols", "Annual GDD, base 50F, GLS - OLS predictions", "+/- GDD50") ## ----child-data, child='exRKGLS_RK.Rnw'---------------------------------- ## ----RKGLS-set-parent, echo=FALSE, cache=FALSE--------------------------- set_parent('exRKGLS.Rnw') ## ----print-gls-vmf------------------------------------------------------- print(vmf.r.gls) ## ----gls-resid-ok-grid, out.width='.6\\textwidth'------------------------ summary(ne.m$gls.resid) hist(ne.m$gls.resid, freq=F, xlab="GDD50", main="GLS residuals") rug(ne.m$gls.resid) ## ----krige-ok-gls-resid-1------------------------------------------------ ok.gls.resid <- krige(gls.resid ~ 1, loc=ne.m, newdata=dem.ne.m.sp, model=vmf.r.gls) summary(ok.gls.resid) ## ----krige-ok-gls-resid-2------------------------------------------------ dem.ne.m.df$ok.gls.resid <- ok.gls.resid$var1.pred dem.ne.m.df$ok.gls.resid.var <- ok.gls.resid$var1.var ## ----krige-ok-gls-resid, out.width='.9\\textwidth'----------------------- ggplot() + geom_point(aes(x=E, y=N, colour=ok.gls.resid), data=dem.ne.m.df) + xlab("E") + ylab("N") + coord_fixed() + ggtitle("Residuals from GLS trend surface, GDD base 50F") + scale_colour_distiller(name="GDD50", space="Lab", palette="RdBu") ## ------------------------------------------------------------------------ mean(dem.ne.m.df$ok.gls.resid) # spatial mean over the grid mean(ne.m$gls.resid) # arithmetic mean at observation points ## ----show-ok-gls-resid-var, out.width='.9\\textwidth'-------------------- ggplot() + geom_point(aes(x=E, y=N, colour=sqrt(ok.gls.resid.var)), data=dem.ne.m.df) + xlab("E") + ylab("N") + coord_fixed() + geom_point(aes(x=E, y=N), data=ne.df, size=0.5, colour="black", shape=I(20)) + ggtitle("Residuals from GLS trend, kriging prediction standard deviation") + scale_colour_distiller(name="GDD50 base 50F", space="Lab", palette="BuPu", trans="reverse") ## ----rk-gls, out.width='.9\\textwidth'----------------------------------- summary(dem.ne.m.df$pred.rkgls <- dem.ne.m.df$pred.gls + dem.ne.m.df$ok.gls.resid) display.prediction.map("pred.rkgls", "Annual GDD, base 50F, GLS-RK prediction", "GDD50") ## ----child-data, child='exRKGLS_KED.Rnw'--------------------------------- ## ----KED-set-parent, echo=FALSE, cache=FALSE----------------------------- set_parent('exRKGLS.Rnw') ## ----ked-vgm, out.width='0.6\\textwidth', w=8, h=6----------------------- v.ked <- variogram(ANN_GDD50 ~ sqrt(ELEVATION_) + N, locations=ne.m, cutoff=100000, width=16000) ## ----ked-vgm-fit, out.width='0.6\\textwidth', w=8, h=6------------------- (vmf.ked <- fit.variogram(v.ked, vgm(15000, "Exp", 20000, 20000))) plot(v.ked, plot.numbers=TRUE, model=vmf.ked) ## ----ked-pred-grid------------------------------------------------------- k.ked <- krige(ANN_GDD50 ~ sqrt(ELEVATION_)+ N, locations=ne.m, newdata=dem.ne.m.sp, model=vmf.ked) summary(k.ked) ## ----ked-show, out.width='.9\\textwidth'--------------------------------- dem.ne.m.df$pred.ked <- k.ked$var1.pred dem.ne.m.df$pred.ked.sd <- sqrt(k.ked$var1.var) display.prediction.map("pred.ked", "Annual GDD, base 50F, KED prediction", "GDD50") # ggplot() + geom_point(aes(x=E, y=N, colour=pred.ked.sd), data=dem.ne.m.df) + xlab("E") + ylab("N") + coord_fixed() + geom_point(aes(x=E, y=N), data=ne.df, size=0.5, colour="black", shape=I(20)) + ggtitle("KED standard deviation, GDD base 50F") + scale_colour_distiller(name="GDD50", space="Lab", palette="BuPu", trans="reverse") ## ----diffs-4km-rkgls-ked------------------------------------------------- summary(dem.ne.m.df$diff.rkgls.ked <- dem.ne.m.df$pred.rkgls - dem.ne.m.df$pred.ked) ## ----display-diffs-4km-rkgls-ked, out.width='.8\\textwidth'-------------- display.difference.map("diff.rkgls.ked", "Difference annual GDD base 50F, RK-GLS - KED", "+/- GDD50") ## ----krige-loocv-ked, results='hide'------------------------------------- kcv.ked <- krige.cv(ANN_GDD50 ~ sqrt(ELEVATION_)+ N, locations=ne.m, model=vmf.ked) ## ----krige-loocv-ked-2--------------------------------------------------- summary(kcv.ked$residual) ## ------------------------------------------------------------------------ (loocv.ked.rmse <- sqrt(sum(kcv.ked$residual^2)/length(kcv.ked$residual))) ## ----ok-loocv-bubble-ked------------------------------------------------- bubble(kcv.ked, zcol="residual", pch=1, main="LOOCV KED residuals") ## ----ked-local----------------------------------------------------------- ked.n.max <- 61 # change this to try other numbers of neighbours k.ked.nn <- krige(ANN_GDD50 ~ sqrt(ELEVATION_)+ N, locations=ne.m, newdata=dem.ne.m.sp, model=vmf.ked, nmax=ked.n.max) summary(k.ked.nn@data) ## ----ked-local-display-map----------------------------------------------- dem.ne.m.df$pred.ked.nn <- k.ked.nn$var1.pred dem.ne.m.df$pred.ked.nn.sd <- sqrt(k.ked.nn$var1.var) display.prediction.map("pred.ked.nn", paste("Annual GDD, base 50F, KED prediction,", ked.n.max, "nearest neighbours"), "GDD50") ## ----ked-compare-global-local-------------------------------------------- summary(dem.ne.m.df$diff.ked <- dem.ne.m.df$pred.ked - dem.ne.m.df$pred.ked.nn) display.difference.map("diff.ked", paste("Difference annual GDD base 50F, KED global - KED", ked.n.max,"nearest neighbours"), "+/- GDD50") ## ----ked-local-cv, results='hide'---------------------------------------- kcv.ked.nn <- krige.cv(ANN_GDD50 ~ sqrt(ELEVATION_)+ N, locations=ne.m, model=vmf.ked, nmax=ked.n.max) ## ----ked-local-cv-2------------------------------------------------------ summary(kcv.ked.nn$residual) summary(kcv.ked$residual) (loocv.ked.nn.rmse <- sqrt(sum(kcv.ked.nn$residual^2)/length(kcv.ked.nn$residual))) (loocv.ked.rmse <- sqrt(sum(kcv.ked$residual^2)/length(kcv.ked$residual))) ## ----ked-global-local-cv-bubble------------------------------------------ summary(kcv.ked$diff <- kcv.ked$residual - kcv.ked.nn$residual) bubble(kcv.ked, zcol="diff", pch=1, main=paste("KED - KED", ked.n.max, "nearest neighbour residuals"), col=c("#EF8A62","#67A9CF")) ## ------------------------------------------------------------------------ k.ok <- krige(ANN_GDD50 ~ sqrt(ELEVATION_)+ N, locations=ne.m, newdata=dem.ne.m.sp, model=NULL) ## ------------------------------------------------------------------------ k.okr <- krige(ols.resid ~ 1, locations=ne.m, newdata=dem.ne.m.sp, model=vmf.r.ols) ## ------------------------------------------------------------------------ k.ok$rk.pred <- k.ok$var1.pred + k.okr$var1.pred ## ------------------------------------------------------------------------ k.ok$diff.pred <- k.ok$rk.pred - k.ked$var1.pred summary(k.ok$rk.pred); summary(k.ked$var1.pred) summary(k.ok$diff.pred) ## ----display-okrk-ked-diff, out.width='.8\\textwidth'-------------------- dem.ne.m.df$diff.okrk.ked.pred <- k.ok$diff.pred display.difference.map("diff.okrk.ked.pred", "Naive RK vs.\ KED surface, GDD base 50F", "+/- GDD50") ## ----child-data, child='exRKGLS_GAM.Rnw'--------------------------------- ## ----GAM-set-parent, echo=FALSE, cache=FALSE----------------------------- set_parent('exRKGLS.Rnw') ## ----plot-marginal-loess, out.width='\\textwidth'------------------------ g1 <- ggplot(ne.df, aes(x=E, y=ANN_GDD50)) + geom_point() + geom_smooth(method="loess") g2 <- ggplot(ne.df, aes(x=N, y=ANN_GDD50)) + geom_point() + geom_smooth(method="loess") g3 <- ggplot(ne.df, aes(x=ELEVATION_, y=ANN_GDD50)) + geom_point() + geom_smooth(method="loess") g4 <- ggplot(ne.df, aes(x=sqrt(ELEVATION_), y=ANN_GDD50)) + geom_point() + geom_smooth(method="loess") require(gridExtra) grid.arrange(g1, g2, g3, g4, ncol = 2) ## ----load-mgcv----------------------------------------------------------- require(mgcv) ## ----fit-gam------------------------------------------------------------- m.g.xy <- gam(ANN_GDD50 ~ s(E, N) + s(ELEVATION_), data=ne.df) summary(m.g.xy) summary(residuals(m.g.xy)) ## ----gam-resid-bubble---------------------------------------------------- ne.m@data$resid.m.g.xy <- residuals(m.g.xy) bubble(ne.m, zcol="resid.m.g.xy", pch=1, main="Residuals from GAM") ## ----gam-resid-vgm, out.width='.55\\linewidth'--------------------------- vr <- variogram(resid.m.g.xy ~ 1, loc=ne.m, cutoff=50000, width=5000) plot(vr, pl=T) ## ----plot-gam-2D, out.width='\\linewidth'-------------------------------- plot.gam(m.g.xy, rug=T, se=T, select=1, scheme=1, theta=30+130, phi=30) ## ----plot-gam-2D-se, out.width='\\linewidth'----------------------------- vis.gam(m.g.xy, plot.type="persp", color="terrain", theta=160, zlab="Annual GDD50", se=1.96) ## ----plot-gam-1D-elev---------------------------------------------------- plot.gam(m.g.xy, select=2, rug=T, se=T, residuals=T, pch=20, shade=T, seWithMean=T, shade.col="lightblue") ## ----plot-gam-fits------------------------------------------------------- (rmse.gam <- sqrt(sum(residuals(m.g.xy)^2)/length(residuals(m.g.xy)))) plot(ne.m$ANN_GDD50 ~ predict(m.g.xy, newdata=ne.df), col=ne.m$STATE, pch=20, asp=1, xlab="Fitted by random forest", ylab="Actual", main="Annual GDD50") legend("topleft", levels(ne.df$STATE), pch=20, col=1:4) grid(); abline(0,1) ## ----predict-gam, out.width='0.8\\textwidth'----------------------------- tmp <- predict.gam(object=m.g.xy, newdata=dem.ne.m.df, se.fit=TRUE) summary(tmp$fit) summary(tmp$se.fit) dem.ne.m.df$pred.gam <- tmp$fit dem.ne.m.df$pred.gam.se <- tmp$se.fit display.prediction.map("pred.gam", "Annual GDD, base 50F, GAM prediction", "GDD50") ## ----predict-gam-se, out.width='0.8\\textwidth'-------------------------- ggplot() + geom_point(aes(x=E, y=N, colour=pred.gam.se), data=dem.ne.m.df) + xlab("E") + ylab("N") + coord_fixed() + ggtitle("Annual GDD base 50F, Standard error of GAM prediction") + scale_colour_distiller(name="GDD50 s.e.", space="Lab", palette="RdYlGn", direction=-1) ## ----diffs-4km-gls-gam--------------------------------------------------- summary(dem.ne.m.df$diff.gls.gam <- dem.ne.m.df$pred.gls - dem.ne.m.df$pred.gam) ## ----display-diffs-4km-gls-gam, out.width='.8\\textwidth'---------------- display.difference.map("diff.gls.gam", "Difference annual GDD base 50F, GLS - GAM", "+/- GDD50") ## ----diffs-4km-rkgls-gam------------------------------------------------- dem.ne.m.df$diff.rkgls.gam <- dem.ne.m.df$pred.rkgls - dem.ne.m.df$pred.gam summary(dem.ne.m.df$diff.rkgls.gam) ## ----display-diffs-4km-rkgls-gam, out.width='.8\\textwidth'-------------- ggplot() + geom_point(aes(x=E, y=N, colour=diff.rkgls.gam), data=dem.ne.m.df) + xlab("E") + ylab("N") + coord_fixed() + ggtitle("Difference annual GDD base 50F, GLS/RK - GAM") + scale_colour_distiller(name="GDD50", space="Lab", palette="Spectral") ## ----child-data, child='exRKGLS_GWR.Rnw'--------------------------------- ## ----GWR-set-parent, echo=FALSE, cache=FALSE----------------------------- set_parent('exRKGLS.Rnw') ## ----plot.fixed.kernel, out.width='0.6\\textwidth', w=6, h=4, fig.cap="Gaussian kernel"---- require(ggplot2) h <- 16 ggplot(data=data.frame(x=c(-64, 64)), aes(x)) + stat_function(fun=function(x) { exp(-1/2 * (x/h)^2) }) + labs(title = paste("Gaussian kernel, bandwidth parameter",h)) g.kernel <- function(d, h) { exp(-1/2 * (d/h)^2)} ## ---- 3) | (abs(t.e) > 3) | (abs(t.h) > 3)) ne.m@data[which(!is.sig),"STATION_NA"] ## ----gwr-compare-rmse---------------------------------------------------- n <- length(residuals(m.ols)) tmp.rmse <- data.frame(cbind(sqrt(sum(residuals(m.ols)^2)/n), sqrt(sum(residuals(m.gls)^2)/n), sqrt(sum((gwr.Gauss.f$SDF$pred - ne.m$ANN_GDD50)^2)/n), sqrt(sum((gwr.Gauss.a$SDF$pred - ne.m$ANN_GDD50)^2)/n))) row.names(tmp.rmse) <- "RMSE" ## ----gwr-pred-points----------------------------------------------------- tmp <- data.frame(round( cbind(summary(residuals(m.ols)), summary(residuals(m.gls)), summary(gwr.f$gwr.e), summary(gwr.a$gwr.e)),1)) tmp <- rbind(tmp.rmse, tmp) names(tmp) <- c("OLS", "GLS", "GWR-fixed", "GWR-adaptive") print(tmp) rm(tmp, tmp.rmse) ## ----manipulate-states--------------------------------------------------- require(raster) dem.ne4.m <- raster::mask(dem.ne.m, state.ne.m) ## ----convert-dem-transform----------------------------------------------- dem.ne4.m.spdf <- as(dem.ne4.m,"SpatialPointsDataFrame") ## match the coordinate names with the calibration points coordnames(dem.ne4.m.spdf) dem.ne4.km.spdf <- spTransform(dem.ne4.m.spdf, p4str.km) ## remove negative elevations so sqrt() is real names(dem.ne4.km.spdf) <- "ELEVATION_" dem.ne4.km.spdf[(dem.ne4.km.spdf$ELEVATION_ < 0), "ELEVATION_"] <- 0 ## add the coordinates to the data frame dem.ne4.km.spdf$E <- coordinates(dem.ne4.km.spdf)[,1] dem.ne4.km.spdf$N <- coordinates(dem.ne4.km.spdf)[,2] ## match the names with the calibration points summary(dem.ne4.km.spdf) ## ----gwr-predict--------------------------------------------------------- gwr.Gauss.f.pred <- gwr(ANN_GDD50 ~ E + N + sqrt(ELEVATION_), data=ne.km, predictions=TRUE, # for prediction fit.points=dem.ne4.km.spdf, bandwidth=bw.Gauss.f, gweight=gwr.Gauss) gwr.Gauss.a.pred <- gwr(ANN_GDD50 ~ E + N + sqrt(ELEVATION_), data=ne.km, predictions=TRUE, # for prediction fit.points=dem.ne4.km.spdf, adapt=bw.Gauss.a, gweight=gwr.Gauss) summary(gwr.Gauss.f.pred$SDF$pred) summary(gwr.Gauss.a.pred$SDF$pred) dem.ne4.km.spdf$pred.f <- gwr.Gauss.f.pred$SDF$pred dem.ne4.km.spdf$pred.a <- gwr.Gauss.a.pred$SDF$pred ## ----gwr-pred-coefficients----------------------------------------------- dem.ne4.km.spdf$b0.f <- gwr.Gauss.f.pred$SDF$"(Intercept)" dem.ne4.km.spdf$b0.a <- gwr.Gauss.a.pred$SDF$"(Intercept)" dem.ne4.km.spdf$N.f <- gwr.Gauss.f.pred$SDF$N dem.ne4.km.spdf$N.a <- gwr.Gauss.a.pred$SDF$N dem.ne4.km.spdf$E.f <- gwr.Gauss.f.pred$SDF$E dem.ne4.km.spdf$E.a <- gwr.Gauss.a.pred$SDF$E dem.ne4.km.spdf$H.f <- gwr.Gauss.f.pred$SDF$"sqrt(ELEVATION_)" dem.ne4.km.spdf$H.a <- gwr.Gauss.a.pred$SDF$"sqrt(ELEVATION_)" ## ----make-km-grid-------------------------------------------------------- dem.ne4.km.grid <- dem.ne4.km.spdf # convert to a gridded object gridded(dem.ne4.km.grid) <- TRUE; fullgrid(dem.ne4.km.grid) <- TRUE ## ----map-gwr-setup------------------------------------------------------- pred.range <- range(na.omit(dem.ne4.km.grid$pred.f), na.omit(dem.ne4.km.grid$pred.a)) cuts <- seq(pred.range[1], pred.range[2], length=24) ## ----map-gwr-pred, fig.width=14, fig.height=7, out.width='\\textwidth'---- p1 <- spplot(dem.ne4.km.grid, zcol="pred.f", main="Annual GDD50, GWR fixed Gaussian kernel", at=cuts, key.space="right", col.regions=bpy.colors(24)) p2 <- spplot(dem.ne4.km.grid, zcol="pred.a", main="Annual GDD50, GWR adaptive Gaussian kernel", at=cuts, key.space="right") print(p1, split=c(1,1,2,1), more=T); print(p2, split=c(2,1,2,1), more=F) ## ----map-gwr-setup-bo---------------------------------------------------- pred.range <- range(na.omit(dem.ne4.km.grid$b0.f), na.omit(dem.ne4.km.grid$b0.a)) cuts <- seq(pred.range[1], pred.range[2], length=24) ## ----map-gwr-pred-b0, fig.width=14, fig.height=7, out.width='\\textwidth'---- p1 <- spplot(dem.ne4.km.grid, zcol="b0.f", main="Intercept, fixed kernel", at=cuts, key.space="right", col.regions=topo.colors(24)) p2 <- spplot(dem.ne4.km.grid, zcol="b0.a", main="Intercept, adaptive kernel", at=cuts, key.space="right", col.regions=topo.colors(24)) print(p1, split=c(1,1,2,1), more=T); print(p2, split=c(2,1,2,1), more=F) ## ----map-gwr-setup-N----------------------------------------------------- pred.range <- range(na.omit(dem.ne4.km.grid$N.f), na.omit(dem.ne4.km.grid$N.a)) cuts <- seq(pred.range[1], pred.range[2], length=24) ## ----map-gwr-pred-N, fig.width=14, fig.height=7, out.width='\\textwidth'---- p1 <- spplot(dem.ne4.km.grid, zcol="N.f", main="Northing coefficient, fixed kernel", at=cuts, key.space="right", col.regions=terrain.colors(24)) p2 <- spplot(dem.ne4.km.grid, zcol="N.a", main="Northing coefficient, adaptive kernel", at=cuts, key.space="right", col.regions=terrain.colors(24)) print(p1, split=c(1,1,2,1), more=T); print(p2, split=c(2,1,2,1), more=F) ## ----map-gwr-setup-E----------------------------------------------------- pred.range <- range(na.omit(dem.ne4.km.grid$E.f), na.omit(dem.ne4.km.grid$E.a)) cuts <- seq(pred.range[1], pred.range[2], length=24) ## ----map-gwr-pred-E, fig.width=14, fig.height=7, out.width='\\textwidth'---- p1 <- spplot(dem.ne4.km.grid, zcol="E.f", main="Easting coefficient, fixed kernel", at=cuts, key.space="right", col.regions=terrain.colors(24)) p2 <- spplot(dem.ne4.km.grid, zcol="E.a", main="Easting coefficient, adaptive kernel", at=cuts, key.space="right", col.regions=terrain.colors(24)) print(p1, split=c(1,1,2,1), more=T); print(p2, split=c(2,1,2,1), more=F) ## ----map-gwr-setup-H----------------------------------------------------- pred.range <- range(na.omit(dem.ne4.km.grid$H.f), na.omit(dem.ne4.km.grid$H.a)) cuts <- seq(pred.range[1], pred.range[2], length=24) ## ----map-gwr-pred-H, fig.width=14, fig.height=7, out.width='\\textwidth'---- p1 <- spplot(dem.ne4.km.grid, zcol="H.f", main="elevation coefficient, fixed kernel", at=cuts, key.space="right", col.regions=terrain.colors(24)) p2 <- spplot(dem.ne4.km.grid, zcol="H.a", main="elevation coefficient, adaptive kernel", at=cuts, key.space="right", col.regions=terrain.colors(24)) print(p1, split=c(1,1,2,1), more=T); print(p2, split=c(2,1,2,1), more=F) ## ----child-data, child='exRKGLS_RF.Rnw'---------------------------------- ## ----RF-set-parent, echo=FALSE, cache=FALSE------------------------------ set_parent('exRKGLS.Rnw') ## ----load-rpart---------------------------------------------------------- library(rpart) ## ----set-seed-rt, echo=F, results='hide'--------------------------------- # set seed for the random number generator to get a consistent result for the document # this should *not* be done when following the exercise set.seed(318316) ## ----trees-comp-rpart---------------------------------------------------- m.rt <- rpart(ANN_GDD50 ~ N + E + ELEVATION_, data=ne.df, minsplit=2, cp=0.003) ## ----summary-rt---------------------------------------------------------- print(m.rt) ## ----trees-plot-unpruned, out.width='\\textwidth'------------------------ library(rpart.plot) rpart.plot(m.rt, digits=3, type=4, extra=1) ## ----trees-import-rpart-------------------------------------------------- x <- m.rt$variable.importance data.frame(variableImportance = 100 * x / sum(x)) ## ----trees-plotcp-rp, out.width='\\textwidth'---------------------------- printcp(m.rt) plotcp(m.rt) ## ----trees-prune-rpart--------------------------------------------------- (m.rt.p <- prune(m.rt, cp=0.0045)) ## ----trees-plot-pruned, out.width='\\textwidth'-------------------------- rpart.plot(m.rt.p, digits=3, type=4, extra=1) ## ----trees-predict-cal-pts----------------------------------------------- summary(p.rt.p <- predict(m.rt.p, newdata=ne.df)) ## ----trees-count-unique-pred--------------------------------------------- length(unique(p.rt.p)) ## ----trees-residuals-histo----------------------------------------------- summary(residuals.rt.p <- ne.df$ANN_GDD50 - p.rt.p) hist(residuals.rt.p, main="Residuals from regression tree fit", xlab="ANN_GDD50") rug(residuals.rt.p) sqrt(mean(residuals.rt.p^2)/length(residuals.rt.p)) ## ----trees-predict-rpart, out.width='0.75\\textwidth', keep.source=TRUE---- plot(ne.df$ANN_GDD50 ~ p.rt.p, asp=1, pch=20, xlab="fitted by regression tree", ylab="actual", xlim=c(500,4200), ylim=c(500,4200), col=ne.df$STATE, main="Annual GDD50") legend("topleft", levels(ne.m$STATE), pch=20, col=1:4) grid() abline(0,1) ## ----pred-rt-area-------------------------------------------------------- dem.ne.m.df$pred.rt <- predict(m.rt.p, newdata=dem.ne.m.df) summary(dem.ne.m.df$pred.rt) ## ----plot-rt-area-------------------------------------------------------- display.prediction.map("pred.rt", "Annual GDD, base 50F, regression tree prediction", "GDD50") ## ----set-seed-rf, echo=F, results='hide'--------------------------------- # set seed for the random number generator to get a consistent result for the document # this should *not* be done when following the exercise set.seed(318316) ## ----random-forest------------------------------------------------------- library(randomForest) m.rf <- randomForest(ANN_GDD50 ~ ELEVATION_ + N + E, data=ne.df, ntree=1200, importance=TRUE) randomForest::importance(m.rf)[,1] randomForest::importance(m.rf)[,2]/max(randomForest::importance(m.rf)[,2]) ## ----plot-rf-fits-------------------------------------------------------- summary(predict(m.rf, newdata=ne.m)) plot(ne.m$ANN_GDD50 ~ predict(m.rf, newdata=ne.m), col=ne.m$STATE, pch=20, asp=1, xlab="Fitted by random forest", ylab="Actual", main="Annual GDD50") legend("topleft", levels(ne.m$STATE), pch=20, col=1:4) grid() abline(0,1) ## ----rf-fits-errors------------------------------------------------------ summary(rf.resid <- ne.m$ANN_GDD50 - predict(m.rf, newdata=ne.m)) (rf.me <- mean(rf.resid)) (rf.rmse <- sqrt(mean(rf.resid^2)/length(rf.resid))) ## ----plot-rf-oob--------------------------------------------------------- summary(predict(m.rf)) plot(ne.m$ANN_GDD50 ~ predict(m.rf), col=ne.m$STATE, pch=20, asp=1, xlab="Fitted by random forest (OOB)", ylab="Actual", main="Annual GDD50") legend("topleft", levels(ne.m$STATE), pch=20, col=1:4) grid() abline(0,1) ## ----rf-oob-errors------------------------------------------------------- summary(rf.oob.resid <- ne.m$ANN_GDD50 - predict(m.rf)) (rf.oob.me <- mean(rf.oob.resid)) (rf.oob.rmse <- sqrt(mean(rf.oob.resid^2)/length(rf.oob.resid))) ## ----rf-compare-errors--------------------------------------------------- (rf.oob.me/rf.me) (rf.oob.rmse/rf.rmse) ## ----rf-resid------------------------------------------------------------ ne.m$rf.resid <- (ne.df$ANN_GDD50 - predict(m.rf, newdata=ne.m)) summary(ne.m$rf.resid) bubble(ne.m, zcol="rf.resid", pch=1, main="Random Forest residuals, actual-predicted") ## ----rf-resid-oob-------------------------------------------------------- ne.m$rf.resid.oob <- (ne.df$ANN_GDD50 - predict(m.rf)) summary(ne.m$rf.resid.oob) summary(ne.m$rf.resid) summary(ne.m$rf.resid.oob/ne.m$rf.resid) ## ----which-rf-worst------------------------------------------------------ (ix <- order(abs(ne.m$rf.resid), decreasing=TRUE)[1:8]) ne.m@data[ix,c("STATE","STATION_NA","ELEVATION_", "gls.resid", "rf.resid")] ## ----vgm-rf-resid, out.width='0.5\\textwidth'---------------------------- v.rf <- variogram(rf.resid ~ 1, locations=ne.m, cutoff=100000, width=16000) plot(v.rf, pl=T) ## ------------------------------------------------------------------------ summary(ne.m$ols.resid) summary(ne.m$rf.resid) summary(ne.m$gls.resid) sd(ne.m$ols.resid) sd(ne.m$rf.resid) sd(ne.m$gls.resid) ## ----predict-4km-rf------------------------------------------------------ dem.ne.m.df$pred.rf <- predict(m.rf, newdata=dem.ne.m.df) summary(dem.ne.m.df$pred.rf) ## ----display-4km-rf, out.width='.8\\textwidth'--------------------------- display.prediction.map("pred.rf", "Annual GDD, base 50F, random forest prediction", "GDD50") ## ----diffs-4km-gls-rf---------------------------------------------------- summary(dem.ne.m.df$diff.gls.rf <- dem.ne.m.df$pred.gls - dem.ne.m.df$pred.rf) ## ----display-diffs-4km-gls-rf, out.width='.8\\textwidth'----------------- display.difference.map("diff.gls.rf", "Annual GDD, base 50F, GLS - RF predictions", "+/- GDD50") ## ----load-caret---------------------------------------------------------- library(caret) ## ----list-models--------------------------------------------------------- length(names(getModelInfo())) head(names(getModelInfo()),24) ## ----caret-model-info---------------------------------------------------- getModelInfo("ranger")$ranger$parameters ## ----set-seed-caret, echo=F, results='hide'------------------------------ # set seed for the random number generator to get a consistent result for the document # this should *not* be done when following the exercise set.seed(318316) ## ----caret--------------------------------------------------------------- library(ranger) dim(preds <- ne.df[, c("E", "N", "ELEVATION_")]) length(response <- ne.df[, "ANN_GDD50"]) system.time( ranger.tune <- train(x = preds, y = response, method="ranger", tuneGrid = expand.grid(.mtry = 1:3, .splitrule = "variance", .min.node.size = 1:10), trControl = trainControl(method = 'cv')) ) ## ----caret-show-results, out.width='.65\\textwidth', fig.width=8, fig.height=5---- print(ranger.tune) names(ranger.tune$result) ix <- which.min(ranger.tune$result$RMSE) ranger.tune$result[ix, c(1,3,4)] ix <- which.max(ranger.tune$result$Rsquared) ranger.tune$result[ix, c(1,3,5)] ix <- which.min(ranger.tune$result$MAE) ranger.tune$result[ix, c(1,3,6)] plot.train(ranger.tune, metric="RMSE") plot.train(ranger.tune, metric="Rsquared") plot.train(ranger.tune, metric="MAE") ## ----fit-ranger---------------------------------------------------------- (ranger.rf <- ranger(ANN_GDD50 ~ N + E + ELEVATION_, data=ne.df, mtry=2, min.node.size=9)) ## ----plot-ranger-fits---------------------------------------------------- summary(ranger.fits <- predict(ranger.rf, data=ne.df)$predictions) plot(ne.df$ANN_GDD50 ~ ranger.fits, col=ne.m$STATE, pch=20, asp=1, xlab="Fitted by ranger", ylab="Actual", main="Annual GDD50") legend("topleft", levels(ne.m$STATE), pch=20, col=1:4) grid(); abline(0,1) ## ----predict-4km-ranger-------------------------------------------------- summary(dem.ne.m.df$pred.ranger <- predict(ranger.rf, data=dem.ne.m.df)$predictions) ## ----display-4km-ranger, out.width='.8\\textwidth'----------------------- display.prediction.map("pred.ranger", "Annual GDD, base 50F, ranger RF prediction", "GDD50") ## ----diffs-4km-ranger-rf------------------------------------------------- summary(dem.ne.m.df$diff.ranger.rf <- dem.ne.m.df$pred.ranger - dem.ne.m.df$pred.rf) ## ----display-diffs-4km-ranger-rf, out.width='.8\\textwidth'-------------- display.difference.map("diff.ranger.rf", "Annual GDD, base 50F, ranger - randomForest predictions", "+/- GDD50") ## ----load-cubist--------------------------------------------------------- library(Cubist) ## ----vignette-cubist, eval=FALSE----------------------------------------- ## vignette("cubist") ## ----set-seed-cubist, echo=F, results='hide'----------------------------- # set seed for the random number generator to get a consistent result for the document # this should *not* be done when following the exercise set.seed(316318) ## ----cubist-train-------------------------------------------------------- require(caret) all.preds <- ne.df[, c("N", "E", "ELEVATION_")] all.resp <- ne.df[ , "ANN_GDD50"] system.time( cubist.tune <- train(x = all.preds, y = all.resp, "cubist", tuneGrid = expand.grid(.committees = 1:12, .neighbors = 0:8), trControl = trainControl(method = 'cv')) ) ## ----cubist-train-results, fig.width=8, fig.height=5--------------------- print(cubist.tune) plot(cubist.tune, metric="RMSE") plot(cubist.tune, metric="Rsquared") plot(cubist.tune, metric="MAE") ## ----cubist-fit-best----------------------------------------------------- c.model <- cubist(x = all.preds, y = all.resp, committees=6) summary(c.model) ## ----cubist-model-eval-fit----------------------------------------------- # predictive accuracy cubist.fits <- predict(c.model, newdata=all.preds, neighbors=cubist.tune$bestTune$neighbors) ## Test set RMSE sqrt(mean((cubist.fits - all.resp)^2)) cor(cubist.fits, all.resp)^2 # R^2 plot(ne.df$ANN_GDD50 ~ cubist.fits, col=ne.m$STATE, pch=20, asp=1, xlab="Fitted by cubist", ylab="Actual", main="Annual GDD50") legend("topleft", levels(ne.m$STATE), pch=20, col=1:4) grid(); abline(0,1) ## ----cubist-model-predict------------------------------------------------ summary(dem.ne.m.df$pred.cubist <- predict(c.model, newdata=dem.ne.m.df, neighbours=cubist.tune$bestTune$neighbors)) ## ----display-4km-cubist, out.width='.8\\textwidth'----------------------- display.prediction.map("pred.cubist", "Annual GDD, base 50F, Cubist prediction", "GDD50") ## ----child-data, child='exRKGLS_TPS.Rnw'--------------------------------- ## ----TPS-set-parent, echo=FALSE, cache=FALSE----------------------------- set_parent('exRKGLS.Rnw') ## ----TPS----------------------------------------------------------------- require(fields) ne.tps <- ne.df ne.tps$coords <- matrix(c(ne.tps$E, ne.tps$N), byrow=F, ncol=2) str(ne.tps$coords) surf.1 <-Tps(ne.tps$coords, ne.tps$ANN_GDD50) summary(surf.1) ## ----TPS-grid------------------------------------------------------------ resolution <- 9 bbox(state.ne.m) (ne.sq.km <- diff(bbox(state.ne.m)[1,]) * diff(bbox(state.ne.m)[2,])/10^6) (approx.n.grid.cells <- ne.sq.km/(resolution^2)) states.grid <- spsample(state.ne.m, n=approx.n.grid.cells, type="regular") gridded(states.grid) <- TRUE states.grid.df <- as.data.frame(states.grid) states.grid.df$coords <- matrix(c(states.grid.df$x1, states.grid.df$x2), byrow=F, ncol=2) str(states.grid.df) ## ----TPS-predict--------------------------------------------------------- surf.1.pred <- predict.Krig(surf.1, states.grid.df$coords) summary(as.vector(surf.1.pred)) ## ----TPS-map, out.width='\\textwidth'------------------------------------ plot(states.grid.df$coords, pch=20, asp=1, cex=.6, col=bpy.colors(256)[cut(surf.1.pred, 256)], xlab="E", ylab="N", main="Annual GDD50") lines(state.ne.m, col="white") grid(col="gray") ## ----states-grid-spdf---------------------------------------------------- states.grid.spdf <- SpatialPixelsDataFrame( states.grid, data=data.frame(tps.pred=as.vector(surf.1.pred)) ) ## ----get-value-tps------------------------------------------------------- pt <- SpatialPoints(coords=cbind(-76.402175, 42.453271)) proj4string(pt) <- CRS("+init=EPSG:4326") pt <- spTransform(pt, CRS(proj4string(states.grid.spdf))) coordinates(pt) pt.sp <- SpatialPointsDataFrame(pt, over(pt, states.grid.spdf)) pt.sp$tps.pred ## ----child-data, child='exRKGLS_OK.Rnw'---------------------------------- ## ----OK-set-parent, echo=FALSE, cache=FALSE------------------------------ set_parent('exRKGLS.Rnw') ## ----display-ordinary-v, out.width='0.6\\textwidth', w=8, h=6------------ library(gstat) bbox(ne.m) (cutoff.default <- (sqrt(diff(bbox(ne.m)["E",])^2 + diff(bbox(ne.m)["N",])^2))/3) (binwidth.default <- cutoff.default/15) plot(v.o <- variogram(ANN_GDD50 ~ 1, locations=ne.m), plot.numbers=TRUE) ## ----display-ordinary-v-long, out.width='0.6\\textwidth', w=8, h=6------- plot(v.o <- variogram(ANN_GDD50 ~ 1, locations=ne.m, cutoff=500000, width=20000), plot.numbers=TRUE) ## ----show-ordinary-v-short, out.width='0.6\\textwidth', w=8, h=6--------- v.o <- variogram(ANN_GDD50 ~ 1, locations=ne.m, cutoff=220000) plot(v.o, plot.numbers=TRUE) ## ----model-ordinary-2, out.width='0.6\\textwidth', w=8, h=6-------------- (vmf.o <- fit.variogram(v.o, vgm(psill=210000, model="Sph", range=220000, nugget=40000))) plot(v.o, plot.numbers=TRUE, model=vmf.o) ## ----krige-ok------------------------------------------------------------ # dem.ne.m.sp was set up in \S6.2 "Adjusting the grid for prediction" k.ok <- krige(ANN_GDD50 ~ 1, locations=ne.m, newdata=dem.ne.m.sp, model=vmf.o, nmax=24, block=c(1000,1000)) summary(k.ok) ## ----close-sta-eith------------------------------------------------------ e.ith.pt <- subset(ne.m, (ne.m$STATION_NA=="ITHACA CORNELL UNIV")) dist.pt <- spDists(ne.m, e.ith.pt) (ix <- order(dist.pt)[1:24]) print(cbind(ne.df[ix,c('STN_NAME','STATE','ELEVATION_','ANN_GDD50')], dist=round(dist.pt[ix,]/1000,1))) ## ----krige-plot-ok-pred, out.width='0.65\\textwidth'--------------------- spplot(k.ok, zcol="var1.pred") ## ----krige-plot-ok-pred-df, out.width='0.8\\textwidth'------------------- dim(dem.ne.m.df) dem.ne.m.df$pred.ok <- k.ok$var1.pred display.prediction.map("pred.ok", "Annual GDD base 50F, OK prediction", "GDD50") ## ----krige-plot-ok-pred-sd-df, out.width='0.8\\textwidth'---------------- dem.ne.m.df$sd.ok <- sqrt(k.ok$var1.var) ggplot() + geom_point(aes(x=E, y=N, colour=sd.ok), data=dem.ne.m.df) + xlab("E") + ylab("N") + coord_fixed() + ggtitle("Annual GDD base 50F, Standard error of OK prediction") + scale_colour_distiller(name="GDD50 s.d.", space="Lab", palette="RdYlGn", direction=-1) ## ----krige-min-sd-------------------------------------------------------- summary(dem.ne.m.df$ok.sd) ix <- which.min(dem.ne.m.df$ok.sd) k.ok@data[ix,"var1.pred"] round(100*dem.ne.m.df[ix,"ok.sd"]/k.ok$var1.pred[ix],1) ## ----krige-loocv-ok, results='hide'-------------------------------------- kcv.ok <- krige.cv(ANN_GDD50 ~ 1, locations=ne.m, model=vmf.o) summary(kcv.ok$residual) ## ------------------------------------------------------------------------ (loocv.ok.rmse <- sqrt(sum(kcv.ok$residual^2)/length(kcv.ok$residual))) ## ----ok-loocv-bubble-ok-------------------------------------------------- bubble(kcv.ok, zcol="residual", pch=1, main="LOOCV OK residuals") ## ------------------------------------------------------------------------ ne.m@data[which.min(kcv.ok$residual),2:6] ne.m@data[which.max(kcv.ok$residual),2:6] ## ----idw----------------------------------------------------------------- k.idw <- idw(ANN_GDD50 ~ 1, locations=ne.m, newdata=dem.ne.m.sp, idp=2, nmax=24) summary(k.idw$var1.pred) summary(k.ok$var1.pred) ## ----idw-plot, out.width='.8\\textwidth'--------------------------------- dem.ne.m.df$pred.idw <- k.idw$var1.pred display.prediction.map("pred.idw", "Annual GDD base 50F, IDW2 prediction", "GDD50") ## ----idw-loocv-1, results='hide'----------------------------------------- kcv.idw <- krige.cv(ANN_GDD50 ~ 1, locations=ne.m, model=NULL) ## ----idw-loocv-2--------------------------------------------------------- summary(kcv.idw$residual) summary(kcv.ok$residual) (loocv.idw.rmse <- sqrt(sum(kcv.idw$residual^2)/length(kcv.idw$residual))) (loocv.ok.rmse) ## ----idw-loocv-bubble---------------------------------------------------- bubble(kcv.idw, zcol="residual", pch=1, main="IDW^2 LOOCV residuals") ## ----child-data, child='exRKGLS_Thiessen.Rnw'---------------------------- ## ----Thiessen-set-parent, echo=FALSE, cache=FALSE------------------------ set_parent('exRKGLS.Rnw') ## ----voronoi, out.width='0.9\\linewidth'--------------------------------- library(dismo) v <- voronoi(ne.m, ext=t(bbox(state.ne.m))) class(v) plot(v) ## ----union-state--------------------------------------------------------- require(rgeos) area.ne.m <- gUnaryUnion(state.ne.m) class(area.ne.m) plot(area.ne.m) ## ----clip-voronoi, out.width='\\linewidth'------------------------------- v2 <- gIntersection(v, area.ne.m, byid=TRUE) class(v2) plot(v2) ## ----predict-voroni-over------------------------------------------------- v3 <- over(v2, ne.m) class(v3) ## ----predict-overlay-spdf------------------------------------------------ v4 <- SpatialPolygonsDataFrame(v2, data=v3) names(v4) ## ----plot-pred-voronoi, out.width='\\linewidth'-------------------------- spplot(v4, zcol="ANN_GDD50", col.regions=topo.colors(64), main="Annual GDD base 50F") ## ----find-point---------------------------------------------------------- pt <- SpatialPoints(coords=cbind(-76.402175, 42.453271)) proj4string(pt) <- CRS("+init=EPSG:4326") pt <- spTransform(pt, CRS(proj4string(ne.m))) coordinates(pt) ## ----predict-pt-voronoi-------------------------------------------------- pt.sp <- SpatialPointsDataFrame(pt, over(pt, v4)) pt.sp$ANN_GDD50 ## ----compare-voronoi-rkgls----------------------------------------------- dem.ne.m.spdf <- SpatialPixelsDataFrame(coordinates(dem.ne.m.sp), data=dem.ne.m.df) proj4string(dem.ne.m.spdf) <- proj4string(ne.m) pt.pred <-over(pt, dem.ne.m.spdf) names(pt.pred) pt.pred[,c("pred.ols", "pred.gls", "pred.rkgls", "pred.ked", "pred.rf")] ## ----compare-elev-------------------------------------------------------- pt.sp$ELEVATION_ pt.pred$ELEVATION_ ## ----inter-station-dist-------------------------------------------------- dm <- spDists(ne.m) str(dm) ## ----find-nn------------------------------------------------------------- diag(dm) <- max(dm)*1.1 nn <- apply(dm, 1, which.min) str(nn) ## ----predict-nn---------------------------------------------------------- nn.gdd <- ne.m@data[nn,"ANN_GDD50"] str(nn.gdd) ## ----compare-nn-gdd------------------------------------------------------ obs <- ne.m@data[,"ANN_GDD50"] summary(obs) summary(nn.gdd) summary(diff <- (obs - nn.gdd)) hist(diff) rug(diff) (me <- mean(diff)) (rmse <- sqrt(sum(diff^2)/length(diff))) ## ------------------------------------------------------------------------ rf.oob.me; rf.oob.rmse ## ----plot-loocv-voronoi, out.width='\\linewidth'------------------------- v4$resid <- diff spplot(v4, zcol="resid", col.regions=bpy.colors(32), main="Cross-validation error, Thiessen polygon prediction") ## ----child-data, child='exRKGLS_compare.Rnw'----------------------------- ## ----compare-set-parent, echo=FALSE, cache=FALSE------------------------- set_parent('exRKGLS.Rnw') ## ----read-july-no, eval=FALSE-------------------------------------------- ## tmp <- readOGR(dsn=".", layer="maat7100", ## integer64="allow.loss") ## ----read-july-usa-yes, echo=FALSE--------------------------------------- tmp <- readOGR(dsn="./ds/NEweather", layer="maat7100", integer64="allow.loss") ## ----july-4states-------------------------------------------------------- ix <- (tmp$STATE %in% c("NY","NJ","PA","VT")) tmp <- tmp[ix,] names(tmp) ## ----july-crs------------------------------------------------------------ strwrap(proj4string(dem.ne.m)) tmp.m <- spTransform(tmp, ne.crs) proj4string(tmp.m) ## ----add-july------------------------------------------------------------ names(ne.m) ne.m <- cbind(ne.m, tmp.m[,7:19]) names(ne.m) rm(tmp, tmp.m) ## ----add-july-df--------------------------------------------------------- ne.df <- as(ne.m, "data.frame") ## ----standardized-------------------------------------------------------- summary(ne.m$ANN_GDD50) summary(ne.m$ANN_TNORM_) gdd50.std <- (ne.m$ANN_GDD50 - mean(ne.m$ANN_GDD50))/sd(ne.m$ANN_GDD50) t.ann.std <- (ne.m$ANN_TNORM_ - mean(ne.m$ANN_TNORM_))/sd(ne.m$ANN_TNORM_) summary(gdd50.std) summary(t.ann.std) ## ----check-corr-t-gdd---------------------------------------------------- cor(ne.m$ANN_GDD50, ne.m$ANN_TNORM_) cor(gdd50.std, t.ann.std) plot(gdd50.std, t.ann.std, asp=1, xlab="Standardized annual GDD50", ylab="Standardized mean annual T") abline(0,1) ## ----build-two-models---------------------------------------------------- summary(m.ols.t <- lm(t.ann.std~ sqrt(ELEVATION_)+N+E, data=ne.df)) summary(m.ols.gdd <- lm(gdd50.std ~ sqrt(ELEVATION_)+N+E, data=ne.df)) coef(m.ols.t) coef(m.ols.gdd) coef(m.ols.t)/coef(m.ols.gdd) ## ----show-ols-fits-two, out.width='\\textwidth', w=12, h=6--------------- summary(ne.m$ols.resid.t <- residuals(m.ols.t)) summary(ne.m$ols.resid.gdd <- residuals(m.ols.gdd)) p1 <- bubble(ne.m, zcol="ols.resid.t", pch=1, main="OLS residuals, std. annual T") p2 <- bubble(ne.m, zcol="ols.resid.gdd", pch=1, main="OLS residuals, std. GDD50") print(p1, split=c(1,1,2,1), more=T) print(p2, split=c(2,1,2,1), more=F) ## ----compare-resids-two-------------------------------------------------- summary(ne.m$ols.resid.diff <- ne.m$ols.resid.t - ne.m$ols.resid.gdd) bubble(ne.m, zcol="ols.resid.diff", pch=1, main="Difference of residuals, T - GDD", col=c("#EF8A62","#67A9CF")) ## ----model-resid-vgms---------------------------------------------------- v.r.ols.t <- variogram(ols.resid.t ~ 1, locations=ne.m, cutoff=120000, width=12000) (vmf.r.ols.t <- fit.variogram(v.r.ols.t, vgm(psill=0.1, model="Exp", range=20000, nugget=0.02))) p1 <- plot(v.r.ols.t, pl=T, model=vmf.r.ols.t) # v.r.ols.gdd <- variogram(ols.resid.gdd ~ 1, locations=ne.m, cutoff=120000, width=12000) (vmf.r.ols.gdd <- fit.variogram(v.r.ols.gdd, vgm(psill=0.12, model="Exp", range=20000, nugget=0.02))) p2 <- plot(v.r.ols.gdd, pl=T, model=vmf.r.ols.gdd) print(p1, split=c(1,1,1,2), more=T) print(p2, split=c(1,2,1,2), more=F) ## ----fit-gls-two--------------------------------------------------------- # require(nlme) (p.nugget <- vmf.r.ols.t[1,"psill"]/sum(vmf.r.ols.t[,"psill"])) m.gls.t <- gls(model=t.ann.std ~ sqrt(ELEVATION_) + N + E, data=ne.df, correlation=corExp( value=c(vmf.r.ols.t[2,"range"], p.nugget), form=~E + N, nugget=TRUE)) # (p.nugget <- vmf.r.ols.gdd[1,"psill"]/sum(vmf.r.ols.gdd[,"psill"])) m.gls.gdd <- gls(model=gdd50.std ~ sqrt(ELEVATION_) + N + E, data=ne.df, correlation=corExp( value=c(vmf.r.ols.gdd[2,"range"], p.nugget), form=~E + N, nugget=TRUE)) summary(m.gls.t) summary(m.gls.gdd) ## ----compare-coeff-gls-two----------------------------------------------- coefficients(m.gls.t) coefficients(m.gls.gdd) coefficients(m.gls.t)/coefficients(m.gls.gdd) ## ----compare-corr-struct-gls-two----------------------------------------- intervals(m.gls.t)$corStruct intervals(m.gls.gdd)$corStruct intervals(m.gls.t)$corStruct/intervals(m.gls.gdd)$corStruct ## ----gls-resid-two------------------------------------------------------- ne.m$gls.resid.t <- residuals(m.gls.t) ne.m$gls.resid.gdd <- residuals(m.gls.gdd) ## ----show-vgm-gls-corr-std----------------------------------------------- (p.nugget <- intervals(m.gls.t)$corStruct["nugget","est."]) (t.sill <- var(ne.m$gls.resid.t)) (vmf.r.gls.t <- vgm(psill=t.sill*(1-p.nugget), model="Exp", range=intervals(m.gls.t)$corStruct["range","est."], nugget=t.sill*p.nugget)) v.r.gls.t <- variogram(gls.resid.t ~ 1, locations=ne.m, cutoff=120000, width=12000) p1 <- plot(v.r.gls.t, model=vmf.r.gls.t, pl=T) # (p.nugget <- intervals(m.gls.gdd)$corStruct["nugget","est."]) (t.sill <- var(ne.m$gls.resid.gdd)) (vmf.r.gls.gdd <- vgm(psill=t.sill*(1-p.nugget), model="Exp", range=intervals(m.gls.gdd)$corStruct["range","est."], nugget=t.sill*p.nugget)) v.r.gls.gdd <- variogram(gls.resid.gdd ~ 1, locations=ne.m, cutoff=120000, width=12000) p2 <- plot(v.r.gls.gdd, model=vmf.r.gls.gdd, pl=T) print(p1, split=c(1,1,1,2), more=T) print(p2, split=c(1,2,1,2), more=F) ## ----gls-predict-two----------------------------------------------------- dem.ne.m.df$pred.gls.t <- predict(m.gls.t, newdata=dem.ne.m.df) dem.ne.m.df$pred.gls.gdd <- predict(m.gls.gdd, newdata=dem.ne.m.df) dem.ne.m.df$diff.gls.t.gdd <- dem.ne.m.df$pred.gls.t - dem.ne.m.df$pred.gls.gdd ## ----plot-4km-gdd-t, out.width='.8\\textwidth'--------------------------- (std.pred.lim <- c(min(dem.ne.m.df[,c("pred.gls.t","pred.gls.gdd")]), max(dem.ne.m.df[,c("pred.gls.t","pred.gls.gdd")]))) display.prediction.map("pred.gls.t", "Mean Annual Temperature, standardized, GLS prediction", "GDD50", std.pred.lim, .palette="YlOrBr") display.prediction.map("pred.gls.gdd", "Annual GDD, base 50F, standardized, GLS prediction", "GDD50", std.pred.lim, .palette="YlOrBr") ## ----diffs-4km-gls-t-gdd------------------------------------------------- summary(dem.ne.m.df$diff.gls.t.gdd <- dem.ne.m.df$pred.gls.t - dem.ne.m.df$pred.gls.gdd) ## ----plot-4km-gdd-t-diff, out.width='.8\\textwidth'---------------------- display.difference.map("diff.gls.t.gdd", "Difference, MAT - GDD50, standardized", "+/- s.d.", .palette="BrBG") ## ----ok-gls-resid-t-gdd-std---------------------------------------------- ok.gls.resid.t <- krige(gls.resid.t ~ 1, loc=ne.m, newdata=dem.ne.m.sp, model=vmf.r.gls.t) ok.gls.resid.gdd <- krige(gls.resid.gdd ~ 1, loc=ne.m, newdata=dem.ne.m.sp, model=vmf.r.gls.gdd) dem.ne.m.df$ok.gls.resid.t <- ok.gls.resid.t$var1.pred dem.ne.m.df$ok.gls.resid.gdd <- ok.gls.resid.gdd$var1.pred ## ----ok-gls-resid-t-gdd-std-maps, out.width='.8\\textwidth'-------------- ggplot() + geom_point(aes(x=E, y=N, colour=ok.gls.resid.t), data=dem.ne.m.df) + xlab("E") + ylab("N") + coord_fixed() + ggtitle("Residuals from GLS trend surface, MAT (std)") + scale_colour_distiller(name="T (std)", space="Lab", palette="RdBu") ggplot() + geom_point(aes(x=E, y=N, colour=ok.gls.resid.gdd), data=dem.ne.m.df) + xlab("E") + ylab("N") + coord_fixed() + ggtitle("Residuals from GLS trend surface, GDD base 50F (std)") + scale_colour_distiller(name="GDD50 (std)", space="Lab", palette="RdBu") ## ----ok-gls-resid-t-gdd-diff, out.width='.8\\textwidth'------------------ summary(dem.ne.m.df$ok.gls.resid.diff.t.gdd <- dem.ne.m.df$ok.gls.resid.t - dem.ne.m.df$ok.gls.resid.gdd) display.difference.map("ok.gls.resid.diff.t.gdd", "Difference: kriged residuals from GLS trend surface", "+/- s.d.", .palette="RdGy") ## ----final-gls-rk-std, out.width='.8\\textwidth'------------------------- summary(dem.ne.m.df$pred.rkgls.std.t <- dem.ne.m.df$pred.gls.t + dem.ne.m.df$ok.gls.resid.t) summary(dem.ne.m.df$pred.rkgls.std.gdd <- dem.ne.m.df$pred.gls.gdd + dem.ne.m.df$ok.gls.resid.gdd) display.prediction.map("pred.rkgls.std.t", "GLS-RK prediction, Mean Annual Temperature, standardized", "MAAT (std)", std.pred.lim, .palette="YlOrBr") display.prediction.map("pred.rkgls.std.gdd", "GLS-RK prediction, Annual GDD, base 50F, standardized", "GDD50 (std)", std.pred.lim, .palette="YlOrBr") ## ----final-gls-rk-std-diff, out.width='.8\\textwidth'-------------------- # summary(dem.ne.m.df$diff.rkgls.std.t.gdd <- (dem.ne.m.df$pred.rkgls.std.t - dem.ne.m.df$pred.rkgls.std.gdd)) display.difference.map("diff.rkgls.std.t.gdd", "GLS-RK predictions, difference, MAT - GDD50, standardized", "+/- s.d.", .palette="BrBG") ## ----build-rf-two-------------------------------------------------------- m.rf.std.t <- randomForest(t.ann.std ~ ELEVATION_ + N + E, data=ne.df, ntree=1200, importance=TRUE) m.rf.std.gdd <- randomForest(gdd50.std ~ ELEVATION_ + N + E, data=ne.df, ntree=1200, importance=TRUE) ## ----compare-rf-importance----------------------------------------------- randomForest::importance(m.rf.std.t) randomForest::importance(m.rf.std.gdd) ## ----compare-rf-11-oob, out.width='0.45\\linewidth'---------------------- plot(t.ann.std ~ predict(m.rf.std.t, newdata=ne.m), col=ne.m$STATE, pch=20, asp=1, xlab="Fitted by random forest", ylab="Actual", main="Mean Annual T (std)") legend("topleft", levels(ne.m$STATE), pch=20, col=1:4) grid(); abline(0,1) plot(gdd50.std ~ predict(m.rf.std.t, newdata=ne.m), col=ne.m$STATE, pch=20, asp=1, xlab="Fitted by random forest", ylab="Actual", main="Annual GDD50 (std)") legend("topleft", levels(ne.m$STATE), pch=20, col=1:4) grid(); abline(0,1) plot(t.ann.std ~ predict(m.rf.std.t), col=ne.m$STATE, pch=20, asp=1, xlab="Fitted by random forest (OOB)", ylab="Actual", main="Mean Annual T (std)") legend("topleft", levels(ne.m$STATE), pch=20, col=1:4) grid(); abline(0,1) plot(gdd50.std ~ predict(m.rf.std.t), col=ne.m$STATE, pch=20, asp=1, xlab="Fitted by random forest (OOB)", ylab="Actual", main="Annual GDD50 (std)") legend("topleft", levels(ne.m$STATE), pch=20, col=1:4) grid(); abline(0,1) ## ----predict-rf-two------------------------------------------------------ dem.ne.m.df$pred.rf.std.t <- predict(m.rf.std.t, newdata=dem.ne.m.df) dem.ne.m.df$pred.rf.std.gdd <- predict(m.rf.std.gdd, newdata=dem.ne.m.df) (std.pred.lim <- c(min(dem.ne.m.df[,c("pred.rf.std.t","pred.rf.std.gdd")]), max(dem.ne.m.df[,c("pred.rf.std.t","pred.rf.std.gdd")]))) display.prediction.map("pred.rf.std.t", "Mean Annual Temperature, standardized, RF prediction", "GDD50", std.pred.lim, .palette="YlOrBr") display.prediction.map("pred.rf.std.gdd", "Annual GDD, base 50F, standardized, RF prediction", "GDD50", std.pred.lim, .palette="YlOrBr") ## ----predict-rf-two-diff------------------------------------------------- summary(dem.ne.m.df$diff.rf.std.t.gdd <- dem.ne.m.df$pred.rf.std.t - dem.ne.m.df$pred.rf.std.gdd) display.difference.map("diff.rf.std.t.gdd", "RF predictions, difference, MAT - GDD50, standardized", "+/- s.d.", .palette="BrBG") ## ----child-data, child='exRKGLS_answers.Rnw'----------------------------- ## ----answers-set-parent, echo=FALSE, cache=FALSE------------------------- set_parent('exRKGLS.Rnw') ## ----get-rt-row, echo=FALSE---------------------------------------------- ix.rt.l <- which(row.names(m.rt$frame)=="2") ix.rt.r <- which(row.names(m.rt$frame)=="3") ## ----child-data, child='exRKGLS_challenge.Rnw'--------------------------- ## ----challenge-set-parent, echo=FALSE, cache=FALSE----------------------- set_parent('exRKGLS.Rnw') ## ----child-data, child='exRKGLS_colours.Rnw'----------------------------- ## ----colours-set-parent, echo=FALSE, cache=FALSE------------------------- set_parent('exRKGLS.Rnw') ## ----seq-brewers, out.width='\\linewidth', h=6, w=8---------------------- library(RColorBrewer) display.brewer.all(type="seq") ## ----display-annual-colored, out.width='\\textwidth'--------------------- ggplot(data=ne.df) + aes(x=E, y=N) + geom_point(aes(size=ANN_GDD50, colour=ANN_GDD50), shape=20) + scale_colour_distiller(space="Lab", palette="Greens") + xlab("E") + ylab("N") + coord_fixed() ## ----seq-brewers-div, out.width='\\linewidth', h=6, w=8------------------ display.brewer.all(type="div")