## ----setup, include=FALSE, cache=FALSE------------------------------ # set global chunk options ## this code is only required to set up knitr for reproducible documentation ## it has nothing to do with the exercise itself, please ignore! opts_knit$set(aliases=c(h = 'fig.height', w = 'fig.width'), out.format='latex', use.highlight=TRUE) opts_chunk$set(fig.path='kgraph/exC-', fig.align='center', fig.show='hold', # do not show R prompts, makes it easier to cut-paste prompt=FALSE, comment=NA, fig.width=6, fig.height=6, out.width='.65\\linewidth', size='scriptsize') # options to be read by formatR options(replace.assign=TRUE,width=70) ## ----echo=FALSE, results='hide'------------------------------------- rm(list=ls()) ## ----load-pkgs------------------------------------------------------ library(sp) library(xts) library(spacetime) library(gstat) ## ----load-air------------------------------------------------------- data("air") ls() class(air) str(air) dim(air)["space"] rownames(air) dim(air)["time"] str(dates) summary(stations) summary(DE_NUTS1) ## ----show.proj------------------------------------------------------ proj4string(stations) proj4string(DE_NUTS1) proj4string(stations) <- CRS(proj4string(stations)) proj4string(DE_NUTS1) <- CRS(proj4string(DE_NUTS1)) proj4string(stations) proj4string(DE_NUTS1) ## ----build-st------------------------------------------------------- rural <- STFDF(stations, dates, data.frame(PM10 = as.vector(air))) ## ----show-classes--------------------------------------------------- class(DE_NUTS1) class(rural) ## ------------------------------------------------------------------- str(rural) slotNames(rural) class(rural@data) class(rural@sp) class(rural@time) class(rural@endTime) ## ----echo=FALSE, results='hide'------------------------------------- Sys.setenv(TZ="Asia/Shanghai") ## ----show.posixct--------------------------------------------------- Sys.time() as.POSIXlt(Sys.time(), "UTC") Sys.timezone() Sys.getenv(x="TZ", unset=NA) ## ----show.rural.tz-------------------------------------------------- rural@endTime[1] as.POSIXlt(rural@endTime[1], tz="CET") as.POSIXlt(rural@endTime[1], tz="EST") as.POSIXlt(rural@endTime[1], tz="America/Chicago") as.POSIXlt(rural@endTime[1], tz="Asia/Shanghai") ## ----set.posixtz---------------------------------------------------- Sys.setenv(TZ="UTC") Sys.timezone() Sys.getenv(x="TZ", unset=NA) Sys.time() rural@endTime[1] ## ----find-longest-ts------------------------------------------------ max.not.na <- 0; longest.station <- 1 for (station in 1:dim(rural)["space"]) { ts <- rural[station,][,"PM10"] (ix <- sum(!is.na(ts))) if (ix > max.not.na) { max.not.na <- ix; longest.station <- station } } longest.station.name <- row.names(rural@sp@coords)[longest.station] print(paste0("Station ", longest.station, " (", longest.station.name, ") has ", max.not.na, " PM10 readings")) long.ts <- rural[longest.station,] rm(max.not.na, longest.station, station) ## ----station-1-ts-display,out.width='\\linewidth', h=5, w=12-------- spacetime::plot(long.ts$PM10, lwd=0.5, main=paste("PM10 at",longest.station.name)) ## ------------------------------------------------------------------- class(rural[1,]) class(rural[,1]) ## ----subset-to-20052010--------------------------------------------- rr <- rural[, "2005::2010"] class(rr) length(rural@data$PM10) length(rr@data$PM10) ## ----remove-missing------------------------------------------------- dim(rr) (na.stations <- which(apply(as(rr, "xts"), 2, function(x) all(is.na(x))))) r5to10 <- rr[-na.stations,] rm(na.stations) dim(r5to10) ## ------------------------------------------------------------------- summary(r5to10) ## ----show.station.codes--------------------------------------------- str(attributes(r5to10@sp@coords)) (station.ids <- attributes(r5to10@sp@coords)$dimnames[[1]]) ## ----remove.de------------------------------------------------------ (station.ids <- substr(station.ids, start=3, stop=7)) ## ----plot-station-names, fig.width=6, fig.height=8, out.width='.8\\linewidth', tidy=FALSE---- (aspect.ratio <- cos(median(coordinates(r5to10@sp)[,2]))) plot(coordinates(r5to10@sp), pch=3, col="red", cex=0.5, asp=1/aspect.ratio, xlab="E", ylab="N") grid() text(coordinates(r5to10@sp), labels=station.ids, pos=4, cex=0.8) ## ------------------------------------------------------------------- unique(substr(station.ids, start=1, stop=2)) ## ----load.de.map, tidy=FALSE, out.width='\\linewidth'--------------- library(mapdata) tmp <- map('worldHires', 'Germany', fill=TRUE, plot=FALSE) library(maptools) de.boundary <- map2SpatialPolygons(tmp, IDs=tmp$names, proj4string=CRS(proj4string(rr@sp))) summary(de.boundary) plot(de.boundary, axes=T) points(coordinates(r5to10@sp), pch=3, cex=0.5, col="red") grid() text(coordinates(r5to10@sp), labels=substr(station.ids,1,2), pos=4, cex=0.5) ## ----setup.kml------------------------------------------------------ library(rgdal) r5to10sp <- SpatialPointsDataFrame(r5to10@sp,data=data.frame(id=station.ids)) str(r5to10sp) ## ----write.kml------------------------------------------------------ writeOGR(r5to10sp, "PM10points.kml", "id", driver="KML", overwrite_layer=TRUE) ## ----station-1-extract, tidy=FALSE---------------------------------- (ix <- (substr(station.ids, start=1, stop=2)=="NW")) dim(r5to10) r5to10nrw <- r5to10[ix,] dim(r5to10nrw) (station.ids.nrw <- substr(station.ids[ix], 3, 5)) attributes(r5to10nrw@sp@coords)$dimnames[[1]] <- station.ids.nrw ## ----station-1-acf, tidy=FALSE-------------------------------------- class(tmp <- na.omit(r5to10nrw[1,])) str(tmp) r5to10nrw.1.ts <- as.ts(na.omit(r5to10nrw[1,]))[,"PM10"] acf(r5to10nrw.1.ts, main=paste("Autocorrelation, station", station.ids.nrw[1])) acf(r5to10nrw.1.ts, plot=FALSE) ## ----station-1-pacf, tidy=FALSE------------------------------------- pacf(r5to10nrw.1.ts, main=paste("Partial autocorrelation, station", station.ids.nrw[1])) pacf(r5to10nrw.1.ts, plot=FALSE) ## ----acf-ccf-nrw, out.width="\\linewidth"--------------------------- acf(na.omit(as(r5to10nrw, "xts")), xlab="", ylab="") ## ----show.nrw.stations, out.width="0.8\\linewidth"------------------ plot(de.boundary, axes=T, main="DE NRW stations") points(coordinates(r5to10nrw@sp), pch=20, col="blue") grid() text(coordinates(r5to10nrw@sp), labels=station.ids.nrw, pos=4) ## ------------------------------------------------------------------- print(spDists(r5to10nrw@sp, longlat=TRUE), digits=3) row.names(r5to10nrw@sp) ## ------------------------------------------------------------------- (ix <- which.max(r5to10$PM10)) (pm.max <- r5to10$PM10[ix]) ## ----find.station.id.max.pm10--------------------------------------- dim(r5to10) (station.id <- ix%%dim(r5to10)[1]) (station.name <- attributes(r5to10@sp@coords)$dimnames[[1]][station.id]) coordinates(r5to10)[station.id,] station.xts <- r5to10[station.id,] str(station.xts) summary(station.xts) ## ----hist.DEMV017, fig.width=10, fig.height=5,out.width='\\linewidth'---- hist(station.xts$PM10, breaks=20) rug(station.xts$PM10) ## ----extract.date.id.max.pm10--------------------------------------- (date.xts <- station.xts[which.max(station.xts$PM10)]) class(date.xts) ## ----extract.date.id.max.pm10.index--------------------------------- (date.ix <- index(date.xts)) class(date.ix) ## ----plot.DEMV017, fig.width=10, fig.height=5,out.width='\\linewidth', tidy=FALSE---- plot(station.xts$PM10, type='h', main="Station DEMV017", ylab="PM10 concentration") plot(station.xts[seq(date.ix-20, date.ix+20, by=1)]$PM10, type="b", main="Station DEMV017", ylab="PM10 concentration") ## ----select.one.date------------------------------------------------ class(date.ix) date.ix <- as.POSIXct(date.ix) class(date.ix) date.ix <- as(date.ix,"character") class(date.ix) dim(r5to10) r.date <- r5to10[,date.ix] dim(r.date) summary(r.date) ## ----one_date_postplot, fig.width=6, fig.height=8------------------- plot(coordinates(r.date), cex=4*r.date$PM10/max(na.omit(r.date$PM10)), col="red", asp=1/aspect.ratio, xlab="Longitude", ylab="Latitude", main=paste("PM10 on",date.ix)) text(coordinates(r.date), labels=round(r.date$PM10,1), pos=2) grid() ## ----remove-highest, fig.width=6, fig.height=8---------------------- summary(r.date@data) r.date <- r.date[-station.id,] summary(r.date@data) plot(coordinates(r.date), cex=3*r.date$PM10/max(na.omit(r.date$PM10)), col="red", asp=1/aspect.ratio, xlab="Longitude", ylab="Latitude", main=paste("PM10 on",date.ix)) text(coordinates(r.date), labels=round(r.date$PM10,1), pos=2) grid() ## ----one_date_vgm, out.width="0.48\\linewidth"---------------------- str(r.date) sum(is.na(r.date$PM10)) sum(!is.na(r.date$PM10)) vo <- variogram(PM10 ~ 1, r.date[!is.na(r.date$PM10),]) plot(vo, plot.numbers=T, main=paste("PM10 on",date.ix)) (vom <- fit.variogram(vo, model=vgm(50, "Exp", 300/3, 0))) plot(vo, plot.numbers=T, model=vom, main=paste("PM10 on",date.ix)) ## ----make-space-grid-for-interpolation, out.width="\\linewidth", tidy=FALSE---- bbox(de.boundary) x1 <- seq(from=5.75,to=15.25,by=0.25) x2 <- seq(from=47.25,to=55.25,by=0.25) de.bbox.grid <- SpatialPoints(cbind(rep(x1,length(x2)), rep(x2,each=length(x1))), proj4string=CRS(proj4string(r5to10@sp))) gridded(de.bbox.grid) <- TRUE summary(de.bbox.grid) de.grid <- de.bbox.grid[!is.na(over(de.bbox.grid, de.boundary)),] summary(de.grid) plot(de.boundary, axes=T) points(coordinates(de.bbox.grid), pch=3) points(coordinates(de.grid), pch=1, col="red") points(coordinates(r5to10@sp), pch=21, bg="blue") grid(lty=1, col="blue") ## ----krige-one-day, tidy=FALSE-------------------------------------- proj4string(r.date) <- CRS(proj4string(r.date)) k.one <- krige(PM10 ~ 1, loc=r.date[!is.na(r.date$PM10),], newdata=de.grid, model=vom) spplot(k.one, zcol="var1.pred", col.regions=bpy.colors(64), main=paste("PM10 on",date.ix), sub="single day variogram model") ## ----krige-one-day-predvar, tidy=FALSE, out.width='0.45\\linewidth'---- k.one$var1.sd <- sqrt(k.one$var1.var) k.one$var1.cv <- 100*k.one$var1.sd/k.one$var1.pred spplot(k.one, zcol="var1.sd", col.regions=terrain.colors(64), main=paste("PM10 on",date.ix, "Prediction s.d."), sub="single day variogram model") spplot(k.one, zcol="var1.cv", col.regions=cm.colors(64), main=paste("PM10 on",date.ix, "Coefficient of variation"), sub="single day variogram model") ## ----extract-times, tidy=FALSE-------------------------------------- set.seed(6345789) sort(sample.index <- sample(dim(r5to10)[2], 100)) spdf.lst <- lapply(sample.index, function(i) { x = r5to10[,i] x$ti = i return(x)} ) spdf <- do.call(rbind, spdf.lst) summary(spdf) ## ----lumped_vgm, out.width="0.45\\linewidth"------------------------ vl <- variogram(PM10 ~ ti, spdf[!is.na(spdf$PM10),], dX=0) plot(vl, plot.numbers=T, main=paste("PM10, 200 random days lumped")) (vlm <- fit.variogram(vl, model=vgm(60, "Exp", 250/3, 0))) plot(vl, plot.numbers=T, model=vlm, main="PM10, 200 random days lumped") ## ----krige-one-day-lumped, out.width="0.48\\linewidth", tidy=FALSE---- k.one.lumped <- krige(PM10 ~ 1, loc=r.date[!is.na(r.date$PM10),], newdata=de.grid, model=vlm) legend.breaks <- seq(floor(min(k.one$var1.pred, k.one.lumped$var1.pred)), ceiling(max(k.one$var1.pred, k.one.lumped$var1.pred)), by=0.5) spplot(k.one.lumped, zcol="var1.pred", col.regions=bpy.colors(64), main=paste("PM10 on",date.ix), sub="lumped variogram model", at=legend.breaks) spplot(k.one, zcol="var1.pred", col.regions=bpy.colors(64), main=paste("PM10 on",date.ix), sub="single day variogram model", at=legend.breaks) ## ----plot-krige-one-day-diffs, out.width="0.48\\linewidth", tidy=FALSE---- summary(k.one$diff <- k.one$var1.pred - k.one.lumped$var1.pred) spplot(k.one, zcol="diff", col.regions=topo.colors(64), main=paste("PM10 prediction difference,",date.ix), sub="single day minus lumped variogram models") ## ----krige-one-day-lumped-var, out.width="0.48\\linewidth", tidy=FALSE---- k.one.lumped$var1.sd <- sqrt(k.one.lumped$var1.var) legend.breaks <- seq(floor(min(k.one$var1.sd, k.one.lumped$var1.sd)), ceiling(max(k.one$var1.sd, k.one.lumped$var1.sd)), by=0.5) spplot(k.one.lumped, zcol="var1.sd", col.regions=terrain.colors(64), main=paste("PM10 on",date.ix,"prediction s.d."), sub="lumped variogram model", at=legend.breaks) spplot(k.one, zcol="var1.sd", col.regions=terrain.colors(64), main=paste("PM10 on",date.ix,"prediction s.d."), sub="single day variogram model", at=legend.breaks) ## ----st_variogram_show, eval=FALSE---------------------------------- system.time(vst <- variogramST(PM10 ~ 1, r5to10)) ## ----st_variogram_save, echo=FALSE, results='hide'------------------ # load saved ST-variogram if it's been computed, otherwise compute and save if (file.exists("./ds/air/st_vgm.Rdata")) { load("./ds/air/st_vgm.Rdata") } else { system.time(vst <- variogramST(PM10 ~ 1, r5to10)) save(vst, file="./ds/air/st_vgm.Rdata") } ## ----eval=FALSE----------------------------------------------------- system.time(vst <- variogramST(PM10 ~ 1, r5to10[,1:200])) ## ----str.vst-------------------------------------------------------- summary(vst) ## ----st_vplot_25D,fig.width=10,fig.height=5,out.width="\\linewidth",tidy=FALSE---- plot(vst, xlab="separation (km)", ylab="separation (+days)", main="Semivariance, PM10") ## ----st_variogram_plot_parallel, fig.width=10, fig.height=5, out.width="\\linewidth", tidy=FALSE---- plot(vst, map = FALSE, xlab="separation (km)", ylab = "Semivariance, PM10") ## ----wireframe-vgm-plot, out.width='0.9\\linewidth'----------------- library(lattice) plot(vst, wireframe=TRUE) ## ------------------------------------------------------------------- str(vst) vst[(vst$timelag==5) & (vst$spacelag > 140) & (vst$spacelag < 160),] ## ----estimate.anisotropy, tidy=FALSE-------------------------------- dim(tmp <- vst[(vst$gamma > 70) & (vst$gamma < 80) & (vst$timelag !=0),]) summary(metric.aniso <- tmp$spacelag/tmp$timelag) ## ----metric-vgm, tidy=FALSE----------------------------------------- (vgm.metric <- vgmST(stModel="metric", joint=vgm(50,"Exp",100,0), nugget=10, stAni=mean(metric.aniso))) ## ----fit-metric-vgm, tidy=FALSE------------------------------------- vgmf.metric <- fit.StVariogram(vst, vgm.metric, method="L-BFGS-B", control=list(maxit=1024)) print(vgmf.metric) ## ----plot-metric-vgmfit, fig.width=10, fig.height=5, out.width="\\linewidth"---- plot(vst, vgmf.metric) plot(vst, vgmf.metric, map=FALSE) ## ----build-separable, tidy=FALSE------------------------------------ (estimated.sill <- quantile(na.omit(vst$gamma), .8)) (vgm.sep <- vgmST(stModel="separable", space=vgm(0.9,"Exp", 300,0.1), time=vgm(0.95,"Exp", 2, 0.05), sill=estimated.sill)) ## ----fit-separable, tidy=FALSE-------------------------------------- vgmf.sep <- fit.StVariogram(vst, vgm.sep, method="L-BFGS-B", lower=c(100,0.001,1,0.001,40), control=list(maxit=500)) class(vgmf.sep) attr(vgmf.sep, "optim.output")$par attr(vgmf.sep, "optim.output")$value ## ----plot-sep-vgmfit, fig.width=10, fig.height=5, out.width="\\linewidth"---- plot(vst, vgmf.sep) plot(vst, vgmf.sep, map=FALSE) ## ----estimate-sills------------------------------------------------- (v.sp <- vst[vst$timelag==0,c("spacelag","gamma")]) (v.t <- vst[vst$spacelag==0,c("timelag","gamma")]) ## ----estimate-joint, fig.width=12, fig.height=5, out.width="\\linewidth"---- p1 <- plot(vst) p2 <- plot(vst, map=F) plot(p1, split=c(1,1,2,1), more=T) plot(p2, split=c(2,1,2,1), more=F) ## ----build-sum-metric, tidy=FALSE----------------------------------- vgm.sum.metric <- vgmST(stModel="sumMetric", space=vgm(0.9*max(v.sp$gamma, na.rm=TRUE), "Exp", 250/3,0), time=vgm(0.9*max(v.t$gamma, na.rm=TRUE), "Exp", 6/3, 0), joint=vgm(30, "Exp",50/3, 0), stAni=240/6) ## ----estimate-sill-bounds------------------------------------------- (sp.sill.lb <- 0.7 * max(v.sp$gamma, na.rm=TRUE)) (sp.range.lb <- v.sp[which(v.sp > sp.sill.lb)[1]-1, "spacelag"]) (t.sill.lb <- 0.7 * max(v.t$gamma, na.rm=TRUE)) (t.range.lb <- v.t[which(v.t$gamma > t.sill.lb)[1]-1, "timelag"]) ## ----fit-sum-metric, tidy=FALSE------------------------------------- system.time(vgmf.sum.metric <- fit.StVariogram(vst, vgm.sum.metric, method="L-BFGS-B", control=list(maxit=500), lower=c(sp.sill.lb, sp.range.lb/3,0, t.sill.lb, t.range.lb/3,0, 0,1,0, 1))) attr(vgmf.sum.metric, "optim.output")$par attr(vgmf.sum.metric, "optim.output")$value ## ----plot-sum-metric-vgmfit, fig.width=10, fig.height=5, out.width="\\linewidth"---- plot(vst, vgmf.sum.metric) plot(vst, vgmf.sum.metric, map=FALSE) ## ----cf-vgm-fits---------------------------------------------------- attr(vgmf.metric, "optim.output")$value attr(vgmf.sep, "optim.output")$value attr(vgmf.sum.metric, "optim.output")$value ## ----add-time-ref-de------------------------------------------------ rr.t <- r5to10[,"2006-06-21/2006-06-26"] class(rr.t) class(rr.t@time) ## ----make-st-grid-for-interpolation, tidy=FALSE, out.width="\\linewidth"---- stplot(rr.t, col.regions=bpy.colors(64), sp.layout=list(list("sp.polygons",de.boundary))) ## ----make-st-grid-for-interpolation-2------------------------------- rr.t <- as(rr.t,"STSDF") de.bbox.grid <- STF(sp=as(de.bbox.grid,"SpatialPoints"), time=rr.t@time) de.grid <- STF(sp=as(de.grid,"SpatialPoints"), time=rr.t@time) ## ----krigeST-metric, tidy=FALSE, warning=FALSE---------------------- k.de.met <- krigeST(PM10~1, data=rr.t, newdata=de.grid, modelList=vgmf.metric) gridded(k.de.met@sp) <- TRUE ## ----krigeST-sep, tidy=FALSE, warning=FALSE------------------------- k.de.sep <- krigeST(PM10~1, data=rr.t, newdata=de.grid, modelList=vgmf.sep) gridded(k.de.sep@sp) <- TRUE ## ----krigeST-sum-met, tidy=FALSE, warning=FALSE--------------------- k.de.sum.metric <- krigeST(PM10~1, data=rr.t, newdata=de.grid, modelList=vgmf.sum.metric) gridded(k.de.sum.metric@sp) <- TRUE ## ----compute-range-plot-vgms, out.width="\\linewidth", tidy=FALSE---- plot.zlim <- seq(floor(min(k.de.met$var1.pred, k.de.sep$var1.pred, k.de.sum.metric$var1.pred)), ceiling(max(k.de.met$var1.pred, k.de.sep$var1.pred, k.de.sum.metric$var1.pred)), by = 0.5) ## ----plotST-predictions, out.width="0.9\\linewidth", tidy=FALSE----- stplot(k.de.met, main="PM10, Germany", sub="Metric model", col.regions=bpy.colors(length(plot.zlim)), at=plot.zlim) stplot(k.de.sep, main="PM10, Germany", sub="Separable space-time model", col.regions=bpy.colors(length(plot.zlim)), at=plot.zlim) stplot(k.de.sum.metric, main="PM10, Germany", sub="Sum-metric space-time model", col.regions=bpy.colors(length(plot.zlim)), at=plot.zlim) ## ----compare-metric-separable, out.width="0.9\\linewidth", tidy=FALSE---- k.de.diff <- k.de.sep k.de.diff$var1.pred <- (k.de.sep$var1.pred - k.de.met$var1.pred) summary(k.de.diff$var1.pred) stplot(k.de.diff, main="PM10, Germany", sub="Difference, Separable less metric model predictions", col.regions=topo.colors(64)) ## ----compare-metric-ps, out.width="0.9\\linewidth", tidy=FALSE------ k.de.diff <- k.de.sep k.de.diff$var1.pred <- (k.de.sum.metric$var1.pred - k.de.met$var1.pred) summary(k.de.diff$var1.pred) stplot(k.de.diff, main="PM10, Germany", sub="Difference, sum-metric less metric model predictions", col.regions=topo.colors(64)) ## ----compare-ps-separable, out.width="0.9\\linewidth", tidy=FALSE---- k.de.diff <- k.de.sep k.de.diff$var1.pred <- (k.de.sum.metric$var1.pred - k.de.sep$var1.pred) summary(k.de.diff$var1.pred) stplot(k.de.diff, main="PM10, Germany", sub="Difference, sum-metric less separable model predictions", col.regions=topo.colors(64)) ## ----compare-one-day------------------------------------------------ class(k.de.met) k.de.day <- k.de.met[,"2006-06-25"] k.de.day@data <- cbind(k.de.day@data, k.de.sep[,"2006-06-25"]@data ) k.de.day@data <- cbind(k.de.day@data, k.de.sum.metric[,"2006-06-25"]@data ) names(k.de.day@data) <- c("metric","separable","sum.metric") summary(k.de.day) ## ----plot-one-day-3-pred, warning=FALSE, out.width='\\linewidth', tidy=FALSE---- rr.day <- rr.t[,"2006-06-25"] spplot(k.de.day, col.regions=bpy.colors(64), sp.layout=list( list("sp.points", rr.day, pch=1, col="black", cex=2*rr.day$PM10/max(rr.day$PM10)), list("sp.polygons",de.boundary, col="black"), list("sp.text",coordinates(rr.day), round(rr.day$PM10), pos=4, col="black", cex=0.6) ) ) ## ----wind.load------------------------------------------------------ tmp1 <- ls() data("wind", package="gstat") tmp2 <- ls() setdiff(tmp2,tmp1) rm(tmp1, tmp2) str(wind) wind[1:6, 1:12] ## ----sum.wind.sp1--------------------------------------------------- summary(wind[,"RPT"]) ## ----sum.wind.t1---------------------------------------------------- wind[1,1:3] summary(t(wind[1,4:15])) ## ----wind.convert.sp------------------------------------------------ str(wind.loc) wind.loc$y <- as.numeric(char2dms(as.character(wind.loc[["Latitude"]]))) wind.loc$y[1] wind.loc$x <- as.numeric(char2dms(as.character(wind.loc[["Longitude"]]))) coordinates(wind.loc) <- ~x+y proj4string(wind.loc) <- "+proj=longlat +datum=WGS84" ## ----plot.wind.stations, out.width="0.8\\linewidth"----------------- plot(wind.loc, xlim = c(-11,-5.4), ylim = c(51,55.5), axes=T, col="red", cex.axis =.7) map("worldHires", add=TRUE, col = grey(.5)) grid() text(coordinates(wind.loc), pos=1, label=wind.loc$Station, cex=.7) ## ----wind.merge, tidy=FALSE----------------------------------------- stations <- 4:15 # columns 4..15 are the station fields names(wind[stations]) as.character(wind.loc$Code) wind.loc <- wind.loc[match(names(wind[stations]), wind.loc$Code),] row.names(wind.loc) <- wind.loc$Station wind$time <- ISOdate(wind$year+1900, wind$month, wind$day, 0) st.space <- list(values <- names(wind)[stations]) wind.st <- stConstruct(wind[stations], space=st.space, time=wind$time, SpatialObj=wind.loc) ## ----wind.shorten--------------------------------------------------- wind.st <- wind.st[,1:730] summary(wind.st) dim(wind.st) ## ----wind.show.25D, out.width="\\linewidth", tidy=FALSE------------- stplot(wind.st, mode = "xt", scales = list(x=list(rot = 45)), xlab = NULL, col.regions=bpy.colors(64), main="Average wind speed (knots)") ## ----wind.eof.pc.sp------------------------------------------------- wind.eof.1 <- eof(wind.st, how="spatial") str(wind.eof.1) # returns SpatialPointsDataFrame ## ----wind.eof.pc.t-------------------------------------------------- wind.eof.2 <- eof(wind.st, how="temporal") str(wind.eof.2) # returns xts summary(wind.eof.2[,1:2]) ## ----wind.eof.pca--------------------------------------------------- eof.sp <- eof(wind.st, how="spatial", returnEOFs=FALSE) eof.t <- eof(wind.st, how="temporal", returnEOFs=FALSE) ## ----eof.scree, out.width='0.45\\linewidth'------------------------- summary(eof.sp) summary(eof.t) plot(eof.sp, main="EOF, space") plot(eof.t, main="EOF, time") ## ----wind.eof.biplots.sp, out.width="\\linewidth"------------------- biplot(eof.sp, pc.biplot=T, main="EOF, space", cex=c(0.3, 0.7)) ## ----plot.PCt.outlier.day1, out.width="0.8\\linewidth"-------------- tmp <- wind.st[,"1962-03-7"] plot(wind.loc, xlim = c(-11, -5.4), ylim = c(51, 55.5), axes = T, col = "red", main="Wind speed, 07-March-1962", sub="knots") map("worldHires", add = TRUE, col = grey(0.5)) text(coordinates(wind.loc), pos = 1, label = wind.loc$Station, cex = 0.7) grid() points(coordinates(wind.loc), cex = 3*tmp$values/max(tmp$values)) text(coordinates(wind.loc), pos = 4, label=round(tmp$values), cex = 0.7) ## ----plot.PCt.outlier.day2, out.width="0.8\\linewidth"-------------- tmp <- wind.st[,"1962-10-17"] plot(wind.loc, xlim = c(-11, -5.4), ylim = c(51, 55.5), axes = T, col = "red", main="Wind speed, 17-November-1962", sub="knots") map("worldHires", add = TRUE, col = grey(0.5)) text(coordinates(wind.loc), pos = 1, label = wind.loc$Station, cex = 0.7) grid() points(coordinates(wind.loc), cex = 3*tmp$values/max(tmp$values)) text(coordinates(wind.loc), pos = 4, label=round(tmp$values), cex = 0.7) ## ----wind.eof.biplots.t, out.width="\\linewidth"-------------------- biplot(eof.t, pc.biplot=T, main="EOF, time", cex=c(0.8, 0.2), arrow.len=0.05, col=c("blue","red3")) ## ----loadNCwater---------------------------------------------------- load("./ds/NCwater/example.RData") str(ds.nc) ## ----make-spacetime, tidy=FALSE------------------------------------- stds <- stConstruct(ds.nc, space=c("long","lat"), time="date", interval=FALSE) ## this CRS was found by searching the EPSG database ## http://www.epsg-registry.org/ proj4string(stds) <- CRS("+init=EPSG:4269") str(stds) ## ----make-stfdf, tidy=FALSE----------------------------------------- stds <- as(stds, "STFDF") class(stds) str(stds) summary(stds) ## ----export-R-file, tidy=FALSE-------------------------------------- save(stds, file="./ds/NCwater/NCwater.RData") ## ----import-and-reformat, tidy=FALSE-------------------------------- ## comment lines begin with `#' ## some stations have fewer fields (only mean, not min and max) ## so fill incomplete records ds.nc <- read.table("./ds/NCwater/dv.txt", comment.char="#", fill=T, header=T) ## easier field names names(ds.nc) <- c("agency","site","date","min","minA","max","maxA","mean","meanA") ## remove header lines for all stations length(ix <- which(ds.nc$agency == "agency_cd")) ds.nc <- ds.nc[-ix,] length(ix <- which(ds.nc$agency == "5s")) ds.nc <- ds.nc[-ix,] ## type conversions ds.nc$site <- as.character(ds.nc$site) ds.nc$min <- as.numeric(as.character(ds.nc$min)) ds.nc$max <- as.numeric(as.character(ds.nc$max)) ds.nc$mean <- as.numeric(as.character(ds.nc$mean)) ## if mean is missing, copy the first field "min" to "mean" ## because in this case the first data field is indeed the mean length(ix <- which(is.na(ds.nc$mean))) ds.nc[ix, "mean"] <- ds.nc[ix, "min"] ## how many records still have no mean water level? ## leave these as NA length(ix <- which(is.na(ds.nc$mean))) ## keep only fields we need ds.nc <- ds.nc[,c("site","date","mean")] str(ds.nc) length(unique(ds.nc$site)) ## write the sites to a text file write.table(unique(ds.nc$site), file="./ds/NCwater/sites.txt", quote=F, col.names=F, row.names=F) ## ----import-2, tidy=FALSE------------------------------------------- sites <- read.table("./ds/NCwater/inventory.txt", sep="\t", comment.char="#", header=T) names(sites) ## remove the header line with format specs sites[1,] sites <- sites[-1,] ## view the first two sites sites[1:2,] ## convert site numbers to strings to match data sites$site_no <- as.character(sites$site_no) unique(sites$dec_coord_datum_cd) unique(sites$alt_datum_cd) ## ----convert-nc-spdf, fig.width=10, fig.height=5, out.width="\\linewidth", tidy=FALSE---- sites <- sites[,c("dec_long_va","dec_lat_va","site_no","alt_va")] names(sites) <- c("long","lat","site","alt") sites$long <- as.numeric(as.character(sites$long)) sites$lat <- as.numeric(as.character(sites$lat)) sites$alt <- as.numeric(as.character(sites$alt)) coordinates(sites) <- ~long + lat proj4string(sites) <- CRS("+init=EPSG:4269") plot(coordinates(sites), asp=1, xlab="E longitude", ylab="N latitude", ylim=c(33,37)) grid() str(sites) save(sites, file="./ds/NCwater/sites.RData") ## ----import-3, tidy=FALSE------------------------------------------- unique(ix <- match(ds.nc$site, sites$site)) ds.nc$long <- coordinates(sites)[ix,1] ds.nc$lat <- coordinates(sites)[ix,2] str(sites) ds.nc$alt <- sites@data[ix, "alt"] ds.nc$z <- ds.nc$alt - ds.nc$mean ## the site is a factor ds.nc$site <- as.factor(ds.nc$site) str(ds.nc) ## ----import-4, tidy=FALSE------------------------------------------- ds.nc$date <- as.POSIXlt(ds.nc$date) class(ds.nc$date) ds.nc$date[2] - ds.nc$date[1] str(ds.nc) ## ----import-5, tidy=FALSE------------------------------------------- save(ds.nc, file="./ds/NCwater/example.RData")