In this lab, you will fit a series of candidate models and prepare a table for their performance.
We will define the training data as 1964 to 1987 and the test data as 1988 and 1989.
load("../landings.RData")
spp <- "Anchovy"
landings$log.metric.tons <- log(landings$metric.tons)
landings <- subset(landings, Species==spp)
traindat <- subset(landings, Year <= 1987)$log.metric.tons
testdat <- subset(landings, Year == 1988 | Year==1989)$log.metric.tons
n.fr <- length(testdat)
Load the necessary packages.
library(forecast)
library(stringr)
We will store these in a list for easy access. fun.list
is the function to pass to tsCV()
.
fit.list <- list()
fr.list <- list()
fun.list <- list()
modelname <- "ETSwtrend"
fit <- ets(traindat, model="AAN")
fit.list[[modelname]] <- fit
fr.list[[modelname]] <- forecast(fit, h=n.fr)
fun.list[[modelname]] <- function(x, h){ forecast(ets(x, model="AAN"), h=h) }
modelname <- "ETSnotrend"
fit <- ets(traindat, model="ANN")
fit.list[[modelname]] <- fit
fr.list[[modelname]] <- forecast(fit, h=n.fr)
fun.list[[modelname]] <- function(x, h){ forecast(ets(x, model="ANN"), h=h) }
modelname <- "ARIMA011wdrift"
fit <- Arima(traindat, order=c(0,1,1), include.drift=TRUE)
fit.list[[modelname]] <- fit
fr.list[[modelname]] <- forecast(fit, h=n.fr)
fun.list[[modelname]] <- function(x, h){ forecast(Arima(x, order=c(0,1,1), include.drift=TRUE),h=h) }
modelname <- "ARIMA210wdrift"
fit <- Arima(traindat, order=c(2,1,0), include.drift=TRUE)
fit.list[[modelname]] <- fit
fr.list[[modelname]] <- forecast(fit, h=n.fr)
fun.list[[modelname]] <- function(x, h){ forecast(Arima(x, order=c(2,1,0), include.drift=TRUE),h=h) }
TT <- length(traindat)
#make a data.frame for lm
dat <- data.frame(log.metric.tons=traindat, t=1:TT)
modelname <- "TVreglinear"
fit <- lm(log.metric.tons ~ t, data=dat)
fit.list[[modelname]] <- fit
fr.list[[modelname]] <- forecast(fit, newdata=data.frame(t=TT+1:n.fr))
fun.list[[modelname]] <- function(x, h){
TT <- length(x)
dat <- data.frame(log.metric.tons=x, t=1:TT)
ft <- lm(log.metric.tons ~ t, data=dat)
forecast(ft, newdata=data.frame(t=TT+h)) }
modelname <- "Naive"
fit <- Arima(traindat, order=c(0,1,0))
fit.list[[modelname]] <- fit
fr.list[[modelname]] <- forecast(fit, h=n.fr)
fun.list[[modelname]] <- function(x, h){ rwf(x,h=h) }
modelname <- "Naivewtrend"
fit <- Arima(traindat, order=c(0,1,0), include.drift=TRUE)
fit.list[[modelname]] <- fit
fr.list[[modelname]] <- forecast(fit, h=n.fr)
fun.list[[modelname]] <- function(x, h){ rwf(x, drift=TRUE, h=h) }
modelname <- "Average"
fit <- Arima(traindat, order=c(0,0,0))
fit.list[[modelname]] <- fit
fr.list[[modelname]] <- forecast(fit, h=n.fr)
fun.list[[modelname]] <- function(x, h){ forecast(Arima(x, order=c(0,0,0)),h=h) }
Now we can use names()
to see the models that we have fit. If we want to add more, we use the code above as a template.
modelnames <- names(fit.list)
modelnames
## [1] "ETSwtrend" "ETSnotrend" "ARIMA011wdrift" "ARIMA210wdrift"
## [5] "TVreglinear" "Naive" "Naivewtrend" "Average"
We will run the models and compute the forecast metrics for each and put in a table.
restab <- data.frame(model=modelnames, RMSE=NA, ME=NA, tsCV.RMSE=NA, AIC=NA, BIC=NA, stringsAsFactors = FALSE)
for(i in modelnames){
fit <- fit.list[[i]]
fr <- fr.list[[i]]
restab$RMSE[restab$model==i] <- accuracy(fr, testdat)["Test set","RMSE"]
restab$ME[restab$model==i] <- accuracy(fr, testdat)["Test set","ME"]
e <- tsCV(traindat, fun.list[[i]], h=1)
restab$tsCV.RMSE[restab$model==i] <- sqrt(mean(e^2, na.rm=TRUE))
restab$AIC[restab$model==i] <- AIC(fit)
restab$BIC[restab$model==i] <- BIC(fit)
}
Add on \(\Delta\)AIC and \(\Delta\)BIC. Sort by \(\Delta\)AIC and format to have 3 digits.
restab$DeltaAIC <- restab$AIC-min(restab$AIC)
restab$DeltaBIC <- restab$BIC-min(restab$BIC)
restab <- restab[order(restab$DeltaAIC),]
resfor <- format(restab, digits=3, trim=TRUE)
Bold the minimum values in each column so they are easy to spot.
for(i in colnames(resfor)){
if(class(restab[,i])=="character") next
if(i!="ME") testval <- restab[,i] else testval <- abs(restab[,i])
theminrow <- which(testval==min(testval))
resfor[theminrow, i] <- paste0("**",resfor[theminrow,i],"**")
}
This is the table of FORECAST performance metrics. Not how well it fits the data, but how well it forecasts out of the data. RSME and ME are for the 2 data points in 1988 and 1989 that were held out for testing. tsCV.RMSE is the RSME for the time-series crossvalidation that makes a series of forecasts for each point in the data. AIC and BIC are information criteria, which are a measure of data support for each model.
knitr::kable(resfor)
model | RMSE | ME | tsCV.RMSE | AIC | BIC | DeltaAIC | DeltaBIC | |
---|---|---|---|---|---|---|---|---|
5 | TVreglinear | 0.195 | -0.114 | 0.247 | -7.00 | -3.4611 | 0.00000 | 0.123 |
3 | ARIMA011wdrift | 0.370 | -0.332 | 0.231 | -6.99 | -3.5844 | 0.00443 | 0.000 |
4 | ARIMA210wdrift | 0.381 | -0.347 | 0.224 | -6.08 | -1.5399 | 0.91340 | 2.044 |
7 | Naivewtrend | 0.510 | -0.483 | 0.239 | -2.18 | 0.0883 | 4.81255 | 3.673 |
6 | Naive | 0.406 | -0.384 | 0.222 | -2.06 | -0.9247 | 4.93505 | 2.660 |
1 | ETSwtrend | 0.538 | -0.500 | 0.251 | 3.67 | 9.5587 | 10.66374 | 13.143 |
2 | ETSnotrend | 0.317 | -0.289 | 0.222 | 6.76 | 10.2988 | 13.75990 | 13.883 |
8 | Average | 0.656 | 0.643 | 0.476 | 33.04 | 35.3924 | 40.03162 | 38.977 |