In this lab, you will fit a series of candidate models and prepare a table for their performance.

Set-up

We will define the training data as 1964 to 1987 and the test data as 1988 and 1989.

load("../landings.RData")
spp <- "Anchovy"
landings$log.metric.tons <- log(landings$metric.tons)
landings <- subset(landings, Species==spp)
traindat <- subset(landings, Year <= 1987)$log.metric.tons
testdat <- subset(landings, Year == 1988 | Year==1989)$log.metric.tons
n.fr <- length(testdat)

Load the necessary packages.

library(forecast)
library(stringr)

Fit each of our candidate models

We will store these in a list for easy access. fun.list is the function to pass to tsCV().

fit.list <- list()
fr.list <- list()
fun.list <- list()
  • Exponential smoothing model with trend
modelname <- "ETSwtrend"
fit <- ets(traindat, model="AAN")
fit.list[[modelname]] <- fit
fr.list[[modelname]] <- forecast(fit, h=n.fr)
fun.list[[modelname]] <- function(x, h){ forecast(ets(x, model="AAN"), h=h) }
  • Exponential smoothing model no trend
modelname <- "ETSnotrend"
fit <- ets(traindat, model="ANN")
fit.list[[modelname]] <- fit
fr.list[[modelname]] <- forecast(fit, h=n.fr)
fun.list[[modelname]] <- function(x, h){ forecast(ets(x, model="ANN"), h=h) }
  • ARIMA(0,1,1) with drift (best)
modelname <- "ARIMA011wdrift"
fit <- Arima(traindat, order=c(0,1,1), include.drift=TRUE)
fit.list[[modelname]] <- fit
fr.list[[modelname]] <- forecast(fit, h=n.fr)
fun.list[[modelname]] <- function(x, h){ forecast(Arima(x, order=c(0,1,1), include.drift=TRUE),h=h) }
  • ARIMA(2,1,0) with drift (within 2 AIC of best)
modelname <- "ARIMA210wdrift"
fit <- Arima(traindat, order=c(2,1,0), include.drift=TRUE)
fit.list[[modelname]] <- fit
fr.list[[modelname]] <- forecast(fit, h=n.fr)
fun.list[[modelname]] <- function(x, h){ forecast(Arima(x, order=c(2,1,0), include.drift=TRUE),h=h) }
  • Time-varying regression with linear time
TT <- length(traindat)
#make a data.frame for lm
dat <- data.frame(log.metric.tons=traindat, t=1:TT)
modelname <- "TVreglinear"
fit <- lm(log.metric.tons ~ t, data=dat)
fit.list[[modelname]] <- fit
fr.list[[modelname]] <- forecast(fit, newdata=data.frame(t=TT+1:n.fr))
fun.list[[modelname]] <- function(x, h){ 
  TT <- length(x)
  dat <- data.frame(log.metric.tons=x, t=1:TT)
  ft <- lm(log.metric.tons ~ t, data=dat)
  forecast(ft, newdata=data.frame(t=TT+h)) }
  • Naive no trend
modelname <- "Naive"
fit <- Arima(traindat, order=c(0,1,0))
fit.list[[modelname]] <- fit
fr.list[[modelname]] <- forecast(fit, h=n.fr)
fun.list[[modelname]] <- function(x, h){ rwf(x,h=h) }
  • Naive with trend
modelname <- "Naivewtrend"
fit <- Arima(traindat, order=c(0,1,0), include.drift=TRUE)
fit.list[[modelname]] <- fit
fr.list[[modelname]] <- forecast(fit, h=n.fr)
fun.list[[modelname]] <- function(x, h){ rwf(x, drift=TRUE, h=h) }
  • Average or mean
modelname <- "Average"
fit <- Arima(traindat, order=c(0,0,0))
fit.list[[modelname]] <- fit
fr.list[[modelname]] <- forecast(fit, h=n.fr)
fun.list[[modelname]] <- function(x, h){ forecast(Arima(x, order=c(0,0,0)),h=h) }

Models fit

Now we can use names() to see the models that we have fit. If we want to add more, we use the code above as a template.

modelnames <- names(fit.list)
modelnames
## [1] "ETSwtrend"      "ETSnotrend"     "ARIMA011wdrift" "ARIMA210wdrift"
## [5] "TVreglinear"    "Naive"          "Naivewtrend"    "Average"

Now we can go through each model

We will run the models and compute the forecast metrics for each and put in a table.

restab <- data.frame(model=modelnames, RMSE=NA, ME=NA, tsCV.RMSE=NA, AIC=NA, BIC=NA, stringsAsFactors = FALSE)
for(i in modelnames){
  fit <- fit.list[[i]]
  fr <- fr.list[[i]]
  restab$RMSE[restab$model==i] <- accuracy(fr, testdat)["Test set","RMSE"]
  restab$ME[restab$model==i] <- accuracy(fr, testdat)["Test set","ME"]
  e <- tsCV(traindat, fun.list[[i]], h=1)
  restab$tsCV.RMSE[restab$model==i] <- sqrt(mean(e^2, na.rm=TRUE))
  restab$AIC[restab$model==i] <- AIC(fit)
  restab$BIC[restab$model==i] <- BIC(fit)
}

Add on \(\Delta\)AIC and \(\Delta\)BIC. Sort by \(\Delta\)AIC and format to have 3 digits.

restab$DeltaAIC <- restab$AIC-min(restab$AIC)
restab$DeltaBIC <- restab$BIC-min(restab$BIC)
restab <- restab[order(restab$DeltaAIC),]
resfor <- format(restab, digits=3, trim=TRUE)

Bold the minimum values in each column so they are easy to spot.

for(i in colnames(resfor)){
  if(class(restab[,i])=="character") next
  if(i!="ME") testval <- restab[,i] else testval <- abs(restab[,i])
  theminrow <- which(testval==min(testval))
  resfor[theminrow, i] <- paste0("**",resfor[theminrow,i],"**")
}

This is the table of FORECAST performance metrics. Not how well it fits the data, but how well it forecasts out of the data. RSME and ME are for the 2 data points in 1988 and 1989 that were held out for testing. tsCV.RMSE is the RSME for the time-series crossvalidation that makes a series of forecasts for each point in the data. AIC and BIC are information criteria, which are a measure of data support for each model.

knitr::kable(resfor)
model RMSE ME tsCV.RMSE AIC BIC DeltaAIC DeltaBIC
5 TVreglinear 0.195 -0.114 0.247 -7.00 -3.4611 0.00000 0.123
3 ARIMA011wdrift 0.370 -0.332 0.231 -6.99 -3.5844 0.00443 0.000
4 ARIMA210wdrift 0.381 -0.347 0.224 -6.08 -1.5399 0.91340 2.044
7 Naivewtrend 0.510 -0.483 0.239 -2.18 0.0883 4.81255 3.673
6 Naive 0.406 -0.384 0.222 -2.06 -0.9247 4.93505 2.660
1 ETSwtrend 0.538 -0.500 0.251 3.67 9.5587 10.66374 13.143
2 ETSnotrend 0.317 -0.289 0.222 6.76 10.2988 13.75990 13.883
8 Average 0.656 0.643 0.476 33.04 35.3924 40.03162 38.977