In this lab, you will fit a series of candidate models and prepare a table for their performance.

Set-up

We will define the training data as 1964 to 1987 and the test data as 1988 and 1989.

load("../landings.RData")
spp <- "Anchovy"
landings$log.metric.tons <- log(landings$metric.tons)
landings <- subset(landings, Species==spp)
traindat <- subset(landings, Year <= 1987)$log.metric.tons
testdat <- subset(landings, Year == 1988 | Year==1989)$log.metric.tons
n.fr <- length(testdat)

Load the necessary packages.

library(forecast)
library(stringr)

Fit each of our candidate models

We will store these in a list for easy access. fun.list is the function to pass to tsCV().

fit.list <- list()
fr.list <- list()
fun.list <- list()

Exponential smoothing model with trend

modelname <- "ETSwtrend"
fit <- ets(traindat, model="AAN")
fit.list[[modelname]] <- fit
fr.list[[modelname]] <- forecast(fit, h=n.fr)
fun.list[[modelname]] <- function(x, h){ forecast(ets(x, model="AAN"), h=h) }

Exponential smoothing model no trend

modelname <- "ETSnotrend"
fit <- ets(traindat, model="ANN")
fit.list[[modelname]] <- fit
fr.list[[modelname]] <- forecast(fit, h=n.fr)
fun.list[[modelname]] <- function(x, h){ forecast(ets(x, model="ANN"), h=h) }

ARIMA(0,1,1) with drift (best)

modelname <- "ARIMA011wdrift"
fit <- Arima(traindat, order=c(0,1,1), include.drift=TRUE)
fit.list[[modelname]] <- fit
fr.list[[modelname]] <- forecast(fit, h=n.fr)
fun.list[[modelname]] <- function(x, h){ forecast(Arima(x, order=c(0,1,1), include.drift=TRUE),h=h) }

ARIMA(2,1,0) with drift (within 2 AIC of best)

modelname <- "ARIMA210wdrift"
fit <- Arima(traindat, order=c(2,1,0), include.drift=TRUE)
fit.list[[modelname]] <- fit
fr.list[[modelname]] <- forecast(fit, h=n.fr)
fun.list[[modelname]] <- function(x, h){ forecast(Arima(x, order=c(2,1,0), include.drift=TRUE),h=h) }

Time-varying regression with linear time

TT <- length(traindat)
#make a data.frame for lm
dat <- data.frame(log.metric.tons=traindat, t=1:TT)
modelname <- "TVreglinear"
fit <- lm(log.metric.tons ~ t, data=dat)
fit.list[[modelname]] <- fit
fr.list[[modelname]] <- forecast(fit, newdata=data.frame(t=TT+1:n.fr))
fun.list[[modelname]] <- function(x, h){ 
  TT <- length(x)
  dat <- data.frame(log.metric.tons=x, t=1:TT)
  ft <- lm(log.metric.tons ~ t, data=dat)
  forecast(ft, newdata=data.frame(t=TT+h)) }

Naive no trend

modelname <- "Naive"
fit <- Arima(traindat, order=c(0,1,0))
fit.list[[modelname]] <- fit
fr.list[[modelname]] <- forecast(fit, h=n.fr)
fun.list[[modelname]] <- function(x, h){ rwf(x,h=h) }

Naive with trend

modelname <- "Naivewtrend"
fit <- Arima(traindat, order=c(0,1,0), include.drift=TRUE)
fit.list[[modelname]] <- fit
fr.list[[modelname]] <- forecast(fit, h=n.fr)
fun.list[[modelname]] <- function(x, h){ rwf(x, drift=TRUE, h=h) }

Average or mean

modelname <- "Average"
fit <- Arima(traindat, order=c(0,0,0))
fit.list[[modelname]] <- fit
fr.list[[modelname]] <- forecast(fit, h=n.fr)
fun.list[[modelname]] <- function(x, h){ forecast(Arima(x, order=c(0,0,0)),h=h) }

Models fit

Now we can use names() to see the models that we have fit. If we want to add more, we use the code above as a template.

modelnames <- names(fit.list)
modelnames

## [1] "ETSwtrend"      "ETSnotrend"     "ARIMA011wdrift" "ARIMA210wdrift"
## [5] "TVreglinear"    "Naive"          "Naivewtrend"    "Average"

Now we can go through each model

We will run the models and compute the forecast metrics for each and put in a table.

restab <- data.frame(model=modelnames, RMSE=NA, ME=NA, tsCV.RMSE=NA, AIC=NA, BIC=NA, stringsAsFactors = FALSE)
for(i in modelnames){
  fit <- fit.list[[i]]
  fr <- fr.list[[i]]
  restab$RMSE[restab$model==i] <- accuracy(fr, testdat)["Test set","RMSE"]
  restab$ME[restab$model==i] <- accuracy(fr, testdat)["Test set","ME"]
  e <- tsCV(traindat, fun.list[[i]], h=1)
  restab$tsCV.RMSE[restab$model==i] <- sqrt(mean(e^2, na.rm=TRUE))
  restab$AIC[restab$model==i] <- AIC(fit)
  restab$BIC[restab$model==i] <- BIC(fit)
}

Add on \(\Delta\)AIC and \(\Delta\)BIC. Sort by \(\Delta\)AIC and format to have 3 digits.

restab$DeltaAIC <- restab$AIC-min(restab$AIC)
restab$DeltaBIC <- restab$BIC-min(restab$BIC)
restab <- restab[order(restab$DeltaAIC),]
resfor <- format(restab, digits=3, trim=TRUE)

Bold the minimum values in each column so they are easy to spot.

for(i in colnames(resfor)){
  if(class(restab[,i])=="character") next
  if(i!="ME") testval <- restab[,i] else testval <- abs(restab[,i])
  theminrow <- which(testval==min(testval))
  resfor[theminrow, i] <- paste0("**",resfor[theminrow,i],"**")
}

This is the table of FORECAST performance metrics. Not how well it fits the data, but how well it forecasts out of the data. RSME and ME are for the 2 data points in 1988 and 1989 that were held out for testing. tsCV.RMSE is the RSME for the time-series crossvalidation that makes a series of forecasts for each point in the data. AIC and BIC are information criteria, which are a measure of data support for each model.

knitr::kable(resfor)

	model	RMSE	ME	tsCV.RMSE	AIC	BIC	DeltaAIC	DeltaBIC
5	TVreglinear	0.195	-0.114	0.247	-7.00	-3.4611	0.00000	0.123
3	ARIMA011wdrift	0.370	-0.332	0.231	-6.99	-3.5844	0.00443	0.000
4	ARIMA210wdrift	0.381	-0.347	0.224	-6.08	-1.5399	0.91340	2.044
7	Naivewtrend	0.510	-0.483	0.239	-2.18	0.0883	4.81255	3.673
6	Naive	0.406	-0.384	0.222	-2.06	-0.9247	4.93505	2.660
1	ETSwtrend	0.538	-0.500	0.251	3.67	9.5587	10.66374	13.143
2	ETSnotrend	0.317	-0.289	0.222	6.76	10.2988	13.75990	13.883
8	Average	0.656	0.643	0.476	33.04	35.3924	40.03162	38.977

ETS Models Lab 1

Set-up

Fit each of our candidate models

Models fit

Now we can go through each model