Disclaimer

General disclaimer

This notebook was created for the course “Deep Learning with Actuarial Applications in R” (15th and 16th October in Zurich) of the Swiss Association of Actuaries (https://www.actuaries.ch/).

The course is based on the publications (article, code and data) on the following website: https://www.actuarialdatascience.org/.

Authors are Daniel Meier and Jürg Schelldorfer, with support from Mirai Solutions GmbH (https://mirai-solutions.ch/).

Notebook-specific disclaimer

This notebook closely follows:

options(encoding = 'UTF-8')

options(keras.view_metrics = FALSE)

# Loading all the necessary packages
# if (!require(data.table))
#   install.packages("data.table")
# if (!require(dplyr))
#   install.packages("dplyr")
# if (!require(ggplot2))
#   install.packages("ggplot2")
# if (!require(reshape2))
#   install.packages("reshape2")
# if (!require(stringr))
#   install.packages("stringr")
# if (!require(keras))
#   install.packages("keras")
# if (!require(forecast))
#   install.packages("forecast")

require(data.table)
## Loading required package: data.table
require(dplyr)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
require(ggplot2)
## Loading required package: ggplot2
require(reshape2)
## Loading required package: reshape2
## 
## Attaching package: 'reshape2'
## The following objects are masked from 'package:data.table':
## 
##     dcast, melt
require(stringr)
## Loading required package: stringr
require(keras)
## Loading required package: keras
require(forecast)
## Loading required package: forecast
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
# Checking package versions
packageVersion('keras')               # version keras 2.3.0.0
## [1] '2.3.0.0'
packageVersion('tensorflow')          # version tensorflow 2.2.0 (TensorFlow backend > 2.0 and Python 3.7)
## [1] '2.2.0'
# set seed to obtain best reproducibility. note that the underlying architecture may affect results nonetheless, so full reproducibility cannot be guaranteed across different platforms.
seed <- 100
Sys.setenv(PYTHONHASHSEED = seed)
set.seed(seed)
reticulate::py_set_seed(seed)
tensorflow::tf$random$set_seed(seed)
sessionInfo()
## R version 4.0.0 (2020-04-24)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] forecast_8.13     keras_2.3.0.0     stringr_1.4.0     reshape2_1.4.4   
## [5] ggplot2_3.3.2     dplyr_1.0.2       data.table_1.13.0
## 
## loaded via a namespace (and not attached):
##  [1] reticulate_1.14   zoo_1.8-8         tidyselect_1.1.0  xfun_0.18        
##  [5] purrr_0.3.4       urca_1.3-0        lattice_0.20-41   colorspace_1.4-1 
##  [9] vctrs_0.3.4       generics_0.0.2    htmltools_0.4.0   yaml_2.2.1       
## [13] base64enc_0.1-3   rlang_0.4.8       pillar_1.4.4      glue_1.4.1       
## [17] withr_2.2.0       rappdirs_0.3.1    TTR_0.24.2        lifecycle_0.2.0  
## [21] plyr_1.8.6        tensorflow_2.2.0  quantmod_0.4.17   timeDate_3043.102
## [25] munsell_0.5.0     gtable_0.3.0      evaluate_0.14     knitr_1.30       
## [29] tseries_0.10-47   tfruns_1.4        lmtest_0.9-38     parallel_4.0.0   
## [33] curl_4.3          xts_0.12.1        Rcpp_1.0.4.6      scales_1.1.1     
## [37] jsonlite_1.6.1    fracdiff_1.5-1    digest_0.6.25     stringi_1.4.6    
## [41] grid_4.0.0        quadprog_1.5-8    tools_4.0.0       magrittr_1.5     
## [45] tibble_3.0.1      crayon_1.3.4      whisker_0.4       pkgconfig_2.0.3  
## [49] zeallot_0.1.0     ellipsis_0.3.1    rmarkdown_2.4     R6_2.4.1         
## [53] nnet_7.3-13       nlme_3.1-147      compiler_4.0.0
##################################################################
#### Load data
##################################################################

all_mort <- fread("CHE_mort.csv")
all_mort$Gender <- as.factor(all_mort$Gender)

# The data has been downloaded from the Human Mortality Database (HMD).
# We have applied some pre-processing to this data so that Lee-Carter can be applied.
# Values that differ from the HMD have received a flag in the csv file

str(all_mort)
## Classes 'data.table' and 'data.frame':   13400 obs. of  7 variables:
##  $ Gender      : Factor w/ 2 levels "Female","Male": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Year        : int  1950 1950 1950 1950 1950 1950 1950 1950 1950 1950 ...
##  $ Age         : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ Country     : chr  "CHE" "CHE" "CHE" "CHE" ...
##  $ imputed_flag: logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ mx          : num  0.02729 0.00305 0.00167 0.00123 0.00101 ...
##  $ logmx       : num  -3.6 -5.79 -6.39 -6.7 -6.9 ...
##  - attr(*, ".internal.selfref")=<externalptr>
##################################################################
#### Heatmap of selected data
##################################################################

gender <- "Male"
#gender <- "Female"
m0 <- c(min(all_mort$logmx), max(all_mort$logmx))
# rows are calendar year t, columns are ages x
logmx <- t(matrix(
           as.matrix(all_mort[which(all_mort$Gender == gender), "logmx"]),
           nrow = 100,
           ncol = nrow(all_mort) / 2 / 100
         ))
image(z = logmx,
      useRaster = TRUE,
      zlim = m0,
      col = rev(rainbow(n = 60, start = 0, end = .72)),
      xaxt = 'n',
      yaxt = 'n',
      main = paste("Swiss ", gender, " raw log-mortality rates", sep = ""),
      ylab = "age x",
      xlab = "calendar year t"
)
axis(1, at = c(0:(2016 - 1950)) / (2016 - 1950), c(1950:2016))
axis(2, at = c(0:49) / 50, labels = c(0:49) * 2)
lines(x = rep((1999 - 1950) / (2016 - 1950), 2),
      y = c(0:1),
      lwd = 2)

gender <- "Male"

#gender <- "Female"
x <- 50
t <- 1999
logmx <- t(matrix(
            as.matrix(all_mort[which(all_mort$Gender == gender), "logmx"]),
            nrow = 100,
            ncol = nrow(all_mort) / 2 / 100
         ))
mx <- t(matrix(as.matrix(all_mort[which(all_mort$Gender == gender), "mx"]),
               nrow = 100,
               ncol = nrow(all_mort) / 2 / 100
      ))
par(mfrow = c(2, 2))
plot(x = min(all_mort$Year):max(all_mort$Year),
     y = logmx[, x + 1],
     xlab = "calendar years",
     ylab = "raw log-mortality rates",
     main = paste("Swiss ", gender, ", ", x, " year old", sep = ""),
     type = 'l',
     col = "blue"
)
plot(x = min(all_mort$Year):max(all_mort$Year),
     y = mx[, x + 1],
     xlab = "calendar years",
     ylab = "raw mortality rates",
     main = paste("Swiss ", gender, ", ", x, " year old", sep = ""),
     type = 'l',
     col = "blue"
)
plot(x = min(all_mort$Age):max(all_mort$Age),
     y = logmx[t - min(all_mort$Year) + 1, ],
     xlab = "age",
     ylab = "raw log-mortality rates",
     main = paste("Swiss ", gender, ", year ", t, sep = ""),
     type = 'l',
     col = "blue"
)
plot(x = min(all_mort$Age):max(all_mort$Age),
     y = mx[t - min(all_mort$Year) + 1, ],
     xlab = "age",
     ylab = "raw mortality rates",
     main = paste("Swiss ", gender, ", year ", t, sep = ""),
     type = 'l',
     col = "blue"
)

##################################################################
#### Fit Lee Carter to the selected country 
#### and extrapolate with Random Walk with Drift
##################################################################

# log m_tx = a_x + b_x*k_t

ObsYear <- 1999
gender <- "Male"
train <- all_mort[Year <= ObsYear][Gender == gender]
min(train$Year)
## [1] 1950
### fit via SVD
train[, ax := mean(logmx), by = (Age)]
train[, mx_adj := logmx - ax]
rates_mat <- as.matrix(train %>% 
                dcast.data.table(Age ~ Year, value.var = "mx_adj", sum))[, -1]
svd_fit <- svd(rates_mat)

ax <- train[, unique(ax)] # take care of potential duplicates???
bx <- svd_fit$u[, 1] * svd_fit$d[1]
kt <- svd_fit$v[, 1]

c1 <- mean(kt)
c2 <- sum(bx)
ax <- ax + c1 * bx
bx <- bx / c2
kt <- (kt - c1) * c2

### extrapolation and forecast
test  <- all_mort[Year > ObsYear][Gender == gender]
t_forecast <- test[, unique(Year)] %>% length()
forecast_kt  = kt %>% forecast::rwf(t_forecast, drift = T)
kt_forecast = forecast_kt$mean


##################################################################
#### Illustrate and back-test Lee-Carter prediction
##################################################################

# illustration selected drift
plot_data <- c(kt, kt_forecast)
plot(plot_data,
     pch = 20,
     col = "red",
     cex = 2,
     cex.lab = 1.5,
     xaxt = 'n',
     ylab = "values k_t",
     xlab = "calendar year t",
     main = list(paste("estimated process k_t for ", gender, sep = ""), 
                 cex = 1.5)
)
points(kt, col = "blue", pch = 20, cex = 2)
axis(1,
     at = c(1:length(plot_data)),
     labels = c(1:length(plot_data)) + 1949)
abline(v = (length(kt) + 0.5), lwd = 2)

# in-sample and out-of-sample analysis
fitted_train = (ax + (bx) %*% t(kt)) %>% melt
train$pred_LC_svd = fitted_train$value %>% exp
fitted_test = (ax + (bx) %*% t(kt_forecast)) %>% melt
test$pred_LC_svd =   fitted_test$value %>% exp
round(c((mean((train$mx - train$pred_LC_svd) ^ 2) * 10 ^ 4) , 
        (mean((test$mx - test$pred_LC_svd) ^ 2) * 10 ^ 4)), 4)
## [1] 8.8110 1.8152
##################################################################
### Designing an LSTMs
##################################################################

# single hidden LSTM layer
LSTM1 <- function(T0, tau0, tau1, y0 = 0, optimizer) {
  Input <- layer_input(shape = c(T0, tau0),
                       dtype = 'float32',
                       name = 'Input')
  Output <- Input %>%
    layer_lstm(units = tau1,
               activation = 'tanh',
               recurrent_activation = 'tanh',
               name = 'LSTM1'
    ) %>%
    layer_dense(units = 1,
                activation = k_exp,
                name = "Output",
                weights = list(array(0, dim = c(tau1, 1)), 
                               array(log(y0), dim = c(1)))
    )
  model <- keras_model(inputs = list(Input), outputs = c(Output))
  model %>% compile(loss = 'mean_squared_error', optimizer = optimizer)
}

# double hidden LSTM layers
LSTM2 <- function(T0, tau0, tau1, tau2, y0 = 0, optimizer) {
  Input <- layer_input(shape = c(T0, tau0),
                       dtype = 'float32',
                       name = 'Input')
  Output <- Input %>%
    layer_lstm(units = tau1,
               activation = 'tanh',
               recurrent_activation = 'tanh',
               return_sequences = TRUE,
               name = 'LSTM1'
    ) %>%
    layer_lstm(units = tau2,
               activation = 'tanh',
               recurrent_activation = 'tanh',
               name = 'LSTM2'
    ) %>%
    layer_dense(units = 1,
                activation = k_exp,
                name = "Output",
                weights = list(array(0, dim = c(tau2, 1)), 
                               array(log(y0), dim = c(1)))
    )
  model <- keras_model(inputs = list(Input), outputs = c(Output))
  model %>% compile(loss = 'mean_squared_error', optimizer = optimizer)
}

# triple hidden LSTM layers
LSTM3 <- function(T0, tau0, tau1, tau2, tau3, y0 = 0, optimizer) {
  Input <- layer_input(shape = c(T0, tau0),
                       dtype = 'float32',
                       name = 'Input')
  Output <- Input %>%
    layer_lstm(units = tau1,
               activation = 'tanh',
               recurrent_activation = 'tanh',
               return_sequences = TRUE,
               name = 'LSTM1'
    ) %>%
    layer_lstm(units = tau2,
               activation = 'tanh',
               recurrent_activation = 'tanh',
               return_sequences = TRUE,
               name = 'LSTM2'
    ) %>%
    layer_lstm(units = tau3,
               activation = 'tanh',
               recurrent_activation = 'tanh',
               name = 'LSTM3'
    ) %>%
    layer_dense(units = 1,
                activation = k_exp,
                name = "Output",
                weights = list(array(0, dim = c(tau3, 1)), 
                               array(log(y0), dim = c(1)))
    )
  model <- keras_model(inputs = list(Input), outputs = c(Output))
  model %>% compile(loss = 'mean_squared_error', optimizer = optimizer)
}

# time-distributed layer on LSTMs
LSTM_TD <- function(T0, tau0, tau1, y0 = 0, optimizer) {
  Input <- layer_input(shape = c(T0, tau0),
                       dtype = 'float32',
                       name = 'Input')
  Output <- Input %>%
    layer_lstm(units = tau1,
               activation = 'tanh',
               recurrent_activation = 'tanh',
               return_sequences = TRUE,
               name = 'LSTM1'
    ) %>%
    time_distributed(layer_dense(units = 1,
                                 activation = k_exp,
                                 name = "Output"
                    ),
                    name = 'TD'
    )
  model <- keras_model(inputs = list(Input), outputs = c(Output))
  model %>% compile(loss = 'mean_squared_error', optimizer = optimizer)
}

# single hidden GRU layer
GRU1 <- function(T0, tau0, tau1, y0 = 0, optimizer) {
  Input <- layer_input(shape = c(T0, tau0),
                       dtype = 'float32',
                       name = 'Input')
  Output <- Input %>%
    layer_gru(units = tau1,
              activation = 'tanh',
              recurrent_activation = 'tanh',
              name = 'GRU1'
    ) %>%
    layer_dense(units = 1,
                activation = k_exp,
                name = "Output",
                weights = list(array(0, dim = c(tau1, 1)), 
                               array(log(y0), dim = c(1)))
    )
  model <- keras_model(inputs = list(Input), outputs = c(Output))
  model %>% compile(loss = 'mean_squared_error', optimizer = optimizer)
}

# double hidden GRU layers
GRU2 <- function(T0, tau0, tau1, tau2, y0 = 0, optimizer) {
  Input <- layer_input(shape = c(T0, tau0),
                       dtype = 'float32',
                       name = 'Input')
  Output <- Input %>%
    layer_gru(units = tau1,
              activation = 'tanh',
              recurrent_activation = 'tanh',
              return_sequences = TRUE,
              name = 'GRU1'
    ) %>%
    layer_gru(units = tau2,
              activation = 'tanh',
              recurrent_activation = 'tanh',
              name = 'GRU2'
    ) %>%
    layer_dense(units = 1,
                activation = k_exp,
                name = "Output",
                weights = list(array(0, dim = c(tau2, 1)), 
                               array(log(y0), dim = c(1)))
    )
  model <- keras_model(inputs = list(Input), outputs = c(Output))
  model %>% compile(loss = 'mean_squared_error', optimizer = optimizer)
}

# triple hidden GRU layers
GRU3 <- function(T0, tau0, tau1, tau2, tau3, y0 = 0, optimizer) {
  Input <- layer_input(shape = c(T0, tau0),
                       dtype = 'float32',
                       name = 'Input')
  Output <- Input %>%
    layer_gru(units = tau1,
              activation = 'tanh',
              recurrent_activation = 'tanh',
              return_sequences = TRUE,
              name = 'GRU1'
    ) %>%
    layer_gru(units = tau2,
              activation = 'tanh',
              recurrent_activation = 'tanh',
              return_sequences = TRUE,
              name = 'GRU2'
    ) %>%
    layer_gru(units = tau3,
              activation = 'tanh',
              recurrent_activation = 'tanh',
              name = 'GRU3'
    ) %>%
    layer_dense(units = 1,
                activation = k_exp,
                name = "Output",
                weights = list(array(0, dim = c(tau3, 1)), 
                               array(log(y0), dim = c(1)))
    )
  model <- keras_model(inputs = list(Input), outputs = c(Output))
  model %>% compile(loss = 'mean_squared_error', optimizer = optimizer)
}

# triple hidden LSTM layers both genders
LSTM3.Gender <- function(T0, tau0, tau1, tau2, tau3, y0 = 0, optimizer) {
    Input <- layer_input(shape = c(T0, tau0),
                         dtype = 'float32',
                         name = 'Input')
    Gender <- layer_input(shape = c(1),
                          dtype = 'float32',
                          name = 'Gender')
    RNN <- Input %>%
      layer_lstm(units = tau1,
                 activation = 'tanh',
                 recurrent_activation = 'tanh',
                 return_sequences = TRUE,
                 name = 'LSTM1'
      ) %>%
      layer_lstm(units = tau2,
                 activation = 'tanh',
                 recurrent_activation = 'tanh',
                 return_sequences = TRUE,
                 name = 'LSTM2'
      ) %>%
      layer_lstm(units = tau3,
                 activation = 'tanh',
                 recurrent_activation = 'tanh',
                 name = 'LSTM3'
      )
    Output <- list(RNN, Gender) %>% 
      layer_concatenate(name = "Concat") %>%
      layer_dense(units = 1,
                  activation = k_exp,
                  name = "Output",
                  weights = list(array(0, dim = c(tau3 + 1, 1)), 
                                 array(log(y0), dim = c(1)))
      )
    model <- keras_model(inputs = list(Input, Gender), outputs = c(Output))
    model %>% compile(loss = 'mean_squared_error', optimizer = optimizer)
  }

# triple hidden GRU layers both genders
GRU3.Gender <-
  function(T0, tau0, tau1, tau2, tau3, y0 = 0, optimizer) {
    Input <- layer_input(shape = c(T0, tau0),
                         dtype = 'float32',
                         name = 'Input')
    Gender <- layer_input(shape = c(1),
                          dtype = 'float32',
                          name = 'Gender')
    RNN <- Input %>%
      layer_gru(units = tau1,
                activation = 'tanh',
                recurrent_activation = 'tanh',
                return_sequences = TRUE,
                name = 'GRU1'
      ) %>%
      layer_gru(units = tau2,
                activation = 'tanh',
                recurrent_activation = 'tanh',
                return_sequences = TRUE,
                name = 'GRU2'
      ) %>%
      layer_gru(units = tau3,
                activation = 'tanh',
                recurrent_activation = 'tanh',
                name = 'GRU3'
      )
    Output <- list(RNN, Gender) %>% layer_concatenate(name = "Concat") %>%
      layer_dense(units = 1,
                  activation = k_exp,
                  name = "Output",
                  weights = list(array(0, dim = c(tau3 + 1, 1)),
                                 array(log(y0), dim = c(1)))
      )
    model <- keras_model(inputs = list(Input, Gender), outputs = c(Output))
    model %>% compile(loss = 'mean_squared_error', optimizer = optimizer)
  }

# double hidden FNN layers
FNN <- function(T0, tau0, tau1, tau2, y0 = 0, optimizer) {
  Input <- layer_input(shape = c(T0, tau0),
                       dtype = 'float32',
                       name = 'Input')
  Output <-Input %>% 
    layer_reshape(target_shape = c(T0 * tau0), 
                  name = 'Reshape') %>%
    layer_dense(units = tau1,
                activation = 'tanh',
                name = 'Layer1') %>%
    layer_dense(units = tau2,
                activation = 'tanh',
                name = 'Layer2') %>%
    layer_dense(units = 1,
                activation = k_exp,
                name = "Output",
                weights = list(array(0, dim = c(tau2, 1)), 
                               array(log(y0), dim = c(1)))
    )
  model <- keras_model(inputs = list(Input), outputs = c(Output))
  model %>% compile(loss = 'mean_squared_error', optimizer = optimizer)
}
##################################################################
### LSTMs and GRUs
##################################################################

T0 <- 10
tau0 <- 3
tau1 <- 5
tau2 <- 4

summary(LSTM1(T0, tau0, tau1, 0, "nadam"))
## Model: "model"
## ________________________________________________________________________________
## Layer (type)                        Output Shape                    Param #     
## ================================================================================
## Input (InputLayer)                  [(None, 10, 3)]                 0           
## ________________________________________________________________________________
## LSTM1 (LSTM)                        (None, 5)                       180         
## ________________________________________________________________________________
## Output (Dense)                      (None, 1)                       6           
## ================================================================================
## Total params: 186
## Trainable params: 186
## Non-trainable params: 0
## ________________________________________________________________________________
summary(LSTM2(T0, tau0, tau1, tau2, 0, "nadam"))
## Model: "model_1"
## ________________________________________________________________________________
## Layer (type)                        Output Shape                    Param #     
## ================================================================================
## Input (InputLayer)                  [(None, 10, 3)]                 0           
## ________________________________________________________________________________
## LSTM1 (LSTM)                        (None, 10, 5)                   180         
## ________________________________________________________________________________
## LSTM2 (LSTM)                        (None, 4)                       160         
## ________________________________________________________________________________
## Output (Dense)                      (None, 1)                       5           
## ================================================================================
## Total params: 345
## Trainable params: 345
## Non-trainable params: 0
## ________________________________________________________________________________
summary(LSTM_TD(T0, tau0, tau1, 0, "nadam"))
## Model: "model_2"
## ________________________________________________________________________________
## Layer (type)                        Output Shape                    Param #     
## ================================================================================
## Input (InputLayer)                  [(None, 10, 3)]                 0           
## ________________________________________________________________________________
## LSTM1 (LSTM)                        (None, 10, 5)                   180         
## ________________________________________________________________________________
## TD (TimeDistributed)                (None, 10, 1)                   6           
## ================================================================================
## Total params: 186
## Trainable params: 186
## Non-trainable params: 0
## ________________________________________________________________________________
summary(GRU1(T0, tau0, tau1, 0, "nadam"))
## Model: "model_3"
## ________________________________________________________________________________
## Layer (type)                        Output Shape                    Param #     
## ================================================================================
## Input (InputLayer)                  [(None, 10, 3)]                 0           
## ________________________________________________________________________________
## GRU1 (GRU)                          (None, 5)                       135         
## ________________________________________________________________________________
## Output (Dense)                      (None, 1)                       6           
## ================================================================================
## Total params: 141
## Trainable params: 141
## Non-trainable params: 0
## ________________________________________________________________________________
summary(GRU2(T0, tau0, tau1, tau2, 0, "nadam"))
## Model: "model_4"
## ________________________________________________________________________________
## Layer (type)                        Output Shape                    Param #     
## ================================================================================
## Input (InputLayer)                  [(None, 10, 3)]                 0           
## ________________________________________________________________________________
## GRU1 (GRU)                          (None, 10, 5)                   135         
## ________________________________________________________________________________
## GRU2 (GRU)                          (None, 4)                       120         
## ________________________________________________________________________________
## Output (Dense)                      (None, 1)                       5           
## ================================================================================
## Total params: 260
## Trainable params: 260
## Non-trainable params: 0
## ________________________________________________________________________________
summary(FNN(T0, tau0, tau1, tau2, 0, "nadam"))
## Model: "model_5"
## ________________________________________________________________________________
## Layer (type)                        Output Shape                    Param #     
## ================================================================================
## Input (InputLayer)                  [(None, 10, 3)]                 0           
## ________________________________________________________________________________
## Reshape (Reshape)                   (None, 30)                      0           
## ________________________________________________________________________________
## Layer1 (Dense)                      (None, 5)                       155         
## ________________________________________________________________________________
## Layer2 (Dense)                      (None, 4)                       24          
## ________________________________________________________________________________
## Output (Dense)                      (None, 1)                       5           
## ================================================================================
## Total params: 184
## Trainable params: 184
## Non-trainable params: 0
## ________________________________________________________________________________
##################################################################
### Bringing the data in the right structure for a toy example
##################################################################

gender <- "Female"
ObsYear <- 2000

mort_rates <- all_mort[which(all_mort$Gender == gender), 
                      c("Year", "Age", "logmx")]
mort_rates <- dcast(mort_rates, Year ~ Age, value.var = "logmx")

T0 <- 10     # lookback period
tau0 <- 3    # dimension of x_t (should be odd for our application)
delta0 <- (tau0 - 1) / 2

toy_rates <- as.matrix(
  mort_rates[which(mort_rates$Year %in% c((ObsYear - T0):(ObsYear + 1))),])

xt <- array(NA, c(2, ncol(toy_rates) - tau0, T0, tau0))
YT <- array(NA, c(2, ncol(toy_rates) - tau0))

for (i in 1:2) {
  for (a0 in 1:(ncol(toy_rates) - tau0)) {
    xt[i, a0, , ] <- toy_rates[c(i:(T0 + i - 1)), c((a0 + 1):(a0 + tau0))]
    YT[i, a0] <- toy_rates[T0 + i, a0 + 1 + delta0]
  }
}

plot(x = toy_rates[1:T0, 1],
     y = toy_rates[1:T0, 2],
     col = "white",
     xlab = "calendar years",
     ylab = "raw log-mortality rates",
     cex.lab = 1.5,
     cex = 1.5,
     main = list("data toy example", cex = 1.5),
     xlim = range(toy_rates[, 1]),
     ylim = range(toy_rates[, -1]),
     type = 'l'
)

for (a0 in 2:ncol(toy_rates)) {
  if (a0 %in% (c(1:100) * 3)) {
    lines(x = toy_rates[1:T0, 1], y = toy_rates[1:T0, a0])
    points(x = toy_rates[(T0 + 1):(T0 + 2), 1],
           y = toy_rates[(T0 + 1):(T0 + 2), a0],
           col = c("blue", "red"),
           pch = 20
    )
    lines(x = toy_rates[(T0):(T0 + 1), 1],
          y = toy_rates[(T0):(T0 + 1), a0],
          col = "blue",
          lty = 2
    )
    lines(x = toy_rates[(T0 + 1):(T0 + 2), 1],
          y = toy_rates[(T0 + 1):(T0 + 2), a0],
          col = "red",
          lty = 2
    )
  }
}