library(tidyverse) # Data manipulation and visualization
library(tidyfinance) # Financial data tools
library(tsibble) # Time series data manipulation
library(fable) # Time series modeling
library(feasts) # Time series features
library(rugarch) # Univariate GARCH models
library(forecast) # Forecasting functionsDay 2 Part 1: Fitting Volatility Models
Zero to Hero Bootcamp - Time Series Econometrics in R
Introduction
In this session, we will focus on fitting various volatility models to financial time series data, specifically the S&P 500 index. We will:
- Download and prepare the S&P 500 data.
- Perform exploratory data analysis (EDA).
- Fit different GARCH models.
- Evaluate model diagnostics.
- Forecast future volatility.
- Calculate Value at Risk (VaR) and Expected Shortfall (ES).
Loading Required Libraries
First, we load the necessary R packages for data manipulation, time series analysis, and modeling.
Data Acquisition and Preparation
We will work with the S&P 500 index data. If the data file does not exist locally, we download it using the download_data function; otherwise, we read it from the existing CSV file.
if (!file.exists("sp500_index.csv")) {
sp500_download <-
download_data(
"stock_prices",
symbols = "^GSPC",
start_date = "2014-11-18",
end_date = "2024-11-18"
)
sp500_download %>% write_csv("sp500_index.csv")
} else {
sp500_download <- read_csv("sp500_index.csv")
}Rows: 2516 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): symbol
dbl (6): volume, open, low, high, close, adjusted_close
date (1): date
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Calculating Returns
We calculate the daily returns of the S&P 500 index from the adjusted closing prices.
sp500_returns <-
sp500_download %>%
select(symbol, date, price = adjusted_close) %>%
mutate(return = price / lag(price) - 1) %>%
remove_missing() %>%
select(-price) %>%
mutate(date = row_number()) %>%
print()Warning: Removed 1 row containing missing values or values outside the scale
range.
# A tibble: 2,515 × 3
symbol date return
<chr> <int> <dbl>
1 ^GSPC 1 -0.00150
2 ^GSPC 2 0.00197
3 ^GSPC 3 0.00524
4 ^GSPC 4 0.00286
5 ^GSPC 5 -0.00115
6 ^GSPC 6 0.00281
7 ^GSPC 7 -0.00254
8 ^GSPC 8 -0.00683
9 ^GSPC 9 0.00638
10 ^GSPC 10 0.00376
# ℹ 2,505 more rows
Exploratory Data Analysis
Plotting Returns
We visualize the daily returns over time to observe any patterns or anomalies.
sp500_returns %>%
ggplot(aes(x = date, y = return)) +
geom_line() +
labs(
title = "S&P 500 Daily Returns",
x = "Day",
y = "Daily Return"
)Statistical Summary
We compute key statistical measures to understand the distribution of returns.
sp500_stats <-
sp500_returns %>%
summarise(
mean_return = mean(return),
std_dev = sd(return),
skewness = moments::skewness(return),
kurtosis = moments::kurtosis(return),
jarque_bera_pvalue = tseries::jarque.bera.test(return)$p.value
)
sp500_stats# A tibble: 1 × 5
mean_return std_dev skewness kurtosis jarque_bera_pvalue
<dbl> <dbl> <dbl> <dbl> <dbl>
1 0.000481 0.0112 -0.517 17.5 0
Return Distribution
We compare the empirical distribution of returns to the normal distribution.
sp500_returns %>%
ggplot(aes(x = return)) +
geom_histogram(aes(y = ..density..), bins = 50, fill = "lightblue", color = "black") +
geom_density(color = "red", size = 1) +
stat_function(
fun = dnorm,
args = list(
mean = mean(sp500_returns$return),
sd = sd(sp500_returns$return)
),
color = "blue",
size = 1
) +
labs(
title = "Return Distribution vs Normal Distribution",
x = "Daily Return",
y = "Density"
)Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
ℹ Please use `after_stat(density)` instead.
Time Series Characteristics
We examine the autocorrelation and partial autocorrelation of the returns.
sp500_returns %>%
as_tsibble(index = date) %>%
gg_tsdisplay(return, plot_type = "partial")Modeling Volatility
Fitting an ARIMA Model
We fit an ARIMA model to the returns to capture any autoregressive patterns.
fit_auto_arima <-
sp500_returns %>%
as_tsibble(index = date) %>%
model(arima = ARIMA(return))
fit_auto_arima# A mable: 1 x 1
arima
<model>
1 <ARIMA(2,0,2) w/ mean>
Specifying GARCH and ARMA Orders
We define the orders for the GARCH and ARMA components.
garch_order <- c(2, 2) # GARCH(p,q) order
arma_order <- c(2, 2) # ARMA(p,q) orderDefining GARCH Model Specifications
We specify multiple GARCH models to compare their performance.
garch_specs <-
list(
# Standard GARCH model specification
sGARCH = ugarchspec(
variance.model = list(
model = "sGARCH",
garchOrder = garch_order
),
mean.model = list(
armaOrder = arma_order,
include.mean = TRUE
),
distribution.model = "std" # Student's t-distribution
),
# Asymmetric Power ARCH model specification
apARCH = ugarchspec(
variance.model = list(
model = "apARCH",
garchOrder = garch_order
),
mean.model = list(
armaOrder = arma_order,
include.mean = TRUE
),
distribution.model = "std"
),
# GJR-GARCH model specification
gjrGARCH = ugarchspec(
variance.model = list(
model = "gjrGARCH",
garchOrder = garch_order
),
mean.model = list(
armaOrder = arma_order,
include.mean = TRUE
),
distribution.model = "std"
),
# Threshold GARCH (TGARCH) model specification
tGARCH = ugarchspec(
variance.model = list(
model = "fGARCH",
submodel = "TGARCH",
garchOrder = garch_order
),
mean.model = list(
armaOrder = arma_order,
include.mean = TRUE
),
distribution.model = "std"
)
)Fitting the GARCH Models
We fit each GARCH model to the returns data.
fit_results <-
map(
garch_specs,
~ ugarchfit(spec = .x, data = sp500_returns$return)
)Model Diagnostics
Defining a Diagnostic Function
We create a function to extract important diagnostic statistics from each model.
get_model_diagnostics <- function(model) {
tibble(
aic = infocriteria(model)[1],
bic = infocriteria(model)[2],
log_likelihood = likelihood(model),
persistence = persistence(model),
half_life = -log(2) / log(persistence(model)),
unconditional_variance = mean(sigma(model)^2)
)
}Comparing Model Statistics
We apply the diagnostic function to each model and compile the results.
fit_stats <- map_dfr(fit_results, get_model_diagnostics, .id = "model")
fit_stats# A tibble: 4 × 7
model aic bic log_likelihood persistence half_life unconditional_variance
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 sGARCH -6.70 -6.67 8436. 0.997 201. 0.000136
2 apARCH -6.74 -6.71 8495. 0.964 18.9 0.000132
3 gjrGA… -6.73 -6.70 8477. 0.987 53.4 0.000141
4 tGARCH -6.75 -6.72 8500. 0.942 11.5 0.000135
Analyzing Residuals
We extract standardized residuals to check for remaining autocorrelation or patterns.
residuals <-
imap_dfr(
fit_results,
~ tibble(
model = .y,
date = sp500_returns$date,
residuals = residuals(.x, standardize = TRUE) %>% as.numeric()
)
)
residuals# A tibble: 10,060 × 3
model date residuals
<chr> <int> <dbl>
1 sGARCH 1 -0.215
2 sGARCH 2 0.0943
3 sGARCH 3 0.455
4 sGARCH 4 0.206
5 sGARCH 5 -0.250
6 sGARCH 6 0.233
7 sGARCH 7 -0.481
8 sGARCH 8 -1.08
9 sGARCH 9 0.773
10 sGARCH 10 0.396
# ℹ 10,050 more rows
Plotting Squared Residuals
We plot the squared standardized residuals to assess the adequacy of the volatility models.
residuals %>%
ggplot(aes(x = date, y = residuals^2)) +
geom_line(color = "darkgreen") +
labs(
title = "Squared Standardized Residuals",
x = "Date",
y = "Residuals Squared"
) +
facet_wrap(~model)Residual Diagnostics for tGARCH Model
We perform time series diagnostics on the residuals of the tGARCH model.
residuals %>%
filter(model == "tGARCH") %>%
as_tsibble(index = date) %>%
gg_tsdisplay(residuals, plot_type = "partial")Forecasting Volatility
Generating Volatility Forecasts
We forecast the future volatility over the next 30 days using each model.
n_ahead <- 30 # Number of days to forecast
forecasts <- map(
fit_results,
~ ugarchforecast(.x, n.ahead = n_ahead)
)Preparing Forecast Data
We extract the forecasted volatility and compute confidence intervals.
forecast_data <-
map2_df(
forecasts, names(forecasts),
~ {
sigma_forecast <- sigma(.x) %>% as.numeric()
conf_intervals <- qnorm(c(0.025, 0.975)) * sigma_forecast
tibble(
Day = 1:n_ahead,
Forecasted_Volatility = sigma_forecast,
Lower_CI = sigma_forecast + conf_intervals[1],
Upper_CI = sigma_forecast + conf_intervals[2],
Model = .y
)
}
)Plotting Forecasted Volatility
We visualize the forecasted volatility for each model.
ggplot(forecast_data, aes(x = Day, y = Forecasted_Volatility, color = Model)) +
geom_line(linewidth = 1) +
labs(
title = "30-Day Ahead Volatility Forecasts",
x = "Day",
y = "Forecasted Volatility (Sigma)"
) +
theme_minimal() +
theme(legend.position = "bottom")Risk Assessment: VaR and ES
Calculating VaR and ES
We compute the Value at Risk (VaR) and Expected Shortfall (ES) at the 95% confidence level for each model.
alpha <- 0.05 # 95% confidence level
# Iterate over forecasts and fitted models
var_forecasts <- map2_dfr(forecasts, fit_results, function(forecast, model) {
# Extract mean and standard deviation from the forecast
mu <- as.numeric(fitted(forecast))[1]
sigma <- as.numeric(sigma(forecast))[1]
# Extract the shape parameter (degrees of freedom)
shape <- as.numeric(coef(model)["shape"])
# Calculate VaR using the t-distribution quantile
var_95 <- mu + sigma * qt(alpha, df = shape)
# Calculate ES (Expected Shortfall)
qt_value <- qt(alpha, df = shape)
dt_value <- dt(qt_value, df = shape)
es_95 <- mu - sigma * (dt_value / alpha) * (shape + qt_value^2) / (shape - 1)
tibble(
Model = model@model$modeldesc$vmodel,
VaR_95 = var_95,
ES_95 = es_95
)
}, .id = "model")Displaying Risk Measures
We present the calculated VaR and ES for each volatility model.
var_forecasts# A tibble: 4 × 4
model Model VaR_95 ES_95
<chr> <chr> <dbl> <dbl>
1 sGARCH sGARCH -0.0192 -0.0272
2 apARCH apARCH -0.0194 -0.0269
3 gjrGARCH gjrGARCH -0.0201 -0.0282
4 tGARCH fGARCH -0.0209 -0.0291
Conclusion
In this session, we:
- Collected and prepared S&P 500 index data.
- Performed exploratory data analysis to understand the return characteristics.
- Specified and fitted various GARCH models to capture volatility clustering and leverage effects.
- Evaluated model diagnostics to select the most appropriate model.
- Forecasted future volatility over a 30-day horizon.
- Calculated risk measures such as Value at Risk (VaR) and Expected Shortfall (ES) to assess potential losses.
This comprehensive approach demonstrates how volatility modeling is crucial in financial econometrics for risk management and forecasting.