library(tidyverse) # Data manipulation and visualization
library(tidyfinance) # Financial data tools
library(tsibble) # Time series data manipulation
library(fable) # Time series modeling
library(feasts) # Time series features
library(rugarch) # Univariate GARCH models
library(forecast) # Forecasting functions
Day 2 Part 1: Fitting Volatility Models
Zero to Hero Bootcamp - Time Series Econometrics in R
Introduction
In this session, we will focus on fitting various volatility models to financial time series data, specifically the S&P 500 index. We will:
- Download and prepare the S&P 500 data.
- Perform exploratory data analysis (EDA).
- Fit different GARCH models.
- Evaluate model diagnostics.
- Forecast future volatility.
- Calculate Value at Risk (VaR) and Expected Shortfall (ES).
Loading Required Libraries
First, we load the necessary R packages for data manipulation, time series analysis, and modeling.
Data Acquisition and Preparation
We will work with the S&P 500 index data. If the data file does not exist locally, we download it using the download_data
function; otherwise, we read it from the existing CSV file.
if (!file.exists("sp500_index.csv")) {
<-
sp500_download download_data(
"stock_prices",
symbols = "^GSPC",
start_date = "2014-11-18",
end_date = "2024-11-18"
)%>% write_csv("sp500_index.csv")
sp500_download else {
} <- read_csv("sp500_index.csv")
sp500_download }
Rows: 2516 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): symbol
dbl (6): volume, open, low, high, close, adjusted_close
date (1): date
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Calculating Returns
We calculate the daily returns of the S&P 500 index from the adjusted closing prices.
<-
sp500_returns %>%
sp500_download select(symbol, date, price = adjusted_close) %>%
mutate(return = price / lag(price) - 1) %>%
remove_missing() %>%
select(-price) %>%
mutate(date = row_number()) %>%
print()
Warning: Removed 1 row containing missing values or values outside the scale
range.
# A tibble: 2,515 × 3
symbol date return
<chr> <int> <dbl>
1 ^GSPC 1 -0.00150
2 ^GSPC 2 0.00197
3 ^GSPC 3 0.00524
4 ^GSPC 4 0.00286
5 ^GSPC 5 -0.00115
6 ^GSPC 6 0.00281
7 ^GSPC 7 -0.00254
8 ^GSPC 8 -0.00683
9 ^GSPC 9 0.00638
10 ^GSPC 10 0.00376
# ℹ 2,505 more rows
Exploratory Data Analysis
Plotting Returns
We visualize the daily returns over time to observe any patterns or anomalies.
%>%
sp500_returns ggplot(aes(x = date, y = return)) +
geom_line() +
labs(
title = "S&P 500 Daily Returns",
x = "Day",
y = "Daily Return"
)
Statistical Summary
We compute key statistical measures to understand the distribution of returns.
<-
sp500_stats %>%
sp500_returns summarise(
mean_return = mean(return),
std_dev = sd(return),
skewness = moments::skewness(return),
kurtosis = moments::kurtosis(return),
jarque_bera_pvalue = tseries::jarque.bera.test(return)$p.value
)
sp500_stats
# A tibble: 1 × 5
mean_return std_dev skewness kurtosis jarque_bera_pvalue
<dbl> <dbl> <dbl> <dbl> <dbl>
1 0.000481 0.0112 -0.517 17.5 0
Return Distribution
We compare the empirical distribution of returns to the normal distribution.
%>%
sp500_returns ggplot(aes(x = return)) +
geom_histogram(aes(y = ..density..), bins = 50, fill = "lightblue", color = "black") +
geom_density(color = "red", size = 1) +
stat_function(
fun = dnorm,
args = list(
mean = mean(sp500_returns$return),
sd = sd(sp500_returns$return)
),color = "blue",
size = 1
+
) labs(
title = "Return Distribution vs Normal Distribution",
x = "Daily Return",
y = "Density"
)
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
ℹ Please use `after_stat(density)` instead.
Time Series Characteristics
We examine the autocorrelation and partial autocorrelation of the returns.
%>%
sp500_returns as_tsibble(index = date) %>%
gg_tsdisplay(return, plot_type = "partial")
Modeling Volatility
Fitting an ARIMA Model
We fit an ARIMA model to the returns to capture any autoregressive patterns.
<-
fit_auto_arima %>%
sp500_returns as_tsibble(index = date) %>%
model(arima = ARIMA(return))
fit_auto_arima
# A mable: 1 x 1
arima
<model>
1 <ARIMA(2,0,2) w/ mean>
Specifying GARCH and ARMA Orders
We define the orders for the GARCH and ARMA components.
<- c(2, 2) # GARCH(p,q) order
garch_order <- c(2, 2) # ARMA(p,q) order arma_order
Defining GARCH Model Specifications
We specify multiple GARCH models to compare their performance.
<-
garch_specs list(
# Standard GARCH model specification
sGARCH = ugarchspec(
variance.model = list(
model = "sGARCH",
garchOrder = garch_order
),mean.model = list(
armaOrder = arma_order,
include.mean = TRUE
),distribution.model = "std" # Student's t-distribution
),# Asymmetric Power ARCH model specification
apARCH = ugarchspec(
variance.model = list(
model = "apARCH",
garchOrder = garch_order
),mean.model = list(
armaOrder = arma_order,
include.mean = TRUE
),distribution.model = "std"
),# GJR-GARCH model specification
gjrGARCH = ugarchspec(
variance.model = list(
model = "gjrGARCH",
garchOrder = garch_order
),mean.model = list(
armaOrder = arma_order,
include.mean = TRUE
),distribution.model = "std"
),# Threshold GARCH (TGARCH) model specification
tGARCH = ugarchspec(
variance.model = list(
model = "fGARCH",
submodel = "TGARCH",
garchOrder = garch_order
),mean.model = list(
armaOrder = arma_order,
include.mean = TRUE
),distribution.model = "std"
) )
Fitting the GARCH Models
We fit each GARCH model to the returns data.
<-
fit_results map(
garch_specs,~ ugarchfit(spec = .x, data = sp500_returns$return)
)
Model Diagnostics
Defining a Diagnostic Function
We create a function to extract important diagnostic statistics from each model.
<- function(model) {
get_model_diagnostics tibble(
aic = infocriteria(model)[1],
bic = infocriteria(model)[2],
log_likelihood = likelihood(model),
persistence = persistence(model),
half_life = -log(2) / log(persistence(model)),
unconditional_variance = mean(sigma(model)^2)
) }
Comparing Model Statistics
We apply the diagnostic function to each model and compile the results.
<- map_dfr(fit_results, get_model_diagnostics, .id = "model")
fit_stats fit_stats
# A tibble: 4 × 7
model aic bic log_likelihood persistence half_life unconditional_variance
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 sGARCH -6.70 -6.67 8436. 0.997 201. 0.000136
2 apARCH -6.74 -6.71 8495. 0.964 18.9 0.000132
3 gjrGA… -6.73 -6.70 8477. 0.987 53.4 0.000141
4 tGARCH -6.75 -6.72 8500. 0.942 11.5 0.000135
Analyzing Residuals
We extract standardized residuals to check for remaining autocorrelation or patterns.
<-
residuals imap_dfr(
fit_results,~ tibble(
model = .y,
date = sp500_returns$date,
residuals = residuals(.x, standardize = TRUE) %>% as.numeric()
)
) residuals
# A tibble: 10,060 × 3
model date residuals
<chr> <int> <dbl>
1 sGARCH 1 -0.215
2 sGARCH 2 0.0943
3 sGARCH 3 0.455
4 sGARCH 4 0.206
5 sGARCH 5 -0.250
6 sGARCH 6 0.233
7 sGARCH 7 -0.481
8 sGARCH 8 -1.08
9 sGARCH 9 0.773
10 sGARCH 10 0.396
# ℹ 10,050 more rows
Plotting Squared Residuals
We plot the squared standardized residuals to assess the adequacy of the volatility models.
%>%
residuals ggplot(aes(x = date, y = residuals^2)) +
geom_line(color = "darkgreen") +
labs(
title = "Squared Standardized Residuals",
x = "Date",
y = "Residuals Squared"
+
) facet_wrap(~model)
Residual Diagnostics for tGARCH Model
We perform time series diagnostics on the residuals of the tGARCH model.
%>%
residuals filter(model == "tGARCH") %>%
as_tsibble(index = date) %>%
gg_tsdisplay(residuals, plot_type = "partial")
Forecasting Volatility
Generating Volatility Forecasts
We forecast the future volatility over the next 30 days using each model.
<- 30 # Number of days to forecast
n_ahead <- map(
forecasts
fit_results,~ ugarchforecast(.x, n.ahead = n_ahead)
)
Preparing Forecast Data
We extract the forecasted volatility and compute confidence intervals.
<-
forecast_data map2_df(
names(forecasts),
forecasts, ~ {
<- sigma(.x) %>% as.numeric()
sigma_forecast <- qnorm(c(0.025, 0.975)) * sigma_forecast
conf_intervals tibble(
Day = 1:n_ahead,
Forecasted_Volatility = sigma_forecast,
Lower_CI = sigma_forecast + conf_intervals[1],
Upper_CI = sigma_forecast + conf_intervals[2],
Model = .y
)
} )
Plotting Forecasted Volatility
We visualize the forecasted volatility for each model.
ggplot(forecast_data, aes(x = Day, y = Forecasted_Volatility, color = Model)) +
geom_line(linewidth = 1) +
labs(
title = "30-Day Ahead Volatility Forecasts",
x = "Day",
y = "Forecasted Volatility (Sigma)"
+
) theme_minimal() +
theme(legend.position = "bottom")
Risk Assessment: VaR and ES
Calculating VaR and ES
We compute the Value at Risk (VaR) and Expected Shortfall (ES) at the 95% confidence level for each model.
<- 0.05 # 95% confidence level
alpha
# Iterate over forecasts and fitted models
<- map2_dfr(forecasts, fit_results, function(forecast, model) {
var_forecasts # Extract mean and standard deviation from the forecast
<- as.numeric(fitted(forecast))[1]
mu <- as.numeric(sigma(forecast))[1]
sigma
# Extract the shape parameter (degrees of freedom)
<- as.numeric(coef(model)["shape"])
shape
# Calculate VaR using the t-distribution quantile
<- mu + sigma * qt(alpha, df = shape)
var_95
# Calculate ES (Expected Shortfall)
<- qt(alpha, df = shape)
qt_value <- dt(qt_value, df = shape)
dt_value <- mu - sigma * (dt_value / alpha) * (shape + qt_value^2) / (shape - 1)
es_95
tibble(
Model = model@model$modeldesc$vmodel,
VaR_95 = var_95,
ES_95 = es_95
).id = "model") },
Displaying Risk Measures
We present the calculated VaR and ES for each volatility model.
var_forecasts
# A tibble: 4 × 4
model Model VaR_95 ES_95
<chr> <chr> <dbl> <dbl>
1 sGARCH sGARCH -0.0192 -0.0272
2 apARCH apARCH -0.0194 -0.0269
3 gjrGARCH gjrGARCH -0.0201 -0.0282
4 tGARCH fGARCH -0.0209 -0.0291
Conclusion
In this session, we:
- Collected and prepared S&P 500 index data.
- Performed exploratory data analysis to understand the return characteristics.
- Specified and fitted various GARCH models to capture volatility clustering and leverage effects.
- Evaluated model diagnostics to select the most appropriate model.
- Forecasted future volatility over a 30-day horizon.
- Calculated risk measures such as Value at Risk (VaR) and Expected Shortfall (ES) to assess potential losses.
This comprehensive approach demonstrates how volatility modeling is crucial in financial econometrics for risk management and forecasting.