library(tidyverse) # Data manipulation and visualization
library(tidyfinance) # Financial data tools
library(tsibble) # Time series data manipulation
library(feasts) # Time series features
library(rugarch) # Univariate GARCH models
library(PortfolioAnalytics) # Portfolio optimization
library(xts) # Extensible time series
Day 2 Part 3: Application to S&P 500 Stock Portfolio
Zero to Hero Bootcamp - Time Series Econometrics in R
Introduction
In this session, we will apply time series econometric techniques to an S&P 500 stock portfolio. The objectives are:
- Download and preprocess S&P 500 stock data.
- Construct a minimum variance portfolio.
- Fit a GARCH model to the portfolio returns.
- Evaluate the model and perform rolling forecasts.
- Assess Value at Risk (VaR) using the fitted model.
Loading Required Libraries
We begin by loading the necessary R packages for data manipulation, time series analysis, portfolio optimization, and modeling.
Data Acquisition
Downloading S&P 500 Constituents and Prices
We download the S&P 500 constituents and their historical adjusted closing prices. If the data already exists locally, we read it from the CSV file to save time.
if (!file.exists("sp500_download.csv")) {
# Download S&P 500 constituents
<- download_data("constituents", index = "S&P 500")
constituents
# Download historical stock prices for all constituents
<-
sp500_download download_data(
"stock_prices",
symbols = constituents %>% pull(symbol),
start_date = "2014-11-18",
end_date = "2024-11-18"
)
# Save the data to CSV for future use
%>% write_csv("sp500_download.csv")
sp500_download else {
} # Read the data from CSV if it exists
<- read_csv("sp500_download.csv")
sp500_download }
Rows: 1223542 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): symbol
dbl (6): volume, open, low, high, close, adjusted_close
date (1): date
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Data Preparation
Calculating Daily Returns
We calculate the daily returns for each stock in the S&P 500 index. We also ensure that we only keep stocks with complete data over the entire period.
<-
sp500_returns %>%
sp500_download select(symbol, date, adjusted_close) %>%
group_by(symbol) %>%
arrange(date) %>% # Ensure data is sorted by date
mutate(
return = adjusted_close / lag(adjusted_close) - 1
%>%
) ungroup() %>%
drop_na(return) %>% # Remove NAs resulting from lag
group_by(symbol) %>%
mutate(n_days = n()) %>%
ungroup() %>%
filter(n_days == max(n_days)) %>% # Keep symbols with full data
select(symbol, date, return)
Transforming Data to Wide Format
We pivot the data to a wide format where each column represents the returns of a stock, and each row represents a date.
<-
sp500_wide %>%
sp500_returns pivot_wider(
id_cols = date,
values_from = return,
names_from = symbol
%>%
) mutate(date = as_date(date))
sp500_wide
# A tibble: 2,515 × 469
date NVDA AAPL MSFT AMZN META GOOGL TSLA
<date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2014-11-19 -0.00373 -0.00693 -0.0107 0.00495 -0.0136 0.00494 -0.0386
2 2014-11-20 0.0165 0.0143 0.00995 0.0122 0.00368 -0.00629 0.00392
3 2014-11-21 0.00541 0.00138 -0.0148 0.00632 0.00204 0.00392 -0.0238
4 2014-11-24 0.00636 0.0185 -0.00813 0.00905 0.00353 0.00291 0.0162
5 2014-11-25 -0.000486 -0.00868 -0.00252 -0.00179 0.0219 0.00320 0.00555
6 2014-11-26 0.0170 0.0119 0.00590 -0.00439 0.0263 -0.00273 0.00141
7 2014-11-28 0.00239 -0.000588 0.00126 0.0152 0.00103 0.00246 -0.0158
8 2014-12-01 -0.0186 -0.0325 0.0169 -0.0373 -0.0335 -0.0172 -0.0527
9 2014-12-02 0.00146 -0.00382 -0.00329 0.000951 0.00479 -0.00196 -0.000907
10 2014-12-03 0.0257 0.0113 -0.00784 -0.0301 -0.00769 -0.00301 -0.00920
# ℹ 2,505 more rows
# ℹ 461 more variables: GOOG <dbl>, AVGO <dbl>, JPM <dbl>, LLY <dbl>,
# UNH <dbl>, XOM <dbl>, V <dbl>, MA <dbl>, COST <dbl>, HD <dbl>, PG <dbl>,
# NFLX <dbl>, WMT <dbl>, JNJ <dbl>, CRM <dbl>, BAC <dbl>, ORCL <dbl>,
# ABBV <dbl>, CVX <dbl>, WFC <dbl>, MRK <dbl>, KO <dbl>, CSCO <dbl>,
# ACN <dbl>, ADBE <dbl>, AMD <dbl>, PEP <dbl>, LIN <dbl>, NOW <dbl>,
# DIS <dbl>, MCD <dbl>, IBM <dbl>, ABT <dbl>, PM <dbl>, TMO <dbl>, …
Preparing Data for Portfolio Optimization
We convert the returns data into a data frame suitable for portfolio optimization. The returns
data frame will have dates as row names and stock symbols as column names.
<- sp500_wide %>%
returns select(-date) %>%
as.data.frame()
rownames(returns) <- sp500_wide$date
<- colnames(returns) funds
Portfolio Optimization
Specifying Portfolio Constraints and Objectives
We define the portfolio specification, including the constraints and the objective to minimize portfolio variance (standard deviation).
<-
init_portf portfolio.spec(assets = funds) %>%
add.constraint(type = "weight_sum", min_sum = 0.99, max_sum = 1.01) %>%
add.constraint(type = "box", min = 0, max = 1) %>%
add.objective(type = "risk", name = "StdDev")
- Weight Sum Constraint: The sum of the portfolio weights should be approximately 1.
- Box Constraint: Each asset weight should be between 0 and 1 (no short selling).
- Objective: Minimize the portfolio standard deviation (risk).
Optimizing the Portfolio
We use the optimize.portfolio
function to find the portfolio weights that minimize the portfolio’s standard deviation.
<-
min_sd optimize.portfolio(
R = returns,
portfolio = init_portf,
optimize_method = "osqp",
trace = TRUE
)
Note: The osqp
method is used for optimization due to its efficiency in handling quadratic programming problems.
Extracting Optimal Weights
We extract and round the optimal portfolio weights for easier interpretation.
<-
weights tibble(
symbol = names(min_sd$weights),
weight = round(min_sd$weights, 2)
)
Calculating Portfolio Returns
We calculate the daily returns of the optimized portfolio by weighting the individual stock returns.
<-
portfolio_returns %>%
sp500_returns left_join(weights, by = "symbol") %>%
group_by(date) %>%
summarise(
portfolio_return = sum(return * weight, na.rm = TRUE)
%>%
) mutate(portfolio_return_sq = portfolio_return^2)
Exploratory Data Analysis of Portfolio Returns
Plotting Portfolio Returns
We visualize the portfolio returns and their squares over time to observe any patterns or volatility clustering.
%>%
portfolio_returns pivot_longer(starts_with("portfolio")) %>%
ggplot(aes(date, value)) +
geom_line() +
facet_wrap(~name, scales = "free", nrow = 2) +
labs(
title = "Portfolio Returns and Squared Returns",
x = "Date",
y = "Value"
)
Time Series Modeling
Checking for Autocorrelation
We fit an ARIMA model to the portfolio returns to check for any autoregressive or moving average components.
%>%
portfolio_returns mutate(time = row_number()) %>%
as_tsibble(index = time) %>%
model(arima = fable::ARIMA(portfolio_return)) %>%
print()
# A mable: 1 x 1
arima
<model>
1 <ARIMA(2,0,2) w/ mean>
GARCH Model Specification and Fitting
Specifying the GARCH Model
We specify a Threshold GARCH (TGARCH) model to capture the asymmetric effects in volatility.
<-
spec ugarchspec(
variance.model = list(
model = "fGARCH",
submodel = "TGARCH",
garchOrder = c(1, 1)
),mean.model = list(
armaOrder = c(2, 2),
include.mean = TRUE
),distribution.model = "std" # Student's t-distribution
)
- Variance Model: TGARCH(1,1) to capture asymmetry in volatility.
- Mean Model: ARMA(2,2) to model the returns.
- Distribution: Student’s t-distribution to account for fat tails.
Fitting the GARCH Model
We fit the specified GARCH model to the portfolio returns.
<-
portfolio_fit ugarchfit(
spec = spec,
data = portfolio_returns$portfolio_return
)
Model Evaluation
Comparing Actual vs. Fitted Returns
We compare the actual portfolio returns to the fitted values from the GARCH model.
<-
comparison_data tibble(
date = portfolio_returns$date,
return_actual = portfolio_returns$portfolio_return,
return_fitted = fitted(portfolio_fit) %>% as.numeric()
)
Plotting the Comparison
%>%
comparison_data pivot_longer(-date) %>%
ggplot(aes(x = date, y = value, color = name)) +
geom_line() +
theme_minimal() +
labs(
title = "Actual vs. Fitted Portfolio Returns",
y = "Return",
x = "Date",
color = "Series"
+
) scale_color_brewer(
labels = c("Actual", "Fitted"),
palette = "Set1"
)
Analyzing Volatility Estimates
We analyze the estimated volatility from the GARCH model and compare it to the absolute returns.
<-
volatility_data tibble(
date = portfolio_returns$date,
return_abs = abs(portfolio_returns$portfolio_return),
sigma = sigma(portfolio_fit) %>% as.numeric()
)
Plotting Volatility Estimates
%>%
volatility_data pivot_longer(
cols = c(return_abs, sigma),
names_to = "measure",
values_to = "value"
%>%
) ggplot(aes(x = date, y = value, color = measure)) +
geom_line() +
labs(
title = "Estimated Volatility vs. Absolute Returns",
y = "Value",
x = "Date",
color = "Measure"
+
) theme_minimal()
Rolling Forecast and VaR Analysis
Preparing Data for Rolling Forecast
We convert the portfolio returns data into an xts
object required for the rolling forecast function.
<- xts(
df_portfolio_xts x = portfolio_returns$portfolio_return,
order.by = portfolio_returns$date
)
Performing Rolling Forecast with ugarchroll
We perform a rolling forecast to evaluate the model’s out-of-sample performance and calculate Value at Risk (VaR).
<-
roll_model ugarchroll(
spec = spec,
data = df_portfolio_xts,
n.ahead = 1,
forecast.length = 252, # Approximately one trading year
refit.every = 50,
refit.window = "moving",
calculate.VaR = TRUE,
VaR.alpha = c(0.01, 0.05)
) roll_model
*-------------------------------------*
* GARCH Roll *
*-------------------------------------*
No.Refits : 6
Refit Horizon : 50
No.Forecasts : 252
GARCH Model : fGARCH(1,1)
fGARCH SubModel : TGARCH
Distribution : std
Forecast Density:
Mu Sigma Skew Shape Shape(GIG) Realized
2023-11-16 -1e-04 0.0041 0 7.6336 0 0.0000
2023-11-17 3e-04 0.0038 0 7.6336 0 0.0001
2023-11-20 2e-04 0.0035 0 7.6336 0 0.0006
2023-11-21 4e-04 0.0033 0 7.6336 0 0.0023
2023-11-22 2e-04 0.0032 0 7.6336 0 0.0043
2023-11-24 2e-04 0.0032 0 7.6336 0 0.0026
..........................
Mu Sigma Skew Shape Shape(GIG) Realized
2024-11-08 7e-04 0.0047 0 7.3380 0 0.0055
2024-11-11 0e+00 0.0046 0 7.3380 0 0.0001
2024-11-12 6e-04 0.0042 0 7.3380 0 -0.0021
2024-11-13 3e-04 0.0044 0 7.3380 0 -0.0002
2024-11-14 6e-04 0.0040 0 7.7394 0 -0.0056
2024-11-15 2e-04 0.0048 0 7.7394 0 -0.0038
Elapsed: 12.67342 secs
- n.ahead: Forecast horizon (1 day ahead).
- forecast.length: Number of periods to forecast (252 trading days).
- refit.every: Refit the model every 50 observations.
- refit.window: Use a moving window for refitting.
- VaR.alpha: Confidence levels for VaR calculation (1% and 5%).
Reporting VaR Results
We generate a report of the VaR backtesting results at the 1% significance level.
report(
roll_model,type = "VaR",
VaR.alpha = 0.01,
conf.level = 0.95
)
VaR Backtest Report
===========================================
Model: fGARCH-std
Backtest Length: 252
Data:
==========================================
alpha: 1%
Expected Exceed: 2.5
Actual VaR Exceed: 2
Actual %: 0.8%
Unconditional Coverage (Kupiec)
Null-Hypothesis: Correct Exceedances
LR.uc Statistic: 0.117
LR.uc Critical: 3.841
LR.uc p-value: 0.733
Reject Null: NO
Conditional Coverage (Christoffersen)
Null-Hypothesis: Correct Exceedances and
Independence of Failures
LR.cc Statistic: 0.149
LR.cc Critical: 5.991
LR.cc p-value: 0.928
Reject Null: NO
Evaluating Forecast Performance
We report the forecast performance measures (FPM) to assess the accuracy of the model’s forecasts.
report(roll_model, type = "fpm")
GARCH Roll Mean Forecast Performance Measures
---------------------------------------------
Model : fGARCH
SubModel : TGARCH
No.Refits : 6
No.Forecasts: 252
Stats
MSE 0.0000181
MAE 0.0032990
DAC 0.5595000
Visualizing Rolling Forecast Results
We plot various diagnostics from the rolling forecast to assess the model’s performance.
Conditional Density Forecasts
par(mfrow = c(1, 1))
par(new = FALSE) # Reset plotting parameters
plot(roll_model, which = 1)
Actual vs. Forecasted Values (Mean)
par(mfrow = c(1, 1))
par(new = FALSE)
plot(roll_model, which = 2)
Actual vs. Forecasted Values (Volatility)
par(mfrow = c(1, 1))
par(new = FALSE)
plot(roll_model, which = 3)
VaR Backtesting Plot (5% VaR)
par(mfrow = c(1, 1))
par(new = FALSE)
plot(roll_model, which = 4)
Conclusion
In this session, we:
- Downloaded and prepared S&P 500 stock data.
- Constructed a minimum variance portfolio using portfolio optimization techniques.
- Fitted a TGARCH model to the portfolio returns to model volatility.
- Evaluated the model’s fit and compared actual vs. fitted returns.
- Performed a rolling forecast to assess the model’s out-of-sample performance.
- Calculated and analyzed Value at Risk (VaR) for risk assessment.
This practical application demonstrates the integration of portfolio optimization and time series econometric modeling for financial analysis and risk management.