The management of operational risk follows a standard process: (1) Risk Identification; (2) Risk Evaluation; (3) Risk Monitoring; (4) Risk Mitigation; (5) Risk Reporting. The quantification of risk is a fundamental element for the fulfilment of steps (2) - (5).
Based on empirical loss event data, the loss distribution is estimated by modelling the probability and magnitude of potential losses in the industry. To this end, the Loss Distribution Approach (LDA), is employed, a technique which convolutes the loss frequency distribution with the loss severity distribution to model an objective distribution of aggregate losses attributable to the materialization of specified operational risks. LDA is a favored method of large financial entities for the determination of capital adequacy for operational risk1, however it is also eminently suitable for our objective of quantifying custodial risk in the cryptocurrency universe2. We describe the model construction process below in detail:
1 LDA is one popular method to estimate potential losses and capital requirements in the Advanced Measurement Approach (AMA) under the Basel II regulations.
2 For a more detailed definition of “CCC-mitigable events”, please refer to “Cryptocurrency: Care, Custody, & Control”, New York University Stern School of Business, May 2020, co-authored by Au, Hoffman, Mattingly, McAteer, Takahashi.
3 Poisson-distributed random variate are generated with the inverse transform method,
# Import the necessary libraries
from scipy import stats
from scipy.stats import lognorm, norm, poisson
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from math import exp
# Read data on operational loss events into DataFrame
excel_data_df = pd.read_excel('LDA DATA.xlsx', sheet_name='Frequency')
# View sheet data
excel_data_df
freq = excel_data_df.groupby("YEAR")['CCC-MITIGABLE'].sum()
print(freq)
# Estimate lambda
lamb = np.sum(freq.values) / ((excel_data_df.YEAR[excel_data_df.shape[0]-1] - excel_data_df.YEAR[0])+1)
print(lamb.round(3))
# Draw random variables from a Poisson distribtion specified with lambda = 11.636
p_rand_v = poisson.rvs(lamb, size=(10000))
# Plot the probability density function
hist = plt.hist(p_rand_v, bins=10000) # Collect Frequency Distribution
plt.close("all")
y = hist[0]/np.sum(hist[0]) # Ensure Frequency Represented as Probability
x = hist[1]
plt.figure(figsize=(10, 6))
plt.bar(x[:-1], y, width=0.7, align='center', color="#2c97f1")
plt.xlabel("Number of Loss Events", fontsize=12)
plt.ylabel("Probability", fontsize=12)
plt.title("Loss Event Probability Density Function", fontsize=18)
# Sort the randomly distributed data:
data = p_rand_v
data_sorted = np.sort(data)
# calculate the proportional values of samples
p = 1. * np.arange(len(data)) / (len(data) - 1)
# Plot the cumulative density function
plt.figure(figsize=(10, 6))
plt.bar(data_sorted, p)
plt.xlim(0, 26)
plt.ylabel("Number of Loss Events", fontsize=12)
plt.ylabel("Probability", fontsize=12)
plt.title("Loss Event Cumulative Density Function", fontsize=18)
# Read data on severity of operational loss events into DataFrame
xl_data_df_sev = pd.read_excel('LDA DATA.xlsx', sheet_name='Severity')
# print whole sheet data
xl_data_df_sev.head()
# View empirical annual loss data
sev = pd.DataFrame(xl_data_df_sev.groupby("Year")["$ Losses"].sum())
print(sev)
# Convert loss data type from string to integer
xl_data_df_sev["$ Losses"] = pd.to_numeric(xl_data_df_sev["$ Losses"])
# Calculate natural log of losses
xl_data_df_sev["Log Losses"] = np.log(xl_data_df_sev["$ Losses"])
# Calculate first and second moments of log loss data
logmean= xl_data_df_sev["Log Losses"].mean()
logstd= xl_data_df_sev["Log Losses"].std()
logmean, logstd
# Read input "x" values for visualization of pdf and cdf of loss severity
dist_data_df = pd.read_excel('LDA DATA.xlsx', sheet_name='Distribution')
x = dist_data_df['x loss']
# Check user defined function...
def lognorm_cdf(x, mu, sigma):
shape = logstd
loc = 0
scale = exp(logmean)
return stats.lognorm.cdf(x, shape, loc, scale)
lognorm_cdf(x, exp(logmean), logstd)
# Against stats function
cdf = lognorm.cdf(x, s=logstd, loc=0, scale=exp(logmean))
cdf
# Check user defined function...
def lognorm_pdf(x, mu, sigma):
shape = logstd
loc = 0
scale = exp(logmean)
return stats.lognorm.pdf(x, shape, loc, scale)
lognorm_pdf(x, exp(logmean), logstd)
# Against stats function
pdf = lognorm.pdf(x, s=logstd, loc=0, scale=exp(logmean))
pdf
# Visualize cdf
plt.figure(figsize=(10, 6))
plt.xlabel("Loss Magnitude ($)", fontsize=12)
plt.ylabel("Probability", fontsize=12)
plt.title("Cumulative Density Function - Loss Severity", fontsize=18)
cdf = lognorm.cdf(x, s=logstd, loc=0, scale=exp(logmean))
plt.plot(x, cdf, 'r')
# Obtain random number from Poisson Distribution parametrized by lambda
r = np.random.poisson(lamb, size=1000000)
# Collect in dataframe
lda = pd.DataFrame(r, columns=["Simulated Annual Loss Events"])
lda
# Function to simulate annual losses.
def lda_severity(x):
y= lognorm.rvs(s=logstd, loc=0, scale=exp(logmean), size=x)
return sum(y)
# Apply Function to simulate annual losses.
lda['Simulated Annual Loss'] = lda['Simulated Annual Loss Events'].apply(lda_severity)
lda['Simulated Annual Loss'] = lda['Simulated Annual Loss']/1000000
lda['Simulated Annual Loss'] = lda['Simulated Annual Loss'].round(decimals=1)
lda
arr_ann_loss_sev = lda['Simulated Annual Loss'].to_numpy()
arr_ann_loss_sev
# Compute historic VaR at desired quantiles
OpVaR_90 = np.percentile(arr_ann_loss_sev, q=90).round(decimals=3)
OpVaR_95 = np.percentile(arr_ann_loss_sev, q=95).round(decimals=3)
OpVaR_99 = np.percentile(arr_ann_loss_sev, q=99).round(decimals=3)
OpVaR_90 ,OpVaR_95 ,OpVaR_99
# Ouput results in tabular format
from tabulate import tabulate
ltable = [['90%', OpVaR_90],['95%', OpVaR_95]]
header = ['\033[1mConfidence Level\033[0m', '\033[1mAnnual OpVaR ($Millions)\033[0m']
print(tabulate(ltable,headers=header))
# Visualize Loss Distribution
df_loss_dist = lda[lda['Simulated Annual Loss'] < 4000]
df_loss_dist
Loss_Dist_df = pd.DataFrame(df_loss_dist['Simulated Annual Loss'])
Loss_Dist_df.hist(figsize = (12, 6), bins = 10000, alpha = 1, grid = True)
plt.xlabel('Simulated Annual Losses')
plt.ylabel('Frequency')
plt.title('Loss Distribution with OpVaR @ 95% Confidence Interval', fontsize=20)
plt.axvline(x = OpVaR_95, color = 'r', linestyle = '--', label = "95% CI of VaR: {0:.2f}".format(OpVaR_95))
plt.xlim(0, 4000)
plt.legend(framealpha=1, frameon=True);
plt.show()
print (" At a 95% confidence level, losses will not exceed ${0:.3f} over a 1 year period.".format(OpVaR_95))