#This code is issued under the Creative Commons CC0 Public Domain Dedication
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
def ecdf(x):
x_sort = np.sort(x)
y = np.arange(1, len(x_sort)+1)/float(len(x_sort))
return x_sort, y
def DKW_bounds(y, n, alpha=0.05):
# Compute Dvoretzky–Kiefer–Wolfowitz inequality
eps = np.sqrt(0.5 * np.log(2.0/alpha) /n)
lower = np.maximum(y - eps, 0)
upper = np.minimum(y + eps, 1)
return lower, upper
def pointwise_bound(y, n, alpha=0.05):
# Compute confidence intervals from an eCDF.
# Clopper-Pearson interval
lower = stats.beta.ppf(alpha/2, y*n, (1-y)*n + 1)
upper = stats.beta.ppf(1-alpha/2, y*n + 1, (1-y)*n)
# Primarily used for mapping nan to 0 or 1
lower = np.fmax(lower, 0)
upper = np.fmin(upper, 1)
return lower, upper
num_samps = 30
x = np.linspace(-4,4, num=500)
y = stats.norm.cdf(x)
x_rand = np.random.randn(num_samps)
x_ecdf, y_ecdf = ecdf(x_rand)
# Ensure the eCDF extends to the edges of the graph for the bounds
x_ecdf, y_ecdf = np.append([-4], x_ecdf), np.append([0], y_ecdf)
x_ecdf, y_ecdf = np.append(x_ecdf, [4]), np.append(y_ecdf, [1])
# Pass in number of points because you extended the length of x_ecdf
lower, upper = DKW_bounds(y_ecdf, n=num_samps)
lower_pw, upper_pw = pointwise_bound(y_ecdf, n=num_samps)
fig, axes = plt.subplots(figsize=(4,3.2))
axes.plot(x,y, '-g', linewidth=1.5, color='lightblue')
#Plot gets too crowded if you show the actual ecdf
#axes.step(x_ecdf, y_ecdf, 'k-', where='post', linewidth=1.5, color='lightblue')
axes.step(x_ecdf, lower, '-b', where='post', linewidth=1.5, color='purple')
axes.step(x_ecdf, upper, '-b', where='post', linewidth=1.5, color='purple')
axes.step(x_ecdf, lower_pw, '-b', where='post', linewidth=1.5, color='orange')
axes.step(x_ecdf, upper_pw, '-b', where='post', linewidth=1.5, color='orange')
axes.set_xlim(-3,3)
axes.grid()
axes.set_ylabel('P(x)')
axes.set_xlabel('x')
fig.savefig('DKW_bounds.svg')