6626070
2997924

MATH05, Non parametric tests

Back to the previous pageStatistics
List of posts to read before reading this article


Contents


Non parametric

from scipy import stats
from scipy import optimize
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="whitegrid")

X = stats.chi2(df=5) 
X_samples = X.rvs(100)

kde = stats.kde.gaussian_kde(X_samples)
kde_low_bw = stats.kde.gaussian_kde(X_samples, bw_method=0.25)

x = np.linspace(0, 20, 100)

fig, axes = plt.subplots(1, 3, figsize=(12, 3)) 
axes[0].hist(X_samples, normed=True, alpha=0.5, bins=25)   
axes[1].plot(x, kde(x), label="KDE")   
axes[1].plot(x, kde_low_bw(x), label="KDE (low bw)")   
axes[1].plot(x, X.pdf(x), label="True PDF")  
axes[1].legend()   
sns.distplot(X_samples, bins=25, ax=axes[2])

download (23)

additional kde
kde.resample(10)
array([[ 2.21027713,  2.86300834,  5.63643055,  9.93925447, 11.0112984 ,
         5.53754038,  4.57539167,  0.18351943,  5.84327588,  5.67924786]])


def _kde_cdf(x): 
    return kde.integrate_box_1d(-np.inf, x)

def _kde_ppf(q):
    return optimize.fsolve(lambda x, q: kde_cdf(x) - q, kde. dataset.mean(), args=(q,))[0] 
    
kde_cdf = np.vectorize(_kde_cdf)
kde_ppf = np.vectorize(_kde_ppf)
kde_ppf([0.05, 0.95])
array([0.53427617, 8.06347491])


List of posts followed by this article


Reference


OUTPUT