MATH05, Hypothesis tests
Back to the previous page | Statistics
List of posts to read before reading this article
Contents
- Binomial Test
- Normality Test
- Chi-squared Test
- One-sample z-Test
- One-sample t-Test
- Independent two-sample t-Test
- Paired two-sample t-Test
- Equal-variance Test
Binomial Test
from scipy import stats
import numpy as np
N, mu_0 = 10, 0.5
np.random.seed(0)
x = stats.bernoulli(mu_0).rvs(N)
n = np.count_nonzero(x)
stats.binom_test(n, N)
0.3437499999999999
Visualization
from scipy import stats
import numpy as np
N, mu_0 = 100, 0.5
np.random.seed(0)
x = stats.bernoulli(mu_0).rvs(N)
n = np.count_nonzero(x)
stats.binom_test(n, N)
0.9204107626128206
Visualization
Normality Test
Shapiro–Wilk test
Anderson–Darling test
from scipy import stats
import numpy as np
np.random.seed(0)
N1, N2 = 50, 100
x1 = stats.norm(0, 1).rvs(N1)
x2 = stats.norm(0.5, 1.5).rvs(N2)
stats.ks_2samp(x1, x2)
Ks_2sampResult(statistic=0.23000000000000004, pvalue=0.049516112814422863)
Visualization
from scipy import stats
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
np.random.seed(0)
N1, N2 = 50, 100
x1 = stats.norm(0, 1).rvs(N1)
x2 = stats.norm(0.5, 1.5).rvs(N2)
ax = sns.distplot(x1, kde=False, fit=stats.norm, label="1st dataset")
ax = sns.distplot(x2, kde=False, fit=stats.norm, label="2rd dataset")
ax.lines[0].set_linestyle(":")
plt.legend()
plt.show()
Chi-squared Test
goodness of fit test
from scipy import stats
import numpy as np
N, K = 10, 4
mu_0 = np.ones(K)/K
np.random.seed(0)
x = np.random.choice(K, N, p=mu_0)
n = np.bincount(x, minlength=K)
stats.chisquare(n)
Power_divergenceResult(statistic=5.199999999999999, pvalue=0.157724450396663)
Visualization
test of independence
from scipy import stats
import numpy as np
obs = np.array([[5, 15], [10, 20]])
stats.chi2_contingency(obs)
(0.0992063492063492, 0.7527841326498471, 1, array([[ 6., 14.], [ 9., 21.]]))
Visualization
One-sample z-Test
from scipy import stats
import numpy as np
def ztest_1samp(x, sigma2=1, mu=0):
z = (x.mean() - mu) / np.sqrt(sigma2/len(x))
return z, 2 * stats.norm().sf(np.abs(z))
N, mu_0 = 10, 0
np.random.seed(0)
x = stats.norm(mu_0).rvs(N)
ztest_1samp(x)
(2.3338341854824276, 0.019604406021683538)
Visualization
One-sample t-Test
from scipy import stats
import numpy as np
N, mu_0 = 10, 0
np.random.seed(0)
x = stats.norm(mu_0).rvs(N)
stats.ttest_1samp(x, popmean=0)
Ttest_1sampResult(statistic=2.28943967238967, pvalue=0.04781846490857058)
Visualization
Independent two-sample t-Test
Equal sample sizes, equal variance
from scipy import stats
np.random.seed(12345678)
rvs1 = stats.norm.rvs(loc=5,scale=10,size=500)
rvs2 = stats.norm.rvs(loc=5,scale=10,size=500)
stats.ttest_ind(rvs1,rvs2)
stats.ttest_ind(rvs1,rvs2, equal_var = False)
(0.26833823296239279, 0.78849443369564776)
(0.26833823296239279, 0.78849452749500748)
Visualization
Equal or unequal sample sizes, equal variance
from scipy import stats
import numpy as np
N_1, mu_1, sigma_1 = 50, 0, 1
N_2, mu_2, sigma_2 = 100, 0.5, 1
np.random.seed(0)
x1 = stats.norm(mu_1, sigma_1).rvs(N_1)
x2 = stats.norm(mu_2, sigma_2).rvs(N_2)
stats.ttest_ind(x1, x2, equal_var=True)
Ttest_indResult(statistic=-2.6826951236616963, pvalue=0.008133970915722658)
Visualization
Equal or unequal sample sizes, unequal variances
from scipy import stats
import numpy as np
N_1, mu_1, sigma_1 = 10, 0, 1
N_2, mu_2, sigma_2 = 10, 0.5, 1
np.random.seed(0)
x1 = stats.norm(mu_1, sigma_1).rvs(N_1)
x2 = stats.norm(mu_2, sigma_2).rvs(N_2)
stats.ttest_ind(x1, x2, equal_var=False)
Ttest_indResult(statistic=-0.4139968526988655, pvalue=0.6843504889824326)
Visualization
from scipy import stats
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
N_1, mu_1, sigma_1 = 10, 0, 1
N_2, mu_2, sigma_2 = 10, 0.5, 1
np.random.seed(0)
x1 = stats.norm(mu_1, sigma_1).rvs(N_1)
x2 = stats.norm(mu_2, sigma_2).rvs(N_2)
ax = sns.distplot(x1, kde=False, fit=stats.norm, label="1st dataset")
ax = sns.distplot(x2, kde=False, fit=stats.norm, label="2nd dataset")
ax.lines[0].set_linestyle(":")
plt.legend()
plt.show()
Paired two-sample t-Test
from scipy import stats
import numpy as np
N = 5
mu_1, mu_2 = 0, 0.4
np.random.seed(1)
x1 = stats.norm(mu_1).rvs(N)
x2 = x1 + stats.norm(mu_2, 0.1).rvs(N)
stats.ttest_rel(x1, x2)
Ttest_relResult(statistic=-5.662482449248929, pvalue=0.0047953456833781305)
Visualization
from scipy import stats
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
N = 5
mu_1, mu_2 = 0, 0.4
np.random.seed(1)
x1 = stats.norm(mu_1).rvs(N)
x2 = x1 + stats.norm(mu_2, 0.1).rvs(N)
ax = sns.distplot(x1, kde=False, fit=stats.norm, label="1st dataset")
ax = sns.distplot(x2, kde=False, fit=stats.norm, label="2nd dataset")
ax.lines[0].set_linestyle(":")
plt.legend()
plt.show()
Equal-variance Test
from scipy import stats
import numpy as np
N1, sigma_1 = 100, 1
N2, sigma_2 = 100, 1.2
np.random.seed(0)
x1 = stats.norm(0, sigma_1).rvs(N1)
x2 = stats.norm(0, sigma_2).rvs(N2)
print(stats.bartlett(x1, x2))
print(stats.fligner(x1, x2))
print(stats.levene(x1, x2))
FlignerResult(statistic=7.224841990409457, pvalue=0.007190150106748367)
LeveneResult(statistic=7.680708947679437, pvalue=0.0061135154970207925)
Visualization
from scipy import stats
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
N1, sigma_1 = 100, 1
N2, sigma_2 = 100, 1.2
np.random.seed(0)
x1 = stats.norm(0, sigma_1).rvs(N1)
x2 = stats.norm(0, sigma_2).rvs(N2)
ax = sns.distplot(x1, kde=False, fit=stats.norm, label="1st dataset")
ax = sns.distplot(x2, kde=False, fit=stats.norm, label="2rd dataset")
ax.lines[0].set_linestyle(":")
plt.legend()
plt.show()
List of posts followed by this article
Reference