6626070
2997924

MATH05, Hypothesis tests

Back to the previous pageStatistics
List of posts to read before reading this article


Contents


download




Binomial Test

from scipy import stats
import numpy as np

N, mu_0 = 10, 0.5
np.random.seed(0)
x = stats.bernoulli(mu_0).rvs(N)
n = np.count_nonzero(x)
stats.binom_test(n, N)

0.3437499999999999

Visualization




from scipy import stats
import numpy as np

N, mu_0 = 100, 0.5
np.random.seed(0)
x = stats.bernoulli(mu_0).rvs(N)
n = np.count_nonzero(x)
stats.binom_test(n, N)

0.9204107626128206

Visualization





Normality Test

Shapiro–Wilk test

Anderson–Darling test

from scipy import stats
import numpy as np


np.random.seed(0)
N1, N2 = 50, 100

x1 = stats.norm(0, 1).rvs(N1)
x2 = stats.norm(0.5, 1.5).rvs(N2)

stats.ks_2samp(x1, x2)

Ks_2sampResult(statistic=0.23000000000000004, pvalue=0.049516112814422863)

Visualization
from scipy import stats
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

np.random.seed(0)
N1, N2 = 50, 100

x1 = stats.norm(0, 1).rvs(N1)
x2 = stats.norm(0.5, 1.5).rvs(N2)

ax = sns.distplot(x1, kde=False, fit=stats.norm, label="1st dataset")
ax = sns.distplot(x2, kde=False, fit=stats.norm, label="2rd dataset")
ax.lines[0].set_linestyle(":")
plt.legend()
plt.show()

download (4)






Chi-squared Test

goodness of fit test

from scipy import stats
import numpy as np

N, K = 10, 4
mu_0 = np.ones(K)/K
np.random.seed(0)
x = np.random.choice(K, N, p=mu_0)
n = np.bincount(x, minlength=K)
stats.chisquare(n)

Power_divergenceResult(statistic=5.199999999999999, pvalue=0.157724450396663)

Visualization




test of independence

from scipy import stats
import numpy as np

obs = np.array([[5, 15], [10, 20]])
stats.chi2_contingency(obs)

(0.0992063492063492, 0.7527841326498471, 1, array([[ 6., 14.], [ 9., 21.]]))

Visualization





One-sample z-Test

test_z

from scipy import stats
import numpy as np

def ztest_1samp(x, sigma2=1, mu=0):
    z = (x.mean() - mu) / np.sqrt(sigma2/len(x))
    return z, 2 * stats.norm().sf(np.abs(z))

N, mu_0 = 10, 0
np.random.seed(0)
x = stats.norm(mu_0).rvs(N)
ztest_1samp(x)

(2.3338341854824276, 0.019604406021683538)

Visualization





One-sample t-Test

from scipy import stats
import numpy as np

N, mu_0 = 10, 0
np.random.seed(0)
x = stats.norm(mu_0).rvs(N)
stats.ttest_1samp(x, popmean=0)

Ttest_1sampResult(statistic=2.28943967238967, pvalue=0.04781846490857058)

Visualization





Independent two-sample t-Test

Equal sample sizes, equal variance test_t_for_mean_under_unknown_different_variances_of_two_group_with_large_samples test_t_for_mean_under_unknown_equivalent_variances_of_two_group_with_small_samples

from scipy import stats
np.random.seed(12345678)

rvs1 = stats.norm.rvs(loc=5,scale=10,size=500)
rvs2 = stats.norm.rvs(loc=5,scale=10,size=500)

stats.ttest_ind(rvs1,rvs2)
stats.ttest_ind(rvs1,rvs2, equal_var = False)
(0.26833823296239279, 0.78849443369564776)
(0.26833823296239279, 0.78849452749500748)

Visualization





Equal or unequal sample sizes, equal variance

from scipy import stats
import numpy as np

N_1, mu_1, sigma_1 = 50, 0, 1
N_2, mu_2, sigma_2 = 100, 0.5, 1

np.random.seed(0)
x1 = stats.norm(mu_1, sigma_1).rvs(N_1)
x2 = stats.norm(mu_2, sigma_2).rvs(N_2)
stats.ttest_ind(x1, x2, equal_var=True)

Ttest_indResult(statistic=-2.6826951236616963, pvalue=0.008133970915722658)

Visualization




Equal or unequal sample sizes, unequal variances

from scipy import stats
import numpy as np

N_1, mu_1, sigma_1 = 10, 0, 1
N_2, mu_2, sigma_2 = 10, 0.5, 1

np.random.seed(0)
x1 = stats.norm(mu_1, sigma_1).rvs(N_1)
x2 = stats.norm(mu_2, sigma_2).rvs(N_2)
stats.ttest_ind(x1, x2, equal_var=False)

Ttest_indResult(statistic=-0.4139968526988655, pvalue=0.6843504889824326)

Visualization
from scipy import stats
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

N_1, mu_1, sigma_1 = 10, 0, 1
N_2, mu_2, sigma_2 = 10, 0.5, 1

np.random.seed(0)
x1 = stats.norm(mu_1, sigma_1).rvs(N_1)
x2 = stats.norm(mu_2, sigma_2).rvs(N_2)

ax = sns.distplot(x1, kde=False, fit=stats.norm, label="1st dataset")
ax = sns.distplot(x2, kde=False, fit=stats.norm, label="2nd dataset")
ax.lines[0].set_linestyle(":")
plt.legend()
plt.show()

download






Paired two-sample t-Test

test_t_for_mean_of_paired_sample

from scipy import stats
import numpy as np

N = 5
mu_1, mu_2 = 0, 0.4

np.random.seed(1)
x1 = stats.norm(mu_1).rvs(N)
x2 = x1 + stats.norm(mu_2, 0.1).rvs(N)

stats.ttest_rel(x1, x2)

Ttest_relResult(statistic=-5.662482449248929, pvalue=0.0047953456833781305)

Visualization
from scipy import stats
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

N = 5
mu_1, mu_2 = 0, 0.4

np.random.seed(1)
x1 = stats.norm(mu_1).rvs(N)
x2 = x1 + stats.norm(mu_2, 0.1).rvs(N)

ax = sns.distplot(x1, kde=False, fit=stats.norm, label="1st dataset")
ax = sns.distplot(x2, kde=False, fit=stats.norm, label="2nd dataset")
ax.lines[0].set_linestyle(":")
plt.legend()
plt.show()

download (1)






Equal-variance Test

test_F_for_variance_rate_of_two_group

from scipy import stats
import numpy as np

N1, sigma_1 = 100, 1
N2, sigma_2 = 100, 1.2

np.random.seed(0)
x1 = stats.norm(0, sigma_1).rvs(N1)
x2 = stats.norm(0, sigma_2).rvs(N2)

print(stats.bartlett(x1, x2))
print(stats.fligner(x1, x2))
print(stats.levene(x1, x2))
BartlettResult(statistic=4.253473837232266, pvalue=0.039170128783651344)
FlignerResult(statistic=7.224841990409457, pvalue=0.007190150106748367)
LeveneResult(statistic=7.680708947679437, pvalue=0.0061135154970207925)
Visualization
from scipy import stats
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

N1, sigma_1 = 100, 1
N2, sigma_2 = 100, 1.2

np.random.seed(0)
x1 = stats.norm(0, sigma_1).rvs(N1)
x2 = stats.norm(0, sigma_2).rvs(N2)

ax = sns.distplot(x1, kde=False, fit=stats.norm, label="1st dataset")
ax = sns.distplot(x2, kde=False, fit=stats.norm, label="2rd dataset")
ax.lines[0].set_linestyle(":")
plt.legend()
plt.show()

download (2)






List of posts followed by this article


Reference


Visualization