83 lines
3.5 KiB
Ruby
83 lines
3.5 KiB
Ruby
module Statistics
|
|
module StatisticalTest
|
|
class FTest
|
|
# This method calculates the one-way ANOVA F-test statistic.
|
|
# We assume that all specified arguments are arrays.
|
|
# It returns an array with three elements:
|
|
# [F-statistic or F-score, degrees of freedom numerator, degrees of freedom denominator].
|
|
#
|
|
# Formulas extracted from:
|
|
# https://courses.lumenlearning.com/boundless-statistics/chapter/one-way-anova/
|
|
# http://sphweb.bumc.bu.edu/otlt/MPH-Modules/BS/BS704_HypothesisTesting-ANOVA/BS704_HypothesisTesting-Anova_print.html
|
|
def self.anova_f_score(*args)
|
|
# If only two groups have been specified as arguments, we follow the classic F-Test for
|
|
# equality of variances, which is the ratio between the variances.
|
|
f_score = nil
|
|
df1 = nil
|
|
df2 = nil
|
|
|
|
if args.size == 2
|
|
variances = [args[0].variance, args[1].variance]
|
|
|
|
f_score = variances.max/variances.min.to_f
|
|
df1 = 1 # k-1 (k = 2)
|
|
df2 = args.flatten.size - 2 # N-k (k = 2)
|
|
elsif args.size > 2
|
|
total_groups = args.size
|
|
total_elements = args.flatten.size
|
|
overall_mean = args.flatten.mean
|
|
|
|
sample_sizes = args.map(&:size)
|
|
sample_means = args.map(&:mean)
|
|
sample_stds = args.map(&:standard_deviation)
|
|
|
|
# Variance between groups
|
|
iterator = sample_sizes.each_with_index
|
|
|
|
variance_between_groups = iterator.reduce(0) do |summation, (size, index)|
|
|
inner_calculation = size * ((sample_means[index] - overall_mean) ** 2)
|
|
|
|
summation += (inner_calculation / (total_groups - 1).to_f)
|
|
end
|
|
|
|
# Variance within groups
|
|
variance_within_groups = (0...total_groups).reduce(0) do |outer_summation, group_index|
|
|
outer_summation += args[group_index].reduce(0) do |inner_sumation, observation|
|
|
inner_calculation = ((observation - sample_means[group_index]) ** 2)
|
|
inner_sumation += (inner_calculation / (total_elements - total_groups).to_f)
|
|
end
|
|
end
|
|
|
|
f_score = variance_between_groups/variance_within_groups.to_f
|
|
df1 = total_groups - 1
|
|
df2 = total_elements - total_groups
|
|
end
|
|
|
|
[f_score, df1, df2]
|
|
end
|
|
|
|
# This method expects the alpha value and the groups to calculate the one-way ANOVA test.
|
|
# It returns a hash with multiple information and the test result (if reject the null hypotesis or not).
|
|
# Keep in mind that the values for the alternative key (true/false) does not imply that the alternative hypothesis
|
|
# is TRUE or FALSE. It's a minor notation advantage to decide if reject the null hypothesis or not.
|
|
|
|
def self.one_way_anova(alpha, *args)
|
|
f_score, df1, df2 = *self.anova_f_score(*args) # Splat array result
|
|
|
|
return if f_score.nil? || df1.nil? || df2.nil?
|
|
|
|
probability = Distribution::F.new(df1, df2).cumulative_function(f_score)
|
|
p_value = 1 - probability
|
|
|
|
# According to https://stats.stackexchange.com/questions/29158/do-you-reject-the-null-hypothesis-when-p-alpha-or-p-leq-alpha
|
|
# We can assume that if p_value <= alpha, we can safely reject the null hypothesis, ie. accept the alternative hypothesis.
|
|
{ probability: probability,
|
|
p_value: p_value,
|
|
alpha: alpha,
|
|
null: alpha < p_value,
|
|
alternative: p_value <= alpha,
|
|
confidence_level: 1 - alpha }
|
|
end
|
|
end
|
|
end
|
|
end
|