debian-mirror-gitlab/ruby-statistics/lib/statistics/statistical_test/f_test.rb
2019-10-03 23:17:56 +05:30

83 lines
3.5 KiB
Ruby

module Statistics
module StatisticalTest
class FTest
# This method calculates the one-way ANOVA F-test statistic.
# We assume that all specified arguments are arrays.
# It returns an array with three elements:
# [F-statistic or F-score, degrees of freedom numerator, degrees of freedom denominator].
#
# Formulas extracted from:
# https://courses.lumenlearning.com/boundless-statistics/chapter/one-way-anova/
# http://sphweb.bumc.bu.edu/otlt/MPH-Modules/BS/BS704_HypothesisTesting-ANOVA/BS704_HypothesisTesting-Anova_print.html
def self.anova_f_score(*args)
# If only two groups have been specified as arguments, we follow the classic F-Test for
# equality of variances, which is the ratio between the variances.
f_score = nil
df1 = nil
df2 = nil
if args.size == 2
variances = [args[0].variance, args[1].variance]
f_score = variances.max/variances.min.to_f
df1 = 1 # k-1 (k = 2)
df2 = args.flatten.size - 2 # N-k (k = 2)
elsif args.size > 2
total_groups = args.size
total_elements = args.flatten.size
overall_mean = args.flatten.mean
sample_sizes = args.map(&:size)
sample_means = args.map(&:mean)
sample_stds = args.map(&:standard_deviation)
# Variance between groups
iterator = sample_sizes.each_with_index
variance_between_groups = iterator.reduce(0) do |summation, (size, index)|
inner_calculation = size * ((sample_means[index] - overall_mean) ** 2)
summation += (inner_calculation / (total_groups - 1).to_f)
end
# Variance within groups
variance_within_groups = (0...total_groups).reduce(0) do |outer_summation, group_index|
outer_summation += args[group_index].reduce(0) do |inner_sumation, observation|
inner_calculation = ((observation - sample_means[group_index]) ** 2)
inner_sumation += (inner_calculation / (total_elements - total_groups).to_f)
end
end
f_score = variance_between_groups/variance_within_groups.to_f
df1 = total_groups - 1
df2 = total_elements - total_groups
end
[f_score, df1, df2]
end
# This method expects the alpha value and the groups to calculate the one-way ANOVA test.
# It returns a hash with multiple information and the test result (if reject the null hypotesis or not).
# Keep in mind that the values for the alternative key (true/false) does not imply that the alternative hypothesis
# is TRUE or FALSE. It's a minor notation advantage to decide if reject the null hypothesis or not.
def self.one_way_anova(alpha, *args)
f_score, df1, df2 = *self.anova_f_score(*args) # Splat array result
return if f_score.nil? || df1.nil? || df2.nil?
probability = Distribution::F.new(df1, df2).cumulative_function(f_score)
p_value = 1 - probability
# According to https://stats.stackexchange.com/questions/29158/do-you-reject-the-null-hypothesis-when-p-alpha-or-p-leq-alpha
# We can assume that if p_value <= alpha, we can safely reject the null hypothesis, ie. accept the alternative hypothesis.
{ probability: probability,
p_value: p_value,
alpha: alpha,
null: alpha < p_value,
alternative: p_value <= alpha,
confidence_level: 1 - alpha }
end
end
end
end