debian-mirror-gitlab/ruby-statistics/lib/statistics/spearman_rank_coefficient.rb
2019-10-03 23:17:56 +05:30

71 lines
2.6 KiB
Ruby

module Statistics
class SpearmanRankCoefficient
def self.rank(data:, return_ranks_only: true)
descending_order_data = data.sort { |a, b| b <=> a }
rankings = {}
data.each do |value|
# If we have ties, the find_index method will only retrieve the index of the
# first element in the list (i.e, the most close to the left of the array),
# so when a tie is detected, we increase the temporal ranking by the number of
# counted elements at that particular time and then we increase the counter.
temporal_ranking = descending_order_data.find_index(value) + 1 # 0-index
if rankings.fetch(value, false)
rankings[value][:rank] += (temporal_ranking + rankings[value][:counter])
rankings[value][:counter] += 1
rankings[value][:tie_rank] = rankings[value][:rank] / rankings[value][:counter].to_f
else
rankings[value] = { counter: 1, rank: temporal_ranking, tie_rank: temporal_ranking }
end
end
if return_ranks_only
data.map do |value|
rankings[value][:tie_rank]
end
else
rankings
end
end
# Formulas extracted from: https://statistics.laerd.com/statistical-guides/spearmans-rank-order-correlation-statistical-guide.php
def self.coefficient(set_one, set_two)
raise 'Both group sets must have the same number of cases.' if set_one.size != set_two.size
return if set_one.size == 0 && set_two.size == 0
set_one_mean, set_two_mean = set_one.mean, set_two.mean
have_tie_ranks = (set_one + set_two).any? { |rank| rank.is_a?(Float) }
if have_tie_ranks
numerator = 0
squared_differences_set_one = 0
squared_differences_set_two = 0
set_one.size.times do |idx|
local_diff_one = (set_one[idx] - set_one_mean)
local_diff_two = (set_two[idx] - set_two_mean)
squared_differences_set_one += local_diff_one ** 2
squared_differences_set_two += local_diff_two ** 2
numerator += local_diff_one * local_diff_two
end
denominator = Math.sqrt(squared_differences_set_one * squared_differences_set_two)
numerator / denominator.to_f # This is rho or spearman's coefficient.
else
sum_squared_differences = set_one.each_with_index.reduce(0) do |memo, (rank_one, index)|
memo += ((rank_one - set_two[index]) ** 2)
memo
end
numerator = 6 * sum_squared_differences
denominator = ((set_one.size ** 3) - set_one.size)
1.0 - (numerator / denominator.to_f) # This is rho or spearman's coefficient.
end
end
end
end