71 lines
2.6 KiB
Ruby
71 lines
2.6 KiB
Ruby
module Statistics
|
|
class SpearmanRankCoefficient
|
|
def self.rank(data:, return_ranks_only: true)
|
|
descending_order_data = data.sort { |a, b| b <=> a }
|
|
rankings = {}
|
|
|
|
data.each do |value|
|
|
# If we have ties, the find_index method will only retrieve the index of the
|
|
# first element in the list (i.e, the most close to the left of the array),
|
|
# so when a tie is detected, we increase the temporal ranking by the number of
|
|
# counted elements at that particular time and then we increase the counter.
|
|
temporal_ranking = descending_order_data.find_index(value) + 1 # 0-index
|
|
|
|
if rankings.fetch(value, false)
|
|
rankings[value][:rank] += (temporal_ranking + rankings[value][:counter])
|
|
rankings[value][:counter] += 1
|
|
rankings[value][:tie_rank] = rankings[value][:rank] / rankings[value][:counter].to_f
|
|
else
|
|
rankings[value] = { counter: 1, rank: temporal_ranking, tie_rank: temporal_ranking }
|
|
end
|
|
end
|
|
|
|
if return_ranks_only
|
|
data.map do |value|
|
|
rankings[value][:tie_rank]
|
|
end
|
|
else
|
|
rankings
|
|
end
|
|
end
|
|
|
|
# Formulas extracted from: https://statistics.laerd.com/statistical-guides/spearmans-rank-order-correlation-statistical-guide.php
|
|
def self.coefficient(set_one, set_two)
|
|
raise 'Both group sets must have the same number of cases.' if set_one.size != set_two.size
|
|
return if set_one.size == 0 && set_two.size == 0
|
|
|
|
set_one_mean, set_two_mean = set_one.mean, set_two.mean
|
|
have_tie_ranks = (set_one + set_two).any? { |rank| rank.is_a?(Float) }
|
|
|
|
if have_tie_ranks
|
|
numerator = 0
|
|
squared_differences_set_one = 0
|
|
squared_differences_set_two = 0
|
|
|
|
set_one.size.times do |idx|
|
|
local_diff_one = (set_one[idx] - set_one_mean)
|
|
local_diff_two = (set_two[idx] - set_two_mean)
|
|
|
|
squared_differences_set_one += local_diff_one ** 2
|
|
squared_differences_set_two += local_diff_two ** 2
|
|
|
|
numerator += local_diff_one * local_diff_two
|
|
end
|
|
|
|
denominator = Math.sqrt(squared_differences_set_one * squared_differences_set_two)
|
|
|
|
numerator / denominator.to_f # This is rho or spearman's coefficient.
|
|
else
|
|
sum_squared_differences = set_one.each_with_index.reduce(0) do |memo, (rank_one, index)|
|
|
memo += ((rank_one - set_two[index]) ** 2)
|
|
memo
|
|
end
|
|
|
|
numerator = 6 * sum_squared_differences
|
|
denominator = ((set_one.size ** 3) - set_one.size)
|
|
|
|
1.0 - (numerator / denominator.to_f) # This is rho or spearman's coefficient.
|
|
end
|
|
end
|
|
end
|
|
end
|