diff --git a/ruby-statistics/.gitignore b/ruby-statistics/.gitignore deleted file mode 100644 index 73540b9aab..0000000000 --- a/ruby-statistics/.gitignore +++ /dev/null @@ -1,65 +0,0 @@ -*.gem -*.rbc -/.config -/coverage/ -/InstalledFiles -/pkg/ -/spec/reports/ -/spec/examples.txt -/test/tmp/ -/test/version_tmp/ -/tmp/ - -# Used by dotenv library to load environment variables. -# .env - -## Specific to RubyMotion: -.dat* -.repl_history -build/ -*.bridgesupport -build-iPhoneOS/ -build-iPhoneSimulator/ - -## Specific to RubyMotion (use of CocoaPods): -# -# We recommend against adding the Pods directory to your .gitignore. However -# you should judge for yourself, the pros and cons are mentioned at: -# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control -# -# vendor/Pods/ - -## Documentation cache and generated files: -/.yardoc/ -/_yardoc/ -/doc/ -/rdoc/ - -## Environment normalization: -/.bundle/ -/vendor/bundle -/lib/bundler/man/ - -# for a library or gem, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -# Gemfile.lock -.ruby-version -.ruby-gemset - -# unless supporting rvm < 1.11.0 or doing something fancy, ignore this: -.rvmrc -/.bundle/ -/.yardoc -/Gemfile.lock -/_yardoc/ -/coverage/ -/doc/ -/pkg/ -/spec/reports/ -/tmp/ - -# rspec failure tracking -.rspec_status - -# byebug -.byebug_history diff --git a/ruby-statistics/.rspec b/ruby-statistics/.rspec deleted file mode 100644 index 8c18f1abdd..0000000000 --- a/ruby-statistics/.rspec +++ /dev/null @@ -1,2 +0,0 @@ ---format documentation ---color diff --git a/ruby-statistics/.travis.yml b/ruby-statistics/.travis.yml deleted file mode 100644 index 606201e265..0000000000 --- a/ruby-statistics/.travis.yml +++ /dev/null @@ -1,8 +0,0 @@ -sudo: false -language: ruby -rvm: - - 2.3.7 - - 2.4.4 - - 2.5.1 - - 2.6.0 -before_install: gem update --system && gem install bundler diff --git a/ruby-statistics/CODE_OF_CONDUCT.md b/ruby-statistics/CODE_OF_CONDUCT.md deleted file mode 100644 index 9889d44c59..0000000000 --- a/ruby-statistics/CODE_OF_CONDUCT.md +++ /dev/null @@ -1,74 +0,0 @@ -# Contributor Covenant Code of Conduct - -## Our Pledge - -In the interest of fostering an open and welcoming environment, we as -contributors and maintainers pledge to making participation in our project and -our community a harassment-free experience for everyone, regardless of age, body -size, disability, ethnicity, gender identity and expression, level of experience, -nationality, personal appearance, race, religion, or sexual identity and -orientation. - -## Our Standards - -Examples of behavior that contributes to creating a positive environment -include: - -* Using welcoming and inclusive language -* Being respectful of differing viewpoints and experiences -* Gracefully accepting constructive criticism -* Focusing on what is best for the community -* Showing empathy towards other community members - -Examples of unacceptable behavior by participants include: - -* The use of sexualized language or imagery and unwelcome sexual attention or -advances -* Trolling, insulting/derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others' private information, such as a physical or electronic - address, without explicit permission -* Other conduct which could reasonably be considered inappropriate in a - professional setting - -## Our Responsibilities - -Project maintainers are responsible for clarifying the standards of acceptable -behavior and are expected to take appropriate and fair corrective action in -response to any instances of unacceptable behavior. - -Project maintainers have the right and responsibility to remove, edit, or -reject comments, commits, code, wiki edits, issues, and other contributions -that are not aligned to this Code of Conduct, or to ban temporarily or -permanently any contributor for other behaviors that they deem inappropriate, -threatening, offensive, or harmful. - -## Scope - -This Code of Conduct applies both within project spaces and in public spaces -when an individual is representing the project or its community. Examples of -representing a project or community include using an official project e-mail -address, posting via an official social media account, or acting as an appointed -representative at an online or offline event. Representation of a project may be -further defined and clarified by project maintainers. - -## Enforcement - -Instances of abusive, harassing, or otherwise unacceptable behavior may be -reported by contacting the project team at ezapata@altavistaed.com. All -complaints will be reviewed and investigated and will result in a response that -is deemed necessary and appropriate to the circumstances. The project team is -obligated to maintain confidentiality with regard to the reporter of an incident. -Further details of specific enforcement policies may be posted separately. - -Project maintainers who do not follow or enforce the Code of Conduct in good -faith may face temporary or permanent repercussions as determined by other -members of the project's leadership. - -## Attribution - -This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, -available at [http://contributor-covenant.org/version/1/4][version] - -[homepage]: http://contributor-covenant.org -[version]: http://contributor-covenant.org/version/1/4/ diff --git a/ruby-statistics/CONTRIBUTING.md b/ruby-statistics/CONTRIBUTING.md deleted file mode 100644 index 7315d24786..0000000000 --- a/ruby-statistics/CONTRIBUTING.md +++ /dev/null @@ -1 +0,0 @@ -Bug reports and pull requests are welcome on GitHub at https://github.com/estebanz01/ruby-statistics. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant code of conduct](https://www.contributor-covenant.org/). diff --git a/ruby-statistics/Gemfile b/ruby-statistics/Gemfile deleted file mode 100644 index d006398a9e..0000000000 --- a/ruby-statistics/Gemfile +++ /dev/null @@ -1,6 +0,0 @@ -source "https://rubygems.org" - -git_source(:github) {|repo_name| "https://github.com/#{repo_name}" } - -# Specify your gem's dependencies in statistics.gemspec -gemspec diff --git a/ruby-statistics/LICENSE b/ruby-statistics/LICENSE deleted file mode 100644 index d58962a2a8..0000000000 --- a/ruby-statistics/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2017 Esteban Zapata Rojas - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/ruby-statistics/LICENSE.txt b/ruby-statistics/LICENSE.txt deleted file mode 100644 index b2d0a42d7f..0000000000 --- a/ruby-statistics/LICENSE.txt +++ /dev/null @@ -1,21 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2017 esteban zapata - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/ruby-statistics/README.md b/ruby-statistics/README.md deleted file mode 100644 index b85f775e03..0000000000 --- a/ruby-statistics/README.md +++ /dev/null @@ -1,79 +0,0 @@ -# Ruby Statistics - -![](https://travis-ci.org/estebanz01/ruby-statistics.svg?branch=master) - -A basic ruby gem that implements some statistical methods, functions and concepts to be used in any ruby environment without depending on any mathematical software like `R`, `Matlab`, `Octave` or similar. - -Unit test runs under the following ruby versions: -* Ruby 2.2. -* Ruby 2.3.1. -* Ruby 2.4.0. -* Ruby 2.5.0. - -We got the inspiration from the folks at [JStat](https://github.com/jstat/jstat) and some interesting lectures about [Keystroke dynamics](http://www.biometric-solutions.com/keystroke-dynamics.html). - -Some logic and algorithms are extractions or adaptations from other authors, which are referenced in the comments. -This software is released under the MIT License. - -## Installation - -Add this line to your application's Gemfile: - -```ruby -gem 'ruby-statistics' -``` - -And then execute: - - $ bundle - -Or install it yourself as: - - $ gem install ruby-statistics - -## Basic Usage - -just require the `statistics` gem in order to load it. If you don't have defined the `Distribution` namespace, the gem will assign an alias, reducing the number of namespaces needed to use a class. - -Right now you can load: - -* The whole statistics gem. `require 'statistics'` -* A namespace. `require 'statistics/distribution'` -* A class. `require 'statistics/distribution/normal'` - -Feel free to use the one that is more convenient to you. - -### Hello-World Example -```ruby -require 'statistics' - -poisson = Distribution::Poisson.new(l) # Using Distribution alias. -normal = Statistics::Distribution::StandardNormal.new # Using all namespaces. -``` - -## Documentation -You can find a bit more detailed documentation of all available distributions, tests and functions in the [Documentation Index](https://github.com/estebanz01/ruby-statistics/wiki) - -## Development - -After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment. - -To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org). - -## Contributing - -Bug reports and pull requests are welcome on GitHub at https://github.com/estebanz01/ruby-statistics. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct. - -## License - -The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT). - -## Code of Conduct - -Everyone interacting in the Statistics project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/estebanz01/ruby-statistics/blob/master/CODE_OF_CONDUCT.md). - -## Contact - -You can contact me via: -* [Github](https://github.com/estebanz01) -* [Twitter](https://twitter.com/estebanz01) diff --git a/ruby-statistics/Rakefile b/ruby-statistics/Rakefile deleted file mode 100644 index b7e9ed549b..0000000000 --- a/ruby-statistics/Rakefile +++ /dev/null @@ -1,6 +0,0 @@ -require "bundler/gem_tasks" -require "rspec/core/rake_task" - -RSpec::Core::RakeTask.new(:spec) - -task :default => :spec diff --git a/ruby-statistics/bin/console b/ruby-statistics/bin/console deleted file mode 100755 index 35213b9e9e..0000000000 --- a/ruby-statistics/bin/console +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env ruby - -require "bundler/setup" -require "statistics" - -# You can add fixtures and/or initialization code here to make experimenting -# with your gem easier. You can also use a different console, if you like. - -# (If you use this, don't forget to add pry to your Gemfile!) -# require "pry" -# Pry.start - -require "irb" -IRB.start(__FILE__) diff --git a/ruby-statistics/bin/setup b/ruby-statistics/bin/setup deleted file mode 100755 index dce67d860a..0000000000 --- a/ruby-statistics/bin/setup +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail -IFS=$'\n\t' -set -vx - -bundle install - -# Do any other automated setup that you need to do here diff --git a/ruby-statistics/lib/enumerable.rb b/ruby-statistics/lib/enumerable.rb deleted file mode 100644 index 00627afc76..0000000000 --- a/ruby-statistics/lib/enumerable.rb +++ /dev/null @@ -1,15 +0,0 @@ -# TODO: Avoid monkey-patching. -module Enumerable - def mean - self.reduce(:+) / self.length.to_f - end - - def variance - mean = self.mean - self.reduce(0) { |memo, value| memo + ((value - mean) ** 2) } / (self.length - 1).to_f - end - - def standard_deviation - Math.sqrt(self.variance) - end -end diff --git a/ruby-statistics/lib/math.rb b/ruby-statistics/lib/math.rb deleted file mode 100644 index 9be39ba446..0000000000 --- a/ruby-statistics/lib/math.rb +++ /dev/null @@ -1,120 +0,0 @@ -module Math - def self.factorial(n) - return if n < 0 - - n = n.to_i # Only integers. - - return 1 if n == 0 || n == 1 - Math.gamma(n + 1) # Math.gamma(x) == (n - 1)! for integer values - end - - def self.combination(n, r) - self.factorial(n)/(self.factorial(r) * self.factorial(n - r)).to_f # n!/(r! * [n - r]!) - end - - def self.permutation(n, k) - self.factorial(n)/self.factorial(n - k).to_f - end - - # Function adapted from the python implementation that exists in https://en.wikipedia.org/wiki/Simpson%27s_rule#Sample_implementation - # Finite integral in the interval [a, b] split up in n-intervals - def self.simpson_rule(a, b, n, &block) - unless n.even? - puts "The composite simpson's rule needs even intervals!" - return - end - - h = (b - a)/n.to_f - resA = yield(a) - resB = yield(b) - - sum = resA + resB - - (1..n).step(2).each do |number| - res = yield(a + number * h) - sum += 4 * res - end - - (1..(n-1)).step(2).each do |number| - res = yield(a + number * h) - sum += 2 * res - end - - return sum * h / 3.0 - end - - def self.lower_incomplete_gamma_function(s, x) - # The greater the iterations, the better. That's why we are iterating 10_000 * x times - self.simpson_rule(0, x, (10_000 * x.round).round) do |t| - (t ** (s - 1)) * Math.exp(-t) - end - end - - def self.beta_function(x, y) - return 1 if x == 1 && y == 1 - - (Math.gamma(x) * Math.gamma(y))/Math.gamma(x + y) - end - - ### This implementation is an adaptation of the incomplete beta function made in C by - ### Lewis Van Winkle, which released the code under the zlib license. - ### The whole math behind this code is described in the following post: https://codeplea.com/incomplete-beta-function-c - def self.incomplete_beta_function(x, alp, bet) - return if x < 0.0 - return 1.0 if x > 1.0 - - tiny = 1.0E-50 - - if x > ((alp + 1.0)/(alp + bet + 2.0)) - return 1.0 - self.incomplete_beta_function(1.0 - x, bet, alp) - end - - # To avoid overflow problems, the implementation applies the logarithm properties - # to calculate in a faster and safer way the values. - lbet_ab = (Math.lgamma(alp)[0] + Math.lgamma(bet)[0] - Math.lgamma(alp + bet)[0]).freeze - front = (Math.exp(Math.log(x) * alp + Math.log(1.0 - x) * bet - lbet_ab) / alp.to_f).freeze - - # This is the non-log version of the left part of the formula (before the continuous fraction) - # down_left = alp * self.beta_function(alp, bet) - # upper_left = (x ** alp) * ((1.0 - x) ** bet) - # front = upper_left/down_left - - f, c, d = 1.0, 1.0, 0.0 - - returned_value = nil - - # Let's do more iterations than the proposed implementation (200 iters) - (0..500).each do |number| - m = number/2 - - numerator = if number == 0 - 1.0 - elsif number % 2 == 0 - (m * (bet - m) * x)/((alp + 2.0 * m - 1.0)* (alp + 2.0 * m)) - else - top = -((alp + m) * (alp + bet + m) * x) - down = ((alp + 2.0 * m) * (alp + 2.0 * m + 1.0)) - - top/down - end - - d = 1.0 + numerator * d - d = tiny if d.abs < tiny - d = 1.0 / d - - c = 1.0 + numerator / c - c = tiny if c.abs < tiny - - cd = (c*d).freeze - f = f * cd - - - if (1.0 - cd).abs < 1.0E-10 - returned_value = front * (f - 1.0) - break - end - end - - returned_value - end -end diff --git a/ruby-statistics/lib/statistics.rb b/ruby-statistics/lib/statistics.rb deleted file mode 100644 index 2e443c214a..0000000000 --- a/ruby-statistics/lib/statistics.rb +++ /dev/null @@ -1,7 +0,0 @@ -require File.dirname(__FILE__) + '/enumerable' -require File.dirname(__FILE__) + '/math' -Dir[ File.dirname(__FILE__) + '/statistics/**/*.rb'].each {|file| require file } - -module Statistics - # Your code goes here... -end diff --git a/ruby-statistics/lib/statistics/distribution.rb b/ruby-statistics/lib/statistics/distribution.rb deleted file mode 100644 index 291eab9cc7..0000000000 --- a/ruby-statistics/lib/statistics/distribution.rb +++ /dev/null @@ -1,11 +0,0 @@ -Dir[File.dirname(__FILE__) + '/distribution/**/*.rb'].each {|file| require file } - -module Statistics - module Distribution - end -end - -# If Distribution is not defined, setup alias. -if defined?(Statistics) && !(defined?(Distribution)) - Distribution = Statistics::Distribution -end diff --git a/ruby-statistics/lib/statistics/distribution/bernoulli.rb b/ruby-statistics/lib/statistics/distribution/bernoulli.rb deleted file mode 100644 index 30ab789386..0000000000 --- a/ruby-statistics/lib/statistics/distribution/bernoulli.rb +++ /dev/null @@ -1,35 +0,0 @@ -module Statistics - module Distribution - class Bernoulli - def self.density_function(n, p) - return if n != 0 && n != 1 # The support of the distribution is n = {0, 1}. - - case n - when 0 then 1.0 - p - when 1 then p - end - end - - def self.cumulative_function(n, p) - return if n != 0 && n != 1 # The support of the distribution is n = {0, 1}. - - case n - when 0 then 1.0 - p - when 1 then 1.0 - end - end - - def self.variance(p) - p * (1.0 - p) - end - - def self.skewness(p) - (1.0 - 2.0*p).to_f / Math.sqrt(p * (1.0 - p)) - end - - def self.kurtosis(p) - (6.0 * (p ** 2) - (6 * p) + 1) / (p * (1.0 - p)) - end - end - end -end diff --git a/ruby-statistics/lib/statistics/distribution/beta.rb b/ruby-statistics/lib/statistics/distribution/beta.rb deleted file mode 100644 index 90970954b7..0000000000 --- a/ruby-statistics/lib/statistics/distribution/beta.rb +++ /dev/null @@ -1,36 +0,0 @@ -module Statistics - module Distribution - class Beta - attr_accessor :alpha, :beta - - def initialize(alp, bet) - self.alpha = alp.to_f - self.beta = bet.to_f - end - - def cumulative_function(value) - Math.incomplete_beta_function(value, alpha, beta) - end - - def density_function(value) - return 0 if value < 0 || value > 1 # Density function defined in the [0,1] interval - - num = (value**(alpha - 1)) * ((1 - value)**(beta - 1)) - den = Math.beta_function(alpha, beta) - - num/den - end - - def mode - return unless alpha > 1 && beta > 1 - - (alpha - 1)/(alpha + beta - 2) - end - - def mean - return if alpha + beta == 0 - alpha / (alpha + beta) - end - end - end -end diff --git a/ruby-statistics/lib/statistics/distribution/binomial.rb b/ruby-statistics/lib/statistics/distribution/binomial.rb deleted file mode 100644 index 3ce513ca00..0000000000 --- a/ruby-statistics/lib/statistics/distribution/binomial.rb +++ /dev/null @@ -1,49 +0,0 @@ -module Statistics - module Distribution - class Binomial - attr_accessor :number_of_trials, :probability_per_trial - def initialize(n, p) - self.number_of_trials = n.to_i - self.probability_per_trial = p - end - - def probability_mass_function(k) - return if k < 0 || k > number_of_trials - k = k.to_i - - Math.combination(number_of_trials, k) * - (probability_per_trial ** k) * ((1 - probability_per_trial) ** (number_of_trials - k)) - end - - def cumulative_function(k) - return if k < 0 || k > number_of_trials - k = k.to_i - - p = 1 - probability_per_trial - Math.incomplete_beta_function(p, number_of_trials - k, 1 + k) - end - - def mean - number_of_trials * probability_per_trial - end - - def variance - mean * (1 - probability_per_trial) - end - - def mode - test = (number_of_trials + 1) * probability_per_trial - - returned = if test == 0 || (test % 1 != 0) - test.floor - elsif (test % 1 == 0) && (test >= 1 && test <= number_of_trials) - [test, test - 1] - elsif test == number_of_trials + 1 - number_of_trials - end - - returned - end - end - end -end diff --git a/ruby-statistics/lib/statistics/distribution/chi_squared.rb b/ruby-statistics/lib/statistics/distribution/chi_squared.rb deleted file mode 100644 index 0996999036..0000000000 --- a/ruby-statistics/lib/statistics/distribution/chi_squared.rb +++ /dev/null @@ -1,37 +0,0 @@ -module Statistics - module Distribution - class ChiSquared - attr_accessor :degrees_of_freedom - - alias_method :mean, :degrees_of_freedom - - def initialize(k) - self.degrees_of_freedom = k - end - - def cumulative_function(value) - k = degrees_of_freedom/2.0 - Math.lower_incomplete_gamma_function(k, value/2.0)/Math.gamma(k) - end - - def density_function(value) - return 0 if value < 0 - - common = degrees_of_freedom/2.0 - - left_down = (2 ** common) * Math.gamma(common) - right = (value ** (common - 1)) * Math.exp(-(value/2.0)) - - (1.0/left_down) * right - end - - def mode - [degrees_of_freedom - 2, 0].max - end - - def variance - degrees_of_freedom * 2 - end - end - end -end diff --git a/ruby-statistics/lib/statistics/distribution/empirical.rb b/ruby-statistics/lib/statistics/distribution/empirical.rb deleted file mode 100644 index 63d72bdfed..0000000000 --- a/ruby-statistics/lib/statistics/distribution/empirical.rb +++ /dev/null @@ -1,26 +0,0 @@ -module Statistics - module Distribution - class Empirical - attr_accessor :samples - - def initialize(samples:) - self.samples = samples - end - - # Formula grabbed from here: https://statlect.com/asymptotic-theory/empirical-distribution - def cumulative_function(x:) - cumulative_sum = samples.reduce(0) do |summation, sample| - summation += if sample <= x - 1 - else - 0 - end - - summation - end - - cumulative_sum / samples.size.to_f - end - end - end -end diff --git a/ruby-statistics/lib/statistics/distribution/f.rb b/ruby-statistics/lib/statistics/distribution/f.rb deleted file mode 100644 index af3af9bc09..0000000000 --- a/ruby-statistics/lib/statistics/distribution/f.rb +++ /dev/null @@ -1,46 +0,0 @@ -module Statistics - module Distribution - class F - attr_accessor :d1, :d2 # Degrees of freedom #1 and #2 - - def initialize(k, j) - self.d1 = k - self.d2 = j - end - - # Formula extracted from http://www.itl.nist.gov/div898/handbook/eda/section3/eda3665.htm#CDF - def cumulative_function(value) - k = d2/(d2 + d1 * value.to_f) - - 1 - Math.incomplete_beta_function(k, d2/2.0, d1/2.0) - end - - def density_function(value) - return if d1 < 0 || d2 < 0 # F-pdf is well defined for the [0, +infinity) interval. - - val = value.to_f - upper = ((d1 * val) ** d1) * (d2**d2) - lower = (d1 * val + d2) ** (d1 + d2) - up = Math.sqrt(upper/lower.to_f) - down = val * Math.beta_function(d1/2.0, d2/2.0) - - up/down.to_f - end - - def mean - return if d2 <= 2 - - d2/(d2 - 2).to_f - end - - def mode - return if d1 <= 2 - - left = (d1 - 2)/d1.to_f - right = d2/(d2 + 2).to_f - - left * right - end - end - end -end diff --git a/ruby-statistics/lib/statistics/distribution/geometric.rb b/ruby-statistics/lib/statistics/distribution/geometric.rb deleted file mode 100644 index fe59eb505b..0000000000 --- a/ruby-statistics/lib/statistics/distribution/geometric.rb +++ /dev/null @@ -1,76 +0,0 @@ -module Statistics - module Distribution - class Geometric - attr_accessor :probability_of_success, :always_success_allowed - - def initialize(p, always_success: false) - self.probability_of_success = p.to_f - self.always_success_allowed = always_success - end - - def density_function(k) - k = k.to_i - - if always_success_allowed - return if k < 0 - - ((1.0 - probability_of_success) ** k) * probability_of_success - else - return if k <= 0 - - ((1.0 - probability_of_success) ** (k - 1.0)) * probability_of_success - end - end - - def cumulative_function(k) - k = k.to_i - - if always_success_allowed - return if k < 0 - - 1.0 - ((1.0 - probability_of_success) ** (k + 1.0)) - else - return if k <= 0 - - 1.0 - ((1.0 - probability_of_success) ** k) - end - end - - def mean - if always_success_allowed - (1.0 - probability_of_success) / probability_of_success - else - 1.0 / probability_of_success - end - end - - def median - if always_success_allowed - (-1.0 / Math.log2(1.0 - probability_of_success)).ceil - 1.0 - else - (-1.0 / Math.log2(1.0 - probability_of_success)).ceil - end - end - - def mode - if always_success_allowed - 0.0 - else - 1.0 - end - end - - def variance - (1.0 - probability_of_success) / (probability_of_success ** 2) - end - - def skewness - (2.0 - probability_of_success) / Math.sqrt(1.0 - probability_of_success) - end - - def kurtosis - 6.0 + ((probability_of_success ** 2) / (1.0 - probability_of_success)) - end - end - end -end diff --git a/ruby-statistics/lib/statistics/distribution/logseries.rb b/ruby-statistics/lib/statistics/distribution/logseries.rb deleted file mode 100644 index 359f9c619e..0000000000 --- a/ruby-statistics/lib/statistics/distribution/logseries.rb +++ /dev/null @@ -1,51 +0,0 @@ -module Statistics - module Distribution - class LogSeries - def self.density_function(k, p) - return if k <= 0 - k = k.to_i - - left = (-1.0 / Math.log(1.0 - p)) - right = (p ** k).to_f - - left * right / k - end - - def self.cumulative_function(k, p) - return if k <= 0 - - # Sadly, the incomplete beta function is converging - # too fast to zero and breaking the calculation on logs. - # So, we default to the basic definition of the CDF which is - # the integral (-Inf, K) of the PDF, with P(X <= x) which can - # be solved as a summation of all PDFs from 1 to K. Note that the summation approach - # only applies to discrete distributions. - # - # right = Math.incomplete_beta_function(p, (k + 1).floor, 0) / Math.log(1.0 - p) - # 1.0 + right - - result = 0.0 - 1.upto(k) do |number| - result += self.density_function(number, p) - end - - result - end - - def self.mode - 1.0 - end - - def self.mean(p) - (-1.0 / Math.log(1.0 - p)) * (p / (1.0 - p)) - end - - def self.variance(p) - up = p + Math.log(1.0 - p) - down = ((1.0 - p) ** 2) * (Math.log(1.0 - p) ** 2) - - (-1.0 * p) * (up / down.to_f) - end - end - end -end diff --git a/ruby-statistics/lib/statistics/distribution/negative_binomial.rb b/ruby-statistics/lib/statistics/distribution/negative_binomial.rb deleted file mode 100644 index 34dc451cae..0000000000 --- a/ruby-statistics/lib/statistics/distribution/negative_binomial.rb +++ /dev/null @@ -1,51 +0,0 @@ -module Statistics - module Distribution - class NegativeBinomial - attr_accessor :number_of_failures, :probability_per_trial - - def initialize(r, p) - self.number_of_failures = r.to_i - self.probability_per_trial = p - end - - def probability_mass_function(k) - return if number_of_failures < 0 || k < 0 || k > number_of_failures - - left = Math.combination(k + number_of_failures - 1, k) - right = ((1 - probability_per_trial) ** number_of_failures) * (probability_per_trial ** k) - - left * right - end - - def cumulative_function(k) - return if k < 0 || k > number_of_failures - k = k.to_i - - 1.0 - Math.incomplete_beta_function(probability_per_trial, k + 1, number_of_failures) - end - - def mean - (probability_per_trial * number_of_failures)/(1 - probability_per_trial).to_f - end - - def variance - (probability_per_trial * number_of_failures)/((1 - probability_per_trial) ** 2).to_f - end - - def skewness - (1 + probability_per_trial).to_f / Math.sqrt(probability_per_trial * number_of_failures) - end - - def mode - if number_of_failures > 1 - up = probability_per_trial * (number_of_failures - 1) - down = (1 - probability_per_trial).to_f - - (up/down).floor - elsif number_of_failures <= 1 - 0.0 - end - end - end - end -end diff --git a/ruby-statistics/lib/statistics/distribution/normal.rb b/ruby-statistics/lib/statistics/distribution/normal.rb deleted file mode 100644 index 8feef0247f..0000000000 --- a/ruby-statistics/lib/statistics/distribution/normal.rb +++ /dev/null @@ -1,139 +0,0 @@ -module Statistics - module Distribution - class Normal - attr_accessor :mean, :standard_deviation, :variance - alias_method :mode, :mean - - def initialize(avg, std) - self.mean = avg.to_f - self.standard_deviation = std.to_f - self.variance = std.to_f**2 - end - - def cumulative_function(value) - (1/2.0) * (1.0 + Math.erf((value - mean)/(standard_deviation * Math.sqrt(2.0)))) - end - - def density_function(value) - return 0 if standard_deviation <= 0 - - up_right = (value - mean)**2.0 - down_right = 2.0 * variance - right = Math.exp(-(up_right/down_right)) - left_down = Math.sqrt(2.0 * Math::PI * variance) - left_up = 1.0 - - (left_up/(left_down) * right) - end - - ## Marsaglia polar method implementation for random gaussian (normal) number generation. - # References: - # https://en.wikipedia.org/wiki/Marsaglia_polar_method - # https://math.stackexchange.com/questions/69245/transform-uniform-distribution-to-normal-distribution-using-lindeberg-l%C3%A9vy-clt - # https://www.projectrhea.org/rhea/index.php/The_principles_for_how_to_generate_random_samples_from_a_Gaussian_distribution - - def random(elements: 1, seed: Random.new_seed) - results = [] - - # Setup seed - srand(seed) - - # Number of random numbers to be generated. - elements.times do - x, y, r = 0.0, 0.0, 0.0 - - # Find an (x, y) point in the x^2 + y^2 < 1 circumference. - loop do - x = 2.0 * rand - 1.0 - y = 2.0 * rand - 1.0 - - r = (x ** 2) + (y ** 2) - - break unless r >= 1.0 || r == 0 - end - - # Project the random point to the required random distance - r = Math.sqrt(-2.0 * Math.log(r) / r) - - # Transform the random distance to a gaussian value and append it to the results array - results << mean + x * r * standard_deviation - end - - if elements == 1 - results.first - else - results - end - end - end - - class StandardNormal < Normal - def initialize - super(0, 1) # Mean = 0, Std = 1 - end - - def density_function(value) - pow = (value**2)/2.0 - euler = Math.exp(-pow) - - euler/Math.sqrt(2 * Math::PI) - end - end - - # Inverse Standard Normal distribution: - # References: - # https://en.wikipedia.org/wiki/Inverse_distribution - # http://www.source-code.biz/snippets/vbasic/9.htm - class InverseStandardNormal < StandardNormal - A1 = -39.6968302866538 - A2 = 220.946098424521 - A3 = -275.928510446969 - A4 = 138.357751867269 - A5 = -30.6647980661472 - A6 = 2.50662827745924 - B1 = -54.4760987982241 - B2 = 161.585836858041 - B3 = -155.698979859887 - B4 = 66.8013118877197 - B5 = -13.2806815528857 - C1 = -7.78489400243029E-03 - C2 = -0.322396458041136 - C3 = -2.40075827716184 - C4 = -2.54973253934373 - C5 = 4.37466414146497 - C6 = 2.93816398269878 - D1 = 7.78469570904146E-03 - D2 = 0.32246712907004 - D3 = 2.445134137143 - D4 = 3.75440866190742 - P_LOW = 0.02425 - P_HIGH = 1 - P_LOW - - def density_function(_) - raise NotImplementedError - end - - def random(elements: 1, seed: Random.new_seed) - raise NotImplementedError - end - - def cumulative_function(value) - return if value < 0.0 || value > 1.0 - return -1.0 * Float::INFINITY if value.zero? - return Float::INFINITY if value == 1.0 - - if value < P_LOW - q = Math.sqrt((Math.log(value) * -2.0)) - (((((C1 * q + C2) * q + C3) * q + C4) * q + C5) * q + C6) / ((((D1 * q + D2) * q + D3) * q + D4) * q + 1.0) - elsif value <= P_HIGH - q = value - 0.5 - r = q ** 2 - (((((A1 * r + A2) * r + A3) * r + A4) * r + A5) * r + A6) * q / (((((B1 * r + B2) * r + B3) * r + B4) * r + B5) * r + 1.0) - else - q = Math.sqrt((Math.log(1 - value) * -2.0)) - - (((((C1 * q + C2) * q + C3) * q + C4) * q + C5) * q + C6) / ((((D1 * q + D2) * q + D3) * q + D4) * q + 1) - end - end - end - end -end diff --git a/ruby-statistics/lib/statistics/distribution/poisson.rb b/ruby-statistics/lib/statistics/distribution/poisson.rb deleted file mode 100644 index b317624ad9..0000000000 --- a/ruby-statistics/lib/statistics/distribution/poisson.rb +++ /dev/null @@ -1,38 +0,0 @@ -module Statistics - module Distribution - class Poisson - attr_accessor :expected_number_of_occurrences - - alias_method :mean, :expected_number_of_occurrences - alias_method :variance, :expected_number_of_occurrences - - def initialize(l) - self.expected_number_of_occurrences = l - end - - def probability_mass_function(k) - return if k < 0 || expected_number_of_occurrences < 0 - - k = k.to_i - - upper = (expected_number_of_occurrences ** k) * Math.exp(-expected_number_of_occurrences) - lower = Math.factorial(k) - - upper/lower.to_f - end - - def cumulative_function(k) - return if k < 0 || expected_number_of_occurrences < 0 - - k = k.to_i - - upper = Math.lower_incomplete_gamma_function((k + 1).floor, expected_number_of_occurrences) - lower = Math.factorial(k.floor) - - # We need the right tail, i.e.: The upper incomplete gamma function. This can be - # achieved by doing a substraction between 1 and the lower incomplete gamma function. - 1 - (upper/lower.to_f) - end - end - end -end diff --git a/ruby-statistics/lib/statistics/distribution/t_student.rb b/ruby-statistics/lib/statistics/distribution/t_student.rb deleted file mode 100644 index 66b05780ba..0000000000 --- a/ruby-statistics/lib/statistics/distribution/t_student.rb +++ /dev/null @@ -1,82 +0,0 @@ -module Statistics - module Distribution - class TStudent - attr_accessor :degrees_of_freedom - attr_reader :mode - - def initialize(v) - self.degrees_of_freedom = v - @mode = 0 - end - - ### Extracted from https://codeplea.com/incomplete-beta-function-c - ### This function is shared under zlib license and the author is Lewis Van Winkle - def cumulative_function(value) - upper = (value + Math.sqrt(value * value + degrees_of_freedom)) - lower = (2.0 * Math.sqrt(value * value + degrees_of_freedom)) - - x = upper/lower - - alpha = degrees_of_freedom/2.0 - beta = degrees_of_freedom/2.0 - - Math.incomplete_beta_function(x, alpha, beta) - end - - def density_function(value) - return if degrees_of_freedom <= 0 - - upper = Math.gamma((degrees_of_freedom + 1)/2.0) - lower = Math.sqrt(degrees_of_freedom * Math::PI) * Math.gamma(degrees_of_freedom/2.0) - left = upper/lower - right = (1 + ((value ** 2)/degrees_of_freedom.to_f)) ** -((degrees_of_freedom + 1)/2.0) - - left * right - end - - def mean - 0 if degrees_of_freedom > 1 - end - - def variance - if degrees_of_freedom > 1 && degrees_of_freedom <= 2 - Float::INFINITY - elsif degrees_of_freedom > 2 - degrees_of_freedom/(degrees_of_freedom - 2.0) - end - end - - # Quantile function extracted from http://www.jennessent.com/arcview/idf.htm - # TODO: Make it truly Student's T sample. - def random(elements: 1, seed: Random.new_seed) - warn 'This is an alpha version code. The generated sample is similar to an uniform distribution' - srand(seed) - - v = degrees_of_freedom - results = [] - - # Because the Quantile function of a student-t distribution is between (-Infinity, y) - # we setup an small threshold in order to properly compute the integral - threshold = 10_000.0e-12 - - elements.times do - y = rand - results << Math.simpson_rule(threshold, y, 10_000) do |t| - up = Math.gamma((v+1)/2.0) - down = Math.sqrt(Math::PI * v) * Math.gamma(v/2.0) - right = (1 + ((y ** 2)/v.to_f)) ** ((v+1)/2.0) - left = up/down.to_f - - left * right - end - end - - if elements == 1 - results.first - else - results - end - end - end - end -end diff --git a/ruby-statistics/lib/statistics/distribution/uniform.rb b/ruby-statistics/lib/statistics/distribution/uniform.rb deleted file mode 100644 index 4550159bc9..0000000000 --- a/ruby-statistics/lib/statistics/distribution/uniform.rb +++ /dev/null @@ -1,40 +0,0 @@ -module Statistics - module Distribution - class Uniform - attr_accessor :left, :right - - def initialize(a, b) - self.left = a.to_f - self.right = b.to_f - end - - def density_function(value) - if value >= left && value <= right - 1/(right - left) - else - 0 - end - end - - def cumulative_function(value) - if value < left - 0 - elsif value >= left && value <= right - (value - left)/(right - left) - else - 1 - end - end - - def mean - (1/2.0) * ( left + right ) - end - alias_method :median, :mean - - - def variance - (1/12.0) * ( right - left ) ** 2 - end - end - end -end diff --git a/ruby-statistics/lib/statistics/distribution/weibull.rb b/ruby-statistics/lib/statistics/distribution/weibull.rb deleted file mode 100644 index 90c876855e..0000000000 --- a/ruby-statistics/lib/statistics/distribution/weibull.rb +++ /dev/null @@ -1,66 +0,0 @@ -module Statistics - module Distribution - class Weibull - attr_accessor :shape, :scale # k and lambda - - def initialize(k, lamb) - self.shape = k.to_f - self.scale = lamb.to_f - end - - def cumulative_function(random_value) - return 0 if random_value < 0 - - 1 - Math.exp(-((random_value/scale) ** shape)) - end - - def density_function(value) - return if shape <= 0 || scale <= 0 - return 0 if value < 0 - - left = shape/scale - center = (value/scale)**(shape - 1) - right = Math.exp(-((value/scale)**shape)) - - left * center * right - end - - def mean - scale * Math.gamma(1 + (1/shape)) - end - - def mode - return 0 if shape <= 1 - - scale * (((shape - 1)/shape) ** (1/shape)) - end - - def variance - left = Math.gamma(1 + (2/shape)) - right = Math.gamma(1 + (1/shape)) ** 2 - - (scale ** 2) * (left - right) - end - - # Using the inverse CDF function, also called quantile, we can calculate - # a random sample that follows a weibull distribution. - # - # Formula extracted from https://www.taygeta.com/random/weibull.html - def random(elements: 1, seed: Random.new_seed) - results = [] - - srand(seed) - - elements.times do - results << ((-1/scale) * Math.log(1 - rand)) ** (1/shape) - end - - if elements == 1 - results.first - else - results - end - end - end - end -end diff --git a/ruby-statistics/lib/statistics/spearman_rank_coefficient.rb b/ruby-statistics/lib/statistics/spearman_rank_coefficient.rb deleted file mode 100644 index cbdd7a03ae..0000000000 --- a/ruby-statistics/lib/statistics/spearman_rank_coefficient.rb +++ /dev/null @@ -1,71 +0,0 @@ -module Statistics - class SpearmanRankCoefficient - def self.rank(data:, return_ranks_only: true) - descending_order_data = data.sort { |a, b| b <=> a } - rankings = {} - - data.each do |value| - # If we have ties, the find_index method will only retrieve the index of the - # first element in the list (i.e, the most close to the left of the array), - # so when a tie is detected, we increase the temporal ranking by the number of - # counted elements at that particular time and then we increase the counter. - temporal_ranking = descending_order_data.find_index(value) + 1 # 0-index - - if rankings.fetch(value, false) - rankings[value][:rank] += (temporal_ranking + rankings[value][:counter]) - rankings[value][:counter] += 1 - rankings[value][:tie_rank] = rankings[value][:rank] / rankings[value][:counter].to_f - else - rankings[value] = { counter: 1, rank: temporal_ranking, tie_rank: temporal_ranking } - end - end - - if return_ranks_only - data.map do |value| - rankings[value][:tie_rank] - end - else - rankings - end - end - - # Formulas extracted from: https://statistics.laerd.com/statistical-guides/spearmans-rank-order-correlation-statistical-guide.php - def self.coefficient(set_one, set_two) - raise 'Both group sets must have the same number of cases.' if set_one.size != set_two.size - return if set_one.size == 0 && set_two.size == 0 - - set_one_mean, set_two_mean = set_one.mean, set_two.mean - have_tie_ranks = (set_one + set_two).any? { |rank| rank.is_a?(Float) } - - if have_tie_ranks - numerator = 0 - squared_differences_set_one = 0 - squared_differences_set_two = 0 - - set_one.size.times do |idx| - local_diff_one = (set_one[idx] - set_one_mean) - local_diff_two = (set_two[idx] - set_two_mean) - - squared_differences_set_one += local_diff_one ** 2 - squared_differences_set_two += local_diff_two ** 2 - - numerator += local_diff_one * local_diff_two - end - - denominator = Math.sqrt(squared_differences_set_one * squared_differences_set_two) - - numerator / denominator.to_f # This is rho or spearman's coefficient. - else - sum_squared_differences = set_one.each_with_index.reduce(0) do |memo, (rank_one, index)| - memo += ((rank_one - set_two[index]) ** 2) - memo - end - - numerator = 6 * sum_squared_differences - denominator = ((set_one.size ** 3) - set_one.size) - - 1.0 - (numerator / denominator.to_f) # This is rho or spearman's coefficient. - end - end - end -end diff --git a/ruby-statistics/lib/statistics/statistical_test.rb b/ruby-statistics/lib/statistics/statistical_test.rb deleted file mode 100644 index 5ce3c234db..0000000000 --- a/ruby-statistics/lib/statistics/statistical_test.rb +++ /dev/null @@ -1,11 +0,0 @@ -Dir[File.dirname(__FILE__) + '/statistical_test/**/*.rb'].each {|file| require file } - -module Statistics - module StatisticalTest - end -end - -# If StatisticalTest is not defined, setup alias. -if defined?(Statistics) && !(defined?(StatisticalTest)) - StatisticalTest = Statistics::StatisticalTest -end diff --git a/ruby-statistics/lib/statistics/statistical_test/chi_squared_test.rb b/ruby-statistics/lib/statistics/statistical_test/chi_squared_test.rb deleted file mode 100644 index ce96b34f77..0000000000 --- a/ruby-statistics/lib/statistics/statistical_test/chi_squared_test.rb +++ /dev/null @@ -1,42 +0,0 @@ -module Statistics - module StatisticalTest - class ChiSquaredTest - def self.chi_statistic(expected, observed) - # If the expected is a number, we asumme that all expected observations - # has the same probability to occur, hence we expect to see the same number - # of expected observations per each observed value - statistic = if expected.is_a? Numeric - observed.reduce(0) do |memo, observed_value| - up = (observed_value - expected) ** 2 - memo += (up/expected.to_f) - end - else - expected.each_with_index.reduce(0) do |memo, (expected_value, index)| - up = (observed[index] - expected_value) ** 2 - memo += (up/expected_value.to_f) - end - end - - [statistic, observed.size - 1] - end - - def self.goodness_of_fit(alpha, expected, observed) - chi_score, df = *self.chi_statistic(expected, observed) # Splat array result - - return if chi_score.nil? || df.nil? - - probability = Distribution::ChiSquared.new(df).cumulative_function(chi_score) - p_value = 1 - probability - - # According to https://stats.stackexchange.com/questions/29158/do-you-reject-the-null-hypothesis-when-p-alpha-or-p-leq-alpha - # We can assume that if p_value <= alpha, we can safely reject the null hypothesis, ie. accept the alternative hypothesis. - { probability: probability, - p_value: p_value, - alpha: alpha, - null: alpha < p_value, - alternative: p_value <= alpha, - confidence_level: 1 - alpha } - end - end - end -end diff --git a/ruby-statistics/lib/statistics/statistical_test/f_test.rb b/ruby-statistics/lib/statistics/statistical_test/f_test.rb deleted file mode 100644 index 94be720c96..0000000000 --- a/ruby-statistics/lib/statistics/statistical_test/f_test.rb +++ /dev/null @@ -1,83 +0,0 @@ -module Statistics - module StatisticalTest - class FTest - # This method calculates the one-way ANOVA F-test statistic. - # We assume that all specified arguments are arrays. - # It returns an array with three elements: - # [F-statistic or F-score, degrees of freedom numerator, degrees of freedom denominator]. - # - # Formulas extracted from: - # https://courses.lumenlearning.com/boundless-statistics/chapter/one-way-anova/ - # http://sphweb.bumc.bu.edu/otlt/MPH-Modules/BS/BS704_HypothesisTesting-ANOVA/BS704_HypothesisTesting-Anova_print.html - def self.anova_f_score(*args) - # If only two groups have been specified as arguments, we follow the classic F-Test for - # equality of variances, which is the ratio between the variances. - f_score = nil - df1 = nil - df2 = nil - - if args.size == 2 - variances = [args[0].variance, args[1].variance] - - f_score = variances.max/variances.min.to_f - df1 = 1 # k-1 (k = 2) - df2 = args.flatten.size - 2 # N-k (k = 2) - elsif args.size > 2 - total_groups = args.size - total_elements = args.flatten.size - overall_mean = args.flatten.mean - - sample_sizes = args.map(&:size) - sample_means = args.map(&:mean) - sample_stds = args.map(&:standard_deviation) - - # Variance between groups - iterator = sample_sizes.each_with_index - - variance_between_groups = iterator.reduce(0) do |summation, (size, index)| - inner_calculation = size * ((sample_means[index] - overall_mean) ** 2) - - summation += (inner_calculation / (total_groups - 1).to_f) - end - - # Variance within groups - variance_within_groups = (0...total_groups).reduce(0) do |outer_summation, group_index| - outer_summation += args[group_index].reduce(0) do |inner_sumation, observation| - inner_calculation = ((observation - sample_means[group_index]) ** 2) - inner_sumation += (inner_calculation / (total_elements - total_groups).to_f) - end - end - - f_score = variance_between_groups/variance_within_groups.to_f - df1 = total_groups - 1 - df2 = total_elements - total_groups - end - - [f_score, df1, df2] - end - - # This method expects the alpha value and the groups to calculate the one-way ANOVA test. - # It returns a hash with multiple information and the test result (if reject the null hypotesis or not). - # Keep in mind that the values for the alternative key (true/false) does not imply that the alternative hypothesis - # is TRUE or FALSE. It's a minor notation advantage to decide if reject the null hypothesis or not. - - def self.one_way_anova(alpha, *args) - f_score, df1, df2 = *self.anova_f_score(*args) # Splat array result - - return if f_score.nil? || df1.nil? || df2.nil? - - probability = Distribution::F.new(df1, df2).cumulative_function(f_score) - p_value = 1 - probability - - # According to https://stats.stackexchange.com/questions/29158/do-you-reject-the-null-hypothesis-when-p-alpha-or-p-leq-alpha - # We can assume that if p_value <= alpha, we can safely reject the null hypothesis, ie. accept the alternative hypothesis. - { probability: probability, - p_value: p_value, - alpha: alpha, - null: alpha < p_value, - alternative: p_value <= alpha, - confidence_level: 1 - alpha } - end - end - end -end diff --git a/ruby-statistics/lib/statistics/statistical_test/kolmogorov_smirnov_test.rb b/ruby-statistics/lib/statistics/statistical_test/kolmogorov_smirnov_test.rb deleted file mode 100644 index 19c5b12fc1..0000000000 --- a/ruby-statistics/lib/statistics/statistical_test/kolmogorov_smirnov_test.rb +++ /dev/null @@ -1,70 +0,0 @@ -module Statistics - module StatisticalTest - class KolmogorovSmirnovTest - # Common alpha, and critical D are calculated following formulas from: https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test#Two-sample_Kolmogorov%E2%80%93Smirnov_test - def self.two_samples(group_one:, group_two:, alpha: 0.05) - samples = group_one + group_two # We can use unbalaced group samples - - ecdf_one = Distribution::Empirical.new(samples: group_one) - ecdf_two = Distribution::Empirical.new(samples: group_two) - - d_max = samples.sort.map do |sample| - d1 = ecdf_one.cumulative_function(x: sample) - d2 = ecdf_two.cumulative_function(x: sample) - - (d1 - d2).abs - end.max - - # TODO: Validate calculation of Common alpha. - common_alpha = Math.sqrt((-0.5 * Math.log(alpha))) - radicand = (group_one.size + group_two.size) / (group_one.size * group_two.size).to_f - - critical_d = common_alpha * Math.sqrt(radicand) - # critical_d = self.critical_d(alpha: alpha, n: samples.size) - - # We are unable to calculate the p_value, because we don't have the Kolmogorov distribution - # defined. We reject the null hypotesis if Dmax is > than Dcritical. - { d_max: d_max, - d_critical: critical_d, - total_samples: samples.size, - alpha: alpha, - null: d_max <= critical_d, - alternative: d_max > critical_d, - confidence_level: 1.0 - alpha } - end - - # This is an implementation of the formula presented by Paul Molin and Hervé Abdi in a paper, - # called "New Table and numerical approximations for Kolmogorov-Smirnov / Lilliefors / Van Soest - # normality test". - # In this paper, the authors defines a couple of 6th-degree polynomial functions that allow us - # to find an aproximation of the real critical value. This is based in the conclusions made by - # Dagnelie (1968), where indicates that critical values given by Lilliefors can be approximated - # numerically. - # - # In general, the formula found is: - # C(N, alpha) ^ -2 = A(alpha) * N + B(alpha). - # - # Where A(alpha), B(alpha) are two 6th degree polynomial functions computed using the principle - # of Monte Carlo simulations. - # - # paper can be found here: https://utdallas.edu/~herve/MolinAbdi1998-LillieforsTechReport.pdf - # def self.critical_d(alpha:, n:) - # confidence = 1.0 - alpha - - # a_alpha = 6.32207539843126 -17.1398870006148 * confidence + - # 38.42812675101057 * (confidence ** 2) - 45.93241384693391 * (confidence ** 3) + - # 7.88697700041829 * (confidence ** 4) + 29.79317711037858 * (confidence ** 5) - - # 18.48090137098585 * (confidence ** 6) - - # b_alpha = 12.940399038404 - 53.458334259532 * confidence + - # 186.923866119699 * (confidence ** 2) - 410.582178349305 * (confidence ** 3) + - # 517.377862566267 * (confidence ** 4) - 343.581476222384 * (confidence ** 5) + - # 92.123451358715 * (confidence ** 6) - - # Math.sqrt(1.0 / (a_alpha * n + b_alpha)) - # end - end - - KSTest = KolmogorovSmirnovTest # Alias - end -end diff --git a/ruby-statistics/lib/statistics/statistical_test/t_test.rb b/ruby-statistics/lib/statistics/statistical_test/t_test.rb deleted file mode 100644 index 9fdd55ac23..0000000000 --- a/ruby-statistics/lib/statistics/statistical_test/t_test.rb +++ /dev/null @@ -1,92 +0,0 @@ -module Statistics - module StatisticalTest - class TTest - # Errors for Zero std - class ZeroStdError < StandardError - STD_ERROR_MSG = 'Standard deviation for the difference or group is zero. Please, reconsider sample contents'.freeze - end - - # Perform a T-Test for one or two samples. - # For the tails param, we need a symbol: :one_tail or :two_tail - def self.perform(alpha, tails, *args) - return if args.size < 2 - - degrees_of_freedom = 0 - - # If the comparison mean has been specified - t_score = if args[0].is_a? Numeric - data_mean = args[1].mean - data_std = args[1].standard_deviation - - raise ZeroStdError, ZeroStdError::STD_ERROR_MSG if data_std == 0 - - comparison_mean = args[0] - degrees_of_freedom = args[1].size - - (data_mean - comparison_mean)/(data_std / Math.sqrt(args[1].size).to_f).to_f - else - sample_left_mean = args[0].mean - sample_left_variance = args[0].variance - sample_right_variance = args[1].variance - sample_right_mean = args[1].mean - degrees_of_freedom = args.flatten.size - 2 - - left_root = sample_left_variance/args[0].size.to_f - right_root = sample_right_variance/args[1].size.to_f - - standard_error = Math.sqrt(left_root + right_root) - - (sample_left_mean - sample_right_mean).abs/standard_error.to_f - end - - t_distribution = Distribution::TStudent.new(degrees_of_freedom) - probability = t_distribution.cumulative_function(t_score) - - # Steps grabbed from https://support.minitab.com/en-us/minitab/18/help-and-how-to/statistics/basic-statistics/supporting-topics/basics/manually-calculate-a-p-value/ - # See https://github.com/estebanz01/ruby-statistics/issues/23 - p_value = if tails == :two_tail - 2 * (1 - t_distribution.cumulative_function(t_score.abs)) - else - 1 - probability - end - - { t_score: t_score, - probability: probability, - p_value: p_value, - alpha: alpha, - null: alpha < p_value, - alternative: p_value <= alpha, - confidence_level: 1 - alpha } - end - - def self.paired_test(alpha, tails, left_group, right_group) - raise StandardError, 'both samples are the same' if left_group == right_group - - # Handy snippet grabbed from https://stackoverflow.com/questions/2682411/ruby-sum-corresponding-members-of-two-or-more-arrays - differences = [left_group, right_group].transpose.map { |value| value.reduce(:-) } - - degrees_of_freedom = differences.size - 1 - difference_std = differences.standard_deviation - - raise ZeroStdError, ZeroStdError::STD_ERROR_MSG if difference_std == 0 - - down = difference_std/Math.sqrt(differences.size) - - t_score = (differences.mean - 0)/down.to_f - - probability = Distribution::TStudent.new(degrees_of_freedom).cumulative_function(t_score) - - p_value = 1 - probability - p_value *= 2 if tails == :two_tail - - { t_score: t_score, - probability: probability, - p_value: p_value, - alpha: alpha, - null: alpha < p_value, - alternative: p_value <= alpha, - confidence_level: 1 - alpha } - end - end - end -end diff --git a/ruby-statistics/lib/statistics/statistical_test/wilcoxon_rank_sum_test.rb b/ruby-statistics/lib/statistics/statistical_test/wilcoxon_rank_sum_test.rb deleted file mode 100644 index dcfff81247..0000000000 --- a/ruby-statistics/lib/statistics/statistical_test/wilcoxon_rank_sum_test.rb +++ /dev/null @@ -1,95 +0,0 @@ -module Statistics - module StatisticalTest - class WilcoxonRankSumTest - def rank(elements) - ranked_elements = {} - - elements.sort.each_with_index do |element, index| - if ranked_elements.fetch(element, false) - # This allow us to solve the ties easily when performing the rank summation per group - ranked_elements[element][:counter] += 1 - ranked_elements[element][:rank] += (index + 1) - else - ranked_elements[element] = { counter: 1, rank: (index + 1) } - end - end - - # ranked_elements = [{ x => { counter: 1, rank: y } ] - ranked_elements - end - - # Steps to perform the calculation are based on http://www.mit.edu/~6.s085/notes/lecture5.pdf - def perform(alpha, tails, group_one, group_two) - # Size for each group - n1, n2 = group_one.size, group_two.size - - # Rank all data - total_ranks = rank(group_one + group_two) - - # sum rankings per group - r1 = ranked_sum_for(total_ranks, group_one) - r2 = ranked_sum_for(total_ranks, group_two) - - # calculate U statistic - u1 = (n1 * (n1 + 1)/2.0) - r1 - u2 = (n2 * (n2 + 1)/2.0 ) - r2 - - u_statistic = [u1.abs, u2.abs].min - - median_u = (n1 * n2)/2.0 - - ties = total_ranks.values.select { |element| element[:counter] > 1 } - - std_u = if ties.size > 0 - corrected_sigma(ties, n1, n2) - else - Math.sqrt((n1 * n2 * (n1 + n2 + 1))/12.0) - end - - z = (u_statistic - median_u)/std_u - - # Most literature are not very specific about the normal distribution to be used. - # We ran multiple tests with a Normal(median_u, std_u) and Normal(0, 1) and we found - # the latter to be more aligned with the results. - probability = Distribution::StandardNormal.new.cumulative_function(z.abs) - p_value = 1 - probability - p_value *= 2 if tails == :two_tail - - { probability: probability, - u: u_statistic, - z: z, - p_value: p_value, - alpha: alpha, - null: alpha < p_value, - alternative: p_value <= alpha, - confidence_level: 1 - alpha } - end - - # Formula extracted from http://www.statstutor.ac.uk/resources/uploaded/mannwhitney.pdf - private def corrected_sigma(ties, total_group_one, total_group_two) - n = total_group_one + total_group_two - - rank_sum = ties.reduce(0) do |memo, t| - memo += ((t[:counter] ** 3) - t[:counter])/12.0 - end - - left = (total_group_one * total_group_two)/(n * (n - 1)).to_f - right = (((n ** 3) - n)/12.0) - rank_sum - - Math.sqrt(left * right) - end - - private def ranked_sum_for(total, group) - # sum rankings per group - group.reduce(0) do |memo, element| - rank_of_element = total[element][:rank] / total[element][:counter].to_f - memo += rank_of_element - end - end - end - - # Both test are the same. To keep the selected name, we just alias the class - # with the implementation. - MannWhitneyU = WilcoxonRankSumTest - end -end diff --git a/ruby-statistics/lib/statistics/version.rb b/ruby-statistics/lib/statistics/version.rb deleted file mode 100644 index 7c6cd46de5..0000000000 --- a/ruby-statistics/lib/statistics/version.rb +++ /dev/null @@ -1,3 +0,0 @@ -module Statistics - VERSION = "2.1.1" -end diff --git a/ruby-statistics/ruby-statistics.gemspec b/ruby-statistics/ruby-statistics.gemspec deleted file mode 100644 index b5ce2cebf1..0000000000 --- a/ruby-statistics/ruby-statistics.gemspec +++ /dev/null @@ -1,34 +0,0 @@ -# coding: utf-8 -lib = File.expand_path("../lib", __FILE__) -$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) -require "statistics/version" - -Gem::Specification.new do |spec| - spec.name = "ruby-statistics" - spec.version = Statistics::VERSION - spec.authors = ["esteban zapata"] - spec.email = ["estebanz01@outlook.com"] - - spec.summary = %q{A ruby gem for som specific statistics. Inspired by the jStat js library.} - spec.description = %q{This gem is intended to accomplish the same purpose as jStat js library: - to provide ruby with statistical capabilities without the need - of a statistical programming language like R or Octave. Some functions - and capabilities are an implementation from other authors and are - referenced properly in the class/method.} - spec.homepage = "https://github.com/estebanz01/ruby-statistics" - spec.license = "MIT" - - # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host' - # to allow pushing to a single host or delete this section to allow pushing to any host. - spec.files = `git ls-files -z`.split("\x0").reject do |f| - f.match(%r{^(test|spec|features)/}) - end - spec.bindir = "exe" - spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } - spec.require_paths = ["lib"] - - spec.add_development_dependency "rake", '~> 12.0', '>= 12.0.0' - spec.add_development_dependency "rspec", '~> 3.6', '>= 3.6.0' - spec.add_development_dependency "grb", '~> 0.4.1', '>= 0.4.1' - spec.add_development_dependency 'byebug', '~> 9.1.0', '>= 9.1.0' -end