From 8a1b454a2bd658c487c89766eebdbf4722af108d Mon Sep 17 00:00:00 2001 From: Sruthi Chandran Date: Wed, 16 Oct 2019 22:08:35 +0530 Subject: [PATCH 1/3] New upstream version 12.2.8 --- snowplow-tracker/LICENSE-2.0.txt | 202 ++++++++++ snowplow-tracker/README.md | 74 ++++ snowplow-tracker/lib/snowplow-tracker.rb | 24 ++ .../lib/snowplow-tracker/contracts.rb | 29 ++ .../lib/snowplow-tracker/emitters.rb | 280 +++++++++++++ .../lib/snowplow-tracker/payload.rb | 73 ++++ .../snowplow-tracker/self_describing_json.rb | 34 ++ .../lib/snowplow-tracker/subject.rb | 139 +++++++ .../lib/snowplow-tracker/timestamp.rb | 46 +++ .../lib/snowplow-tracker/tracker.rb | 371 ++++++++++++++++++ .../lib/snowplow-tracker/version.rb | 19 + snowplow-tracker/snowplow-tracker.gemspec | 41 ++ 12 files changed, 1332 insertions(+) create mode 100644 snowplow-tracker/LICENSE-2.0.txt create mode 100644 snowplow-tracker/README.md create mode 100644 snowplow-tracker/lib/snowplow-tracker.rb create mode 100644 snowplow-tracker/lib/snowplow-tracker/contracts.rb create mode 100644 snowplow-tracker/lib/snowplow-tracker/emitters.rb create mode 100644 snowplow-tracker/lib/snowplow-tracker/payload.rb create mode 100644 snowplow-tracker/lib/snowplow-tracker/self_describing_json.rb create mode 100644 snowplow-tracker/lib/snowplow-tracker/subject.rb create mode 100644 snowplow-tracker/lib/snowplow-tracker/timestamp.rb create mode 100644 snowplow-tracker/lib/snowplow-tracker/tracker.rb create mode 100644 snowplow-tracker/lib/snowplow-tracker/version.rb create mode 100644 snowplow-tracker/snowplow-tracker.gemspec diff --git a/snowplow-tracker/LICENSE-2.0.txt b/snowplow-tracker/LICENSE-2.0.txt new file mode 100644 index 0000000000..7a4a3ea242 --- /dev/null +++ b/snowplow-tracker/LICENSE-2.0.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/snowplow-tracker/README.md b/snowplow-tracker/README.md new file mode 100644 index 0000000000..dac689f899 --- /dev/null +++ b/snowplow-tracker/README.md @@ -0,0 +1,74 @@ +# Ruby Analytics for Snowplow +[![Gem Version](https://badge.fury.io/rb/snowplow-tracker.svg)](http://badge.fury.io/rb/snowplow-tracker) +[![Build Status](https://travis-ci.org/snowplow/snowplow-ruby-tracker.png?branch=master)](https://travis-ci.org/snowplow/snowplow-ruby-tracker) +[![Code Climate](https://codeclimate.com/github/snowplow/snowplow-ruby-tracker.png)](https://codeclimate.com/github/snowplow/snowplow-ruby-tracker) +[![Coverage Status](https://coveralls.io/repos/snowplow/snowplow-ruby-tracker/badge.png)](https://coveralls.io/r/snowplow/snowplow-ruby-tracker) +[![License][license-image]][license] + +## Overview + +Add analytics to your Ruby and Rails apps and gems with the **[Snowplow] [snowplow]** event tracker for **[Ruby] [ruby]**. + +With this tracker you can collect event data from your **[Ruby] [ruby]** applications, **[Ruby on Rails] [rails]** web applications and **[Ruby gems] [rubygems]**. + +## Quickstart + +Assuming git, **[Vagrant] [vagrant-install]** and **[VirtualBox] [virtualbox-install]** installed: + +```bash + host$ git clone https://github.com/snowplow/snowplow-ruby-tracker.git + host$ cd snowplow-ruby-tracker + host$ vagrant up && vagrant ssh +guest$ cd /vagrant +guest$ gem install bundler +guest$ bundle install +guest$ rspec +``` + +## Publishing + +```bash + host$ vagrant push +``` + +## Find out more + +| Technical Docs | Setup Guide | Roadmap | Contributing | +|---------------------------------|---------------------------|-------------------------|-----------------------------------| +| ![i1] [techdocs-image] | ![i2] [setup-image] | ![i3] [roadmap-image] | ![i4] [contributing-image] | +| **[Technical Docs] [techdocs]** | **[Setup Guide] [setup]** | **[Roadmap] [roadmap]** | **[Contributing] [contributing]** | + +## Copyright and license + +The Snowplow Ruby Tracker is copyright 2013-2016 Snowplow Analytics Ltd. + +Licensed under the **[Apache License, Version 2.0] [license]** (the "License"); +you may not use this software except in compliance with the License. + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +[license-image]: http://img.shields.io/badge/license-Apache--2-blue.svg?style=flat +[license]: http://www.apache.org/licenses/LICENSE-2.0 + +[ruby]: https://www.ruby-lang.org/en/ +[rails]: http://rubyonrails.org/ +[rubygems]: https://rubygems.org/ + +[snowplow]: http://snowplowanalytics.com + +[vagrant-install]: http://docs.vagrantup.com/v2/installation/index.html +[virtualbox-install]: https://www.virtualbox.org/wiki/Downloads + +[techdocs-image]: https://d3i6fms1cm1j0i.cloudfront.net/github/images/techdocs.png +[setup-image]: https://d3i6fms1cm1j0i.cloudfront.net/github/images/setup.png +[roadmap-image]: https://d3i6fms1cm1j0i.cloudfront.net/github/images/roadmap.png +[contributing-image]: https://d3i6fms1cm1j0i.cloudfront.net/github/images/contributing.png + +[techdocs]: https://github.com/snowplow/snowplow/wiki/Ruby-Tracker +[setup]: https://github.com/snowplow/snowplow/wiki/Ruby-Tracker-Setup +[roadmap]: https://github.com/snowplow/snowplow/wiki/Ruby-Tracker-Roadmap +[contributing]: https://github.com/snowplow/snowplow/wiki/Ruby-Tracker-Contributing diff --git a/snowplow-tracker/lib/snowplow-tracker.rb b/snowplow-tracker/lib/snowplow-tracker.rb new file mode 100644 index 0000000000..a08defef22 --- /dev/null +++ b/snowplow-tracker/lib/snowplow-tracker.rb @@ -0,0 +1,24 @@ +# Copyright (c) 2013-2014 Snowplow Analytics Ltd. All rights reserved. +# +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License Version 2.0. +# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + +# Author:: Alex Dean, Fred Blundun (mailto:snowplow-user@googlegroups.com) +# Copyright:: Copyright (c) 2013-2014 Snowplow Analytics Ltd +# License:: Apache License Version 2.0 + +require 'snowplow-tracker/contracts.rb' +require 'snowplow-tracker/version.rb' +require 'snowplow-tracker/self_describing_json.rb' +require 'snowplow-tracker/payload.rb' +require 'snowplow-tracker/subject.rb' +require 'snowplow-tracker/emitters.rb' +require 'snowplow-tracker/timestamp.rb' +require 'snowplow-tracker/tracker.rb' + diff --git a/snowplow-tracker/lib/snowplow-tracker/contracts.rb b/snowplow-tracker/lib/snowplow-tracker/contracts.rb new file mode 100644 index 0000000000..0ce2907b24 --- /dev/null +++ b/snowplow-tracker/lib/snowplow-tracker/contracts.rb @@ -0,0 +1,29 @@ +# Copyright (c) 2013-2014 Snowplow Analytics Ltd. All rights reserved. +# +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License Version 2.0. +# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + +# Author:: Alex Dean, Fred Blundun (mailto:support@snowplowanalytics.com) +# Copyright:: Copyright (c) 2013-2014 Snowplow Analytics Ltd +# License:: Apache License Version 2.0 + +require 'contracts' + +module SnowplowTracker + + ORIGINAL_FAILURE_CALLBACK = Contract.method(:failure_callback) + + def self.disable_contracts + Contract.define_singleton_method(:failure_callback) {|data| true} + end + + def self.enable_contracts + Contract.define_singleton_method(:failure_callback, ORIGINAL_FAILURE_CALLBACK) + end +end diff --git a/snowplow-tracker/lib/snowplow-tracker/emitters.rb b/snowplow-tracker/lib/snowplow-tracker/emitters.rb new file mode 100644 index 0000000000..09c75d199e --- /dev/null +++ b/snowplow-tracker/lib/snowplow-tracker/emitters.rb @@ -0,0 +1,280 @@ +# Copyright (c) 2013-2014 Snowplow Analytics Ltd. All rights reserved. +# +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License Version 2.0. +# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + +# Author:: Alex Dean, Fred Blundun (mailto:support@snowplowanalytics.com) +# Copyright:: Copyright (c) 2013-2014 Snowplow Analytics Ltd +# License:: Apache License Version 2.0 + +require 'net/https' +require 'set' +require 'logger' +require 'contracts' + +module SnowplowTracker + + LOGGER = Logger.new(STDERR) + LOGGER.level = Logger::INFO + + class Emitter + + include Contracts + + @@ConfigHash = ({ + :protocol => Maybe[Or['http', 'https']], + :port => Maybe[Num], + :method => Maybe[Or['get', 'post']], + :buffer_size => Maybe[Num], + :on_success => Maybe[Func[Num => Any]], + :on_failure => Maybe[Func[Num, Hash => Any]], + :thread_count => Maybe[Num] + }) + + @@StrictConfigHash = And[@@ConfigHash, lambda { |x| + x.class == Hash and Set.new(x.keys).subset? Set.new(@@ConfigHash.keys) + }] + + @@DefaultConfig = { + :protocol => 'http', + :method => 'get' + } + + Contract String, @@StrictConfigHash => lambda { |x| x.is_a? Emitter } + def initialize(endpoint, config={}) + config = @@DefaultConfig.merge(config) + @lock = Monitor.new + @collector_uri = as_collector_uri(endpoint, config[:protocol], config[:port], config[:method]) + @buffer = [] + if not config[:buffer_size].nil? + @buffer_size = config[:buffer_size] + elsif config[:method] == 'get' + @buffer_size = 1 + else + @buffer_size = 10 + end + @method = config[:method] + @on_success = config[:on_success] + @on_failure = config[:on_failure] + LOGGER.info("#{self.class} initialized with endpoint #{@collector_uri}") + + self + end + + # Build the collector URI from the configuration hash + # + Contract String, String, Maybe[Num], String => String + def as_collector_uri(endpoint, protocol, port, method) + port_string = port == nil ? '' : ":#{port.to_s}" + path = method == 'get' ? '/i' : '/com.snowplowanalytics.snowplow/tp2' + + "#{protocol}://#{endpoint}#{port_string}#{path}" + end + + # Add an event to the buffer and flush it if maximum size has been reached + # + Contract Hash => nil + def input(payload) + payload.each { |k,v| payload[k] = v.to_s} + @lock.synchronize do + @buffer.push(payload) + if @buffer.size >= @buffer_size + flush + end + end + + nil + end + + # Flush the buffer + # + Contract Bool => nil + def flush(async=true) + @lock.synchronize do + send_requests(@buffer) + @buffer = [] + end + nil + end + + # Send all events in the buffer to the collector + # + Contract ArrayOf[Hash] => nil + def send_requests(evts) + if evts.size < 1 + LOGGER.info("Skipping sending events since buffer is empty") + return + end + LOGGER.info("Attempting to send #{evts.size} request#{evts.size == 1 ? '' : 's'}") + + evts.each do |event| + event['stm'] = (Time.now.to_f * 1000).to_i.to_s # add the sent timestamp, overwrite if already exists + end + + if @method == 'post' + post_succeeded = false + begin + request = http_post(SelfDescribingJson.new( + 'iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4', + evts + ).to_json) + post_succeeded = is_good_status_code(request.code) + rescue StandardError => se + LOGGER.warn(se) + end + if post_succeeded + unless @on_success.nil? + @on_success.call(evts.size) + end + else + unless @on_failure.nil? + @on_failure.call(0, evts) + end + end + + elsif @method == 'get' + success_count = 0 + unsent_requests = [] + evts.each do |evt| + get_succeeded = false + begin + request = http_get(evt) + get_succeeded = is_good_status_code(request.code) + rescue StandardError => se + LOGGER.warn(se) + end + if get_succeeded + success_count += 1 + else + unsent_requests << evt + end + end + if unsent_requests.size == 0 + unless @on_success.nil? + @on_success.call(success_count) + end + else + unless @on_failure.nil? + @on_failure.call(success_count, unsent_requests) + end + end + end + + nil + end + + # Send a GET request + # + Contract Hash => lambda { |x| x.is_a? Net::HTTPResponse } + def http_get(payload) + destination = URI(@collector_uri + '?' + URI.encode_www_form(payload)) + LOGGER.info("Sending GET request to #{@collector_uri}...") + LOGGER.debug("Payload: #{payload}") + http = Net::HTTP.new(destination.host, destination.port) + request = Net::HTTP::Get.new(destination.request_uri) + if destination.scheme == 'https' + http.use_ssl = true + end + response = http.request(request) + LOGGER.add(is_good_status_code(response.code) ? Logger::INFO : Logger::WARN) { + "GET request to #{@collector_uri} finished with status code #{response.code}" + } + + response + end + + # Send a POST request + # + Contract Hash => lambda { |x| x.is_a? Net::HTTPResponse } + def http_post(payload) + LOGGER.info("Sending POST request to #{@collector_uri}...") + LOGGER.debug("Payload: #{payload}") + destination = URI(@collector_uri) + http = Net::HTTP.new(destination.host, destination.port) + request = Net::HTTP::Post.new(destination.request_uri) + if destination.scheme == 'https' + http.use_ssl = true + end + request.body = payload.to_json + request.set_content_type('application/json; charset=utf-8') + response = http.request(request) + LOGGER.add(is_good_status_code(response.code) ? Logger::INFO : Logger::WARN) { + "POST request to #{@collector_uri} finished with status code #{response.code}" + } + + response + end + + # Only 2xx and 3xx status codes are considered successes + # + Contract String => Bool + def is_good_status_code(status_code) + status_code.to_i >= 200 && status_code.to_i < 400 + end + + private :as_collector_uri, + :http_get, + :http_post + + end + + + class AsyncEmitter < Emitter + + Contract String, @@StrictConfigHash => lambda { |x| x.is_a? Emitter } + def initialize(endpoint, config={}) + @queue = Queue.new() + # @all_processed_condition and @results_unprocessed are used to emulate Python's Queue.task_done() + @queue.extend(MonitorMixin) + @all_processed_condition = @queue.new_cond + @results_unprocessed = 0 + (config[:thread_count] || 1).times do + t = Thread.new do + consume + end + end + super(endpoint, config) + end + + def consume + loop do + work_unit = @queue.pop + send_requests(work_unit) + @queue.synchronize do + @results_unprocessed -= 1 + @all_processed_condition.broadcast + end + end + end + + # Flush the buffer + # If async is false, block until the queue is empty + # + def flush(async=true) + loop do + @lock.synchronize do + @queue.synchronize do + @results_unprocessed += 1 + end + @queue << @buffer + @buffer = [] + end + if not async + LOGGER.info('Starting synchronous flush') + @queue.synchronize do + @all_processed_condition.wait_while { @results_unprocessed > 0 } + LOGGER.info('Finished synchronous flush') + end + end + break if @buffer.size < 1 + end + end + end + +end diff --git a/snowplow-tracker/lib/snowplow-tracker/payload.rb b/snowplow-tracker/lib/snowplow-tracker/payload.rb new file mode 100644 index 0000000000..383f525269 --- /dev/null +++ b/snowplow-tracker/lib/snowplow-tracker/payload.rb @@ -0,0 +1,73 @@ +# Copyright (c) 2013-2014 Snowplow Analytics Ltd. All rights reserved. +# +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License Version 2.0. +# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + +# Author:: Alex Dean, Fred Blundun (mailto:support@snowplowanalytics.com) +# Copyright:: Copyright (c) 2013-2014 Snowplow Analytics Ltd +# License:: Apache License Version 2.0 + +require 'base64' +require 'json' +require 'net/http' +require 'contracts' + +module SnowplowTracker + + class Payload + + include Contracts + + attr_reader :context + + Contract nil => Payload + def initialize + @context = {} + self + end + + # Add a single name-value pair to @context + # + Contract String, Or[String, Bool, Num, nil] => Or[String, Bool, Num, nil] + def add(name, value) + if value != "" and not value.nil? + @context[name] = value + end + end + + # Add each name-value pair in dict to @context + # + Contract Hash => Hash + def add_dict(dict) + for f in dict + self.add(f[0], f[1]) + end + end + + # Stringify a JSON and add it to @context + # + Contract Maybe[Hash], Bool, String, String => Maybe[String] + def add_json(dict, encode_base64, type_when_encoded, type_when_not_encoded) + + if dict.nil? + return + end + + dict_string = JSON.generate(dict) + + if encode_base64 + self.add(type_when_encoded, Base64.strict_encode64(dict_string)) + else + self.add(type_when_not_encoded, dict_string) + end + + end + + end +end diff --git a/snowplow-tracker/lib/snowplow-tracker/self_describing_json.rb b/snowplow-tracker/lib/snowplow-tracker/self_describing_json.rb new file mode 100644 index 0000000000..7b917c1b00 --- /dev/null +++ b/snowplow-tracker/lib/snowplow-tracker/self_describing_json.rb @@ -0,0 +1,34 @@ +# Copyright (c) 2013-2014 Snowplow Analytics Ltd. All rights reserved. +# +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License Version 2.0. +# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + +# Author:: Alex Dean, Fred Blundun (mailto:support@snowplowanalytics.com) +# Copyright:: Copyright (c) 2013-2014 Snowplow Analytics Ltd +# License:: Apache License Version 2.0 + +module SnowplowTracker + + class SelfDescribingJson + + def initialize(schema, data) + @schema = schema + @data = data + end + + def to_json + { + :schema => @schema, + :data => @data + } + end + + end + +end diff --git a/snowplow-tracker/lib/snowplow-tracker/subject.rb b/snowplow-tracker/lib/snowplow-tracker/subject.rb new file mode 100644 index 0000000000..09d2bdfb60 --- /dev/null +++ b/snowplow-tracker/lib/snowplow-tracker/subject.rb @@ -0,0 +1,139 @@ +# Copyright (c) 2013-2014 Snowplow Analytics Ltd. All rights reserved. +# +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License Version 2.0. +# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + +# Author:: Alex Dean, Fred Blundun (mailto:support@snowplowanalytics.com) +# Copyright:: Copyright (c) 2013-2014 Snowplow Analytics Ltd +# License:: Apache License Version 2.0 + +require 'contracts' + +module SnowplowTracker + + class Subject + + include Contracts + + @@default_platform = 'srv' + @@supported_platforms = ['pc', 'tv', 'mob', 'cnsl', 'iot'] + + attr_reader :standard_nv_pairs + + Contract None => Subject + def initialize + @standard_nv_pairs = {"p" => @@default_platform} + self + end + + # Specify the platform + # + Contract String => Subject + def set_platform(value) + if @@supported_platforms.include?(value) + @standard_nv_pairs['p'] = value + else + raise "#{value} is not a supported platform" + end + + self + end + + # Set the business-defined user ID for a user + # + Contract String => Subject + def set_user_id(user_id) + @standard_nv_pairs['uid'] = user_id + self + end + + # Set fingerprint for the user + # + Contract Num => Subject + def set_fingerprint(fingerprint) + @standard_nv_pairs['fp'] = fingerprint + self + end + + # Set the screen resolution for a device + # + Contract Num, Num => Subject + def set_screen_resolution(width, height) + @standard_nv_pairs['res'] = "#{width}x#{height}" + self + end + + # Set the dimensions of the current viewport + # + Contract Num, Num => Subject + def set_viewport(width, height) + @standard_nv_pairs['vp'] = "#{width}x#{height}" + self + end + + # Set the color depth of the device in bits per pixel + # + Contract Num => Subject + def set_color_depth(depth) + @standard_nv_pairs['cd'] = depth + self + end + + # Set the timezone field + # + Contract String => Subject + def set_timezone(timezone) + @standard_nv_pairs['tz'] = timezone + self + end + + # Set the language field + # + Contract String => Subject + def set_lang(lang) + @standard_nv_pairs['lang'] = lang + self + end + + # Set the domain user ID + # + Contract String => Subject + def set_domain_user_id(duid) + @standard_nv_pairs['duid'] = duid + self + end + + # Set the IP address field + # + Contract String => Subject + def set_ip_address(ip) + @standard_nv_pairs['ip'] = ip + self + end + + # Set the user agent + # + Contract String => Subject + def set_useragent(ua) + @standard_nv_pairs['ua'] = ua + self + end + + # Set the network user ID field + # This overwrites the nuid field set by the collector + # + Contract String => Subject + def set_network_user_id(nuid) + @standard_nv_pairs['tnuid'] = nuid + self + end + + end + +end diff --git a/snowplow-tracker/lib/snowplow-tracker/timestamp.rb b/snowplow-tracker/lib/snowplow-tracker/timestamp.rb new file mode 100644 index 0000000000..d81a12850c --- /dev/null +++ b/snowplow-tracker/lib/snowplow-tracker/timestamp.rb @@ -0,0 +1,46 @@ +# Copyright (c) 2016 Snowplow Analytics Ltd. All rights reserved. +# +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License Version 2.0. +# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + +# Author:: Alex Dean, Fred Blundun, Ed Lewis (mailto:support@snowplowanalytics.com) +# Copyright:: Copyright (c) 2016 Snowplow Analytics Ltd +# License:: Apache License Version 2.0 + +module SnowplowTracker + + class Timestamp + + attr_reader :type + attr_reader :value + + def initialize(type, value) + @type = type + @value = value + end + + end + + class TrueTimestamp < Timestamp + + def initialize(value) + super 'ttm', value + end + + end + + class DeviceTimestamp < Timestamp + + def initialize(value) + super 'dtm', value + end + + end + +end \ No newline at end of file diff --git a/snowplow-tracker/lib/snowplow-tracker/tracker.rb b/snowplow-tracker/lib/snowplow-tracker/tracker.rb new file mode 100644 index 0000000000..f73dcef505 --- /dev/null +++ b/snowplow-tracker/lib/snowplow-tracker/tracker.rb @@ -0,0 +1,371 @@ +# Copyright (c) 2013-2014 Snowplow Analytics Ltd. All rights reserved. +# +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License Version 2.0. +# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + +# Author:: Alex Dean, Fred Blundun (mailto:support@snowplowanalytics.com) +# Copyright:: Copyright (c) 2013-2014 Snowplow Analytics Ltd +# License:: Apache License Version 2.0 + +require 'contracts' +require 'securerandom' +require 'set' + +module SnowplowTracker + + class Tracker + + include Contracts + + @@EmitterInput = Or[lambda {|x| x.is_a? Emitter}, ArrayOf[lambda {|x| x.is_a? Emitter}]] + + @@required_transaction_keys = Set.new(%w(order_id total_value)) + @@recognised_transaction_keys = Set.new(%w(order_id total_value affiliation tax_value shipping city state country currency)) + + @@Transaction = lambda { |x| + return false unless x.class == Hash + transaction_keys = Set.new(x.keys) + @@required_transaction_keys.subset? transaction_keys and + transaction_keys.subset? @@recognised_transaction_keys + } + + @@required_item_keys = Set.new(%w(sku price quantity)) + @@recognised_item_keys = Set.new(%w(sku price quantity name category context)) + + @@Item = lambda { |x| + return false unless x.class == Hash + item_keys = Set.new(x.keys) + @@required_item_keys.subset? item_keys and + item_keys.subset? @@recognised_item_keys + } + + @@required_augmented_item_keys = Set.new(%w(sku price quantity tstamp order_id)) + @@recognised_augmented_item_keys = Set.new(%w(sku price quantity name category context tstamp order_id currency)) + + @@AugmentedItem = lambda { |x| + return false unless x.class == Hash + augmented_item_keys = Set.new(x.keys) + @@required_augmented_item_keys.subset? augmented_item_keys and + augmented_item_keys.subset? @@recognised_augmented_item_keys + } + + @@ContextsInput = ArrayOf[SelfDescribingJson] + + @@version = TRACKER_VERSION + @@default_encode_base64 = true + + @@base_schema_path = "iglu:com.snowplowanalytics.snowplow" + @@schema_tag = "jsonschema" + @@context_schema = "#{@@base_schema_path}/contexts/#{@@schema_tag}/1-0-1" + @@unstruct_event_schema = "#{@@base_schema_path}/unstruct_event/#{@@schema_tag}/1-0-0" + + Contract @@EmitterInput, Maybe[Subject], Maybe[String], Maybe[String], Bool => Tracker + def initialize(emitters, subject=nil, namespace=nil, app_id=nil, encode_base64=@@default_encode_base64) + @emitters = Array(emitters) + if subject.nil? + @subject = Subject.new + else + @subject = subject + end + @standard_nv_pairs = { + 'tna' => namespace, + 'tv' => @@version, + 'aid' => app_id + } + @config = { + 'encode_base64' => encode_base64 + } + + self + end + + # Call subject methods from tracker instance + # + Subject.instance_methods(false).each do |name| + define_method name, ->(*splat) do + @subject.method(name.to_sym).call(*splat) + + self + end + end + + # Generates a type-4 UUID to identify this event + Contract nil => String + def get_event_id() + SecureRandom.uuid + end + + # Generates the timestamp (in milliseconds) to be attached to each event + # + Contract nil => Num + def get_timestamp + (Time.now.to_f * 1000).to_i + end + + # Builds a self-describing JSON from an array of custom contexts + # + Contract @@ContextsInput => Hash + def build_context(context) + SelfDescribingJson.new( + @@context_schema, + context.map {|c| c.to_json} + ).to_json + end + + # Tracking methods + + # Attaches all the fields in @standard_nv_pairs to the request + # Only attaches the context vendor if the event has a custom context + # + Contract Payload => nil + def track(pb) + pb.add_dict(@subject.standard_nv_pairs) + pb.add_dict(@standard_nv_pairs) + pb.add('eid', get_event_id()) + @emitters.each{ |emitter| emitter.input(pb.context)} + + nil + end + + # Log a visit to this page with an inserted device timestamp + # + Contract String, Maybe[String], Maybe[String], Maybe[@@ContextsInput], Maybe[Num] => Tracker + def track_page_view(page_url, page_title=nil, referrer=nil, context=nil, tstamp=nil) + if tstamp.nil? + tstamp = get_timestamp + end + + track_page_view(page_url, page_title, referrer, context, DeviceTimestamp.new(tstamp)) + end + + # Log a visit to this page + # + Contract String, Maybe[String], Maybe[String], Maybe[@@ContextsInput], SnowplowTracker::Timestamp => Tracker + def track_page_view(page_url, page_title=nil, referrer=nil, context=nil, tstamp=nil) + pb = Payload.new + pb.add('e', 'pv') + pb.add('url', page_url) + pb.add('page', page_title) + pb.add('refr', referrer) + + unless context.nil? + pb.add_json(build_context(context), @config['encode_base64'], 'cx', 'co') + end + + pb.add(tstamp.type, tstamp.value) + + track(pb) + + self + end + + # Track a single item within an ecommerce transaction + # Not part of the public API + # + Contract @@AugmentedItem => self + def track_ecommerce_transaction_item(argmap) + pb = Payload.new + pb.add('e', 'ti') + pb.add('ti_id', argmap['order_id']) + pb.add('ti_sk', argmap['sku']) + pb.add('ti_pr', argmap['price']) + pb.add('ti_qu', argmap['quantity']) + pb.add('ti_nm', argmap['name']) + pb.add('ti_ca', argmap['category']) + pb.add('ti_cu', argmap['currency']) + unless argmap['context'].nil? + pb.add_json(build_context(argmap['context']), @config['encode_base64'], 'cx', 'co') + end + pb.add(argmap['tstamp'].type, argmap['tstamp'].value) + track(pb) + + self + end + + # Track an ecommerce transaction and all the items in it + # Set the timestamp as the device timestamp + Contract @@Transaction, ArrayOf[@@Item], Maybe[@@ContextsInput], Maybe[Num] => Tracker + def track_ecommerce_transaction(transaction, + items, + context=nil, + tstamp=nil) + if tstamp.nil? + tstamp = get_timestamp + end + + track_ecommerce_transaction(transaction, items, context, DeviceTimestamp.new(tstamp)) + end + + # Track an ecommerce transaction and all the items in it + # + Contract @@Transaction, ArrayOf[@@Item], Maybe[@@ContextsInput], Timestamp => Tracker + def track_ecommerce_transaction(transaction, items, + context=nil, tstamp=nil) + pb = Payload.new + pb.add('e', 'tr') + pb.add('tr_id', transaction['order_id']) + pb.add('tr_tt', transaction['total_value']) + pb.add('tr_af', transaction['affiliation']) + pb.add('tr_tx', transaction['tax_value']) + pb.add('tr_sh', transaction['shipping']) + pb.add('tr_ci', transaction['city']) + pb.add('tr_st', transaction['state']) + pb.add('tr_co', transaction['country']) + pb.add('tr_cu', transaction['currency']) + unless context.nil? + pb.add_json(build_context(context), @config['encode_base64'], 'cx', 'co') + end + + pb.add(tstamp.type, tstamp.value) + + track(pb) + + for item in items + item['tstamp'] = tstamp + item['order_id'] = transaction['order_id'] + item['currency'] = transaction['currency'] + track_ecommerce_transaction_item(item) + end + + self + end + + # Track a structured event + # set the timestamp to the device timestamp + Contract String, String, Maybe[String], Maybe[String], Maybe[Num], Maybe[@@ContextsInput], Maybe[Num] => Tracker + def track_struct_event(category, action, label=nil, property=nil, value=nil, context=nil, tstamp=nil) + if tstamp.nil? + tstamp = get_timestamp + end + + track_struct_event(category, action, label, property, value, context, DeviceTimestamp.new(tstamp)) + end + # Track a structured event + # + Contract String, String, Maybe[String], Maybe[String], Maybe[Num], Maybe[@@ContextsInput], Timestamp => Tracker + def track_struct_event(category, action, label=nil, property=nil, value=nil, context=nil, tstamp=nil) + pb = Payload.new + pb.add('e', 'se') + pb.add('se_ca', category) + pb.add('se_ac', action) + pb.add('se_la', label) + pb.add('se_pr', property) + pb.add('se_va', value) + unless context.nil? + pb.add_json(build_context(context), @config['encode_base64'], 'cx', 'co') + end + + pb.add(tstamp.type, tstamp.value) + track(pb) + + self + end + + # Track a screen view event + # + Contract Maybe[String], Maybe[String], Maybe[@@ContextsInput], Or[Timestamp, Num, nil] => Tracker + def track_screen_view(name=nil, id=nil, context=nil, tstamp=nil) + screen_view_properties = {} + unless name.nil? + screen_view_properties['name'] = name + end + unless id.nil? + screen_view_properties['id'] = id + end + screen_view_schema = "#{@@base_schema_path}/screen_view/#{@@schema_tag}/1-0-0" + + event_json = SelfDescribingJson.new(screen_view_schema, screen_view_properties) + + self.track_unstruct_event(event_json, context, tstamp) + + self + end + + # Better name for track unstruct event + # + Contract SelfDescribingJson, Maybe[@@ContextsInput], Timestamp => Tracker + def track_self_describing_event(event_json, context=nil, tstamp=nil) + track_unstruct_event(event_json, context, tstamp) + end + + # Better name for track unstruct event + # set the timestamp to the device timestamp + Contract SelfDescribingJson, Maybe[@@ContextsInput], Maybe[Num] => Tracker + def track_self_describing_event(event_json, context=nil, tstamp=nil) + track_unstruct_event(event_json, context, tstamp) + end + + # Track an unstructured event + # set the timestamp to the device timstamp + Contract SelfDescribingJson, Maybe[@@ContextsInput], Maybe[Num] => Tracker + def track_unstruct_event(event_json, context=nil, tstamp=nil) + if tstamp.nil? + tstamp = get_timestamp + end + + track_unstruct_event(event_json, context, DeviceTimestamp.new(tstamp)) + end + + # Track an unstructured event + # + Contract SelfDescribingJson, Maybe[@@ContextsInput], Timestamp => Tracker + def track_unstruct_event(event_json, context=nil, tstamp=nil) + pb = Payload.new + pb.add('e', 'ue') + + envelope = SelfDescribingJson.new(@@unstruct_event_schema, event_json.to_json) + + pb.add_json(envelope.to_json, @config['encode_base64'], 'ue_px', 'ue_pr') + + unless context.nil? + pb.add_json(build_context(context), @config['encode_base64'], 'cx', 'co') + end + + pb.add(tstamp.type, tstamp.value) + + track(pb) + + self + end + + # Flush all events stored in all emitters + # + Contract Bool => Tracker + def flush(async=false) + @emitters.each do |emitter| + emitter.flush(async) + end + + self + end + + # Set the subject of the events fired by the tracker + # + Contract Subject => Tracker + def set_subject(subject) + @subject = subject + self + end + + # Add a new emitter + # + Contract Emitter => Tracker + def add_emitter(emitter) + @emitters.push(emitter) + self + end + + private :get_timestamp, + :build_context, + :track, + :track_ecommerce_transaction_item + + end + +end diff --git a/snowplow-tracker/lib/snowplow-tracker/version.rb b/snowplow-tracker/lib/snowplow-tracker/version.rb new file mode 100644 index 0000000000..18bde7bf60 --- /dev/null +++ b/snowplow-tracker/lib/snowplow-tracker/version.rb @@ -0,0 +1,19 @@ +# Copyright (c) 2013-2014 Snowplow Analytics Ltd. All rights reserved. +# +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License Version 2.0. +# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + +# Author:: Alex Dean, Fred Blundun (mailto:support@snowplowanalytics.com) +# Copyright:: Copyright (c) 2013-2014 Snowplow Analytics Ltd +# License:: Apache License Version 2.0 + +module SnowplowTracker + VERSION = '0.6.1' + TRACKER_VERSION = "rb-#{VERSION}" +end diff --git a/snowplow-tracker/snowplow-tracker.gemspec b/snowplow-tracker/snowplow-tracker.gemspec new file mode 100644 index 0000000000..c30cb26829 --- /dev/null +++ b/snowplow-tracker/snowplow-tracker.gemspec @@ -0,0 +1,41 @@ +######################################################### +# This file has been automatically generated by gem2tgz # +######################################################### +# -*- encoding: utf-8 -*- +# stub: snowplow-tracker 0.6.1 ruby lib + +Gem::Specification.new do |s| + s.name = "snowplow-tracker".freeze + s.version = "0.6.1" + + s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version= + s.require_paths = ["lib".freeze] + s.authors = ["Alexander Dean".freeze, "Fred Blundun".freeze] + s.date = "2016-12-26" + s.description = "With this tracker you can collect event data from your Ruby applications, Ruby on Rails web applications and Ruby gems.".freeze + s.email = "support@snowplowanalytics.com".freeze + s.files = ["LICENSE-2.0.txt".freeze, "README.md".freeze, "lib/snowplow-tracker.rb".freeze, "lib/snowplow-tracker/contracts.rb".freeze, "lib/snowplow-tracker/emitters.rb".freeze, "lib/snowplow-tracker/payload.rb".freeze, "lib/snowplow-tracker/self_describing_json.rb".freeze, "lib/snowplow-tracker/subject.rb".freeze, "lib/snowplow-tracker/timestamp.rb".freeze, "lib/snowplow-tracker/tracker.rb".freeze, "lib/snowplow-tracker/version.rb".freeze] + s.homepage = "http://github.com/snowplow/snowplow-ruby-tracker".freeze + s.licenses = ["Apache License 2.0".freeze] + s.required_ruby_version = Gem::Requirement.new(">= 2.0.0".freeze) + s.rubygems_version = "2.5.2.1".freeze + s.summary = "Ruby Analytics for Snowplow".freeze + + if s.respond_to? :specification_version then + s.specification_version = 4 + + if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then + s.add_runtime_dependency(%q.freeze, ["<= 0.11", "~> 0.7"]) + s.add_development_dependency(%q.freeze, ["~> 2.14.1"]) + s.add_development_dependency(%q.freeze, ["~> 1.17.4"]) + else + s.add_dependency(%q.freeze, ["<= 0.11", "~> 0.7"]) + s.add_dependency(%q.freeze, ["~> 2.14.1"]) + s.add_dependency(%q.freeze, ["~> 1.17.4"]) + end + else + s.add_dependency(%q.freeze, ["<= 0.11", "~> 0.7"]) + s.add_dependency(%q.freeze, ["~> 2.14.1"]) + s.add_dependency(%q.freeze, ["~> 1.17.4"]) + end +end From 193d502479f80e2d94c60c691d0d7e5ef83217d3 Mon Sep 17 00:00:00 2001 From: Sruthi Chandran Date: Wed, 16 Oct 2019 22:29:52 +0530 Subject: [PATCH 2/3] New upstream version 12.2.8 --- ruby-statistics/.gitignore | 65 -------- ruby-statistics/.rspec | 2 - ruby-statistics/.travis.yml | 8 - ruby-statistics/CODE_OF_CONDUCT.md | 74 ---------- ruby-statistics/CONTRIBUTING.md | 1 - ruby-statistics/Gemfile | 6 - ruby-statistics/LICENSE | 21 --- ruby-statistics/LICENSE.txt | 21 --- ruby-statistics/README.md | 79 ---------- ruby-statistics/Rakefile | 6 - ruby-statistics/bin/console | 14 -- ruby-statistics/bin/setup | 8 - ruby-statistics/lib/enumerable.rb | 15 -- ruby-statistics/lib/math.rb | 120 --------------- ruby-statistics/lib/statistics.rb | 7 - .../lib/statistics/distribution.rb | 11 -- .../lib/statistics/distribution/bernoulli.rb | 35 ----- .../lib/statistics/distribution/beta.rb | 36 ----- .../lib/statistics/distribution/binomial.rb | 49 ------ .../statistics/distribution/chi_squared.rb | 37 ----- .../lib/statistics/distribution/empirical.rb | 26 ---- .../lib/statistics/distribution/f.rb | 46 ------ .../lib/statistics/distribution/geometric.rb | 76 ---------- .../lib/statistics/distribution/logseries.rb | 51 ------- .../distribution/negative_binomial.rb | 51 ------- .../lib/statistics/distribution/normal.rb | 139 ------------------ .../lib/statistics/distribution/poisson.rb | 38 ----- .../lib/statistics/distribution/t_student.rb | 82 ----------- .../lib/statistics/distribution/uniform.rb | 40 ----- .../lib/statistics/distribution/weibull.rb | 66 --------- .../statistics/spearman_rank_coefficient.rb | 71 --------- .../lib/statistics/statistical_test.rb | 11 -- .../statistical_test/chi_squared_test.rb | 42 ------ .../lib/statistics/statistical_test/f_test.rb | 83 ----------- .../kolmogorov_smirnov_test.rb | 70 --------- .../lib/statistics/statistical_test/t_test.rb | 92 ------------ .../wilcoxon_rank_sum_test.rb | 95 ------------ ruby-statistics/lib/statistics/version.rb | 3 - ruby-statistics/ruby-statistics.gemspec | 34 ----- 39 files changed, 1731 deletions(-) delete mode 100644 ruby-statistics/.gitignore delete mode 100644 ruby-statistics/.rspec delete mode 100644 ruby-statistics/.travis.yml delete mode 100644 ruby-statistics/CODE_OF_CONDUCT.md delete mode 100644 ruby-statistics/CONTRIBUTING.md delete mode 100644 ruby-statistics/Gemfile delete mode 100644 ruby-statistics/LICENSE delete mode 100644 ruby-statistics/LICENSE.txt delete mode 100644 ruby-statistics/README.md delete mode 100644 ruby-statistics/Rakefile delete mode 100755 ruby-statistics/bin/console delete mode 100755 ruby-statistics/bin/setup delete mode 100644 ruby-statistics/lib/enumerable.rb delete mode 100644 ruby-statistics/lib/math.rb delete mode 100644 ruby-statistics/lib/statistics.rb delete mode 100644 ruby-statistics/lib/statistics/distribution.rb delete mode 100644 ruby-statistics/lib/statistics/distribution/bernoulli.rb delete mode 100644 ruby-statistics/lib/statistics/distribution/beta.rb delete mode 100644 ruby-statistics/lib/statistics/distribution/binomial.rb delete mode 100644 ruby-statistics/lib/statistics/distribution/chi_squared.rb delete mode 100644 ruby-statistics/lib/statistics/distribution/empirical.rb delete mode 100644 ruby-statistics/lib/statistics/distribution/f.rb delete mode 100644 ruby-statistics/lib/statistics/distribution/geometric.rb delete mode 100644 ruby-statistics/lib/statistics/distribution/logseries.rb delete mode 100644 ruby-statistics/lib/statistics/distribution/negative_binomial.rb delete mode 100644 ruby-statistics/lib/statistics/distribution/normal.rb delete mode 100644 ruby-statistics/lib/statistics/distribution/poisson.rb delete mode 100644 ruby-statistics/lib/statistics/distribution/t_student.rb delete mode 100644 ruby-statistics/lib/statistics/distribution/uniform.rb delete mode 100644 ruby-statistics/lib/statistics/distribution/weibull.rb delete mode 100644 ruby-statistics/lib/statistics/spearman_rank_coefficient.rb delete mode 100644 ruby-statistics/lib/statistics/statistical_test.rb delete mode 100644 ruby-statistics/lib/statistics/statistical_test/chi_squared_test.rb delete mode 100644 ruby-statistics/lib/statistics/statistical_test/f_test.rb delete mode 100644 ruby-statistics/lib/statistics/statistical_test/kolmogorov_smirnov_test.rb delete mode 100644 ruby-statistics/lib/statistics/statistical_test/t_test.rb delete mode 100644 ruby-statistics/lib/statistics/statistical_test/wilcoxon_rank_sum_test.rb delete mode 100644 ruby-statistics/lib/statistics/version.rb delete mode 100644 ruby-statistics/ruby-statistics.gemspec diff --git a/ruby-statistics/.gitignore b/ruby-statistics/.gitignore deleted file mode 100644 index 73540b9aab..0000000000 --- a/ruby-statistics/.gitignore +++ /dev/null @@ -1,65 +0,0 @@ -*.gem -*.rbc -/.config -/coverage/ -/InstalledFiles -/pkg/ -/spec/reports/ -/spec/examples.txt -/test/tmp/ -/test/version_tmp/ -/tmp/ - -# Used by dotenv library to load environment variables. -# .env - -## Specific to RubyMotion: -.dat* -.repl_history -build/ -*.bridgesupport -build-iPhoneOS/ -build-iPhoneSimulator/ - -## Specific to RubyMotion (use of CocoaPods): -# -# We recommend against adding the Pods directory to your .gitignore. However -# you should judge for yourself, the pros and cons are mentioned at: -# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control -# -# vendor/Pods/ - -## Documentation cache and generated files: -/.yardoc/ -/_yardoc/ -/doc/ -/rdoc/ - -## Environment normalization: -/.bundle/ -/vendor/bundle -/lib/bundler/man/ - -# for a library or gem, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -# Gemfile.lock -.ruby-version -.ruby-gemset - -# unless supporting rvm < 1.11.0 or doing something fancy, ignore this: -.rvmrc -/.bundle/ -/.yardoc -/Gemfile.lock -/_yardoc/ -/coverage/ -/doc/ -/pkg/ -/spec/reports/ -/tmp/ - -# rspec failure tracking -.rspec_status - -# byebug -.byebug_history diff --git a/ruby-statistics/.rspec b/ruby-statistics/.rspec deleted file mode 100644 index 8c18f1abdd..0000000000 --- a/ruby-statistics/.rspec +++ /dev/null @@ -1,2 +0,0 @@ ---format documentation ---color diff --git a/ruby-statistics/.travis.yml b/ruby-statistics/.travis.yml deleted file mode 100644 index 606201e265..0000000000 --- a/ruby-statistics/.travis.yml +++ /dev/null @@ -1,8 +0,0 @@ -sudo: false -language: ruby -rvm: - - 2.3.7 - - 2.4.4 - - 2.5.1 - - 2.6.0 -before_install: gem update --system && gem install bundler diff --git a/ruby-statistics/CODE_OF_CONDUCT.md b/ruby-statistics/CODE_OF_CONDUCT.md deleted file mode 100644 index 9889d44c59..0000000000 --- a/ruby-statistics/CODE_OF_CONDUCT.md +++ /dev/null @@ -1,74 +0,0 @@ -# Contributor Covenant Code of Conduct - -## Our Pledge - -In the interest of fostering an open and welcoming environment, we as -contributors and maintainers pledge to making participation in our project and -our community a harassment-free experience for everyone, regardless of age, body -size, disability, ethnicity, gender identity and expression, level of experience, -nationality, personal appearance, race, religion, or sexual identity and -orientation. - -## Our Standards - -Examples of behavior that contributes to creating a positive environment -include: - -* Using welcoming and inclusive language -* Being respectful of differing viewpoints and experiences -* Gracefully accepting constructive criticism -* Focusing on what is best for the community -* Showing empathy towards other community members - -Examples of unacceptable behavior by participants include: - -* The use of sexualized language or imagery and unwelcome sexual attention or -advances -* Trolling, insulting/derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others' private information, such as a physical or electronic - address, without explicit permission -* Other conduct which could reasonably be considered inappropriate in a - professional setting - -## Our Responsibilities - -Project maintainers are responsible for clarifying the standards of acceptable -behavior and are expected to take appropriate and fair corrective action in -response to any instances of unacceptable behavior. - -Project maintainers have the right and responsibility to remove, edit, or -reject comments, commits, code, wiki edits, issues, and other contributions -that are not aligned to this Code of Conduct, or to ban temporarily or -permanently any contributor for other behaviors that they deem inappropriate, -threatening, offensive, or harmful. - -## Scope - -This Code of Conduct applies both within project spaces and in public spaces -when an individual is representing the project or its community. Examples of -representing a project or community include using an official project e-mail -address, posting via an official social media account, or acting as an appointed -representative at an online or offline event. Representation of a project may be -further defined and clarified by project maintainers. - -## Enforcement - -Instances of abusive, harassing, or otherwise unacceptable behavior may be -reported by contacting the project team at ezapata@altavistaed.com. All -complaints will be reviewed and investigated and will result in a response that -is deemed necessary and appropriate to the circumstances. The project team is -obligated to maintain confidentiality with regard to the reporter of an incident. -Further details of specific enforcement policies may be posted separately. - -Project maintainers who do not follow or enforce the Code of Conduct in good -faith may face temporary or permanent repercussions as determined by other -members of the project's leadership. - -## Attribution - -This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, -available at [http://contributor-covenant.org/version/1/4][version] - -[homepage]: http://contributor-covenant.org -[version]: http://contributor-covenant.org/version/1/4/ diff --git a/ruby-statistics/CONTRIBUTING.md b/ruby-statistics/CONTRIBUTING.md deleted file mode 100644 index 7315d24786..0000000000 --- a/ruby-statistics/CONTRIBUTING.md +++ /dev/null @@ -1 +0,0 @@ -Bug reports and pull requests are welcome on GitHub at https://github.com/estebanz01/ruby-statistics. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant code of conduct](https://www.contributor-covenant.org/). diff --git a/ruby-statistics/Gemfile b/ruby-statistics/Gemfile deleted file mode 100644 index d006398a9e..0000000000 --- a/ruby-statistics/Gemfile +++ /dev/null @@ -1,6 +0,0 @@ -source "https://rubygems.org" - -git_source(:github) {|repo_name| "https://github.com/#{repo_name}" } - -# Specify your gem's dependencies in statistics.gemspec -gemspec diff --git a/ruby-statistics/LICENSE b/ruby-statistics/LICENSE deleted file mode 100644 index d58962a2a8..0000000000 --- a/ruby-statistics/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2017 Esteban Zapata Rojas - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/ruby-statistics/LICENSE.txt b/ruby-statistics/LICENSE.txt deleted file mode 100644 index b2d0a42d7f..0000000000 --- a/ruby-statistics/LICENSE.txt +++ /dev/null @@ -1,21 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2017 esteban zapata - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/ruby-statistics/README.md b/ruby-statistics/README.md deleted file mode 100644 index b85f775e03..0000000000 --- a/ruby-statistics/README.md +++ /dev/null @@ -1,79 +0,0 @@ -# Ruby Statistics - -![](https://travis-ci.org/estebanz01/ruby-statistics.svg?branch=master) - -A basic ruby gem that implements some statistical methods, functions and concepts to be used in any ruby environment without depending on any mathematical software like `R`, `Matlab`, `Octave` or similar. - -Unit test runs under the following ruby versions: -* Ruby 2.2. -* Ruby 2.3.1. -* Ruby 2.4.0. -* Ruby 2.5.0. - -We got the inspiration from the folks at [JStat](https://github.com/jstat/jstat) and some interesting lectures about [Keystroke dynamics](http://www.biometric-solutions.com/keystroke-dynamics.html). - -Some logic and algorithms are extractions or adaptations from other authors, which are referenced in the comments. -This software is released under the MIT License. - -## Installation - -Add this line to your application's Gemfile: - -```ruby -gem 'ruby-statistics' -``` - -And then execute: - - $ bundle - -Or install it yourself as: - - $ gem install ruby-statistics - -## Basic Usage - -just require the `statistics` gem in order to load it. If you don't have defined the `Distribution` namespace, the gem will assign an alias, reducing the number of namespaces needed to use a class. - -Right now you can load: - -* The whole statistics gem. `require 'statistics'` -* A namespace. `require 'statistics/distribution'` -* A class. `require 'statistics/distribution/normal'` - -Feel free to use the one that is more convenient to you. - -### Hello-World Example -```ruby -require 'statistics' - -poisson = Distribution::Poisson.new(l) # Using Distribution alias. -normal = Statistics::Distribution::StandardNormal.new # Using all namespaces. -``` - -## Documentation -You can find a bit more detailed documentation of all available distributions, tests and functions in the [Documentation Index](https://github.com/estebanz01/ruby-statistics/wiki) - -## Development - -After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment. - -To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org). - -## Contributing - -Bug reports and pull requests are welcome on GitHub at https://github.com/estebanz01/ruby-statistics. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct. - -## License - -The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT). - -## Code of Conduct - -Everyone interacting in the Statistics project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/estebanz01/ruby-statistics/blob/master/CODE_OF_CONDUCT.md). - -## Contact - -You can contact me via: -* [Github](https://github.com/estebanz01) -* [Twitter](https://twitter.com/estebanz01) diff --git a/ruby-statistics/Rakefile b/ruby-statistics/Rakefile deleted file mode 100644 index b7e9ed549b..0000000000 --- a/ruby-statistics/Rakefile +++ /dev/null @@ -1,6 +0,0 @@ -require "bundler/gem_tasks" -require "rspec/core/rake_task" - -RSpec::Core::RakeTask.new(:spec) - -task :default => :spec diff --git a/ruby-statistics/bin/console b/ruby-statistics/bin/console deleted file mode 100755 index 35213b9e9e..0000000000 --- a/ruby-statistics/bin/console +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env ruby - -require "bundler/setup" -require "statistics" - -# You can add fixtures and/or initialization code here to make experimenting -# with your gem easier. You can also use a different console, if you like. - -# (If you use this, don't forget to add pry to your Gemfile!) -# require "pry" -# Pry.start - -require "irb" -IRB.start(__FILE__) diff --git a/ruby-statistics/bin/setup b/ruby-statistics/bin/setup deleted file mode 100755 index dce67d860a..0000000000 --- a/ruby-statistics/bin/setup +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail -IFS=$'\n\t' -set -vx - -bundle install - -# Do any other automated setup that you need to do here diff --git a/ruby-statistics/lib/enumerable.rb b/ruby-statistics/lib/enumerable.rb deleted file mode 100644 index 00627afc76..0000000000 --- a/ruby-statistics/lib/enumerable.rb +++ /dev/null @@ -1,15 +0,0 @@ -# TODO: Avoid monkey-patching. -module Enumerable - def mean - self.reduce(:+) / self.length.to_f - end - - def variance - mean = self.mean - self.reduce(0) { |memo, value| memo + ((value - mean) ** 2) } / (self.length - 1).to_f - end - - def standard_deviation - Math.sqrt(self.variance) - end -end diff --git a/ruby-statistics/lib/math.rb b/ruby-statistics/lib/math.rb deleted file mode 100644 index 9be39ba446..0000000000 --- a/ruby-statistics/lib/math.rb +++ /dev/null @@ -1,120 +0,0 @@ -module Math - def self.factorial(n) - return if n < 0 - - n = n.to_i # Only integers. - - return 1 if n == 0 || n == 1 - Math.gamma(n + 1) # Math.gamma(x) == (n - 1)! for integer values - end - - def self.combination(n, r) - self.factorial(n)/(self.factorial(r) * self.factorial(n - r)).to_f # n!/(r! * [n - r]!) - end - - def self.permutation(n, k) - self.factorial(n)/self.factorial(n - k).to_f - end - - # Function adapted from the python implementation that exists in https://en.wikipedia.org/wiki/Simpson%27s_rule#Sample_implementation - # Finite integral in the interval [a, b] split up in n-intervals - def self.simpson_rule(a, b, n, &block) - unless n.even? - puts "The composite simpson's rule needs even intervals!" - return - end - - h = (b - a)/n.to_f - resA = yield(a) - resB = yield(b) - - sum = resA + resB - - (1..n).step(2).each do |number| - res = yield(a + number * h) - sum += 4 * res - end - - (1..(n-1)).step(2).each do |number| - res = yield(a + number * h) - sum += 2 * res - end - - return sum * h / 3.0 - end - - def self.lower_incomplete_gamma_function(s, x) - # The greater the iterations, the better. That's why we are iterating 10_000 * x times - self.simpson_rule(0, x, (10_000 * x.round).round) do |t| - (t ** (s - 1)) * Math.exp(-t) - end - end - - def self.beta_function(x, y) - return 1 if x == 1 && y == 1 - - (Math.gamma(x) * Math.gamma(y))/Math.gamma(x + y) - end - - ### This implementation is an adaptation of the incomplete beta function made in C by - ### Lewis Van Winkle, which released the code under the zlib license. - ### The whole math behind this code is described in the following post: https://codeplea.com/incomplete-beta-function-c - def self.incomplete_beta_function(x, alp, bet) - return if x < 0.0 - return 1.0 if x > 1.0 - - tiny = 1.0E-50 - - if x > ((alp + 1.0)/(alp + bet + 2.0)) - return 1.0 - self.incomplete_beta_function(1.0 - x, bet, alp) - end - - # To avoid overflow problems, the implementation applies the logarithm properties - # to calculate in a faster and safer way the values. - lbet_ab = (Math.lgamma(alp)[0] + Math.lgamma(bet)[0] - Math.lgamma(alp + bet)[0]).freeze - front = (Math.exp(Math.log(x) * alp + Math.log(1.0 - x) * bet - lbet_ab) / alp.to_f).freeze - - # This is the non-log version of the left part of the formula (before the continuous fraction) - # down_left = alp * self.beta_function(alp, bet) - # upper_left = (x ** alp) * ((1.0 - x) ** bet) - # front = upper_left/down_left - - f, c, d = 1.0, 1.0, 0.0 - - returned_value = nil - - # Let's do more iterations than the proposed implementation (200 iters) - (0..500).each do |number| - m = number/2 - - numerator = if number == 0 - 1.0 - elsif number % 2 == 0 - (m * (bet - m) * x)/((alp + 2.0 * m - 1.0)* (alp + 2.0 * m)) - else - top = -((alp + m) * (alp + bet + m) * x) - down = ((alp + 2.0 * m) * (alp + 2.0 * m + 1.0)) - - top/down - end - - d = 1.0 + numerator * d - d = tiny if d.abs < tiny - d = 1.0 / d - - c = 1.0 + numerator / c - c = tiny if c.abs < tiny - - cd = (c*d).freeze - f = f * cd - - - if (1.0 - cd).abs < 1.0E-10 - returned_value = front * (f - 1.0) - break - end - end - - returned_value - end -end diff --git a/ruby-statistics/lib/statistics.rb b/ruby-statistics/lib/statistics.rb deleted file mode 100644 index 2e443c214a..0000000000 --- a/ruby-statistics/lib/statistics.rb +++ /dev/null @@ -1,7 +0,0 @@ -require File.dirname(__FILE__) + '/enumerable' -require File.dirname(__FILE__) + '/math' -Dir[ File.dirname(__FILE__) + '/statistics/**/*.rb'].each {|file| require file } - -module Statistics - # Your code goes here... -end diff --git a/ruby-statistics/lib/statistics/distribution.rb b/ruby-statistics/lib/statistics/distribution.rb deleted file mode 100644 index 291eab9cc7..0000000000 --- a/ruby-statistics/lib/statistics/distribution.rb +++ /dev/null @@ -1,11 +0,0 @@ -Dir[File.dirname(__FILE__) + '/distribution/**/*.rb'].each {|file| require file } - -module Statistics - module Distribution - end -end - -# If Distribution is not defined, setup alias. -if defined?(Statistics) && !(defined?(Distribution)) - Distribution = Statistics::Distribution -end diff --git a/ruby-statistics/lib/statistics/distribution/bernoulli.rb b/ruby-statistics/lib/statistics/distribution/bernoulli.rb deleted file mode 100644 index 30ab789386..0000000000 --- a/ruby-statistics/lib/statistics/distribution/bernoulli.rb +++ /dev/null @@ -1,35 +0,0 @@ -module Statistics - module Distribution - class Bernoulli - def self.density_function(n, p) - return if n != 0 && n != 1 # The support of the distribution is n = {0, 1}. - - case n - when 0 then 1.0 - p - when 1 then p - end - end - - def self.cumulative_function(n, p) - return if n != 0 && n != 1 # The support of the distribution is n = {0, 1}. - - case n - when 0 then 1.0 - p - when 1 then 1.0 - end - end - - def self.variance(p) - p * (1.0 - p) - end - - def self.skewness(p) - (1.0 - 2.0*p).to_f / Math.sqrt(p * (1.0 - p)) - end - - def self.kurtosis(p) - (6.0 * (p ** 2) - (6 * p) + 1) / (p * (1.0 - p)) - end - end - end -end diff --git a/ruby-statistics/lib/statistics/distribution/beta.rb b/ruby-statistics/lib/statistics/distribution/beta.rb deleted file mode 100644 index 90970954b7..0000000000 --- a/ruby-statistics/lib/statistics/distribution/beta.rb +++ /dev/null @@ -1,36 +0,0 @@ -module Statistics - module Distribution - class Beta - attr_accessor :alpha, :beta - - def initialize(alp, bet) - self.alpha = alp.to_f - self.beta = bet.to_f - end - - def cumulative_function(value) - Math.incomplete_beta_function(value, alpha, beta) - end - - def density_function(value) - return 0 if value < 0 || value > 1 # Density function defined in the [0,1] interval - - num = (value**(alpha - 1)) * ((1 - value)**(beta - 1)) - den = Math.beta_function(alpha, beta) - - num/den - end - - def mode - return unless alpha > 1 && beta > 1 - - (alpha - 1)/(alpha + beta - 2) - end - - def mean - return if alpha + beta == 0 - alpha / (alpha + beta) - end - end - end -end diff --git a/ruby-statistics/lib/statistics/distribution/binomial.rb b/ruby-statistics/lib/statistics/distribution/binomial.rb deleted file mode 100644 index 3ce513ca00..0000000000 --- a/ruby-statistics/lib/statistics/distribution/binomial.rb +++ /dev/null @@ -1,49 +0,0 @@ -module Statistics - module Distribution - class Binomial - attr_accessor :number_of_trials, :probability_per_trial - def initialize(n, p) - self.number_of_trials = n.to_i - self.probability_per_trial = p - end - - def probability_mass_function(k) - return if k < 0 || k > number_of_trials - k = k.to_i - - Math.combination(number_of_trials, k) * - (probability_per_trial ** k) * ((1 - probability_per_trial) ** (number_of_trials - k)) - end - - def cumulative_function(k) - return if k < 0 || k > number_of_trials - k = k.to_i - - p = 1 - probability_per_trial - Math.incomplete_beta_function(p, number_of_trials - k, 1 + k) - end - - def mean - number_of_trials * probability_per_trial - end - - def variance - mean * (1 - probability_per_trial) - end - - def mode - test = (number_of_trials + 1) * probability_per_trial - - returned = if test == 0 || (test % 1 != 0) - test.floor - elsif (test % 1 == 0) && (test >= 1 && test <= number_of_trials) - [test, test - 1] - elsif test == number_of_trials + 1 - number_of_trials - end - - returned - end - end - end -end diff --git a/ruby-statistics/lib/statistics/distribution/chi_squared.rb b/ruby-statistics/lib/statistics/distribution/chi_squared.rb deleted file mode 100644 index 0996999036..0000000000 --- a/ruby-statistics/lib/statistics/distribution/chi_squared.rb +++ /dev/null @@ -1,37 +0,0 @@ -module Statistics - module Distribution - class ChiSquared - attr_accessor :degrees_of_freedom - - alias_method :mean, :degrees_of_freedom - - def initialize(k) - self.degrees_of_freedom = k - end - - def cumulative_function(value) - k = degrees_of_freedom/2.0 - Math.lower_incomplete_gamma_function(k, value/2.0)/Math.gamma(k) - end - - def density_function(value) - return 0 if value < 0 - - common = degrees_of_freedom/2.0 - - left_down = (2 ** common) * Math.gamma(common) - right = (value ** (common - 1)) * Math.exp(-(value/2.0)) - - (1.0/left_down) * right - end - - def mode - [degrees_of_freedom - 2, 0].max - end - - def variance - degrees_of_freedom * 2 - end - end - end -end diff --git a/ruby-statistics/lib/statistics/distribution/empirical.rb b/ruby-statistics/lib/statistics/distribution/empirical.rb deleted file mode 100644 index 63d72bdfed..0000000000 --- a/ruby-statistics/lib/statistics/distribution/empirical.rb +++ /dev/null @@ -1,26 +0,0 @@ -module Statistics - module Distribution - class Empirical - attr_accessor :samples - - def initialize(samples:) - self.samples = samples - end - - # Formula grabbed from here: https://statlect.com/asymptotic-theory/empirical-distribution - def cumulative_function(x:) - cumulative_sum = samples.reduce(0) do |summation, sample| - summation += if sample <= x - 1 - else - 0 - end - - summation - end - - cumulative_sum / samples.size.to_f - end - end - end -end diff --git a/ruby-statistics/lib/statistics/distribution/f.rb b/ruby-statistics/lib/statistics/distribution/f.rb deleted file mode 100644 index af3af9bc09..0000000000 --- a/ruby-statistics/lib/statistics/distribution/f.rb +++ /dev/null @@ -1,46 +0,0 @@ -module Statistics - module Distribution - class F - attr_accessor :d1, :d2 # Degrees of freedom #1 and #2 - - def initialize(k, j) - self.d1 = k - self.d2 = j - end - - # Formula extracted from http://www.itl.nist.gov/div898/handbook/eda/section3/eda3665.htm#CDF - def cumulative_function(value) - k = d2/(d2 + d1 * value.to_f) - - 1 - Math.incomplete_beta_function(k, d2/2.0, d1/2.0) - end - - def density_function(value) - return if d1 < 0 || d2 < 0 # F-pdf is well defined for the [0, +infinity) interval. - - val = value.to_f - upper = ((d1 * val) ** d1) * (d2**d2) - lower = (d1 * val + d2) ** (d1 + d2) - up = Math.sqrt(upper/lower.to_f) - down = val * Math.beta_function(d1/2.0, d2/2.0) - - up/down.to_f - end - - def mean - return if d2 <= 2 - - d2/(d2 - 2).to_f - end - - def mode - return if d1 <= 2 - - left = (d1 - 2)/d1.to_f - right = d2/(d2 + 2).to_f - - left * right - end - end - end -end diff --git a/ruby-statistics/lib/statistics/distribution/geometric.rb b/ruby-statistics/lib/statistics/distribution/geometric.rb deleted file mode 100644 index fe59eb505b..0000000000 --- a/ruby-statistics/lib/statistics/distribution/geometric.rb +++ /dev/null @@ -1,76 +0,0 @@ -module Statistics - module Distribution - class Geometric - attr_accessor :probability_of_success, :always_success_allowed - - def initialize(p, always_success: false) - self.probability_of_success = p.to_f - self.always_success_allowed = always_success - end - - def density_function(k) - k = k.to_i - - if always_success_allowed - return if k < 0 - - ((1.0 - probability_of_success) ** k) * probability_of_success - else - return if k <= 0 - - ((1.0 - probability_of_success) ** (k - 1.0)) * probability_of_success - end - end - - def cumulative_function(k) - k = k.to_i - - if always_success_allowed - return if k < 0 - - 1.0 - ((1.0 - probability_of_success) ** (k + 1.0)) - else - return if k <= 0 - - 1.0 - ((1.0 - probability_of_success) ** k) - end - end - - def mean - if always_success_allowed - (1.0 - probability_of_success) / probability_of_success - else - 1.0 / probability_of_success - end - end - - def median - if always_success_allowed - (-1.0 / Math.log2(1.0 - probability_of_success)).ceil - 1.0 - else - (-1.0 / Math.log2(1.0 - probability_of_success)).ceil - end - end - - def mode - if always_success_allowed - 0.0 - else - 1.0 - end - end - - def variance - (1.0 - probability_of_success) / (probability_of_success ** 2) - end - - def skewness - (2.0 - probability_of_success) / Math.sqrt(1.0 - probability_of_success) - end - - def kurtosis - 6.0 + ((probability_of_success ** 2) / (1.0 - probability_of_success)) - end - end - end -end diff --git a/ruby-statistics/lib/statistics/distribution/logseries.rb b/ruby-statistics/lib/statistics/distribution/logseries.rb deleted file mode 100644 index 359f9c619e..0000000000 --- a/ruby-statistics/lib/statistics/distribution/logseries.rb +++ /dev/null @@ -1,51 +0,0 @@ -module Statistics - module Distribution - class LogSeries - def self.density_function(k, p) - return if k <= 0 - k = k.to_i - - left = (-1.0 / Math.log(1.0 - p)) - right = (p ** k).to_f - - left * right / k - end - - def self.cumulative_function(k, p) - return if k <= 0 - - # Sadly, the incomplete beta function is converging - # too fast to zero and breaking the calculation on logs. - # So, we default to the basic definition of the CDF which is - # the integral (-Inf, K) of the PDF, with P(X <= x) which can - # be solved as a summation of all PDFs from 1 to K. Note that the summation approach - # only applies to discrete distributions. - # - # right = Math.incomplete_beta_function(p, (k + 1).floor, 0) / Math.log(1.0 - p) - # 1.0 + right - - result = 0.0 - 1.upto(k) do |number| - result += self.density_function(number, p) - end - - result - end - - def self.mode - 1.0 - end - - def self.mean(p) - (-1.0 / Math.log(1.0 - p)) * (p / (1.0 - p)) - end - - def self.variance(p) - up = p + Math.log(1.0 - p) - down = ((1.0 - p) ** 2) * (Math.log(1.0 - p) ** 2) - - (-1.0 * p) * (up / down.to_f) - end - end - end -end diff --git a/ruby-statistics/lib/statistics/distribution/negative_binomial.rb b/ruby-statistics/lib/statistics/distribution/negative_binomial.rb deleted file mode 100644 index 34dc451cae..0000000000 --- a/ruby-statistics/lib/statistics/distribution/negative_binomial.rb +++ /dev/null @@ -1,51 +0,0 @@ -module Statistics - module Distribution - class NegativeBinomial - attr_accessor :number_of_failures, :probability_per_trial - - def initialize(r, p) - self.number_of_failures = r.to_i - self.probability_per_trial = p - end - - def probability_mass_function(k) - return if number_of_failures < 0 || k < 0 || k > number_of_failures - - left = Math.combination(k + number_of_failures - 1, k) - right = ((1 - probability_per_trial) ** number_of_failures) * (probability_per_trial ** k) - - left * right - end - - def cumulative_function(k) - return if k < 0 || k > number_of_failures - k = k.to_i - - 1.0 - Math.incomplete_beta_function(probability_per_trial, k + 1, number_of_failures) - end - - def mean - (probability_per_trial * number_of_failures)/(1 - probability_per_trial).to_f - end - - def variance - (probability_per_trial * number_of_failures)/((1 - probability_per_trial) ** 2).to_f - end - - def skewness - (1 + probability_per_trial).to_f / Math.sqrt(probability_per_trial * number_of_failures) - end - - def mode - if number_of_failures > 1 - up = probability_per_trial * (number_of_failures - 1) - down = (1 - probability_per_trial).to_f - - (up/down).floor - elsif number_of_failures <= 1 - 0.0 - end - end - end - end -end diff --git a/ruby-statistics/lib/statistics/distribution/normal.rb b/ruby-statistics/lib/statistics/distribution/normal.rb deleted file mode 100644 index 8feef0247f..0000000000 --- a/ruby-statistics/lib/statistics/distribution/normal.rb +++ /dev/null @@ -1,139 +0,0 @@ -module Statistics - module Distribution - class Normal - attr_accessor :mean, :standard_deviation, :variance - alias_method :mode, :mean - - def initialize(avg, std) - self.mean = avg.to_f - self.standard_deviation = std.to_f - self.variance = std.to_f**2 - end - - def cumulative_function(value) - (1/2.0) * (1.0 + Math.erf((value - mean)/(standard_deviation * Math.sqrt(2.0)))) - end - - def density_function(value) - return 0 if standard_deviation <= 0 - - up_right = (value - mean)**2.0 - down_right = 2.0 * variance - right = Math.exp(-(up_right/down_right)) - left_down = Math.sqrt(2.0 * Math::PI * variance) - left_up = 1.0 - - (left_up/(left_down) * right) - end - - ## Marsaglia polar method implementation for random gaussian (normal) number generation. - # References: - # https://en.wikipedia.org/wiki/Marsaglia_polar_method - # https://math.stackexchange.com/questions/69245/transform-uniform-distribution-to-normal-distribution-using-lindeberg-l%C3%A9vy-clt - # https://www.projectrhea.org/rhea/index.php/The_principles_for_how_to_generate_random_samples_from_a_Gaussian_distribution - - def random(elements: 1, seed: Random.new_seed) - results = [] - - # Setup seed - srand(seed) - - # Number of random numbers to be generated. - elements.times do - x, y, r = 0.0, 0.0, 0.0 - - # Find an (x, y) point in the x^2 + y^2 < 1 circumference. - loop do - x = 2.0 * rand - 1.0 - y = 2.0 * rand - 1.0 - - r = (x ** 2) + (y ** 2) - - break unless r >= 1.0 || r == 0 - end - - # Project the random point to the required random distance - r = Math.sqrt(-2.0 * Math.log(r) / r) - - # Transform the random distance to a gaussian value and append it to the results array - results << mean + x * r * standard_deviation - end - - if elements == 1 - results.first - else - results - end - end - end - - class StandardNormal < Normal - def initialize - super(0, 1) # Mean = 0, Std = 1 - end - - def density_function(value) - pow = (value**2)/2.0 - euler = Math.exp(-pow) - - euler/Math.sqrt(2 * Math::PI) - end - end - - # Inverse Standard Normal distribution: - # References: - # https://en.wikipedia.org/wiki/Inverse_distribution - # http://www.source-code.biz/snippets/vbasic/9.htm - class InverseStandardNormal < StandardNormal - A1 = -39.6968302866538 - A2 = 220.946098424521 - A3 = -275.928510446969 - A4 = 138.357751867269 - A5 = -30.6647980661472 - A6 = 2.50662827745924 - B1 = -54.4760987982241 - B2 = 161.585836858041 - B3 = -155.698979859887 - B4 = 66.8013118877197 - B5 = -13.2806815528857 - C1 = -7.78489400243029E-03 - C2 = -0.322396458041136 - C3 = -2.40075827716184 - C4 = -2.54973253934373 - C5 = 4.37466414146497 - C6 = 2.93816398269878 - D1 = 7.78469570904146E-03 - D2 = 0.32246712907004 - D3 = 2.445134137143 - D4 = 3.75440866190742 - P_LOW = 0.02425 - P_HIGH = 1 - P_LOW - - def density_function(_) - raise NotImplementedError - end - - def random(elements: 1, seed: Random.new_seed) - raise NotImplementedError - end - - def cumulative_function(value) - return if value < 0.0 || value > 1.0 - return -1.0 * Float::INFINITY if value.zero? - return Float::INFINITY if value == 1.0 - - if value < P_LOW - q = Math.sqrt((Math.log(value) * -2.0)) - (((((C1 * q + C2) * q + C3) * q + C4) * q + C5) * q + C6) / ((((D1 * q + D2) * q + D3) * q + D4) * q + 1.0) - elsif value <= P_HIGH - q = value - 0.5 - r = q ** 2 - (((((A1 * r + A2) * r + A3) * r + A4) * r + A5) * r + A6) * q / (((((B1 * r + B2) * r + B3) * r + B4) * r + B5) * r + 1.0) - else - q = Math.sqrt((Math.log(1 - value) * -2.0)) - - (((((C1 * q + C2) * q + C3) * q + C4) * q + C5) * q + C6) / ((((D1 * q + D2) * q + D3) * q + D4) * q + 1) - end - end - end - end -end diff --git a/ruby-statistics/lib/statistics/distribution/poisson.rb b/ruby-statistics/lib/statistics/distribution/poisson.rb deleted file mode 100644 index b317624ad9..0000000000 --- a/ruby-statistics/lib/statistics/distribution/poisson.rb +++ /dev/null @@ -1,38 +0,0 @@ -module Statistics - module Distribution - class Poisson - attr_accessor :expected_number_of_occurrences - - alias_method :mean, :expected_number_of_occurrences - alias_method :variance, :expected_number_of_occurrences - - def initialize(l) - self.expected_number_of_occurrences = l - end - - def probability_mass_function(k) - return if k < 0 || expected_number_of_occurrences < 0 - - k = k.to_i - - upper = (expected_number_of_occurrences ** k) * Math.exp(-expected_number_of_occurrences) - lower = Math.factorial(k) - - upper/lower.to_f - end - - def cumulative_function(k) - return if k < 0 || expected_number_of_occurrences < 0 - - k = k.to_i - - upper = Math.lower_incomplete_gamma_function((k + 1).floor, expected_number_of_occurrences) - lower = Math.factorial(k.floor) - - # We need the right tail, i.e.: The upper incomplete gamma function. This can be - # achieved by doing a substraction between 1 and the lower incomplete gamma function. - 1 - (upper/lower.to_f) - end - end - end -end diff --git a/ruby-statistics/lib/statistics/distribution/t_student.rb b/ruby-statistics/lib/statistics/distribution/t_student.rb deleted file mode 100644 index 66b05780ba..0000000000 --- a/ruby-statistics/lib/statistics/distribution/t_student.rb +++ /dev/null @@ -1,82 +0,0 @@ -module Statistics - module Distribution - class TStudent - attr_accessor :degrees_of_freedom - attr_reader :mode - - def initialize(v) - self.degrees_of_freedom = v - @mode = 0 - end - - ### Extracted from https://codeplea.com/incomplete-beta-function-c - ### This function is shared under zlib license and the author is Lewis Van Winkle - def cumulative_function(value) - upper = (value + Math.sqrt(value * value + degrees_of_freedom)) - lower = (2.0 * Math.sqrt(value * value + degrees_of_freedom)) - - x = upper/lower - - alpha = degrees_of_freedom/2.0 - beta = degrees_of_freedom/2.0 - - Math.incomplete_beta_function(x, alpha, beta) - end - - def density_function(value) - return if degrees_of_freedom <= 0 - - upper = Math.gamma((degrees_of_freedom + 1)/2.0) - lower = Math.sqrt(degrees_of_freedom * Math::PI) * Math.gamma(degrees_of_freedom/2.0) - left = upper/lower - right = (1 + ((value ** 2)/degrees_of_freedom.to_f)) ** -((degrees_of_freedom + 1)/2.0) - - left * right - end - - def mean - 0 if degrees_of_freedom > 1 - end - - def variance - if degrees_of_freedom > 1 && degrees_of_freedom <= 2 - Float::INFINITY - elsif degrees_of_freedom > 2 - degrees_of_freedom/(degrees_of_freedom - 2.0) - end - end - - # Quantile function extracted from http://www.jennessent.com/arcview/idf.htm - # TODO: Make it truly Student's T sample. - def random(elements: 1, seed: Random.new_seed) - warn 'This is an alpha version code. The generated sample is similar to an uniform distribution' - srand(seed) - - v = degrees_of_freedom - results = [] - - # Because the Quantile function of a student-t distribution is between (-Infinity, y) - # we setup an small threshold in order to properly compute the integral - threshold = 10_000.0e-12 - - elements.times do - y = rand - results << Math.simpson_rule(threshold, y, 10_000) do |t| - up = Math.gamma((v+1)/2.0) - down = Math.sqrt(Math::PI * v) * Math.gamma(v/2.0) - right = (1 + ((y ** 2)/v.to_f)) ** ((v+1)/2.0) - left = up/down.to_f - - left * right - end - end - - if elements == 1 - results.first - else - results - end - end - end - end -end diff --git a/ruby-statistics/lib/statistics/distribution/uniform.rb b/ruby-statistics/lib/statistics/distribution/uniform.rb deleted file mode 100644 index 4550159bc9..0000000000 --- a/ruby-statistics/lib/statistics/distribution/uniform.rb +++ /dev/null @@ -1,40 +0,0 @@ -module Statistics - module Distribution - class Uniform - attr_accessor :left, :right - - def initialize(a, b) - self.left = a.to_f - self.right = b.to_f - end - - def density_function(value) - if value >= left && value <= right - 1/(right - left) - else - 0 - end - end - - def cumulative_function(value) - if value < left - 0 - elsif value >= left && value <= right - (value - left)/(right - left) - else - 1 - end - end - - def mean - (1/2.0) * ( left + right ) - end - alias_method :median, :mean - - - def variance - (1/12.0) * ( right - left ) ** 2 - end - end - end -end diff --git a/ruby-statistics/lib/statistics/distribution/weibull.rb b/ruby-statistics/lib/statistics/distribution/weibull.rb deleted file mode 100644 index 90c876855e..0000000000 --- a/ruby-statistics/lib/statistics/distribution/weibull.rb +++ /dev/null @@ -1,66 +0,0 @@ -module Statistics - module Distribution - class Weibull - attr_accessor :shape, :scale # k and lambda - - def initialize(k, lamb) - self.shape = k.to_f - self.scale = lamb.to_f - end - - def cumulative_function(random_value) - return 0 if random_value < 0 - - 1 - Math.exp(-((random_value/scale) ** shape)) - end - - def density_function(value) - return if shape <= 0 || scale <= 0 - return 0 if value < 0 - - left = shape/scale - center = (value/scale)**(shape - 1) - right = Math.exp(-((value/scale)**shape)) - - left * center * right - end - - def mean - scale * Math.gamma(1 + (1/shape)) - end - - def mode - return 0 if shape <= 1 - - scale * (((shape - 1)/shape) ** (1/shape)) - end - - def variance - left = Math.gamma(1 + (2/shape)) - right = Math.gamma(1 + (1/shape)) ** 2 - - (scale ** 2) * (left - right) - end - - # Using the inverse CDF function, also called quantile, we can calculate - # a random sample that follows a weibull distribution. - # - # Formula extracted from https://www.taygeta.com/random/weibull.html - def random(elements: 1, seed: Random.new_seed) - results = [] - - srand(seed) - - elements.times do - results << ((-1/scale) * Math.log(1 - rand)) ** (1/shape) - end - - if elements == 1 - results.first - else - results - end - end - end - end -end diff --git a/ruby-statistics/lib/statistics/spearman_rank_coefficient.rb b/ruby-statistics/lib/statistics/spearman_rank_coefficient.rb deleted file mode 100644 index cbdd7a03ae..0000000000 --- a/ruby-statistics/lib/statistics/spearman_rank_coefficient.rb +++ /dev/null @@ -1,71 +0,0 @@ -module Statistics - class SpearmanRankCoefficient - def self.rank(data:, return_ranks_only: true) - descending_order_data = data.sort { |a, b| b <=> a } - rankings = {} - - data.each do |value| - # If we have ties, the find_index method will only retrieve the index of the - # first element in the list (i.e, the most close to the left of the array), - # so when a tie is detected, we increase the temporal ranking by the number of - # counted elements at that particular time and then we increase the counter. - temporal_ranking = descending_order_data.find_index(value) + 1 # 0-index - - if rankings.fetch(value, false) - rankings[value][:rank] += (temporal_ranking + rankings[value][:counter]) - rankings[value][:counter] += 1 - rankings[value][:tie_rank] = rankings[value][:rank] / rankings[value][:counter].to_f - else - rankings[value] = { counter: 1, rank: temporal_ranking, tie_rank: temporal_ranking } - end - end - - if return_ranks_only - data.map do |value| - rankings[value][:tie_rank] - end - else - rankings - end - end - - # Formulas extracted from: https://statistics.laerd.com/statistical-guides/spearmans-rank-order-correlation-statistical-guide.php - def self.coefficient(set_one, set_two) - raise 'Both group sets must have the same number of cases.' if set_one.size != set_two.size - return if set_one.size == 0 && set_two.size == 0 - - set_one_mean, set_two_mean = set_one.mean, set_two.mean - have_tie_ranks = (set_one + set_two).any? { |rank| rank.is_a?(Float) } - - if have_tie_ranks - numerator = 0 - squared_differences_set_one = 0 - squared_differences_set_two = 0 - - set_one.size.times do |idx| - local_diff_one = (set_one[idx] - set_one_mean) - local_diff_two = (set_two[idx] - set_two_mean) - - squared_differences_set_one += local_diff_one ** 2 - squared_differences_set_two += local_diff_two ** 2 - - numerator += local_diff_one * local_diff_two - end - - denominator = Math.sqrt(squared_differences_set_one * squared_differences_set_two) - - numerator / denominator.to_f # This is rho or spearman's coefficient. - else - sum_squared_differences = set_one.each_with_index.reduce(0) do |memo, (rank_one, index)| - memo += ((rank_one - set_two[index]) ** 2) - memo - end - - numerator = 6 * sum_squared_differences - denominator = ((set_one.size ** 3) - set_one.size) - - 1.0 - (numerator / denominator.to_f) # This is rho or spearman's coefficient. - end - end - end -end diff --git a/ruby-statistics/lib/statistics/statistical_test.rb b/ruby-statistics/lib/statistics/statistical_test.rb deleted file mode 100644 index 5ce3c234db..0000000000 --- a/ruby-statistics/lib/statistics/statistical_test.rb +++ /dev/null @@ -1,11 +0,0 @@ -Dir[File.dirname(__FILE__) + '/statistical_test/**/*.rb'].each {|file| require file } - -module Statistics - module StatisticalTest - end -end - -# If StatisticalTest is not defined, setup alias. -if defined?(Statistics) && !(defined?(StatisticalTest)) - StatisticalTest = Statistics::StatisticalTest -end diff --git a/ruby-statistics/lib/statistics/statistical_test/chi_squared_test.rb b/ruby-statistics/lib/statistics/statistical_test/chi_squared_test.rb deleted file mode 100644 index ce96b34f77..0000000000 --- a/ruby-statistics/lib/statistics/statistical_test/chi_squared_test.rb +++ /dev/null @@ -1,42 +0,0 @@ -module Statistics - module StatisticalTest - class ChiSquaredTest - def self.chi_statistic(expected, observed) - # If the expected is a number, we asumme that all expected observations - # has the same probability to occur, hence we expect to see the same number - # of expected observations per each observed value - statistic = if expected.is_a? Numeric - observed.reduce(0) do |memo, observed_value| - up = (observed_value - expected) ** 2 - memo += (up/expected.to_f) - end - else - expected.each_with_index.reduce(0) do |memo, (expected_value, index)| - up = (observed[index] - expected_value) ** 2 - memo += (up/expected_value.to_f) - end - end - - [statistic, observed.size - 1] - end - - def self.goodness_of_fit(alpha, expected, observed) - chi_score, df = *self.chi_statistic(expected, observed) # Splat array result - - return if chi_score.nil? || df.nil? - - probability = Distribution::ChiSquared.new(df).cumulative_function(chi_score) - p_value = 1 - probability - - # According to https://stats.stackexchange.com/questions/29158/do-you-reject-the-null-hypothesis-when-p-alpha-or-p-leq-alpha - # We can assume that if p_value <= alpha, we can safely reject the null hypothesis, ie. accept the alternative hypothesis. - { probability: probability, - p_value: p_value, - alpha: alpha, - null: alpha < p_value, - alternative: p_value <= alpha, - confidence_level: 1 - alpha } - end - end - end -end diff --git a/ruby-statistics/lib/statistics/statistical_test/f_test.rb b/ruby-statistics/lib/statistics/statistical_test/f_test.rb deleted file mode 100644 index 94be720c96..0000000000 --- a/ruby-statistics/lib/statistics/statistical_test/f_test.rb +++ /dev/null @@ -1,83 +0,0 @@ -module Statistics - module StatisticalTest - class FTest - # This method calculates the one-way ANOVA F-test statistic. - # We assume that all specified arguments are arrays. - # It returns an array with three elements: - # [F-statistic or F-score, degrees of freedom numerator, degrees of freedom denominator]. - # - # Formulas extracted from: - # https://courses.lumenlearning.com/boundless-statistics/chapter/one-way-anova/ - # http://sphweb.bumc.bu.edu/otlt/MPH-Modules/BS/BS704_HypothesisTesting-ANOVA/BS704_HypothesisTesting-Anova_print.html - def self.anova_f_score(*args) - # If only two groups have been specified as arguments, we follow the classic F-Test for - # equality of variances, which is the ratio between the variances. - f_score = nil - df1 = nil - df2 = nil - - if args.size == 2 - variances = [args[0].variance, args[1].variance] - - f_score = variances.max/variances.min.to_f - df1 = 1 # k-1 (k = 2) - df2 = args.flatten.size - 2 # N-k (k = 2) - elsif args.size > 2 - total_groups = args.size - total_elements = args.flatten.size - overall_mean = args.flatten.mean - - sample_sizes = args.map(&:size) - sample_means = args.map(&:mean) - sample_stds = args.map(&:standard_deviation) - - # Variance between groups - iterator = sample_sizes.each_with_index - - variance_between_groups = iterator.reduce(0) do |summation, (size, index)| - inner_calculation = size * ((sample_means[index] - overall_mean) ** 2) - - summation += (inner_calculation / (total_groups - 1).to_f) - end - - # Variance within groups - variance_within_groups = (0...total_groups).reduce(0) do |outer_summation, group_index| - outer_summation += args[group_index].reduce(0) do |inner_sumation, observation| - inner_calculation = ((observation - sample_means[group_index]) ** 2) - inner_sumation += (inner_calculation / (total_elements - total_groups).to_f) - end - end - - f_score = variance_between_groups/variance_within_groups.to_f - df1 = total_groups - 1 - df2 = total_elements - total_groups - end - - [f_score, df1, df2] - end - - # This method expects the alpha value and the groups to calculate the one-way ANOVA test. - # It returns a hash with multiple information and the test result (if reject the null hypotesis or not). - # Keep in mind that the values for the alternative key (true/false) does not imply that the alternative hypothesis - # is TRUE or FALSE. It's a minor notation advantage to decide if reject the null hypothesis or not. - - def self.one_way_anova(alpha, *args) - f_score, df1, df2 = *self.anova_f_score(*args) # Splat array result - - return if f_score.nil? || df1.nil? || df2.nil? - - probability = Distribution::F.new(df1, df2).cumulative_function(f_score) - p_value = 1 - probability - - # According to https://stats.stackexchange.com/questions/29158/do-you-reject-the-null-hypothesis-when-p-alpha-or-p-leq-alpha - # We can assume that if p_value <= alpha, we can safely reject the null hypothesis, ie. accept the alternative hypothesis. - { probability: probability, - p_value: p_value, - alpha: alpha, - null: alpha < p_value, - alternative: p_value <= alpha, - confidence_level: 1 - alpha } - end - end - end -end diff --git a/ruby-statistics/lib/statistics/statistical_test/kolmogorov_smirnov_test.rb b/ruby-statistics/lib/statistics/statistical_test/kolmogorov_smirnov_test.rb deleted file mode 100644 index 19c5b12fc1..0000000000 --- a/ruby-statistics/lib/statistics/statistical_test/kolmogorov_smirnov_test.rb +++ /dev/null @@ -1,70 +0,0 @@ -module Statistics - module StatisticalTest - class KolmogorovSmirnovTest - # Common alpha, and critical D are calculated following formulas from: https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test#Two-sample_Kolmogorov%E2%80%93Smirnov_test - def self.two_samples(group_one:, group_two:, alpha: 0.05) - samples = group_one + group_two # We can use unbalaced group samples - - ecdf_one = Distribution::Empirical.new(samples: group_one) - ecdf_two = Distribution::Empirical.new(samples: group_two) - - d_max = samples.sort.map do |sample| - d1 = ecdf_one.cumulative_function(x: sample) - d2 = ecdf_two.cumulative_function(x: sample) - - (d1 - d2).abs - end.max - - # TODO: Validate calculation of Common alpha. - common_alpha = Math.sqrt((-0.5 * Math.log(alpha))) - radicand = (group_one.size + group_two.size) / (group_one.size * group_two.size).to_f - - critical_d = common_alpha * Math.sqrt(radicand) - # critical_d = self.critical_d(alpha: alpha, n: samples.size) - - # We are unable to calculate the p_value, because we don't have the Kolmogorov distribution - # defined. We reject the null hypotesis if Dmax is > than Dcritical. - { d_max: d_max, - d_critical: critical_d, - total_samples: samples.size, - alpha: alpha, - null: d_max <= critical_d, - alternative: d_max > critical_d, - confidence_level: 1.0 - alpha } - end - - # This is an implementation of the formula presented by Paul Molin and Hervé Abdi in a paper, - # called "New Table and numerical approximations for Kolmogorov-Smirnov / Lilliefors / Van Soest - # normality test". - # In this paper, the authors defines a couple of 6th-degree polynomial functions that allow us - # to find an aproximation of the real critical value. This is based in the conclusions made by - # Dagnelie (1968), where indicates that critical values given by Lilliefors can be approximated - # numerically. - # - # In general, the formula found is: - # C(N, alpha) ^ -2 = A(alpha) * N + B(alpha). - # - # Where A(alpha), B(alpha) are two 6th degree polynomial functions computed using the principle - # of Monte Carlo simulations. - # - # paper can be found here: https://utdallas.edu/~herve/MolinAbdi1998-LillieforsTechReport.pdf - # def self.critical_d(alpha:, n:) - # confidence = 1.0 - alpha - - # a_alpha = 6.32207539843126 -17.1398870006148 * confidence + - # 38.42812675101057 * (confidence ** 2) - 45.93241384693391 * (confidence ** 3) + - # 7.88697700041829 * (confidence ** 4) + 29.79317711037858 * (confidence ** 5) - - # 18.48090137098585 * (confidence ** 6) - - # b_alpha = 12.940399038404 - 53.458334259532 * confidence + - # 186.923866119699 * (confidence ** 2) - 410.582178349305 * (confidence ** 3) + - # 517.377862566267 * (confidence ** 4) - 343.581476222384 * (confidence ** 5) + - # 92.123451358715 * (confidence ** 6) - - # Math.sqrt(1.0 / (a_alpha * n + b_alpha)) - # end - end - - KSTest = KolmogorovSmirnovTest # Alias - end -end diff --git a/ruby-statistics/lib/statistics/statistical_test/t_test.rb b/ruby-statistics/lib/statistics/statistical_test/t_test.rb deleted file mode 100644 index 9fdd55ac23..0000000000 --- a/ruby-statistics/lib/statistics/statistical_test/t_test.rb +++ /dev/null @@ -1,92 +0,0 @@ -module Statistics - module StatisticalTest - class TTest - # Errors for Zero std - class ZeroStdError < StandardError - STD_ERROR_MSG = 'Standard deviation for the difference or group is zero. Please, reconsider sample contents'.freeze - end - - # Perform a T-Test for one or two samples. - # For the tails param, we need a symbol: :one_tail or :two_tail - def self.perform(alpha, tails, *args) - return if args.size < 2 - - degrees_of_freedom = 0 - - # If the comparison mean has been specified - t_score = if args[0].is_a? Numeric - data_mean = args[1].mean - data_std = args[1].standard_deviation - - raise ZeroStdError, ZeroStdError::STD_ERROR_MSG if data_std == 0 - - comparison_mean = args[0] - degrees_of_freedom = args[1].size - - (data_mean - comparison_mean)/(data_std / Math.sqrt(args[1].size).to_f).to_f - else - sample_left_mean = args[0].mean - sample_left_variance = args[0].variance - sample_right_variance = args[1].variance - sample_right_mean = args[1].mean - degrees_of_freedom = args.flatten.size - 2 - - left_root = sample_left_variance/args[0].size.to_f - right_root = sample_right_variance/args[1].size.to_f - - standard_error = Math.sqrt(left_root + right_root) - - (sample_left_mean - sample_right_mean).abs/standard_error.to_f - end - - t_distribution = Distribution::TStudent.new(degrees_of_freedom) - probability = t_distribution.cumulative_function(t_score) - - # Steps grabbed from https://support.minitab.com/en-us/minitab/18/help-and-how-to/statistics/basic-statistics/supporting-topics/basics/manually-calculate-a-p-value/ - # See https://github.com/estebanz01/ruby-statistics/issues/23 - p_value = if tails == :two_tail - 2 * (1 - t_distribution.cumulative_function(t_score.abs)) - else - 1 - probability - end - - { t_score: t_score, - probability: probability, - p_value: p_value, - alpha: alpha, - null: alpha < p_value, - alternative: p_value <= alpha, - confidence_level: 1 - alpha } - end - - def self.paired_test(alpha, tails, left_group, right_group) - raise StandardError, 'both samples are the same' if left_group == right_group - - # Handy snippet grabbed from https://stackoverflow.com/questions/2682411/ruby-sum-corresponding-members-of-two-or-more-arrays - differences = [left_group, right_group].transpose.map { |value| value.reduce(:-) } - - degrees_of_freedom = differences.size - 1 - difference_std = differences.standard_deviation - - raise ZeroStdError, ZeroStdError::STD_ERROR_MSG if difference_std == 0 - - down = difference_std/Math.sqrt(differences.size) - - t_score = (differences.mean - 0)/down.to_f - - probability = Distribution::TStudent.new(degrees_of_freedom).cumulative_function(t_score) - - p_value = 1 - probability - p_value *= 2 if tails == :two_tail - - { t_score: t_score, - probability: probability, - p_value: p_value, - alpha: alpha, - null: alpha < p_value, - alternative: p_value <= alpha, - confidence_level: 1 - alpha } - end - end - end -end diff --git a/ruby-statistics/lib/statistics/statistical_test/wilcoxon_rank_sum_test.rb b/ruby-statistics/lib/statistics/statistical_test/wilcoxon_rank_sum_test.rb deleted file mode 100644 index dcfff81247..0000000000 --- a/ruby-statistics/lib/statistics/statistical_test/wilcoxon_rank_sum_test.rb +++ /dev/null @@ -1,95 +0,0 @@ -module Statistics - module StatisticalTest - class WilcoxonRankSumTest - def rank(elements) - ranked_elements = {} - - elements.sort.each_with_index do |element, index| - if ranked_elements.fetch(element, false) - # This allow us to solve the ties easily when performing the rank summation per group - ranked_elements[element][:counter] += 1 - ranked_elements[element][:rank] += (index + 1) - else - ranked_elements[element] = { counter: 1, rank: (index + 1) } - end - end - - # ranked_elements = [{ x => { counter: 1, rank: y } ] - ranked_elements - end - - # Steps to perform the calculation are based on http://www.mit.edu/~6.s085/notes/lecture5.pdf - def perform(alpha, tails, group_one, group_two) - # Size for each group - n1, n2 = group_one.size, group_two.size - - # Rank all data - total_ranks = rank(group_one + group_two) - - # sum rankings per group - r1 = ranked_sum_for(total_ranks, group_one) - r2 = ranked_sum_for(total_ranks, group_two) - - # calculate U statistic - u1 = (n1 * (n1 + 1)/2.0) - r1 - u2 = (n2 * (n2 + 1)/2.0 ) - r2 - - u_statistic = [u1.abs, u2.abs].min - - median_u = (n1 * n2)/2.0 - - ties = total_ranks.values.select { |element| element[:counter] > 1 } - - std_u = if ties.size > 0 - corrected_sigma(ties, n1, n2) - else - Math.sqrt((n1 * n2 * (n1 + n2 + 1))/12.0) - end - - z = (u_statistic - median_u)/std_u - - # Most literature are not very specific about the normal distribution to be used. - # We ran multiple tests with a Normal(median_u, std_u) and Normal(0, 1) and we found - # the latter to be more aligned with the results. - probability = Distribution::StandardNormal.new.cumulative_function(z.abs) - p_value = 1 - probability - p_value *= 2 if tails == :two_tail - - { probability: probability, - u: u_statistic, - z: z, - p_value: p_value, - alpha: alpha, - null: alpha < p_value, - alternative: p_value <= alpha, - confidence_level: 1 - alpha } - end - - # Formula extracted from http://www.statstutor.ac.uk/resources/uploaded/mannwhitney.pdf - private def corrected_sigma(ties, total_group_one, total_group_two) - n = total_group_one + total_group_two - - rank_sum = ties.reduce(0) do |memo, t| - memo += ((t[:counter] ** 3) - t[:counter])/12.0 - end - - left = (total_group_one * total_group_two)/(n * (n - 1)).to_f - right = (((n ** 3) - n)/12.0) - rank_sum - - Math.sqrt(left * right) - end - - private def ranked_sum_for(total, group) - # sum rankings per group - group.reduce(0) do |memo, element| - rank_of_element = total[element][:rank] / total[element][:counter].to_f - memo += rank_of_element - end - end - end - - # Both test are the same. To keep the selected name, we just alias the class - # with the implementation. - MannWhitneyU = WilcoxonRankSumTest - end -end diff --git a/ruby-statistics/lib/statistics/version.rb b/ruby-statistics/lib/statistics/version.rb deleted file mode 100644 index 7c6cd46de5..0000000000 --- a/ruby-statistics/lib/statistics/version.rb +++ /dev/null @@ -1,3 +0,0 @@ -module Statistics - VERSION = "2.1.1" -end diff --git a/ruby-statistics/ruby-statistics.gemspec b/ruby-statistics/ruby-statistics.gemspec deleted file mode 100644 index b5ce2cebf1..0000000000 --- a/ruby-statistics/ruby-statistics.gemspec +++ /dev/null @@ -1,34 +0,0 @@ -# coding: utf-8 -lib = File.expand_path("../lib", __FILE__) -$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) -require "statistics/version" - -Gem::Specification.new do |spec| - spec.name = "ruby-statistics" - spec.version = Statistics::VERSION - spec.authors = ["esteban zapata"] - spec.email = ["estebanz01@outlook.com"] - - spec.summary = %q{A ruby gem for som specific statistics. Inspired by the jStat js library.} - spec.description = %q{This gem is intended to accomplish the same purpose as jStat js library: - to provide ruby with statistical capabilities without the need - of a statistical programming language like R or Octave. Some functions - and capabilities are an implementation from other authors and are - referenced properly in the class/method.} - spec.homepage = "https://github.com/estebanz01/ruby-statistics" - spec.license = "MIT" - - # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host' - # to allow pushing to a single host or delete this section to allow pushing to any host. - spec.files = `git ls-files -z`.split("\x0").reject do |f| - f.match(%r{^(test|spec|features)/}) - end - spec.bindir = "exe" - spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } - spec.require_paths = ["lib"] - - spec.add_development_dependency "rake", '~> 12.0', '>= 12.0.0' - spec.add_development_dependency "rspec", '~> 3.6', '>= 3.6.0' - spec.add_development_dependency "grb", '~> 0.4.1', '>= 0.4.1' - spec.add_development_dependency 'byebug', '~> 9.1.0', '>= 9.1.0' -end