diff --git a/Cargo.lock b/Cargo.lock index acddbfe..935b825 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -518,6 +518,17 @@ dependencies = [ "instant", ] +[[package]] +name = "federate-core" +version = "0.1.0" +dependencies = [ + "async-trait", + "db-core", + "serde", + "thiserror", + "url", +] + [[package]] name = "flate2" version = "1.0.23" @@ -1148,6 +1159,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "mktemp" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "975de676448231fcde04b9149d2543077e166b78fc29eae5aa219e7928410da2" +dependencies = [ + "uuid", +] + [[package]] name = "native-tls" version = "0.2.10" @@ -1521,6 +1541,22 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "publiccodeyml" +version = "0.1.0" +dependencies = [ + "actix-rt", + "async-trait", + "db-core", + "federate-core", + "mktemp", + "serde", + "serde_yaml", + "thiserror", + "tokio", + "url", +] + [[package]] name = "quick-error" version = "1.2.3" @@ -1912,6 +1948,18 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_yaml" +version = "0.8.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "707d15895415db6628332b737c838b88c598522e4dc70647e59b72312924aebc" +dependencies = [ + "indexmap", + "ryu", + "serde", + "yaml-rust", +] + [[package]] name = "sha-1" version = "0.8.2" @@ -2365,9 +2413,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" [[package]] name = "tokio" -version = "1.18.1" +version = "1.18.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dce653fb475565de9f6fb0614b28bca8df2c430c0cf84bcd9c843f15de5414cc" +checksum = "4903bf0427cf68dddd5aa6a93220756f8be0c34fcfa9f5e6191e103e15a31395" dependencies = [ "bytes", "libc", @@ -2686,6 +2734,9 @@ name = "uuid" version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" +dependencies = [ + "getrandom", +] [[package]] name = "validator" diff --git a/Cargo.toml b/Cargo.toml index 126de5d..90cf77a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,18 @@ edition = "2021" license = "AGPLv3 or later version" build = "build.rs" +[workspace] +exclude = ["db/migrator"] +members = [ + ".", + "db/db-core", + "db/db-sqlx-sqlite", + "forge/forge-core", + "forge/gitea", + "federate/federate-core", + "federate/publiccodeyml" +] + [dependencies] actix-rt = "2.7" async-trait = "0.1.51" @@ -56,7 +68,3 @@ path = "./forge/forge-core" [dependencies.sqlx] features = ["runtime-actix-rustls", "uuid", "postgres", "time", "offline", "sqlite"] version = "0.5.11" - -[workspace] -exclude = ["db/migrator"] -members = [".", "db/db-core", "db/db-sqlx-sqlite", "forge/forge-core", "forge/gitea"] diff --git a/Makefile b/Makefile index ee27ab0..fc1aed6 100644 --- a/Makefile +++ b/Makefile @@ -3,17 +3,35 @@ define launch_test_env python ./scripts/gitea.py endef -define test_sqlite_db +define test_databases + cd db/db-core &&\ + cargo test --no-fail-fast cd db/db-sqlx-sqlite &&\ DATABASE_URL=${SQLITE_DATABASE_URL}\ cargo test --no-fail-fast endef -define test_gitea_forge +define test_forges + cd forge/forge-core && \ + cargo test --no-fail-fast cd forge/gitea && \ cargo test --no-fail-fast endef +define test_federation + cd federate/federate-core && \ + cargo test --no-fail-fast + cd federate/publiccodeyml && \ + cargo test --no-fail-fast +endef + +define test_workspaces + $(call test_databases) + $(call test_forges) + $(call test_federation) + cargo test --no-fail-fast +endef + default: ## Debug build cargo build @@ -29,13 +47,15 @@ coverage: migrate ## Generate coverage report in HTML format check: ## Check for syntax errors on all workspaces cargo check --workspace --tests --all-features - cd db/migrator && cargo check --tests --all-features - cd forge/forge-core && cargo check --tests --all-features - cd forge/gitea && cargo check --tests --all-features cd db/db-sqlx-sqlite &&\ DATABASE_URL=${SQLITE_DATABASE_URL}\ cargo check cd db/db-core/ && cargo check + cd db/migrator && cargo check --tests --all-features + cd forge/forge-core && cargo check --tests --all-features + cd forge/gitea && cargo check --tests --all-features + cd federate/federate-core && cargo check --tests --all-features + cd federate/publiccodeyml && cargo check --tests --all-features dev-env: ## Download development dependencies $(call launch_test_env) @@ -73,9 +93,7 @@ sqlx-offline-data: ## prepare sqlx offline data --all-features test: migrate ## Run tests $(call launch_test_env) - $(call test_sqlite_db) - $(call test_gitea_forge) - cargo test --no-fail-fast + $(call test_workspaces) # cd database/db-sqlx-postgres &&\ # DATABASE_URL=${POSTGRES_DATABASE_URL}\ diff --git a/spider.rs b/spider.rs new file mode 100644 index 0000000..a953fe8 --- /dev/null +++ b/spider.rs @@ -0,0 +1,117 @@ +/* + * ForgeFlux StarChart - A federated software forge spider + * Copyright © 2022 Aravinth Manivannan + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +use std::time::Duration; +use tokio::time; +use url::Url; + +use crate::data::Data; +use crate::gitea::SearchResults; + +const REPO_SEARCH_PATH: &str = "/api/v1/repos/search"; +const GITEA_NODEINFO: &str = "/api/v1/nodeinfo"; + +impl Data { + pub async fn crawl(&self, hostname: &str) -> Vec { + let mut page = 1; + let mut url = Url::parse(hostname).unwrap(); + url.set_path(REPO_SEARCH_PATH); + let mut repos = Vec::new(); + loop { + let mut url = url.clone(); + url.set_query(Some(&format!( + "page={page}&limit={}", + self.settings.crawler.items_per_api_call + ))); + let res: SearchResults = self + .client + .get(url) + .send() + .await + .unwrap() + .json() + .await + .unwrap(); + // TODO implement save + time::sleep(Duration::new( + self.settings.crawler.wait_before_next_api_call, + 0, + )) + .await; + if res.data.is_empty() { + return repos; + } + + for d in res.data.iter() { + + } + + repos.push(res); + page += 1; + } + } + + /// purpose: interact with instance running on provided hostname and verify if the instance is a + /// Gitea instance. + /// + /// will get nodeinfo information, which contains an identifier to uniquely identify Gitea + pub async fn is_gitea(&self, hostname: &str) -> bool { + const GITEA_IDENTIFIER: &str = "gitea"; + let mut url = Url::parse(hostname).unwrap(); + url.set_path(GITEA_NODEINFO); + + let res: serde_json::Value = self + .client + .get(url) + .send() + .await + .unwrap() + .json() + .await + .unwrap(); + if let serde_json::Value::String(software) = &res["software"]["name"] { + software == GITEA_IDENTIFIER + } else { + false + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::settings::Settings; + pub const GITEA_HOST: &str = "http://localhost:8080"; + + #[actix_rt::test] + async fn is_gitea_works() { + let data = Data::new(Settings::new().unwrap()).await; + assert!(data.is_gitea(GITEA_HOST).await); + } + + #[actix_rt::test] + async fn crawl_gitea() { + use crate::tests::sqlx_sqlite; + let data = Data::new(Settings::new().unwrap()).await; + let db = sqlx_sqlite::get_data(); + let res = data.crawl(GITEA_HOST).await; + let mut elements = 0; + res.iter().for_each(|r| elements += r.data.len()); + assert_eq!(res.len(), 5); + assert_eq!(elements, 100); + } +}