diff --git a/federate/federate-core/src/tests.rs b/federate/federate-core/src/tests.rs index ced3e5d..81b8fbd 100644 --- a/federate/federate-core/src/tests.rs +++ b/federate/federate-core/src/tests.rs @@ -21,7 +21,7 @@ use crate::*; /// adding forge works pub async fn adding_forge_works<'a, T: Federate>( ff: &T, - create_forge_msg: CreateForge, + create_forge_msg: CreateForge<'a>, create_user_msg: AddUser<'a>, add_repo_msg: AddRepository<'a>, ) { diff --git a/federate/publiccodeyml/Cargo.toml b/federate/publiccodeyml/Cargo.toml index 432ba09..d68af01 100644 --- a/federate/publiccodeyml/Cargo.toml +++ b/federate/publiccodeyml/Cargo.toml @@ -16,11 +16,18 @@ path = "src/lib.rs" [dependencies] async-trait = "0.1.51" serde = { version = "1", features = ["derive"]} -serde_yaml = "0.8.24" +serde_yaml = "0.9" tokio = { version = "1.18.2", features = ["fs"]} thiserror = "1.0.30" url = { version = "2.2.2", features = ["serde"] } tar = "0.4.38" +log = "0.4.16" +mktemp = "0.4.1" + +[dependencies.reqwest] +features = ["rustls-tls-native-roots", "gzip", "deflate", "brotli", "json"] +version = "0.11.10" + [dependencies.db-core] path = "../../db/db-core" diff --git a/federate/publiccodeyml/src/lib.rs b/federate/publiccodeyml/src/lib.rs index 74f25c5..1b714c0 100644 --- a/federate/publiccodeyml/src/lib.rs +++ b/federate/publiccodeyml/src/lib.rs @@ -15,16 +15,23 @@ * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ +use std::fs as StdFs; use std::path::{Path, PathBuf}; use async_trait::async_trait; +use log::info; +use mktemp::Temp; +use reqwest::Client; use serde::Serialize; +use tar::Archive; use tokio::fs; +use tokio::io::AsyncWriteExt; use url::Url; use db_core::prelude::*; use federate_core::Federate; +use federate_core::{LatestResp, ROUTES}; pub mod errors; pub mod schema; @@ -295,4 +302,122 @@ impl Federate for PccFederate { let latest = times.pop().unwrap(); Ok(format!("{}.tar", latest)) } + + /// import archive from another Starchart instance + async fn import( + &self, + mut starchart_url: Url, + client: &Client, + db: &Box, + ) -> Result<(), Self::Error> { + info!("[import][{starchart_url}] import latest tarball from starchart instance"); + + let mut url = starchart_url.clone(); + url.set_path(ROUTES.get_latest); + let resp: LatestResp = client.get(url).send().await.unwrap().json().await.unwrap(); + let mut url = starchart_url.clone(); + url.set_path(&format!("/federate/{}", resp.latest)); + println!("{:?}", url); + let file = client.get(url).send().await.unwrap().bytes().await.unwrap(); + let tmp = Temp::new_dir().unwrap(); + let import_file = tmp.as_path().join("import.tar.gz"); + { + let mut f = fs::File::create(&import_file).await.unwrap(); + f.write_all(&file).await.unwrap(); + } + + let f = StdFs::File::open(&import_file).unwrap(); + let uncompressed = tmp.as_path().join("untar"); + fs::create_dir(&uncompressed).await.unwrap(); + + let mut ar = Archive::new(f); + ar.unpack(&uncompressed).unwrap(); + + let mut instance_dir_contents = fs::read_dir(&uncompressed).await.unwrap(); + while let Some(instance_dir_entry) = instance_dir_contents.next_entry().await.unwrap() { + if !instance_dir_entry.file_type().await.unwrap().is_dir() { + continue; + } + + let instance_file = instance_dir_entry.path().join(INSTANCE_INFO_FILE); + let instance = fs::read_to_string(instance_file).await.unwrap(); + let mut instance: CreateForge = serde_yaml::from_str(&instance).unwrap(); + instance.starchart_url = Some(starchart_url.as_str()); + + if !db.forge_exists(&instance.url).await.unwrap() { + info!("[import][{}] Creating forge", &instance.url); + + db.create_forge_instance(&instance).await.unwrap(); + } else if !self.forge_exists(&instance.url).await.unwrap() { + self.create_forge_instance(&instance).await.unwrap(); + } + + let mut dir_contents = fs::read_dir(&instance_dir_entry.path()).await.unwrap(); + while let Some(dir_entry) = dir_contents.next_entry().await.unwrap() { + if !dir_entry.file_type().await.unwrap().is_dir() { + continue; + } + let username = dir_entry.file_name(); + let username = username.to_str().unwrap(); + + if !db.user_exists(username, Some(&instance.url)).await.unwrap() { + info!("[import][{}] Creating user: {username}", instance.url); + + let user_file = instance_dir_entry + .path() + .join(&username) + .join(USER_INFO_FILE); + let user_file_content = fs::read_to_string(user_file).await.unwrap(); + let mut user: AddUser<'_> = serde_yaml::from_str(&user_file_content).unwrap(); + user.import = true; + + db.add_user(&user).await.unwrap(); + } + if !self.user_exists(username, &instance.url).await.unwrap() { + let user_file = instance_dir_entry + .path() + .join(&username) + .join(USER_INFO_FILE); + let user_file_content = fs::read_to_string(user_file).await.unwrap(); + let mut user: AddUser<'_> = serde_yaml::from_str(&user_file_content).unwrap(); + user.import = true; + + self.create_user(&user).await.unwrap(); + } + + let mut repositories = fs::read_dir(dir_entry.path()).await.unwrap(); + while let Some(repo) = repositories.next_entry().await.unwrap() { + if !repo.file_type().await.unwrap().is_dir() { + continue; + } + let repo_file = repo.path().join(REPO_INFO_FILE); + println!("repo_file: {:?}", repo_file); + let publiccodeyml_repository: schema::Repository = + serde_yaml::from_str(&fs::read_to_string(repo_file).await.unwrap()) + .unwrap(); + let add_repo = publiccodeyml_repository.to_add_repository(true); + + if !db + .repository_exists(add_repo.name, username, &add_repo.url) + .await + .unwrap() + { + info!( + "[import][{}] Creating repository: {}", + instance.url, add_repo.name + ); + db.create_repository(&add_repo).await.unwrap(); + } + if !self + .repository_exists(add_repo.name, username, &add_repo.url) + .await + .unwrap() + { + self.create_repository(&add_repo).await.unwrap(); + } + } + } + } + Ok(()) + } } diff --git a/federate/publiccodeyml/src/tests.rs b/federate/publiccodeyml/src/tests.rs index 77ad120..31be287 100644 --- a/federate/publiccodeyml/src/tests.rs +++ b/federate/publiccodeyml/src/tests.rs @@ -37,7 +37,7 @@ async fn everything_works() { let create_forge_msg = CreateForge { url: url.clone(), forge_type: ForgeImplementation::Gitea, - import: false, + starchart_url: None, }; let add_user_msg = AddUser { diff --git a/spider.rs b/spider.rs deleted file mode 100644 index a953fe8..0000000 --- a/spider.rs +++ /dev/null @@ -1,117 +0,0 @@ -/* - * ForgeFlux StarChart - A federated software forge spider - * Copyright © 2022 Aravinth Manivannan - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as - * published by the Free Software Foundation, either version 3 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ -use std::time::Duration; -use tokio::time; -use url::Url; - -use crate::data::Data; -use crate::gitea::SearchResults; - -const REPO_SEARCH_PATH: &str = "/api/v1/repos/search"; -const GITEA_NODEINFO: &str = "/api/v1/nodeinfo"; - -impl Data { - pub async fn crawl(&self, hostname: &str) -> Vec { - let mut page = 1; - let mut url = Url::parse(hostname).unwrap(); - url.set_path(REPO_SEARCH_PATH); - let mut repos = Vec::new(); - loop { - let mut url = url.clone(); - url.set_query(Some(&format!( - "page={page}&limit={}", - self.settings.crawler.items_per_api_call - ))); - let res: SearchResults = self - .client - .get(url) - .send() - .await - .unwrap() - .json() - .await - .unwrap(); - // TODO implement save - time::sleep(Duration::new( - self.settings.crawler.wait_before_next_api_call, - 0, - )) - .await; - if res.data.is_empty() { - return repos; - } - - for d in res.data.iter() { - - } - - repos.push(res); - page += 1; - } - } - - /// purpose: interact with instance running on provided hostname and verify if the instance is a - /// Gitea instance. - /// - /// will get nodeinfo information, which contains an identifier to uniquely identify Gitea - pub async fn is_gitea(&self, hostname: &str) -> bool { - const GITEA_IDENTIFIER: &str = "gitea"; - let mut url = Url::parse(hostname).unwrap(); - url.set_path(GITEA_NODEINFO); - - let res: serde_json::Value = self - .client - .get(url) - .send() - .await - .unwrap() - .json() - .await - .unwrap(); - if let serde_json::Value::String(software) = &res["software"]["name"] { - software == GITEA_IDENTIFIER - } else { - false - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::settings::Settings; - pub const GITEA_HOST: &str = "http://localhost:8080"; - - #[actix_rt::test] - async fn is_gitea_works() { - let data = Data::new(Settings::new().unwrap()).await; - assert!(data.is_gitea(GITEA_HOST).await); - } - - #[actix_rt::test] - async fn crawl_gitea() { - use crate::tests::sqlx_sqlite; - let data = Data::new(Settings::new().unwrap()).await; - let db = sqlx_sqlite::get_data(); - let res = data.crawl(GITEA_HOST).await; - let mut elements = 0; - res.iter().for_each(|r| elements += r.data.len()); - assert_eq!(res.len(), 5); - assert_eq!(elements, 100); - } -}