diff --git a/federate/federate-core/src/tests.rs b/federate/federate-core/src/tests.rs
index ced3e5d..81b8fbd 100644
--- a/federate/federate-core/src/tests.rs
+++ b/federate/federate-core/src/tests.rs
@@ -21,7 +21,7 @@ use crate::*;
/// adding forge works
pub async fn adding_forge_works<'a, T: Federate>(
ff: &T,
- create_forge_msg: CreateForge,
+ create_forge_msg: CreateForge<'a>,
create_user_msg: AddUser<'a>,
add_repo_msg: AddRepository<'a>,
) {
diff --git a/federate/publiccodeyml/Cargo.toml b/federate/publiccodeyml/Cargo.toml
index 432ba09..d68af01 100644
--- a/federate/publiccodeyml/Cargo.toml
+++ b/federate/publiccodeyml/Cargo.toml
@@ -16,11 +16,18 @@ path = "src/lib.rs"
[dependencies]
async-trait = "0.1.51"
serde = { version = "1", features = ["derive"]}
-serde_yaml = "0.8.24"
+serde_yaml = "0.9"
tokio = { version = "1.18.2", features = ["fs"]}
thiserror = "1.0.30"
url = { version = "2.2.2", features = ["serde"] }
tar = "0.4.38"
+log = "0.4.16"
+mktemp = "0.4.1"
+
+[dependencies.reqwest]
+features = ["rustls-tls-native-roots", "gzip", "deflate", "brotli", "json"]
+version = "0.11.10"
+
[dependencies.db-core]
path = "../../db/db-core"
diff --git a/federate/publiccodeyml/src/lib.rs b/federate/publiccodeyml/src/lib.rs
index 74f25c5..1b714c0 100644
--- a/federate/publiccodeyml/src/lib.rs
+++ b/federate/publiccodeyml/src/lib.rs
@@ -15,16 +15,23 @@
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
+use std::fs as StdFs;
use std::path::{Path, PathBuf};
use async_trait::async_trait;
+use log::info;
+use mktemp::Temp;
+use reqwest::Client;
use serde::Serialize;
+use tar::Archive;
use tokio::fs;
+use tokio::io::AsyncWriteExt;
use url::Url;
use db_core::prelude::*;
use federate_core::Federate;
+use federate_core::{LatestResp, ROUTES};
pub mod errors;
pub mod schema;
@@ -295,4 +302,122 @@ impl Federate for PccFederate {
let latest = times.pop().unwrap();
Ok(format!("{}.tar", latest))
}
+
+ /// import archive from another Starchart instance
+ async fn import(
+ &self,
+ mut starchart_url: Url,
+ client: &Client,
+ db: &Box,
+ ) -> Result<(), Self::Error> {
+ info!("[import][{starchart_url}] import latest tarball from starchart instance");
+
+ let mut url = starchart_url.clone();
+ url.set_path(ROUTES.get_latest);
+ let resp: LatestResp = client.get(url).send().await.unwrap().json().await.unwrap();
+ let mut url = starchart_url.clone();
+ url.set_path(&format!("/federate/{}", resp.latest));
+ println!("{:?}", url);
+ let file = client.get(url).send().await.unwrap().bytes().await.unwrap();
+ let tmp = Temp::new_dir().unwrap();
+ let import_file = tmp.as_path().join("import.tar.gz");
+ {
+ let mut f = fs::File::create(&import_file).await.unwrap();
+ f.write_all(&file).await.unwrap();
+ }
+
+ let f = StdFs::File::open(&import_file).unwrap();
+ let uncompressed = tmp.as_path().join("untar");
+ fs::create_dir(&uncompressed).await.unwrap();
+
+ let mut ar = Archive::new(f);
+ ar.unpack(&uncompressed).unwrap();
+
+ let mut instance_dir_contents = fs::read_dir(&uncompressed).await.unwrap();
+ while let Some(instance_dir_entry) = instance_dir_contents.next_entry().await.unwrap() {
+ if !instance_dir_entry.file_type().await.unwrap().is_dir() {
+ continue;
+ }
+
+ let instance_file = instance_dir_entry.path().join(INSTANCE_INFO_FILE);
+ let instance = fs::read_to_string(instance_file).await.unwrap();
+ let mut instance: CreateForge = serde_yaml::from_str(&instance).unwrap();
+ instance.starchart_url = Some(starchart_url.as_str());
+
+ if !db.forge_exists(&instance.url).await.unwrap() {
+ info!("[import][{}] Creating forge", &instance.url);
+
+ db.create_forge_instance(&instance).await.unwrap();
+ } else if !self.forge_exists(&instance.url).await.unwrap() {
+ self.create_forge_instance(&instance).await.unwrap();
+ }
+
+ let mut dir_contents = fs::read_dir(&instance_dir_entry.path()).await.unwrap();
+ while let Some(dir_entry) = dir_contents.next_entry().await.unwrap() {
+ if !dir_entry.file_type().await.unwrap().is_dir() {
+ continue;
+ }
+ let username = dir_entry.file_name();
+ let username = username.to_str().unwrap();
+
+ if !db.user_exists(username, Some(&instance.url)).await.unwrap() {
+ info!("[import][{}] Creating user: {username}", instance.url);
+
+ let user_file = instance_dir_entry
+ .path()
+ .join(&username)
+ .join(USER_INFO_FILE);
+ let user_file_content = fs::read_to_string(user_file).await.unwrap();
+ let mut user: AddUser<'_> = serde_yaml::from_str(&user_file_content).unwrap();
+ user.import = true;
+
+ db.add_user(&user).await.unwrap();
+ }
+ if !self.user_exists(username, &instance.url).await.unwrap() {
+ let user_file = instance_dir_entry
+ .path()
+ .join(&username)
+ .join(USER_INFO_FILE);
+ let user_file_content = fs::read_to_string(user_file).await.unwrap();
+ let mut user: AddUser<'_> = serde_yaml::from_str(&user_file_content).unwrap();
+ user.import = true;
+
+ self.create_user(&user).await.unwrap();
+ }
+
+ let mut repositories = fs::read_dir(dir_entry.path()).await.unwrap();
+ while let Some(repo) = repositories.next_entry().await.unwrap() {
+ if !repo.file_type().await.unwrap().is_dir() {
+ continue;
+ }
+ let repo_file = repo.path().join(REPO_INFO_FILE);
+ println!("repo_file: {:?}", repo_file);
+ let publiccodeyml_repository: schema::Repository =
+ serde_yaml::from_str(&fs::read_to_string(repo_file).await.unwrap())
+ .unwrap();
+ let add_repo = publiccodeyml_repository.to_add_repository(true);
+
+ if !db
+ .repository_exists(add_repo.name, username, &add_repo.url)
+ .await
+ .unwrap()
+ {
+ info!(
+ "[import][{}] Creating repository: {}",
+ instance.url, add_repo.name
+ );
+ db.create_repository(&add_repo).await.unwrap();
+ }
+ if !self
+ .repository_exists(add_repo.name, username, &add_repo.url)
+ .await
+ .unwrap()
+ {
+ self.create_repository(&add_repo).await.unwrap();
+ }
+ }
+ }
+ }
+ Ok(())
+ }
}
diff --git a/federate/publiccodeyml/src/tests.rs b/federate/publiccodeyml/src/tests.rs
index 77ad120..31be287 100644
--- a/federate/publiccodeyml/src/tests.rs
+++ b/federate/publiccodeyml/src/tests.rs
@@ -37,7 +37,7 @@ async fn everything_works() {
let create_forge_msg = CreateForge {
url: url.clone(),
forge_type: ForgeImplementation::Gitea,
- import: false,
+ starchart_url: None,
};
let add_user_msg = AddUser {
diff --git a/spider.rs b/spider.rs
deleted file mode 100644
index a953fe8..0000000
--- a/spider.rs
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * ForgeFlux StarChart - A federated software forge spider
- * Copyright © 2022 Aravinth Manivannan
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as
- * published by the Free Software Foundation, either version 3 of the
- * License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see .
- */
-use std::time::Duration;
-use tokio::time;
-use url::Url;
-
-use crate::data::Data;
-use crate::gitea::SearchResults;
-
-const REPO_SEARCH_PATH: &str = "/api/v1/repos/search";
-const GITEA_NODEINFO: &str = "/api/v1/nodeinfo";
-
-impl Data {
- pub async fn crawl(&self, hostname: &str) -> Vec {
- let mut page = 1;
- let mut url = Url::parse(hostname).unwrap();
- url.set_path(REPO_SEARCH_PATH);
- let mut repos = Vec::new();
- loop {
- let mut url = url.clone();
- url.set_query(Some(&format!(
- "page={page}&limit={}",
- self.settings.crawler.items_per_api_call
- )));
- let res: SearchResults = self
- .client
- .get(url)
- .send()
- .await
- .unwrap()
- .json()
- .await
- .unwrap();
- // TODO implement save
- time::sleep(Duration::new(
- self.settings.crawler.wait_before_next_api_call,
- 0,
- ))
- .await;
- if res.data.is_empty() {
- return repos;
- }
-
- for d in res.data.iter() {
-
- }
-
- repos.push(res);
- page += 1;
- }
- }
-
- /// purpose: interact with instance running on provided hostname and verify if the instance is a
- /// Gitea instance.
- ///
- /// will get nodeinfo information, which contains an identifier to uniquely identify Gitea
- pub async fn is_gitea(&self, hostname: &str) -> bool {
- const GITEA_IDENTIFIER: &str = "gitea";
- let mut url = Url::parse(hostname).unwrap();
- url.set_path(GITEA_NODEINFO);
-
- let res: serde_json::Value = self
- .client
- .get(url)
- .send()
- .await
- .unwrap()
- .json()
- .await
- .unwrap();
- if let serde_json::Value::String(software) = &res["software"]["name"] {
- software == GITEA_IDENTIFIER
- } else {
- false
- }
- }
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
- use crate::settings::Settings;
- pub const GITEA_HOST: &str = "http://localhost:8080";
-
- #[actix_rt::test]
- async fn is_gitea_works() {
- let data = Data::new(Settings::new().unwrap()).await;
- assert!(data.is_gitea(GITEA_HOST).await);
- }
-
- #[actix_rt::test]
- async fn crawl_gitea() {
- use crate::tests::sqlx_sqlite;
- let data = Data::new(Settings::new().unwrap()).await;
- let db = sqlx_sqlite::get_data();
- let res = data.crawl(GITEA_HOST).await;
- let mut elements = 0;
- res.iter().for_each(|r| elements += r.data.len());
- assert_eq!(res.len(), 5);
- assert_eq!(elements, 100);
- }
-}