feat: impl import for publiccodeyml
This commit is contained in:
parent
c5a36ca685
commit
30c245ea8f
5 changed files with 135 additions and 120 deletions
|
@ -21,7 +21,7 @@ use crate::*;
|
||||||
/// adding forge works
|
/// adding forge works
|
||||||
pub async fn adding_forge_works<'a, T: Federate>(
|
pub async fn adding_forge_works<'a, T: Federate>(
|
||||||
ff: &T,
|
ff: &T,
|
||||||
create_forge_msg: CreateForge,
|
create_forge_msg: CreateForge<'a>,
|
||||||
create_user_msg: AddUser<'a>,
|
create_user_msg: AddUser<'a>,
|
||||||
add_repo_msg: AddRepository<'a>,
|
add_repo_msg: AddRepository<'a>,
|
||||||
) {
|
) {
|
||||||
|
|
|
@ -16,11 +16,18 @@ path = "src/lib.rs"
|
||||||
[dependencies]
|
[dependencies]
|
||||||
async-trait = "0.1.51"
|
async-trait = "0.1.51"
|
||||||
serde = { version = "1", features = ["derive"]}
|
serde = { version = "1", features = ["derive"]}
|
||||||
serde_yaml = "0.8.24"
|
serde_yaml = "0.9"
|
||||||
tokio = { version = "1.18.2", features = ["fs"]}
|
tokio = { version = "1.18.2", features = ["fs"]}
|
||||||
thiserror = "1.0.30"
|
thiserror = "1.0.30"
|
||||||
url = { version = "2.2.2", features = ["serde"] }
|
url = { version = "2.2.2", features = ["serde"] }
|
||||||
tar = "0.4.38"
|
tar = "0.4.38"
|
||||||
|
log = "0.4.16"
|
||||||
|
mktemp = "0.4.1"
|
||||||
|
|
||||||
|
[dependencies.reqwest]
|
||||||
|
features = ["rustls-tls-native-roots", "gzip", "deflate", "brotli", "json"]
|
||||||
|
version = "0.11.10"
|
||||||
|
|
||||||
|
|
||||||
[dependencies.db-core]
|
[dependencies.db-core]
|
||||||
path = "../../db/db-core"
|
path = "../../db/db-core"
|
||||||
|
|
|
@ -15,16 +15,23 @@
|
||||||
* You should have received a copy of the GNU Affero General Public License
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
use std::fs as StdFs;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
|
use log::info;
|
||||||
|
use mktemp::Temp;
|
||||||
|
use reqwest::Client;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
use tar::Archive;
|
||||||
use tokio::fs;
|
use tokio::fs;
|
||||||
|
use tokio::io::AsyncWriteExt;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
use db_core::prelude::*;
|
use db_core::prelude::*;
|
||||||
|
|
||||||
use federate_core::Federate;
|
use federate_core::Federate;
|
||||||
|
use federate_core::{LatestResp, ROUTES};
|
||||||
|
|
||||||
pub mod errors;
|
pub mod errors;
|
||||||
pub mod schema;
|
pub mod schema;
|
||||||
|
@ -295,4 +302,122 @@ impl Federate for PccFederate {
|
||||||
let latest = times.pop().unwrap();
|
let latest = times.pop().unwrap();
|
||||||
Ok(format!("{}.tar", latest))
|
Ok(format!("{}.tar", latest))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// import archive from another Starchart instance
|
||||||
|
async fn import(
|
||||||
|
&self,
|
||||||
|
mut starchart_url: Url,
|
||||||
|
client: &Client,
|
||||||
|
db: &Box<dyn SCDatabase>,
|
||||||
|
) -> Result<(), Self::Error> {
|
||||||
|
info!("[import][{starchart_url}] import latest tarball from starchart instance");
|
||||||
|
|
||||||
|
let mut url = starchart_url.clone();
|
||||||
|
url.set_path(ROUTES.get_latest);
|
||||||
|
let resp: LatestResp = client.get(url).send().await.unwrap().json().await.unwrap();
|
||||||
|
let mut url = starchart_url.clone();
|
||||||
|
url.set_path(&format!("/federate/{}", resp.latest));
|
||||||
|
println!("{:?}", url);
|
||||||
|
let file = client.get(url).send().await.unwrap().bytes().await.unwrap();
|
||||||
|
let tmp = Temp::new_dir().unwrap();
|
||||||
|
let import_file = tmp.as_path().join("import.tar.gz");
|
||||||
|
{
|
||||||
|
let mut f = fs::File::create(&import_file).await.unwrap();
|
||||||
|
f.write_all(&file).await.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
let f = StdFs::File::open(&import_file).unwrap();
|
||||||
|
let uncompressed = tmp.as_path().join("untar");
|
||||||
|
fs::create_dir(&uncompressed).await.unwrap();
|
||||||
|
|
||||||
|
let mut ar = Archive::new(f);
|
||||||
|
ar.unpack(&uncompressed).unwrap();
|
||||||
|
|
||||||
|
let mut instance_dir_contents = fs::read_dir(&uncompressed).await.unwrap();
|
||||||
|
while let Some(instance_dir_entry) = instance_dir_contents.next_entry().await.unwrap() {
|
||||||
|
if !instance_dir_entry.file_type().await.unwrap().is_dir() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let instance_file = instance_dir_entry.path().join(INSTANCE_INFO_FILE);
|
||||||
|
let instance = fs::read_to_string(instance_file).await.unwrap();
|
||||||
|
let mut instance: CreateForge = serde_yaml::from_str(&instance).unwrap();
|
||||||
|
instance.starchart_url = Some(starchart_url.as_str());
|
||||||
|
|
||||||
|
if !db.forge_exists(&instance.url).await.unwrap() {
|
||||||
|
info!("[import][{}] Creating forge", &instance.url);
|
||||||
|
|
||||||
|
db.create_forge_instance(&instance).await.unwrap();
|
||||||
|
} else if !self.forge_exists(&instance.url).await.unwrap() {
|
||||||
|
self.create_forge_instance(&instance).await.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut dir_contents = fs::read_dir(&instance_dir_entry.path()).await.unwrap();
|
||||||
|
while let Some(dir_entry) = dir_contents.next_entry().await.unwrap() {
|
||||||
|
if !dir_entry.file_type().await.unwrap().is_dir() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let username = dir_entry.file_name();
|
||||||
|
let username = username.to_str().unwrap();
|
||||||
|
|
||||||
|
if !db.user_exists(username, Some(&instance.url)).await.unwrap() {
|
||||||
|
info!("[import][{}] Creating user: {username}", instance.url);
|
||||||
|
|
||||||
|
let user_file = instance_dir_entry
|
||||||
|
.path()
|
||||||
|
.join(&username)
|
||||||
|
.join(USER_INFO_FILE);
|
||||||
|
let user_file_content = fs::read_to_string(user_file).await.unwrap();
|
||||||
|
let mut user: AddUser<'_> = serde_yaml::from_str(&user_file_content).unwrap();
|
||||||
|
user.import = true;
|
||||||
|
|
||||||
|
db.add_user(&user).await.unwrap();
|
||||||
|
}
|
||||||
|
if !self.user_exists(username, &instance.url).await.unwrap() {
|
||||||
|
let user_file = instance_dir_entry
|
||||||
|
.path()
|
||||||
|
.join(&username)
|
||||||
|
.join(USER_INFO_FILE);
|
||||||
|
let user_file_content = fs::read_to_string(user_file).await.unwrap();
|
||||||
|
let mut user: AddUser<'_> = serde_yaml::from_str(&user_file_content).unwrap();
|
||||||
|
user.import = true;
|
||||||
|
|
||||||
|
self.create_user(&user).await.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut repositories = fs::read_dir(dir_entry.path()).await.unwrap();
|
||||||
|
while let Some(repo) = repositories.next_entry().await.unwrap() {
|
||||||
|
if !repo.file_type().await.unwrap().is_dir() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let repo_file = repo.path().join(REPO_INFO_FILE);
|
||||||
|
println!("repo_file: {:?}", repo_file);
|
||||||
|
let publiccodeyml_repository: schema::Repository =
|
||||||
|
serde_yaml::from_str(&fs::read_to_string(repo_file).await.unwrap())
|
||||||
|
.unwrap();
|
||||||
|
let add_repo = publiccodeyml_repository.to_add_repository(true);
|
||||||
|
|
||||||
|
if !db
|
||||||
|
.repository_exists(add_repo.name, username, &add_repo.url)
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
{
|
||||||
|
info!(
|
||||||
|
"[import][{}] Creating repository: {}",
|
||||||
|
instance.url, add_repo.name
|
||||||
|
);
|
||||||
|
db.create_repository(&add_repo).await.unwrap();
|
||||||
|
}
|
||||||
|
if !self
|
||||||
|
.repository_exists(add_repo.name, username, &add_repo.url)
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
{
|
||||||
|
self.create_repository(&add_repo).await.unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,7 +37,7 @@ async fn everything_works() {
|
||||||
let create_forge_msg = CreateForge {
|
let create_forge_msg = CreateForge {
|
||||||
url: url.clone(),
|
url: url.clone(),
|
||||||
forge_type: ForgeImplementation::Gitea,
|
forge_type: ForgeImplementation::Gitea,
|
||||||
import: false,
|
starchart_url: None,
|
||||||
};
|
};
|
||||||
|
|
||||||
let add_user_msg = AddUser {
|
let add_user_msg = AddUser {
|
||||||
|
|
117
spider.rs
117
spider.rs
|
@ -1,117 +0,0 @@
|
||||||
/*
|
|
||||||
* ForgeFlux StarChart - A federated software forge spider
|
|
||||||
* Copyright © 2022 Aravinth Manivannan <realaravinth@batsense.net>
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU Affero General Public License as
|
|
||||||
* published by the Free Software Foundation, either version 3 of the
|
|
||||||
* License, or (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU Affero General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU Affero General Public License
|
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
use std::time::Duration;
|
|
||||||
use tokio::time;
|
|
||||||
use url::Url;
|
|
||||||
|
|
||||||
use crate::data::Data;
|
|
||||||
use crate::gitea::SearchResults;
|
|
||||||
|
|
||||||
const REPO_SEARCH_PATH: &str = "/api/v1/repos/search";
|
|
||||||
const GITEA_NODEINFO: &str = "/api/v1/nodeinfo";
|
|
||||||
|
|
||||||
impl Data {
|
|
||||||
pub async fn crawl(&self, hostname: &str) -> Vec<SearchResults> {
|
|
||||||
let mut page = 1;
|
|
||||||
let mut url = Url::parse(hostname).unwrap();
|
|
||||||
url.set_path(REPO_SEARCH_PATH);
|
|
||||||
let mut repos = Vec::new();
|
|
||||||
loop {
|
|
||||||
let mut url = url.clone();
|
|
||||||
url.set_query(Some(&format!(
|
|
||||||
"page={page}&limit={}",
|
|
||||||
self.settings.crawler.items_per_api_call
|
|
||||||
)));
|
|
||||||
let res: SearchResults = self
|
|
||||||
.client
|
|
||||||
.get(url)
|
|
||||||
.send()
|
|
||||||
.await
|
|
||||||
.unwrap()
|
|
||||||
.json()
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
// TODO implement save
|
|
||||||
time::sleep(Duration::new(
|
|
||||||
self.settings.crawler.wait_before_next_api_call,
|
|
||||||
0,
|
|
||||||
))
|
|
||||||
.await;
|
|
||||||
if res.data.is_empty() {
|
|
||||||
return repos;
|
|
||||||
}
|
|
||||||
|
|
||||||
for d in res.data.iter() {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
repos.push(res);
|
|
||||||
page += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// purpose: interact with instance running on provided hostname and verify if the instance is a
|
|
||||||
/// Gitea instance.
|
|
||||||
///
|
|
||||||
/// will get nodeinfo information, which contains an identifier to uniquely identify Gitea
|
|
||||||
pub async fn is_gitea(&self, hostname: &str) -> bool {
|
|
||||||
const GITEA_IDENTIFIER: &str = "gitea";
|
|
||||||
let mut url = Url::parse(hostname).unwrap();
|
|
||||||
url.set_path(GITEA_NODEINFO);
|
|
||||||
|
|
||||||
let res: serde_json::Value = self
|
|
||||||
.client
|
|
||||||
.get(url)
|
|
||||||
.send()
|
|
||||||
.await
|
|
||||||
.unwrap()
|
|
||||||
.json()
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
if let serde_json::Value::String(software) = &res["software"]["name"] {
|
|
||||||
software == GITEA_IDENTIFIER
|
|
||||||
} else {
|
|
||||||
false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
use crate::settings::Settings;
|
|
||||||
pub const GITEA_HOST: &str = "http://localhost:8080";
|
|
||||||
|
|
||||||
#[actix_rt::test]
|
|
||||||
async fn is_gitea_works() {
|
|
||||||
let data = Data::new(Settings::new().unwrap()).await;
|
|
||||||
assert!(data.is_gitea(GITEA_HOST).await);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[actix_rt::test]
|
|
||||||
async fn crawl_gitea() {
|
|
||||||
use crate::tests::sqlx_sqlite;
|
|
||||||
let data = Data::new(Settings::new().unwrap()).await;
|
|
||||||
let db = sqlx_sqlite::get_data();
|
|
||||||
let res = data.crawl(GITEA_HOST).await;
|
|
||||||
let mut elements = 0;
|
|
||||||
res.iter().for_each(|r| elements += r.data.len());
|
|
||||||
assert_eq!(res.len(), 5);
|
|
||||||
assert_eq!(elements, 100);
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in a new issue