feat: impl import for publiccodeyml
This commit is contained in:
parent
c5a36ca685
commit
30c245ea8f
5 changed files with 135 additions and 120 deletions
|
@ -21,7 +21,7 @@ use crate::*;
|
|||
/// adding forge works
|
||||
pub async fn adding_forge_works<'a, T: Federate>(
|
||||
ff: &T,
|
||||
create_forge_msg: CreateForge,
|
||||
create_forge_msg: CreateForge<'a>,
|
||||
create_user_msg: AddUser<'a>,
|
||||
add_repo_msg: AddRepository<'a>,
|
||||
) {
|
||||
|
|
|
@ -16,11 +16,18 @@ path = "src/lib.rs"
|
|||
[dependencies]
|
||||
async-trait = "0.1.51"
|
||||
serde = { version = "1", features = ["derive"]}
|
||||
serde_yaml = "0.8.24"
|
||||
serde_yaml = "0.9"
|
||||
tokio = { version = "1.18.2", features = ["fs"]}
|
||||
thiserror = "1.0.30"
|
||||
url = { version = "2.2.2", features = ["serde"] }
|
||||
tar = "0.4.38"
|
||||
log = "0.4.16"
|
||||
mktemp = "0.4.1"
|
||||
|
||||
[dependencies.reqwest]
|
||||
features = ["rustls-tls-native-roots", "gzip", "deflate", "brotli", "json"]
|
||||
version = "0.11.10"
|
||||
|
||||
|
||||
[dependencies.db-core]
|
||||
path = "../../db/db-core"
|
||||
|
|
|
@ -15,16 +15,23 @@
|
|||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
use std::fs as StdFs;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use log::info;
|
||||
use mktemp::Temp;
|
||||
use reqwest::Client;
|
||||
use serde::Serialize;
|
||||
use tar::Archive;
|
||||
use tokio::fs;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use url::Url;
|
||||
|
||||
use db_core::prelude::*;
|
||||
|
||||
use federate_core::Federate;
|
||||
use federate_core::{LatestResp, ROUTES};
|
||||
|
||||
pub mod errors;
|
||||
pub mod schema;
|
||||
|
@ -295,4 +302,122 @@ impl Federate for PccFederate {
|
|||
let latest = times.pop().unwrap();
|
||||
Ok(format!("{}.tar", latest))
|
||||
}
|
||||
|
||||
/// import archive from another Starchart instance
|
||||
async fn import(
|
||||
&self,
|
||||
mut starchart_url: Url,
|
||||
client: &Client,
|
||||
db: &Box<dyn SCDatabase>,
|
||||
) -> Result<(), Self::Error> {
|
||||
info!("[import][{starchart_url}] import latest tarball from starchart instance");
|
||||
|
||||
let mut url = starchart_url.clone();
|
||||
url.set_path(ROUTES.get_latest);
|
||||
let resp: LatestResp = client.get(url).send().await.unwrap().json().await.unwrap();
|
||||
let mut url = starchart_url.clone();
|
||||
url.set_path(&format!("/federate/{}", resp.latest));
|
||||
println!("{:?}", url);
|
||||
let file = client.get(url).send().await.unwrap().bytes().await.unwrap();
|
||||
let tmp = Temp::new_dir().unwrap();
|
||||
let import_file = tmp.as_path().join("import.tar.gz");
|
||||
{
|
||||
let mut f = fs::File::create(&import_file).await.unwrap();
|
||||
f.write_all(&file).await.unwrap();
|
||||
}
|
||||
|
||||
let f = StdFs::File::open(&import_file).unwrap();
|
||||
let uncompressed = tmp.as_path().join("untar");
|
||||
fs::create_dir(&uncompressed).await.unwrap();
|
||||
|
||||
let mut ar = Archive::new(f);
|
||||
ar.unpack(&uncompressed).unwrap();
|
||||
|
||||
let mut instance_dir_contents = fs::read_dir(&uncompressed).await.unwrap();
|
||||
while let Some(instance_dir_entry) = instance_dir_contents.next_entry().await.unwrap() {
|
||||
if !instance_dir_entry.file_type().await.unwrap().is_dir() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let instance_file = instance_dir_entry.path().join(INSTANCE_INFO_FILE);
|
||||
let instance = fs::read_to_string(instance_file).await.unwrap();
|
||||
let mut instance: CreateForge = serde_yaml::from_str(&instance).unwrap();
|
||||
instance.starchart_url = Some(starchart_url.as_str());
|
||||
|
||||
if !db.forge_exists(&instance.url).await.unwrap() {
|
||||
info!("[import][{}] Creating forge", &instance.url);
|
||||
|
||||
db.create_forge_instance(&instance).await.unwrap();
|
||||
} else if !self.forge_exists(&instance.url).await.unwrap() {
|
||||
self.create_forge_instance(&instance).await.unwrap();
|
||||
}
|
||||
|
||||
let mut dir_contents = fs::read_dir(&instance_dir_entry.path()).await.unwrap();
|
||||
while let Some(dir_entry) = dir_contents.next_entry().await.unwrap() {
|
||||
if !dir_entry.file_type().await.unwrap().is_dir() {
|
||||
continue;
|
||||
}
|
||||
let username = dir_entry.file_name();
|
||||
let username = username.to_str().unwrap();
|
||||
|
||||
if !db.user_exists(username, Some(&instance.url)).await.unwrap() {
|
||||
info!("[import][{}] Creating user: {username}", instance.url);
|
||||
|
||||
let user_file = instance_dir_entry
|
||||
.path()
|
||||
.join(&username)
|
||||
.join(USER_INFO_FILE);
|
||||
let user_file_content = fs::read_to_string(user_file).await.unwrap();
|
||||
let mut user: AddUser<'_> = serde_yaml::from_str(&user_file_content).unwrap();
|
||||
user.import = true;
|
||||
|
||||
db.add_user(&user).await.unwrap();
|
||||
}
|
||||
if !self.user_exists(username, &instance.url).await.unwrap() {
|
||||
let user_file = instance_dir_entry
|
||||
.path()
|
||||
.join(&username)
|
||||
.join(USER_INFO_FILE);
|
||||
let user_file_content = fs::read_to_string(user_file).await.unwrap();
|
||||
let mut user: AddUser<'_> = serde_yaml::from_str(&user_file_content).unwrap();
|
||||
user.import = true;
|
||||
|
||||
self.create_user(&user).await.unwrap();
|
||||
}
|
||||
|
||||
let mut repositories = fs::read_dir(dir_entry.path()).await.unwrap();
|
||||
while let Some(repo) = repositories.next_entry().await.unwrap() {
|
||||
if !repo.file_type().await.unwrap().is_dir() {
|
||||
continue;
|
||||
}
|
||||
let repo_file = repo.path().join(REPO_INFO_FILE);
|
||||
println!("repo_file: {:?}", repo_file);
|
||||
let publiccodeyml_repository: schema::Repository =
|
||||
serde_yaml::from_str(&fs::read_to_string(repo_file).await.unwrap())
|
||||
.unwrap();
|
||||
let add_repo = publiccodeyml_repository.to_add_repository(true);
|
||||
|
||||
if !db
|
||||
.repository_exists(add_repo.name, username, &add_repo.url)
|
||||
.await
|
||||
.unwrap()
|
||||
{
|
||||
info!(
|
||||
"[import][{}] Creating repository: {}",
|
||||
instance.url, add_repo.name
|
||||
);
|
||||
db.create_repository(&add_repo).await.unwrap();
|
||||
}
|
||||
if !self
|
||||
.repository_exists(add_repo.name, username, &add_repo.url)
|
||||
.await
|
||||
.unwrap()
|
||||
{
|
||||
self.create_repository(&add_repo).await.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,7 +37,7 @@ async fn everything_works() {
|
|||
let create_forge_msg = CreateForge {
|
||||
url: url.clone(),
|
||||
forge_type: ForgeImplementation::Gitea,
|
||||
import: false,
|
||||
starchart_url: None,
|
||||
};
|
||||
|
||||
let add_user_msg = AddUser {
|
||||
|
|
117
spider.rs
117
spider.rs
|
@ -1,117 +0,0 @@
|
|||
/*
|
||||
* ForgeFlux StarChart - A federated software forge spider
|
||||
* Copyright © 2022 Aravinth Manivannan <realaravinth@batsense.net>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
use std::time::Duration;
|
||||
use tokio::time;
|
||||
use url::Url;
|
||||
|
||||
use crate::data::Data;
|
||||
use crate::gitea::SearchResults;
|
||||
|
||||
const REPO_SEARCH_PATH: &str = "/api/v1/repos/search";
|
||||
const GITEA_NODEINFO: &str = "/api/v1/nodeinfo";
|
||||
|
||||
impl Data {
|
||||
pub async fn crawl(&self, hostname: &str) -> Vec<SearchResults> {
|
||||
let mut page = 1;
|
||||
let mut url = Url::parse(hostname).unwrap();
|
||||
url.set_path(REPO_SEARCH_PATH);
|
||||
let mut repos = Vec::new();
|
||||
loop {
|
||||
let mut url = url.clone();
|
||||
url.set_query(Some(&format!(
|
||||
"page={page}&limit={}",
|
||||
self.settings.crawler.items_per_api_call
|
||||
)));
|
||||
let res: SearchResults = self
|
||||
.client
|
||||
.get(url)
|
||||
.send()
|
||||
.await
|
||||
.unwrap()
|
||||
.json()
|
||||
.await
|
||||
.unwrap();
|
||||
// TODO implement save
|
||||
time::sleep(Duration::new(
|
||||
self.settings.crawler.wait_before_next_api_call,
|
||||
0,
|
||||
))
|
||||
.await;
|
||||
if res.data.is_empty() {
|
||||
return repos;
|
||||
}
|
||||
|
||||
for d in res.data.iter() {
|
||||
|
||||
}
|
||||
|
||||
repos.push(res);
|
||||
page += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// purpose: interact with instance running on provided hostname and verify if the instance is a
|
||||
/// Gitea instance.
|
||||
///
|
||||
/// will get nodeinfo information, which contains an identifier to uniquely identify Gitea
|
||||
pub async fn is_gitea(&self, hostname: &str) -> bool {
|
||||
const GITEA_IDENTIFIER: &str = "gitea";
|
||||
let mut url = Url::parse(hostname).unwrap();
|
||||
url.set_path(GITEA_NODEINFO);
|
||||
|
||||
let res: serde_json::Value = self
|
||||
.client
|
||||
.get(url)
|
||||
.send()
|
||||
.await
|
||||
.unwrap()
|
||||
.json()
|
||||
.await
|
||||
.unwrap();
|
||||
if let serde_json::Value::String(software) = &res["software"]["name"] {
|
||||
software == GITEA_IDENTIFIER
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::settings::Settings;
|
||||
pub const GITEA_HOST: &str = "http://localhost:8080";
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn is_gitea_works() {
|
||||
let data = Data::new(Settings::new().unwrap()).await;
|
||||
assert!(data.is_gitea(GITEA_HOST).await);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn crawl_gitea() {
|
||||
use crate::tests::sqlx_sqlite;
|
||||
let data = Data::new(Settings::new().unwrap()).await;
|
||||
let db = sqlx_sqlite::get_data();
|
||||
let res = data.crawl(GITEA_HOST).await;
|
||||
let mut elements = 0;
|
||||
res.iter().for_each(|r| elements += r.data.len());
|
||||
assert_eq!(res.len(), 5);
|
||||
assert_eq!(elements, 100);
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue