feat: impl import for publiccodeyml

This commit is contained in:
Aravinth Manivannan 2023-03-02 17:39:14 +05:30
parent c5a36ca685
commit 30c245ea8f
Signed by: realaravinth
GPG key ID: AD9F0F08E855ED88
5 changed files with 135 additions and 120 deletions

View file

@ -21,7 +21,7 @@ use crate::*;
/// adding forge works /// adding forge works
pub async fn adding_forge_works<'a, T: Federate>( pub async fn adding_forge_works<'a, T: Federate>(
ff: &T, ff: &T,
create_forge_msg: CreateForge, create_forge_msg: CreateForge<'a>,
create_user_msg: AddUser<'a>, create_user_msg: AddUser<'a>,
add_repo_msg: AddRepository<'a>, add_repo_msg: AddRepository<'a>,
) { ) {

View file

@ -16,11 +16,18 @@ path = "src/lib.rs"
[dependencies] [dependencies]
async-trait = "0.1.51" async-trait = "0.1.51"
serde = { version = "1", features = ["derive"]} serde = { version = "1", features = ["derive"]}
serde_yaml = "0.8.24" serde_yaml = "0.9"
tokio = { version = "1.18.2", features = ["fs"]} tokio = { version = "1.18.2", features = ["fs"]}
thiserror = "1.0.30" thiserror = "1.0.30"
url = { version = "2.2.2", features = ["serde"] } url = { version = "2.2.2", features = ["serde"] }
tar = "0.4.38" tar = "0.4.38"
log = "0.4.16"
mktemp = "0.4.1"
[dependencies.reqwest]
features = ["rustls-tls-native-roots", "gzip", "deflate", "brotli", "json"]
version = "0.11.10"
[dependencies.db-core] [dependencies.db-core]
path = "../../db/db-core" path = "../../db/db-core"

View file

@ -15,16 +15,23 @@
* You should have received a copy of the GNU Affero General Public License * You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>. * along with this program. If not, see <https://www.gnu.org/licenses/>.
*/ */
use std::fs as StdFs;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use async_trait::async_trait; use async_trait::async_trait;
use log::info;
use mktemp::Temp;
use reqwest::Client;
use serde::Serialize; use serde::Serialize;
use tar::Archive;
use tokio::fs; use tokio::fs;
use tokio::io::AsyncWriteExt;
use url::Url; use url::Url;
use db_core::prelude::*; use db_core::prelude::*;
use federate_core::Federate; use federate_core::Federate;
use federate_core::{LatestResp, ROUTES};
pub mod errors; pub mod errors;
pub mod schema; pub mod schema;
@ -295,4 +302,122 @@ impl Federate for PccFederate {
let latest = times.pop().unwrap(); let latest = times.pop().unwrap();
Ok(format!("{}.tar", latest)) Ok(format!("{}.tar", latest))
} }
/// import archive from another Starchart instance
async fn import(
&self,
mut starchart_url: Url,
client: &Client,
db: &Box<dyn SCDatabase>,
) -> Result<(), Self::Error> {
info!("[import][{starchart_url}] import latest tarball from starchart instance");
let mut url = starchart_url.clone();
url.set_path(ROUTES.get_latest);
let resp: LatestResp = client.get(url).send().await.unwrap().json().await.unwrap();
let mut url = starchart_url.clone();
url.set_path(&format!("/federate/{}", resp.latest));
println!("{:?}", url);
let file = client.get(url).send().await.unwrap().bytes().await.unwrap();
let tmp = Temp::new_dir().unwrap();
let import_file = tmp.as_path().join("import.tar.gz");
{
let mut f = fs::File::create(&import_file).await.unwrap();
f.write_all(&file).await.unwrap();
}
let f = StdFs::File::open(&import_file).unwrap();
let uncompressed = tmp.as_path().join("untar");
fs::create_dir(&uncompressed).await.unwrap();
let mut ar = Archive::new(f);
ar.unpack(&uncompressed).unwrap();
let mut instance_dir_contents = fs::read_dir(&uncompressed).await.unwrap();
while let Some(instance_dir_entry) = instance_dir_contents.next_entry().await.unwrap() {
if !instance_dir_entry.file_type().await.unwrap().is_dir() {
continue;
}
let instance_file = instance_dir_entry.path().join(INSTANCE_INFO_FILE);
let instance = fs::read_to_string(instance_file).await.unwrap();
let mut instance: CreateForge = serde_yaml::from_str(&instance).unwrap();
instance.starchart_url = Some(starchart_url.as_str());
if !db.forge_exists(&instance.url).await.unwrap() {
info!("[import][{}] Creating forge", &instance.url);
db.create_forge_instance(&instance).await.unwrap();
} else if !self.forge_exists(&instance.url).await.unwrap() {
self.create_forge_instance(&instance).await.unwrap();
}
let mut dir_contents = fs::read_dir(&instance_dir_entry.path()).await.unwrap();
while let Some(dir_entry) = dir_contents.next_entry().await.unwrap() {
if !dir_entry.file_type().await.unwrap().is_dir() {
continue;
}
let username = dir_entry.file_name();
let username = username.to_str().unwrap();
if !db.user_exists(username, Some(&instance.url)).await.unwrap() {
info!("[import][{}] Creating user: {username}", instance.url);
let user_file = instance_dir_entry
.path()
.join(&username)
.join(USER_INFO_FILE);
let user_file_content = fs::read_to_string(user_file).await.unwrap();
let mut user: AddUser<'_> = serde_yaml::from_str(&user_file_content).unwrap();
user.import = true;
db.add_user(&user).await.unwrap();
}
if !self.user_exists(username, &instance.url).await.unwrap() {
let user_file = instance_dir_entry
.path()
.join(&username)
.join(USER_INFO_FILE);
let user_file_content = fs::read_to_string(user_file).await.unwrap();
let mut user: AddUser<'_> = serde_yaml::from_str(&user_file_content).unwrap();
user.import = true;
self.create_user(&user).await.unwrap();
}
let mut repositories = fs::read_dir(dir_entry.path()).await.unwrap();
while let Some(repo) = repositories.next_entry().await.unwrap() {
if !repo.file_type().await.unwrap().is_dir() {
continue;
}
let repo_file = repo.path().join(REPO_INFO_FILE);
println!("repo_file: {:?}", repo_file);
let publiccodeyml_repository: schema::Repository =
serde_yaml::from_str(&fs::read_to_string(repo_file).await.unwrap())
.unwrap();
let add_repo = publiccodeyml_repository.to_add_repository(true);
if !db
.repository_exists(add_repo.name, username, &add_repo.url)
.await
.unwrap()
{
info!(
"[import][{}] Creating repository: {}",
instance.url, add_repo.name
);
db.create_repository(&add_repo).await.unwrap();
}
if !self
.repository_exists(add_repo.name, username, &add_repo.url)
.await
.unwrap()
{
self.create_repository(&add_repo).await.unwrap();
}
}
}
}
Ok(())
}
} }

View file

@ -37,7 +37,7 @@ async fn everything_works() {
let create_forge_msg = CreateForge { let create_forge_msg = CreateForge {
url: url.clone(), url: url.clone(),
forge_type: ForgeImplementation::Gitea, forge_type: ForgeImplementation::Gitea,
import: false, starchart_url: None,
}; };
let add_user_msg = AddUser { let add_user_msg = AddUser {

117
spider.rs
View file

@ -1,117 +0,0 @@
/*
* ForgeFlux StarChart - A federated software forge spider
* Copyright © 2022 Aravinth Manivannan <realaravinth@batsense.net>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
use std::time::Duration;
use tokio::time;
use url::Url;
use crate::data::Data;
use crate::gitea::SearchResults;
const REPO_SEARCH_PATH: &str = "/api/v1/repos/search";
const GITEA_NODEINFO: &str = "/api/v1/nodeinfo";
impl Data {
pub async fn crawl(&self, hostname: &str) -> Vec<SearchResults> {
let mut page = 1;
let mut url = Url::parse(hostname).unwrap();
url.set_path(REPO_SEARCH_PATH);
let mut repos = Vec::new();
loop {
let mut url = url.clone();
url.set_query(Some(&format!(
"page={page}&limit={}",
self.settings.crawler.items_per_api_call
)));
let res: SearchResults = self
.client
.get(url)
.send()
.await
.unwrap()
.json()
.await
.unwrap();
// TODO implement save
time::sleep(Duration::new(
self.settings.crawler.wait_before_next_api_call,
0,
))
.await;
if res.data.is_empty() {
return repos;
}
for d in res.data.iter() {
}
repos.push(res);
page += 1;
}
}
/// purpose: interact with instance running on provided hostname and verify if the instance is a
/// Gitea instance.
///
/// will get nodeinfo information, which contains an identifier to uniquely identify Gitea
pub async fn is_gitea(&self, hostname: &str) -> bool {
const GITEA_IDENTIFIER: &str = "gitea";
let mut url = Url::parse(hostname).unwrap();
url.set_path(GITEA_NODEINFO);
let res: serde_json::Value = self
.client
.get(url)
.send()
.await
.unwrap()
.json()
.await
.unwrap();
if let serde_json::Value::String(software) = &res["software"]["name"] {
software == GITEA_IDENTIFIER
} else {
false
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::settings::Settings;
pub const GITEA_HOST: &str = "http://localhost:8080";
#[actix_rt::test]
async fn is_gitea_works() {
let data = Data::new(Settings::new().unwrap()).await;
assert!(data.is_gitea(GITEA_HOST).await);
}
#[actix_rt::test]
async fn crawl_gitea() {
use crate::tests::sqlx_sqlite;
let data = Data::new(Settings::new().unwrap()).await;
let db = sqlx_sqlite::get_data();
let res = data.crawl(GITEA_HOST).await;
let mut elements = 0;
res.iter().for_each(|r| elements += r.data.len());
assert_eq!(res.len(), 5);
assert_eq!(elements, 100);
}
}