feat: impl forgeflux forge driver
This commit is contained in:
parent
d1606fa2ac
commit
a9e482272e
4 changed files with 294 additions and 0 deletions
37
forge/forgeflux/Cargo.toml
Normal file
37
forge/forgeflux/Cargo.toml
Normal file
|
@ -0,0 +1,37 @@
|
|||
[package]
|
||||
name = "forgeflux"
|
||||
version = "0.1.0"
|
||||
authors = ["realaravinth <realaravinth@batsense.net>"]
|
||||
description = "ForgeFlux StarChart - Federated forge spider"
|
||||
documentation = "https://forgeflux.org/"
|
||||
edition = "2021"
|
||||
license = "AGPLv3 or later version"
|
||||
|
||||
|
||||
[lib]
|
||||
name = "forgeflux"
|
||||
path = "src/lib.rs"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
async-trait = "0.1.51"
|
||||
url = { version = "2.2.2", features = ["serde"] }
|
||||
tokio = { version = "1.17", features = ["time"] }
|
||||
|
||||
[dependencies.forge-core]
|
||||
path = "../forge-core"
|
||||
|
||||
[dependencies.reqwest]
|
||||
features = ["rustls-tls-native-roots", "gzip", "deflate", "brotli", "json"]
|
||||
version = "0.11.10"
|
||||
|
||||
[dependencies.serde]
|
||||
features = ["derive"]
|
||||
version = "1"
|
||||
|
||||
[dependencies.serde_json]
|
||||
version = "1"
|
||||
|
||||
[dev-dependencies]
|
||||
actix-rt = "2.7"
|
187
forge/forgeflux/src/lib.rs
Normal file
187
forge/forgeflux/src/lib.rs
Normal file
|
@ -0,0 +1,187 @@
|
|||
/*
|
||||
* ForgeFlux StarChart - A federated software forge spider
|
||||
* Copyright © 2022 Aravinth Manivannan <realaravinth@batsense.net>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use reqwest::Client;
|
||||
use tokio::task::JoinHandle;
|
||||
use url::Url;
|
||||
|
||||
use db_core::ForgeImplementation;
|
||||
use forge_core::dev::*;
|
||||
use forge_core::Repository;
|
||||
|
||||
pub mod schema;
|
||||
|
||||
const REPO_SEARCH_PATH: &str = "/search/repositories";
|
||||
const FORGEFLUX_NODEINFO: &str = "/nodeinfo/2.0";
|
||||
const FORGEFLUX_IDENTIFIER: &str = "forgeflux";
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ForgeFlux {
|
||||
pub instance_url: Url,
|
||||
pub client: Client,
|
||||
url: Url,
|
||||
}
|
||||
|
||||
impl ForgeFlux {
|
||||
pub fn new(instance_url: Url, client: Client) -> Self {
|
||||
let url = Url::parse(&db_core::clean_url(&instance_url)).unwrap();
|
||||
|
||||
Self {
|
||||
instance_url,
|
||||
client,
|
||||
url,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for ForgeFlux {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.url == other.url && self.instance_url == other.instance_url
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl SCForge for ForgeFlux {
|
||||
async fn is_forge(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn get_url(&self) -> &Url {
|
||||
&self.url
|
||||
}
|
||||
|
||||
fn forge_type(&self) -> ForgeImplementation {
|
||||
ForgeImplementation::ForgeFlux
|
||||
}
|
||||
|
||||
async fn crawl(&self, limit: u64, page: u64, rate_limit: u64) -> CrawlResp {
|
||||
fn empty_is_none(s: &str) -> Option<String> {
|
||||
let s = s.trim();
|
||||
if s.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(s.to_owned())
|
||||
}
|
||||
}
|
||||
|
||||
let mut tags = Tags::default();
|
||||
let mut users = UserMap::default();
|
||||
let mut internal_users = UserMap::default();
|
||||
let mut repos = Repositories::default();
|
||||
|
||||
let instance_url = self.instance_url.clone();
|
||||
|
||||
let mut url = instance_url.clone();
|
||||
url.set_path(REPO_SEARCH_PATH);
|
||||
url.set_query(Some(&format!("page={page}&limit={limit}")));
|
||||
let mut res: Vec<schema::Repository> = self
|
||||
.client
|
||||
.get(url)
|
||||
.send()
|
||||
.await
|
||||
.unwrap()
|
||||
.json()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let mut sleep_fut: Option<JoinHandle<()>> = None;
|
||||
for repo in res.drain(0..) {
|
||||
let user = if internal_users.contains_key(&repo.attributed_to.to_string()) {
|
||||
if let Some(sleep_fut) = sleep_fut {
|
||||
sleep_fut.await.unwrap();
|
||||
}
|
||||
|
||||
let user: schema::User = self
|
||||
.client
|
||||
.get(repo.attributed_to.clone())
|
||||
.send()
|
||||
.await
|
||||
.unwrap()
|
||||
.json()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
sleep_fut = Some(tokio::spawn(tokio::time::sleep(Duration::new(
|
||||
rate_limit, 0,
|
||||
))));
|
||||
|
||||
let profile_photo = if let Some(profile_photo) = user.icon {
|
||||
Some(profile_photo.url.to_string())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let f_user = Arc::new(User {
|
||||
url: user.id.clone(),
|
||||
username: Arc::new(user.preferred_username),
|
||||
html_link: user.id.to_string(),
|
||||
profile_photo,
|
||||
});
|
||||
|
||||
users.insert(f_user.username.clone(), f_user.clone());
|
||||
users.insert(Arc::new(f_user.url.to_string()), f_user.clone());
|
||||
f_user
|
||||
} else {
|
||||
internal_users
|
||||
.get(&repo.attributed_to.to_string())
|
||||
.unwrap()
|
||||
.clone()
|
||||
};
|
||||
|
||||
let frepo = Repository {
|
||||
url: self.url.clone(),
|
||||
website: None,
|
||||
name: repo.name,
|
||||
owner: user,
|
||||
html_link: repo.id.to_string(),
|
||||
tags: None,
|
||||
description: Some(repo.summary),
|
||||
};
|
||||
|
||||
repos.push(frepo);
|
||||
}
|
||||
CrawlResp { repos, tags, users }
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use url::Url;
|
||||
|
||||
pub const NET_REPOSITORIES: u64 = 0;
|
||||
pub const PER_CRAWL: u64 = 10;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn forgeflux_works() {
|
||||
let ctx = ForgeFlux::new(
|
||||
Url::parse(&std::env::var("FORGEFLUX_HOST").unwrap()).unwrap(),
|
||||
Client::new(),
|
||||
);
|
||||
|
||||
assert!(ctx.is_forge().await);
|
||||
let steps = NET_REPOSITORIES / PER_CRAWL;
|
||||
|
||||
for i in 0..steps {
|
||||
let res = ctx.crawl(PER_CRAWL, i, 0).await;
|
||||
assert_eq!(res.repos.len() as u64, PER_CRAWL);
|
||||
}
|
||||
}
|
||||
}
|
69
forge/forgeflux/src/schema.rs
Normal file
69
forge/forgeflux/src/schema.rs
Normal file
|
@ -0,0 +1,69 @@
|
|||
/*
|
||||
* ForgeFlux StarChart - A federated software forge spider
|
||||
* Copyright © 2usize22 Aravinth Manivannan <realaravinth@batsense.net>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
use serde::{Deserialize, Serialize};
|
||||
use url::Url;
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, Serialize, Eq, PartialEq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ProfilePhoto {
|
||||
pub url: Url,
|
||||
pub media_type: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct User {
|
||||
pub name: String,
|
||||
pub preferred_username: String,
|
||||
pub id: Url,
|
||||
pub url: Option<Url>,
|
||||
pub icon: Option<ProfilePhoto>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Repository {
|
||||
pub id: Url,
|
||||
pub clone_uri: Url,
|
||||
pub inbox: Url,
|
||||
pub name: String,
|
||||
pub attributed_to: Url,
|
||||
pub summary: String,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
use std::fs;
|
||||
|
||||
#[test]
|
||||
/// Tests if Gitea responses panic when deserialized with serde into structs defined in this
|
||||
/// module/file. Since Go doesn't have abilities to describe nullable values, I(@realaravinth)
|
||||
/// am forced to do this as I my knowledge about Gitea codebase is very limited.
|
||||
fn schema_doesnt_panic() {
|
||||
let files = ["./tests/schema/forgeflux/net.forgeflux.org.json"];
|
||||
for file in files.iter() {
|
||||
let contents = fs::read_to_string(file).unwrap();
|
||||
for line in contents.lines() {
|
||||
let _: Vec<Repository> =
|
||||
serde_json::from_str(line).expect("Forgeflux schema paniced");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
File diff suppressed because one or more lines are too long
Loading…
Reference in a new issue