starchart/forge/forgeflux/src/lib.rs

198 lines
5.8 KiB
Rust

/*
* ForgeFlux StarChart - A federated software forge spider
* Copyright © 2022 Aravinth Manivannan <realaravinth@batsense.net>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
use std::sync::Arc;
use std::time::Duration;
use reqwest::Client;
use tokio::task::JoinHandle;
use url::Url;
use db_core::ForgeImplementation;
use forge_core::dev::*;
use forge_core::Repository;
pub mod schema;
const REPO_SEARCH_PATH: &str = "/search/repositories";
const FORGEFLUX_NODEINFO: &str = "/nodeinfo/2.0";
const FORGEFLUX_IDENTIFIER: &str = "forgeflux";
#[derive(Clone)]
pub struct ForgeFlux {
pub instance_url: Url,
pub client: Client,
url: Url,
}
impl ForgeFlux {
pub fn new(instance_url: Url, client: Client) -> Self {
let url = Url::parse(&db_core::clean_url(&instance_url)).unwrap();
Self {
instance_url,
client,
url,
}
}
}
impl PartialEq for ForgeFlux {
fn eq(&self, other: &Self) -> bool {
self.url == other.url && self.instance_url == other.instance_url
}
}
#[async_trait]
impl SCForge for ForgeFlux {
async fn is_forge(&self) -> bool {
let u = self.instance_url.clone();
let mut node_info_url = self.instance_url.clone();
node_info_url.set_path(FORGEFLUX_NODEINFO);
let resp = self.client.get(node_info_url).send().await.unwrap();
if resp.status() == 200 {
let res: schema::Nodeinfo = resp.json().await.unwrap();
return res.software.name == FORGEFLUX_IDENTIFIER;
} else {
false
}
}
fn get_url(&self) -> &Url {
&self.url
}
fn forge_type(&self) -> ForgeImplementation {
ForgeImplementation::ForgeFlux
}
async fn crawl(&self, limit: u64, page: u64, rate_limit: u64) -> CrawlResp {
fn empty_is_none(s: &str) -> Option<String> {
let s = s.trim();
if s.is_empty() {
None
} else {
Some(s.to_owned())
}
}
let tags = Tags::default();
let mut users = UserMap::default();
let mut internal_users = UserMap::default();
let mut repos = Repositories::default();
let instance_url = self.instance_url.clone();
let mut url = instance_url.clone();
url.set_path(REPO_SEARCH_PATH);
url.set_query(Some(&format!("page={page}&limit={limit}")));
let mut res: Vec<schema::Repository> = self
.client
.get(url)
.send()
.await
.unwrap()
.json()
.await
.unwrap();
let mut sleep_fut: Option<JoinHandle<()>> = None;
for repo in res.drain(0..) {
let user = { //if internal_users.contains_key(&repo.attributed_to.to_string()) {
if let Some(sleep_fut) = sleep_fut {
sleep_fut.await.unwrap();
}
let user: schema::User = self
.client
.get(repo.attributed_to.clone())
.send()
.await
.unwrap()
.json()
.await
.unwrap();
sleep_fut = Some(tokio::spawn(tokio::time::sleep(Duration::new(
rate_limit, 0,
))));
let profile_photo = if let Some(profile_photo) = user.icon {
Some(profile_photo.url.to_string())
} else {
None
};
let f_user = Arc::new(User {
url: user.id.clone(),
username: Arc::new(user.preferred_username),
html_link: user.id.to_string(),
profile_photo,
});
users.insert(f_user.username.clone(), f_user.clone());
users.insert(Arc::new(f_user.url.to_string()), f_user.clone());
internal_users.insert(Arc::new(repo.attributed_to.to_string()), f_user.clone());
f_user
};
// } else {
// internal_users
// .get(&repo.attributed_to.to_string())
// .unwrap()
// .clone()
// };
let frepo = Repository {
url: repo.id.clone(),
website: None,
name: repo.name,
owner: user,
html_link: repo.id.to_string(),
tags: None,
description: Some(repo.summary),
};
repos.push(frepo);
}
CrawlResp { repos, tags, users }
}
}
#[cfg(test)]
mod tests {
use super::*;
use url::Url;
pub const NET_REPOSITORIES: u64 = 0;
pub const PER_CRAWL: u64 = 10;
#[actix_rt::test]
async fn forgeflux_works() {
let ctx = ForgeFlux::new(
Url::parse(&std::env::var("FORGEFLUX_HOST").unwrap()).unwrap(),
Client::new(),
);
assert!(ctx.is_forge().await);
let steps = NET_REPOSITORIES / PER_CRAWL;
for i in 0..steps {
let res = ctx.crawl(PER_CRAWL, i, 0).await;
assert_eq!(res.repos.len() as u64, PER_CRAWL);
}
}
}