From a9e482272e171f2160e2abff32dac2ac9f579b7b Mon Sep 17 00:00:00 2001 From: Aravinth Manivannan Date: Sun, 9 Feb 2025 13:00:03 +0530 Subject: [PATCH] feat: impl forgeflux forge driver --- forge/forgeflux/Cargo.toml | 37 ++++ forge/forgeflux/src/lib.rs | 187 ++++++++++++++++++ forge/forgeflux/src/schema.rs | 69 +++++++ .../schema/forgeflux/net.forgeflux.org.json | 1 + 4 files changed, 294 insertions(+) create mode 100644 forge/forgeflux/Cargo.toml create mode 100644 forge/forgeflux/src/lib.rs create mode 100644 forge/forgeflux/src/schema.rs create mode 100644 forge/forgeflux/tests/schema/forgeflux/net.forgeflux.org.json diff --git a/forge/forgeflux/Cargo.toml b/forge/forgeflux/Cargo.toml new file mode 100644 index 0000000..519876d --- /dev/null +++ b/forge/forgeflux/Cargo.toml @@ -0,0 +1,37 @@ +[package] +name = "forgeflux" +version = "0.1.0" +authors = ["realaravinth "] +description = "ForgeFlux StarChart - Federated forge spider" +documentation = "https://forgeflux.org/" +edition = "2021" +license = "AGPLv3 or later version" + + +[lib] +name = "forgeflux" +path = "src/lib.rs" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +async-trait = "0.1.51" +url = { version = "2.2.2", features = ["serde"] } +tokio = { version = "1.17", features = ["time"] } + +[dependencies.forge-core] +path = "../forge-core" + +[dependencies.reqwest] +features = ["rustls-tls-native-roots", "gzip", "deflate", "brotli", "json"] +version = "0.11.10" + +[dependencies.serde] +features = ["derive"] +version = "1" + +[dependencies.serde_json] +version = "1" + +[dev-dependencies] +actix-rt = "2.7" diff --git a/forge/forgeflux/src/lib.rs b/forge/forgeflux/src/lib.rs new file mode 100644 index 0000000..1bd7097 --- /dev/null +++ b/forge/forgeflux/src/lib.rs @@ -0,0 +1,187 @@ +/* + * ForgeFlux StarChart - A federated software forge spider + * Copyright © 2022 Aravinth Manivannan + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +use std::sync::Arc; +use std::time::Duration; + +use reqwest::Client; +use tokio::task::JoinHandle; +use url::Url; + +use db_core::ForgeImplementation; +use forge_core::dev::*; +use forge_core::Repository; + +pub mod schema; + +const REPO_SEARCH_PATH: &str = "/search/repositories"; +const FORGEFLUX_NODEINFO: &str = "/nodeinfo/2.0"; +const FORGEFLUX_IDENTIFIER: &str = "forgeflux"; + +#[derive(Clone)] +pub struct ForgeFlux { + pub instance_url: Url, + pub client: Client, + url: Url, +} + +impl ForgeFlux { + pub fn new(instance_url: Url, client: Client) -> Self { + let url = Url::parse(&db_core::clean_url(&instance_url)).unwrap(); + + Self { + instance_url, + client, + url, + } + } +} + +impl PartialEq for ForgeFlux { + fn eq(&self, other: &Self) -> bool { + self.url == other.url && self.instance_url == other.instance_url + } +} + +#[async_trait] +impl SCForge for ForgeFlux { + async fn is_forge(&self) -> bool { + true + } + + fn get_url(&self) -> &Url { + &self.url + } + + fn forge_type(&self) -> ForgeImplementation { + ForgeImplementation::ForgeFlux + } + + async fn crawl(&self, limit: u64, page: u64, rate_limit: u64) -> CrawlResp { + fn empty_is_none(s: &str) -> Option { + let s = s.trim(); + if s.is_empty() { + None + } else { + Some(s.to_owned()) + } + } + + let mut tags = Tags::default(); + let mut users = UserMap::default(); + let mut internal_users = UserMap::default(); + let mut repos = Repositories::default(); + + let instance_url = self.instance_url.clone(); + + let mut url = instance_url.clone(); + url.set_path(REPO_SEARCH_PATH); + url.set_query(Some(&format!("page={page}&limit={limit}"))); + let mut res: Vec = self + .client + .get(url) + .send() + .await + .unwrap() + .json() + .await + .unwrap(); + + let mut sleep_fut: Option> = None; + for repo in res.drain(0..) { + let user = if internal_users.contains_key(&repo.attributed_to.to_string()) { + if let Some(sleep_fut) = sleep_fut { + sleep_fut.await.unwrap(); + } + + let user: schema::User = self + .client + .get(repo.attributed_to.clone()) + .send() + .await + .unwrap() + .json() + .await + .unwrap(); + + sleep_fut = Some(tokio::spawn(tokio::time::sleep(Duration::new( + rate_limit, 0, + )))); + + let profile_photo = if let Some(profile_photo) = user.icon { + Some(profile_photo.url.to_string()) + } else { + None + }; + + let f_user = Arc::new(User { + url: user.id.clone(), + username: Arc::new(user.preferred_username), + html_link: user.id.to_string(), + profile_photo, + }); + + users.insert(f_user.username.clone(), f_user.clone()); + users.insert(Arc::new(f_user.url.to_string()), f_user.clone()); + f_user + } else { + internal_users + .get(&repo.attributed_to.to_string()) + .unwrap() + .clone() + }; + + let frepo = Repository { + url: self.url.clone(), + website: None, + name: repo.name, + owner: user, + html_link: repo.id.to_string(), + tags: None, + description: Some(repo.summary), + }; + + repos.push(frepo); + } + CrawlResp { repos, tags, users } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use url::Url; + + pub const NET_REPOSITORIES: u64 = 0; + pub const PER_CRAWL: u64 = 10; + + #[actix_rt::test] + async fn forgeflux_works() { + let ctx = ForgeFlux::new( + Url::parse(&std::env::var("FORGEFLUX_HOST").unwrap()).unwrap(), + Client::new(), + ); + + assert!(ctx.is_forge().await); + let steps = NET_REPOSITORIES / PER_CRAWL; + + for i in 0..steps { + let res = ctx.crawl(PER_CRAWL, i, 0).await; + assert_eq!(res.repos.len() as u64, PER_CRAWL); + } + } +} diff --git a/forge/forgeflux/src/schema.rs b/forge/forgeflux/src/schema.rs new file mode 100644 index 0000000..1f47a7f --- /dev/null +++ b/forge/forgeflux/src/schema.rs @@ -0,0 +1,69 @@ +/* + * ForgeFlux StarChart - A federated software forge spider + * Copyright © 2usize22 Aravinth Manivannan + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +use serde::{Deserialize, Serialize}; +use url::Url; + +#[derive(Clone, Debug, Deserialize, Serialize, Eq, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct ProfilePhoto { + pub url: Url, + pub media_type: String, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct User { + pub name: String, + pub preferred_username: String, + pub id: Url, + pub url: Option, + pub icon: Option, +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct Repository { + pub id: Url, + pub clone_uri: Url, + pub inbox: Url, + pub name: String, + pub attributed_to: Url, + pub summary: String, +} + +#[cfg(test)] +mod tests { + use super::*; + + use std::fs; + + #[test] + /// Tests if Gitea responses panic when deserialized with serde into structs defined in this + /// module/file. Since Go doesn't have abilities to describe nullable values, I(@realaravinth) + /// am forced to do this as I my knowledge about Gitea codebase is very limited. + fn schema_doesnt_panic() { + let files = ["./tests/schema/forgeflux/net.forgeflux.org.json"]; + for file in files.iter() { + let contents = fs::read_to_string(file).unwrap(); + for line in contents.lines() { + let _: Vec = + serde_json::from_str(line).expect("Forgeflux schema paniced"); + } + } + } +} diff --git a/forge/forgeflux/tests/schema/forgeflux/net.forgeflux.org.json b/forge/forgeflux/tests/schema/forgeflux/net.forgeflux.org.json new file mode 100644 index 0000000..2513e28 --- /dev/null +++ b/forge/forgeflux/tests/schema/forgeflux/net.forgeflux.org.json @@ -0,0 +1 @@ +[{"@context":["https://www.w3.org/ns/activitystreams","https://w3id.org/security/v1","https://forgefed.org/ns"],"id":"https://net.forgeflux.org/git.batsense.net/forgeflux/forgeflux","cloneUri":"https://git.batsense.net/forgeflux/forgeflux.git","type":["Repository","TicketTracker","PatchTracker"],"publicKey":{"id":"https://net.forgeflux.org/git.batsense.net/forgeflux/forgeflux##main-key","owner":"https://net.forgeflux.org/git.batsense.net/forgeflux/forgeflux","publicKeyPem":"-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA9Bd24ypLB4tmrbwfQ1VE\n+mhyqpTBmQzJjCrr+0CniQf0pVpMJH7UmIKw3MPmItPNtaZz+bD2WXGm3X1iWOwc\nGazPmP7ruKXh8v2W4ofnzcWsExtlUjPbcSE8CSvhE8xEjNkFUnSIuarcMJ0SCQjD\nwk9F8SJNyNs5KUq4hAmUzbY+GUvo5q24EsYT5ksomfY6L/eE0Awfcy8HMLXZBrOF\nuNzej7MszeRQoz7v9n+TKDJnAmndo30NfUQ209gmcJ3qb2vvFaArYPVOU+EGR7wX\ncJnZe0OXbCfWlSaW54f5ldWAmmUCCq/UznkAVejSm78EDoANZdYGJvgTYNAPAKZL\nywIDAQAB\n-----END PUBLIC KEY-----\n"},"inbox":"https://net.forgeflux.org/git.batsense.net/forgeflux/forgeflux/inbox","outbox":"https://net.forgeflux.org/git.batsense.net/forgeflux/forgeflux/outbox","followers":"https://net.forgeflux.org/git.batsense.net/forgeflux/forgeflux/followers","team":"https://net.forgeflux.org/git.batsense.net/forgeflux/forgeflux/team","ticketsTrackedBy":"https://net.forgeflux.org/git.batsense.net/forgeflux/forgeflux/ticket","sendPatchesTo":"https://net.forgeflux.org/git.batsense.net/forgeflux/forgeflux/patch","name":"forgeflux","attributedTo":"https://net.forgeflux.org/git.batsense.net/forgeflux","summary":"demo instance: https://net.forgeflux.org","preferredUsername":"ForgeFlux_ForgeFlux_repository_git.batsense.net"},{"@context":["https://www.w3.org/ns/activitystreams","https://w3id.org/security/v1","https://forgefed.org/ns"],"id":"https://net.forgeflux.org/git.batsense.net/realaravinth/throwaway-forgeflux-events","cloneUri":"https://git.batsense.net/realaravinth/throwaway-forgeflux-events.git","type":["Repository","TicketTracker","PatchTracker"],"publicKey":{"id":"https://net.forgeflux.org/git.batsense.net/realaravinth/throwaway-forgeflux-events##main-key","owner":"https://net.forgeflux.org/git.batsense.net/realaravinth/throwaway-forgeflux-events","publicKeyPem":"-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAm1pokKi0oegWgu0MWHXs\nngDl5DcAhWtvfFtUv0SeZhoeLlRFN2XG988v8WJIwfSSMLXh3njWdn/npJNzOGq1\nf0atjORJpolX4Dyv0AKWBQg4d5rg9CJl9yalCWHbFkoipA8nVdiEv7ekgOlzDtBR\nPF1oDxpNRhibcPzolrkS2B9bwtY643IJK/8O/jH8xzj/4rL2CylpEPFOrpCAH0Dt\nOlNBm2jIB+oHhhqjwGGvZ5VdR9GDZ3hsWLTvuuDCsRVkSKTruxyzwOprIGVcZ9ti\nj+NUzYJKfkf9pkgHBtbkCOAX9RmdgyH1Q99KSjvIhPCwzHg16v6olH6kGBiGwpjC\nbQIDAQAB\n-----END PUBLIC KEY-----\n"},"inbox":"https://net.forgeflux.org/git.batsense.net/realaravinth/throwaway-forgeflux-events/inbox","outbox":"https://net.forgeflux.org/git.batsense.net/realaravinth/throwaway-forgeflux-events/outbox","followers":"https://net.forgeflux.org/git.batsense.net/realaravinth/throwaway-forgeflux-events/followers","team":"https://net.forgeflux.org/git.batsense.net/realaravinth/throwaway-forgeflux-events/team","ticketsTrackedBy":"https://net.forgeflux.org/git.batsense.net/realaravinth/throwaway-forgeflux-events/ticket","sendPatchesTo":"https://net.forgeflux.org/git.batsense.net/realaravinth/throwaway-forgeflux-events/patch","name":"throwaway-forgeflux-events","attributedTo":"https://net.forgeflux.org/git.batsense.net/realaravinth","summary":"throwaway repository to test stuff","preferredUsername":"realaravinth_throwaway-forgeflux-events_repository_git.batsense.net"},{"@context":["https://www.w3.org/ns/activitystreams","https://w3id.org/security/v1","https://forgefed.org/ns"],"id":"https://net.forgeflux.org/github.com/dat-adi/orkanskader","cloneUri":"https://github.com/dat-adi/orkanskader.git","type":["Repository","TicketTracker","PatchTracker"],"publicKey":{"id":"https://net.forgeflux.org/github.com/dat-adi/orkanskader##main-key","owner":"https://net.forgeflux.org/github.com/dat-adi/orkanskader","publicKeyPem":"-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAzSNzY0IXkWC527f1BU7S\n32eMOm9BJjGDWxVFLR8AsA7PR827wesXgxuceyq797crLZkX12BV4EypuWmKTvBt\nb/Te8lXSmgqGMoZ/RyuzuKS4fSMAuuikWkc12EwikVFoWZFIM8drLYCdmjXpFWk8\nXOiTLr8I/1H5lGOoBGcGAstPB2ydUilWp6Ukgm9MWs+aC229QaXud/UCFO8AgquQ\nHPaTP0ElnJaehOdNDQJ5G5Vuqdk2Hpk5+HOT0tHZNGly78wvjgWZSiJESgFIoMqG\nnEeqdO2KMpBkhKEn0vXA3RYxditSkijH5F4ThZq+Wfgcy1NRSpQx3A7Ux+id4M4O\ntwIDAQAB\n-----END PUBLIC KEY-----\n"},"inbox":"https://net.forgeflux.org/github.com/dat-adi/orkanskader/inbox","outbox":"https://net.forgeflux.org/github.com/dat-adi/orkanskader/outbox","followers":"https://net.forgeflux.org/github.com/dat-adi/orkanskader/followers","team":"https://net.forgeflux.org/github.com/dat-adi/orkanskader/team","ticketsTrackedBy":"https://net.forgeflux.org/github.com/dat-adi/orkanskader/ticket","sendPatchesTo":"https://net.forgeflux.org/github.com/dat-adi/orkanskader/patch","name":"orkanskader","attributedTo":"https://net.forgeflux.org/github.com/dat-adi","summary":"Modelling the impact of Hurricane Damage to Residential Property","preferredUsername":"dat-adi_orkanskader_repository_github.com"},{"@context":["https://www.w3.org/ns/activitystreams","https://w3id.org/security/v1","https://forgefed.org/ns"],"id":"https://net.forgeflux.org/github.com/dat-adi/dotfiles","cloneUri":"https://github.com/dat-adi/dotfiles.git","type":["Repository","TicketTracker","PatchTracker"],"publicKey":{"id":"https://net.forgeflux.org/github.com/dat-adi/dotfiles##main-key","owner":"https://net.forgeflux.org/github.com/dat-adi/dotfiles","publicKeyPem":"-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA2ObGCViuxFWf7IITlbmb\nkNDpkzwOjNmcspXGSo87SpqTPxKXcTU6QaBH8iljCG9/U29zRjwa1Ychdu7qap3s\n0fBRN5XI65Y0DwWNDrekalbTUIPC9Pqqrj54xQlyHjKbBtDfMgslAuO1zBWrIpZz\nAvFFQURedNnF+FD5PjfYZCW/m9Axvm9y4Ba4CXBjdPlTDYRw2Bmfdsl5wmyMOVpW\nl0st3YpB8+cSYe0MSgKAspi3nrIBNEoZR53KeD4Deex0z4W+u6HfPiRvsAWolDR+\nO0E6Jzb073O9K9BsDYOv9usl1c5BZyz71HwfEI1aB2zLTF9tcwkEvq8kgC3Z0g+K\nfwIDAQAB\n-----END PUBLIC KEY-----\n"},"inbox":"https://net.forgeflux.org/github.com/dat-adi/dotfiles/inbox","outbox":"https://net.forgeflux.org/github.com/dat-adi/dotfiles/outbox","followers":"https://net.forgeflux.org/github.com/dat-adi/dotfiles/followers","team":"https://net.forgeflux.org/github.com/dat-adi/dotfiles/team","ticketsTrackedBy":"https://net.forgeflux.org/github.com/dat-adi/dotfiles/ticket","sendPatchesTo":"https://net.forgeflux.org/github.com/dat-adi/dotfiles/patch","name":"dotfiles","attributedTo":"https://net.forgeflux.org/github.com/dat-adi","summary":"A repository to contain the configuration files of my editors, and settings of the IDEs as well. Maybe a bit more.","preferredUsername":"dat-adi_dotfiles_repository_github.com"},{"@context":["https://www.w3.org/ns/activitystreams","https://w3id.org/security/v1","https://forgefed.org/ns"],"id":"https://net.forgeflux.org/github.com/dat-adi/dat-adi","cloneUri":"https://github.com/dat-adi/dat-adi.git","type":["Repository","TicketTracker","PatchTracker"],"publicKey":{"id":"https://net.forgeflux.org/github.com/dat-adi/dat-adi##main-key","owner":"https://net.forgeflux.org/github.com/dat-adi/dat-adi","publicKeyPem":"-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAvo7nTh3oPCPkwu3qi91L\n2+MSu+XqFnhaPWAdFMANTTZMPhrKOngkM7u9jTvvAf2SOi5VlF/dl+S1365JLtZF\neNe7dyecTdykqc3SkxIXoIG4rYF32wGTrUXgMT3nQ986bEBmDi8izCk0q8BsTQ/O\n9ZL/DvBv4wh2Z25c3ckOjjdtrkquOlKtDoeeR7jMwQzT7jgzse3XzyV9jjs0zkRV\nxLUejJ6WMXbcnIqx9zn71hl4+RHejCfsJBPoZcNaYOLCV5860+UKfTrR1MSm0APt\nZDjzdlyFzTL3oQOq6eY0/wAtj0s4PqB3SBOotUfdRPMxJQxMc5lCtrxdrkEtWkA1\nTwIDAQAB\n-----END PUBLIC KEY-----\n"},"inbox":"https://net.forgeflux.org/github.com/dat-adi/dat-adi/inbox","outbox":"https://net.forgeflux.org/github.com/dat-adi/dat-adi/outbox","followers":"https://net.forgeflux.org/github.com/dat-adi/dat-adi/followers","team":"https://net.forgeflux.org/github.com/dat-adi/dat-adi/team","ticketsTrackedBy":"https://net.forgeflux.org/github.com/dat-adi/dat-adi/ticket","sendPatchesTo":"https://net.forgeflux.org/github.com/dat-adi/dat-adi/patch","name":"dat-adi","attributedTo":"https://net.forgeflux.org/github.com/dat-adi","summary":"A short summary of who you're looking at.","preferredUsername":"dat-adi_dat-adi_repository_github.com"},{"@context":["https://www.w3.org/ns/activitystreams","https://w3id.org/security/v1","https://forgefed.org/ns"],"id":"https://net.forgeflux.org/git.batsense.net/mcaptcha/mcaptcha","cloneUri":"https://git.batsense.net/mcaptcha/mcaptcha.git","type":["Repository","TicketTracker","PatchTracker"],"publicKey":{"id":"https://net.forgeflux.org/git.batsense.net/mcaptcha/mcaptcha##main-key","owner":"https://net.forgeflux.org/git.batsense.net/mcaptcha/mcaptcha","publicKeyPem":"-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAx1EJ5y6x/4OEd7pryB5u\n/3RcYfWWEv2pNyHDqpwKtPZKpbve9lwPOn4iaQlc55zvHv90kIChCk83AbfQlmBu\nSPzPdf11+3ghFp24cQeo7T93BOA+PUe/x1hYWk4+x8hcKbO5OTNWR8H1DHU1vLFx\noU1/N5yeSxLYlTnQgHC6WvxET8txHACw8UxD9VKZUn7H0CpqjIqIuUgEbHzi0Q74\nEhtzYWJmjgty4JeuMTf4l3UnXW5sW97CRmbcYkvO0e9T5yYNHja92F2lbPC/q9mP\nBjFKyfFerNm8yW2prOTCindyUok4PmXTIOnwfg4U4Hs/3G8iqDyai3wMcsvtYp8X\nIwIDAQAB\n-----END PUBLIC KEY-----\n"},"inbox":"https://net.forgeflux.org/git.batsense.net/mcaptcha/mcaptcha/inbox","outbox":"https://net.forgeflux.org/git.batsense.net/mcaptcha/mcaptcha/outbox","followers":"https://net.forgeflux.org/git.batsense.net/mcaptcha/mcaptcha/followers","team":"https://net.forgeflux.org/git.batsense.net/mcaptcha/mcaptcha/team","ticketsTrackedBy":"https://net.forgeflux.org/git.batsense.net/mcaptcha/mcaptcha/ticket","sendPatchesTo":"https://net.forgeflux.org/git.batsense.net/mcaptcha/mcaptcha/patch","name":"mcaptcha","attributedTo":"https://net.forgeflux.org/git.batsense.net/mcaptcha","summary":"","preferredUsername":"mcaptcha_mcaptcha_repository_git.batsense.net"}] \ No newline at end of file