feat: implement starchart for gitea
This commit is contained in:
parent
72a6ce9564
commit
d2f5f25c87
4 changed files with 406 additions and 0 deletions
33
forge/gitea/Cargo.toml
Normal file
33
forge/gitea/Cargo.toml
Normal file
|
@ -0,0 +1,33 @@
|
|||
[package]
|
||||
name = "gitea"
|
||||
version = "0.1.0"
|
||||
authors = ["realaravinth <realaravinth@batsense.net>"]
|
||||
description = "ForgeFlux StarChart - Federated forge spider"
|
||||
documentation = "https://forgeflux.org/"
|
||||
edition = "2021"
|
||||
license = "AGPLv3 or later version"
|
||||
|
||||
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
async-trait = "0.1.51"
|
||||
url = { version = "2.2.2", features = ["serde"] }
|
||||
|
||||
[dependencies.forge-core]
|
||||
path = "../forge-core"
|
||||
|
||||
[dependencies.reqwest]
|
||||
features = ["rustls-tls-native-roots", "gzip", "deflate", "brotli", "json"]
|
||||
version = "0.11.10"
|
||||
|
||||
[dependencies.serde]
|
||||
features = ["derive"]
|
||||
version = "1"
|
||||
|
||||
[dependencies.serde_json]
|
||||
version = "1"
|
||||
|
||||
[dev-dependencies]
|
||||
actix-rt = "2.7"
|
199
forge/gitea/src/lib.rs
Normal file
199
forge/gitea/src/lib.rs
Normal file
|
@ -0,0 +1,199 @@
|
|||
/*
|
||||
* ForgeFlux StarChart - A federated software forge spider
|
||||
* Copyright © 2022 Aravinth Manivannan <realaravinth@batsense.net>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
use std::sync::Arc;
|
||||
|
||||
use reqwest::Client;
|
||||
use url::Url;
|
||||
|
||||
use db_core::prelude::*;
|
||||
use forge_core::dev::*;
|
||||
|
||||
pub mod schema;
|
||||
|
||||
const REPO_SEARCH_PATH: &str = "/api/v1/repos/search";
|
||||
const GITEA_NODEINFO: &str = "/api/v1/nodeinfo";
|
||||
const GITEA_IDENTIFIER: &str = "gitea";
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Gitea {
|
||||
pub instance_url: Url,
|
||||
pub client: Client,
|
||||
hostname: String,
|
||||
}
|
||||
|
||||
impl Gitea {
|
||||
pub fn new(instance_url: Url, client: Client) -> Self {
|
||||
let hostname = db_core::get_hostname(&instance_url);
|
||||
|
||||
Self {
|
||||
instance_url,
|
||||
client,
|
||||
hostname,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for Gitea {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.hostname == other.hostname && self.instance_url == other.instance_url
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl SCForge for Gitea {
|
||||
async fn is_forge(&self) -> bool {
|
||||
let mut url = self.instance_url.clone();
|
||||
url.set_path(GITEA_NODEINFO);
|
||||
|
||||
let res: serde_json::Value = self
|
||||
.client
|
||||
.get(url)
|
||||
.send()
|
||||
.await
|
||||
.unwrap()
|
||||
.json()
|
||||
.await
|
||||
.unwrap();
|
||||
if let serde_json::Value::String(software) = &res["software"]["name"] {
|
||||
software == GITEA_IDENTIFIER
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
fn get_hostname(&self) -> &str {
|
||||
&self.hostname
|
||||
}
|
||||
|
||||
fn forge_type(&self) -> ForgeImplementation {
|
||||
ForgeImplementation::Gitea
|
||||
}
|
||||
|
||||
async fn crawl(&self, limit: u64, page: u64) -> CrawlResp {
|
||||
fn empty_is_none(s: &str) -> Option<String> {
|
||||
let s = s.trim();
|
||||
if s.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(s.to_owned())
|
||||
}
|
||||
}
|
||||
|
||||
let mut tags = Tags::default();
|
||||
let mut users = UserMap::default();
|
||||
let mut repos = Repositories::default();
|
||||
|
||||
let instance_url = self.instance_url.clone();
|
||||
|
||||
let mut url = instance_url.clone();
|
||||
url.set_path(REPO_SEARCH_PATH);
|
||||
url.set_query(Some(&format!("page={page}&limit={limit}")));
|
||||
let mut res: schema::SearchResults = self
|
||||
.client
|
||||
.get(url)
|
||||
.send()
|
||||
.await
|
||||
.unwrap()
|
||||
.json()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
fn to_user(u: schema::User, g: &Gitea) -> Arc<User<'_>> {
|
||||
let mut profile_url = g.instance_url.clone();
|
||||
profile_url.set_path(&u.username);
|
||||
let username = Arc::new(u.username);
|
||||
Arc::new(User {
|
||||
username,
|
||||
html_link: profile_url.to_string(),
|
||||
profile_photo: Some(u.avatar_url),
|
||||
hostname: &g.hostname,
|
||||
})
|
||||
}
|
||||
|
||||
for repo in res.data.drain(0..) {
|
||||
let user = if !users.contains_key(&repo.owner.username) {
|
||||
let u = to_user(repo.owner, self);
|
||||
let username = u.username.clone();
|
||||
users.insert(username.clone().clone(), u.clone());
|
||||
u
|
||||
} else {
|
||||
users.get(&repo.owner.username).unwrap().clone()
|
||||
};
|
||||
|
||||
let mut url = instance_url.clone();
|
||||
url.set_path(&format!(
|
||||
"/api/v1/repos/{}/{}/topics",
|
||||
&user.username, repo.name
|
||||
));
|
||||
|
||||
let mut topics: schema::Topics = self
|
||||
.client
|
||||
.get(url)
|
||||
.send()
|
||||
.await
|
||||
.unwrap()
|
||||
.json()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let mut rtopics = Vec::with_capacity(topics.topics.len());
|
||||
for t in topics.topics.drain(0..) {
|
||||
let t = Arc::new(t);
|
||||
if !tags.contains(&t) {
|
||||
tags.insert(t.clone());
|
||||
}
|
||||
rtopics.push(t);
|
||||
}
|
||||
|
||||
let frepo = Repository {
|
||||
hostname: &self.hostname,
|
||||
website: empty_is_none(&repo.website),
|
||||
name: repo.name,
|
||||
owner: user,
|
||||
html_link: repo.html_url,
|
||||
tags: Some(rtopics),
|
||||
description: Some(repo.description),
|
||||
};
|
||||
|
||||
repos.push(frepo);
|
||||
}
|
||||
CrawlResp { repos, tags, users }
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use url::Url;
|
||||
|
||||
pub const GITEA_HOST: &str = "http://localhost:8080";
|
||||
pub const NET_REPOSITORIES: u64 = 100;
|
||||
pub const PER_CRAWL: u64 = 10;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn gitea_works() {
|
||||
let ctx = Gitea::new(Url::parse(GITEA_HOST).unwrap(), Client::new());
|
||||
assert!(ctx.is_forge().await);
|
||||
let steps = NET_REPOSITORIES / PER_CRAWL;
|
||||
|
||||
for i in 0..steps {
|
||||
let res = ctx.crawl(PER_CRAWL, i).await;
|
||||
assert_eq!(res.repos.len() as u64, PER_CRAWL);
|
||||
}
|
||||
}
|
||||
}
|
172
forge/gitea/src/schema.rs
Normal file
172
forge/gitea/src/schema.rs
Normal file
|
@ -0,0 +1,172 @@
|
|||
/*
|
||||
* ForgeFlux StarChart - A federated software forge spider
|
||||
* Copyright © 2usize22 Aravinth Manivannan <realaravinth@batsense.net>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
use std::collections::HashMap;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct SearchResults {
|
||||
pub ok: bool,
|
||||
pub data: Vec<Repository>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Hash, Eq, Serialize, Deserialize)]
|
||||
pub struct User {
|
||||
pub id: usize,
|
||||
pub login: String,
|
||||
pub full_name: String,
|
||||
pub email: String,
|
||||
pub avatar_url: String,
|
||||
pub language: String,
|
||||
pub is_admin: bool,
|
||||
pub last_login: String,
|
||||
pub created: String,
|
||||
pub restricted: bool,
|
||||
pub active: bool,
|
||||
pub prohibit_login: bool,
|
||||
pub location: String,
|
||||
pub website: String,
|
||||
pub description: String,
|
||||
pub visibility: String,
|
||||
pub followers_count: usize,
|
||||
pub following_count: usize,
|
||||
pub starred_repos_count: usize,
|
||||
pub username: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct Repository {
|
||||
pub name: String,
|
||||
pub full_name: String,
|
||||
pub description: String,
|
||||
pub empty: bool,
|
||||
pub private: bool,
|
||||
pub fork: bool,
|
||||
pub template: bool,
|
||||
pub parent: Option<Box<Repository>>,
|
||||
pub mirror: bool,
|
||||
pub size: usize,
|
||||
pub html_url: String,
|
||||
pub ssh_url: String,
|
||||
pub clone_url: String,
|
||||
pub original_url: String,
|
||||
pub owner: User,
|
||||
pub website: String,
|
||||
pub stars_count: usize,
|
||||
pub forks_count: usize,
|
||||
pub watchers_count: usize,
|
||||
pub open_issues_count: usize,
|
||||
pub open_pr_counter: usize,
|
||||
pub release_counter: usize,
|
||||
pub default_branch: String,
|
||||
pub archived: bool,
|
||||
pub created_at: String,
|
||||
pub updated_at: String,
|
||||
pub internal_tracker: InternalIssueTracker,
|
||||
pub has_issues: bool,
|
||||
pub has_wiki: bool,
|
||||
pub has_pull_requests: bool,
|
||||
pub has_projects: bool,
|
||||
pub ignore_whitespace_conflicts: bool,
|
||||
pub allow_merge_commits: bool,
|
||||
pub allow_rebase: bool,
|
||||
pub allow_rebase_explicit: bool,
|
||||
pub allow_squash_merge: bool,
|
||||
pub default_merge_style: String,
|
||||
pub avatar_url: String,
|
||||
pub internal: bool,
|
||||
pub mirror_interval: String,
|
||||
pub mirror_updated: String,
|
||||
pub repo_transfer: Option<Team>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct InternalIssueTracker {
|
||||
pub enable_time_tracker: bool,
|
||||
pub allow_only_contributors_to_track_time: bool,
|
||||
pub enable_issue_dependencies: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct RepoTransfer {
|
||||
pub doer: User,
|
||||
pub recipient: User,
|
||||
pub teams: Option<Team>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Hash, Deserialize)]
|
||||
pub struct Organization {
|
||||
pub avatar_url: String,
|
||||
pub description: String,
|
||||
pub full_name: String,
|
||||
pub id: u64,
|
||||
pub location: String,
|
||||
pub repo_admin_change_team_access: bool,
|
||||
pub username: String,
|
||||
pub visibility: String,
|
||||
pub website: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Hash, Deserialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum Permission {
|
||||
None,
|
||||
Read,
|
||||
Write,
|
||||
Admin,
|
||||
Owner,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct Team {
|
||||
pub can_create_org_repo: bool,
|
||||
pub description: String,
|
||||
pub id: u64,
|
||||
pub includes_all_repositories: bool,
|
||||
pub name: String,
|
||||
pub organization: Organization,
|
||||
pub permission: Permission,
|
||||
pub units: Vec<String>,
|
||||
pub units_map: HashMap<String, String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct Topics {
|
||||
pub topics: Vec<String>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
use std::fs;
|
||||
|
||||
#[test]
|
||||
/// Tests if Gitea responses panic when deserialized with serde into structs defined in this
|
||||
/// module/file. Since Go doesn't have abilities to describe nullable values, I(@realaravinth)
|
||||
/// am forced to do this as I my knowledge about Gitea codebase is very limited.
|
||||
fn schema_doesnt_panic() {
|
||||
let files = ["./tests/schema/gitea/git.batsense.net.json"];
|
||||
for file in files.iter() {
|
||||
let contents = fs::read_to_string(file).unwrap();
|
||||
for line in contents.lines() {
|
||||
let _: SearchResults = serde_json::from_str(line).expect("Gitea schema paniced");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
2
forge/gitea/tests/schema/gitea/git.batsense.net.json
Normal file
2
forge/gitea/tests/schema/gitea/git.batsense.net.json
Normal file
File diff suppressed because one or more lines are too long
Loading…
Reference in a new issue