feat: implement starchart for gitea

This commit is contained in:
Aravinth Manivannan 2022-05-17 01:04:17 +05:30
parent 72a6ce9564
commit d2f5f25c87
Signed by: realaravinth
GPG key ID: AD9F0F08E855ED88
4 changed files with 406 additions and 0 deletions

33
forge/gitea/Cargo.toml Normal file
View file

@ -0,0 +1,33 @@
[package]
name = "gitea"
version = "0.1.0"
authors = ["realaravinth <realaravinth@batsense.net>"]
description = "ForgeFlux StarChart - Federated forge spider"
documentation = "https://forgeflux.org/"
edition = "2021"
license = "AGPLv3 or later version"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
async-trait = "0.1.51"
url = { version = "2.2.2", features = ["serde"] }
[dependencies.forge-core]
path = "../forge-core"
[dependencies.reqwest]
features = ["rustls-tls-native-roots", "gzip", "deflate", "brotli", "json"]
version = "0.11.10"
[dependencies.serde]
features = ["derive"]
version = "1"
[dependencies.serde_json]
version = "1"
[dev-dependencies]
actix-rt = "2.7"

199
forge/gitea/src/lib.rs Normal file
View file

@ -0,0 +1,199 @@
/*
* ForgeFlux StarChart - A federated software forge spider
* Copyright © 2022 Aravinth Manivannan <realaravinth@batsense.net>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
use std::sync::Arc;
use reqwest::Client;
use url::Url;
use db_core::prelude::*;
use forge_core::dev::*;
pub mod schema;
const REPO_SEARCH_PATH: &str = "/api/v1/repos/search";
const GITEA_NODEINFO: &str = "/api/v1/nodeinfo";
const GITEA_IDENTIFIER: &str = "gitea";
#[derive(Clone)]
pub struct Gitea {
pub instance_url: Url,
pub client: Client,
hostname: String,
}
impl Gitea {
pub fn new(instance_url: Url, client: Client) -> Self {
let hostname = db_core::get_hostname(&instance_url);
Self {
instance_url,
client,
hostname,
}
}
}
impl PartialEq for Gitea {
fn eq(&self, other: &Self) -> bool {
self.hostname == other.hostname && self.instance_url == other.instance_url
}
}
#[async_trait]
impl SCForge for Gitea {
async fn is_forge(&self) -> bool {
let mut url = self.instance_url.clone();
url.set_path(GITEA_NODEINFO);
let res: serde_json::Value = self
.client
.get(url)
.send()
.await
.unwrap()
.json()
.await
.unwrap();
if let serde_json::Value::String(software) = &res["software"]["name"] {
software == GITEA_IDENTIFIER
} else {
false
}
}
fn get_hostname(&self) -> &str {
&self.hostname
}
fn forge_type(&self) -> ForgeImplementation {
ForgeImplementation::Gitea
}
async fn crawl(&self, limit: u64, page: u64) -> CrawlResp {
fn empty_is_none(s: &str) -> Option<String> {
let s = s.trim();
if s.is_empty() {
None
} else {
Some(s.to_owned())
}
}
let mut tags = Tags::default();
let mut users = UserMap::default();
let mut repos = Repositories::default();
let instance_url = self.instance_url.clone();
let mut url = instance_url.clone();
url.set_path(REPO_SEARCH_PATH);
url.set_query(Some(&format!("page={page}&limit={limit}")));
let mut res: schema::SearchResults = self
.client
.get(url)
.send()
.await
.unwrap()
.json()
.await
.unwrap();
fn to_user(u: schema::User, g: &Gitea) -> Arc<User<'_>> {
let mut profile_url = g.instance_url.clone();
profile_url.set_path(&u.username);
let username = Arc::new(u.username);
Arc::new(User {
username,
html_link: profile_url.to_string(),
profile_photo: Some(u.avatar_url),
hostname: &g.hostname,
})
}
for repo in res.data.drain(0..) {
let user = if !users.contains_key(&repo.owner.username) {
let u = to_user(repo.owner, self);
let username = u.username.clone();
users.insert(username.clone().clone(), u.clone());
u
} else {
users.get(&repo.owner.username).unwrap().clone()
};
let mut url = instance_url.clone();
url.set_path(&format!(
"/api/v1/repos/{}/{}/topics",
&user.username, repo.name
));
let mut topics: schema::Topics = self
.client
.get(url)
.send()
.await
.unwrap()
.json()
.await
.unwrap();
let mut rtopics = Vec::with_capacity(topics.topics.len());
for t in topics.topics.drain(0..) {
let t = Arc::new(t);
if !tags.contains(&t) {
tags.insert(t.clone());
}
rtopics.push(t);
}
let frepo = Repository {
hostname: &self.hostname,
website: empty_is_none(&repo.website),
name: repo.name,
owner: user,
html_link: repo.html_url,
tags: Some(rtopics),
description: Some(repo.description),
};
repos.push(frepo);
}
CrawlResp { repos, tags, users }
}
}
#[cfg(test)]
mod tests {
use super::*;
use url::Url;
pub const GITEA_HOST: &str = "http://localhost:8080";
pub const NET_REPOSITORIES: u64 = 100;
pub const PER_CRAWL: u64 = 10;
#[actix_rt::test]
async fn gitea_works() {
let ctx = Gitea::new(Url::parse(GITEA_HOST).unwrap(), Client::new());
assert!(ctx.is_forge().await);
let steps = NET_REPOSITORIES / PER_CRAWL;
for i in 0..steps {
let res = ctx.crawl(PER_CRAWL, i).await;
assert_eq!(res.repos.len() as u64, PER_CRAWL);
}
}
}

172
forge/gitea/src/schema.rs Normal file
View file

@ -0,0 +1,172 @@
/*
* ForgeFlux StarChart - A federated software forge spider
* Copyright © 2usize22 Aravinth Manivannan <realaravinth@batsense.net>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
use std::collections::HashMap;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct SearchResults {
pub ok: bool,
pub data: Vec<Repository>,
}
#[derive(Debug, Clone, PartialEq, Hash, Eq, Serialize, Deserialize)]
pub struct User {
pub id: usize,
pub login: String,
pub full_name: String,
pub email: String,
pub avatar_url: String,
pub language: String,
pub is_admin: bool,
pub last_login: String,
pub created: String,
pub restricted: bool,
pub active: bool,
pub prohibit_login: bool,
pub location: String,
pub website: String,
pub description: String,
pub visibility: String,
pub followers_count: usize,
pub following_count: usize,
pub starred_repos_count: usize,
pub username: String,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Repository {
pub name: String,
pub full_name: String,
pub description: String,
pub empty: bool,
pub private: bool,
pub fork: bool,
pub template: bool,
pub parent: Option<Box<Repository>>,
pub mirror: bool,
pub size: usize,
pub html_url: String,
pub ssh_url: String,
pub clone_url: String,
pub original_url: String,
pub owner: User,
pub website: String,
pub stars_count: usize,
pub forks_count: usize,
pub watchers_count: usize,
pub open_issues_count: usize,
pub open_pr_counter: usize,
pub release_counter: usize,
pub default_branch: String,
pub archived: bool,
pub created_at: String,
pub updated_at: String,
pub internal_tracker: InternalIssueTracker,
pub has_issues: bool,
pub has_wiki: bool,
pub has_pull_requests: bool,
pub has_projects: bool,
pub ignore_whitespace_conflicts: bool,
pub allow_merge_commits: bool,
pub allow_rebase: bool,
pub allow_rebase_explicit: bool,
pub allow_squash_merge: bool,
pub default_merge_style: String,
pub avatar_url: String,
pub internal: bool,
pub mirror_interval: String,
pub mirror_updated: String,
pub repo_transfer: Option<Team>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct InternalIssueTracker {
pub enable_time_tracker: bool,
pub allow_only_contributors_to_track_time: bool,
pub enable_issue_dependencies: bool,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct RepoTransfer {
pub doer: User,
pub recipient: User,
pub teams: Option<Team>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Hash, Deserialize)]
pub struct Organization {
pub avatar_url: String,
pub description: String,
pub full_name: String,
pub id: u64,
pub location: String,
pub repo_admin_change_team_access: bool,
pub username: String,
pub visibility: String,
pub website: String,
}
#[derive(Debug, Clone, PartialEq, Serialize, Hash, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum Permission {
None,
Read,
Write,
Admin,
Owner,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Team {
pub can_create_org_repo: bool,
pub description: String,
pub id: u64,
pub includes_all_repositories: bool,
pub name: String,
pub organization: Organization,
pub permission: Permission,
pub units: Vec<String>,
pub units_map: HashMap<String, String>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Topics {
pub topics: Vec<String>,
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
#[test]
/// Tests if Gitea responses panic when deserialized with serde into structs defined in this
/// module/file. Since Go doesn't have abilities to describe nullable values, I(@realaravinth)
/// am forced to do this as I my knowledge about Gitea codebase is very limited.
fn schema_doesnt_panic() {
let files = ["./tests/schema/gitea/git.batsense.net.json"];
for file in files.iter() {
let contents = fs::read_to_string(file).unwrap();
for line in contents.lines() {
let _: SearchResults = serde_json::from_str(line).expect("Gitea schema paniced");
}
}
}
}

File diff suppressed because one or more lines are too long