feat: migrate gitea interfaces to use forge/gitea interfaces

This commit is contained in:
Aravinth Manivannan 2022-05-17 01:04:44 +05:30
parent d2f5f25c87
commit aefc691033
Signed by: realaravinth
GPG key ID: AD9F0F08E855ED88
6 changed files with 82 additions and 369 deletions

46
src/forge.rs Normal file
View file

@ -0,0 +1,46 @@
/*
* ForgeFlux StarChart - A federated software forge spider
* Copyright © 2022 Aravinth Manivannan <realaravinth@batsense.net>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
use async_trait::async_trait;
use db_core::prelude::*;
#[async_trait]
pub trait SCForge: std::marker::Send + std::marker::Sync + CloneSPForge {
async fn is_forge(&self) -> bool;
async fn get_repositories(&self, limit: usize, page: usize) -> Vec<AddRepository>;
}
/// Trait to clone SCForge
pub trait CloneSPForge {
/// clone DB
fn clone_db(&self) -> Box<dyn SCForge>;
}
impl<T> CloneSPForge for T
where
T: SCForge + Clone + 'static,
{
fn clone_db(&self) -> Box<dyn SCForge> {
Box::new(self.clone())
}
}
impl Clone for Box<dyn SCForge> {
fn clone(&self) -> Self {
(**self).clone_db()
}
}

View file

@ -1,173 +0,0 @@
/*
* ForgeFlux StarChart - A federated software forge spider
* Copyright © 2usize22 Aravinth Manivannan <realaravinth@batsense.net>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
use std::collections::HashMap;
use db_core::AddRepository;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct SearchResults {
pub ok: bool,
pub data: Vec<Repository>,
}
#[derive(Debug, Clone, PartialEq, Hash, Eq, Serialize, Deserialize)]
pub struct User {
pub id: usize,
pub login: String,
pub full_name: String,
pub email: String,
pub avatar_url: String,
pub language: String,
pub is_admin: bool,
pub last_login: String,
pub created: String,
pub restricted: bool,
pub active: bool,
pub prohibit_login: bool,
pub location: String,
pub website: String,
pub description: String,
pub visibility: String,
pub followers_count: usize,
pub following_count: usize,
pub starred_repos_count: usize,
pub username: String,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Repository {
pub name: String,
pub full_name: String,
pub description: String,
pub empty: bool,
pub private: bool,
pub fork: bool,
pub template: bool,
pub parent: Option<Box<Repository>>,
pub mirror: bool,
pub size: usize,
pub html_url: String,
pub ssh_url: String,
pub clone_url: String,
pub original_url: String,
pub owner: User,
pub website: String,
pub stars_count: usize,
pub forks_count: usize,
pub watchers_count: usize,
pub open_issues_count: usize,
pub open_pr_counter: usize,
pub release_counter: usize,
pub default_branch: String,
pub archived: bool,
pub created_at: String,
pub updated_at: String,
pub internal_tracker: InternalIssueTracker,
pub has_issues: bool,
pub has_wiki: bool,
pub has_pull_requests: bool,
pub has_projects: bool,
pub ignore_whitespace_conflicts: bool,
pub allow_merge_commits: bool,
pub allow_rebase: bool,
pub allow_rebase_explicit: bool,
pub allow_squash_merge: bool,
pub default_merge_style: String,
pub avatar_url: String,
pub internal: bool,
pub mirror_interval: String,
pub mirror_updated: String,
pub repo_transfer: Option<Team>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct InternalIssueTracker {
pub enable_time_tracker: bool,
pub allow_only_contributors_to_track_time: bool,
pub enable_issue_dependencies: bool,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct RepoTransfer {
pub doer: User,
pub recipient: User,
pub teams: Option<Team>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Hash, Deserialize)]
pub struct Organization {
pub avatar_url: String,
pub description: String,
pub full_name: String,
pub id: u64,
pub location: String,
pub repo_admin_change_team_access: bool,
pub username: String,
pub visibility: String,
pub website: String,
}
#[derive(Debug, Clone, PartialEq, Serialize, Hash, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum Permission {
None,
Read,
Write,
Admin,
Owner,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Team {
pub can_create_org_repo: bool,
pub description: String,
pub id: u64,
pub includes_all_repositories: bool,
pub name: String,
pub organization: Organization,
pub permission: Permission,
pub units: Vec<String>,
pub units_map: HashMap<String, String>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Topics {
pub topics: Vec<String>,
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
#[test]
/// Tests if Gitea responses panic when deserialized with serde into structs defined in this
/// module/file. Since Go doesn't have abilities to describe nullable values, I(@realaravinth)
/// am forced to do this as I my knowledge about Gitea codebase is very limited.
fn schema_doesnt_panic() {
let files = ["./tests/schema/gitea/git.batsense.net.json"];
for file in files.iter() {
let contents = fs::read_to_string(file).unwrap();
for line in contents.lines() {
let _: SearchResults = serde_json::from_str(line).expect("Gitea schema paniced");
}
}
}
}

View file

@ -17,7 +17,7 @@
*/ */
pub mod data; pub mod data;
pub mod db; pub mod db;
pub mod gitea; pub mod forge;
pub mod settings; pub mod settings;
pub mod spider; pub mod spider;
#[cfg(test)] #[cfg(test)]

View file

@ -102,42 +102,6 @@ impl DBType {
} }
} }
#[derive(Debug, Clone, Deserialize)]
struct DatabaseBuilder {
pub port: u32,
pub hostname: String,
pub username: String,
pub password: String,
pub name: String,
pub database_type: DBType,
}
impl DatabaseBuilder {
#[cfg(not(tarpaulin_include))]
fn extract_database_url(url: &Url) -> Self {
log::debug!("Databse name: {}", url.path());
let mut path = url.path().split('/');
path.next();
let name = path.next().expect("no database name").to_string();
let database_type = DBType::from_url(url).unwrap();
let port = if database_type == DBType::Sqlite {
0
} else {
url.port().expect("Enter database port").into()
};
DatabaseBuilder {
port,
hostname: url.host().expect("Enter database host").to_string(),
username: url.username().into(),
password: url.password().expect("Enter database password").into(),
name,
database_type: DBType::from_url(url).unwrap(),
}
}
}
#[derive(Debug, Clone, Deserialize)] #[derive(Debug, Clone, Deserialize)]
pub struct Database { pub struct Database {
pub url: String, pub url: String,
@ -247,48 +211,6 @@ impl Settings {
} }
} }
//#[cfg(not(tarpaulin_include))]
//fn set_from_database_url(s: &mut Config, database_conf: &DatabaseBuilder) {
// s.set("database.username", database_conf.username.clone())
// .expect("Couldn't set database username");
// s.set("database.password", database_conf.password.clone())
// .expect("Couldn't access database password");
// s.set("database.hostname", database_conf.hostname.clone())
// .expect("Couldn't access database hostname");
// s.set("database.port", database_conf.port as i64)
// .expect("Couldn't access database port");
// s.set("database.name", database_conf.name.clone())
// .expect("Couldn't access database name");
// s.set(
// "database.database_type",
// format!("{}", database_conf.database_type),
// )
// .expect("Couldn't access database type");
//}
//#[cfg(not(tarpaulin_include))]
//fn set_database_url(s: &mut Config) {
// s.set(
// "database.url",
// format!(
// r"{}://{}:{}@{}:{}/{}",
// s.get::<String>("database.database_type")
// .expect("Couldn't access database database_type"),
// s.get::<String>("database.username")
// .expect("Couldn't access database username"),
// s.get::<String>("database.password")
// .expect("Couldn't access database password"),
// s.get::<String>("database.hostname")
// .expect("Couldn't access database hostname"),
// s.get::<String>("database.port")
// .expect("Couldn't access database port"),
// s.get::<String>("database.name")
// .expect("Couldn't access database name")
// ),
// )
// .expect("Couldn't set databse url");
//}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;

View file

@ -21,54 +21,32 @@ use tokio::time;
use url::Url; use url::Url;
use db_core::prelude::*; use db_core::prelude::*;
use forge_core::prelude::*;
use gitea::Gitea;
use crate::data::Data; use crate::data::Data;
use crate::db::BoxDB; use crate::db::BoxDB;
use crate::gitea::SearchResults;
use crate::gitea::Topics;
const REPO_SEARCH_PATH: &str = "/api/v1/repos/search";
const GITEA_NODEINFO: &str = "/api/v1/nodeinfo";
impl Data { impl Data {
pub async fn crawl(&self, hostname: &str, db: &BoxDB) -> Vec<SearchResults> { pub async fn crawl(&self, instance_url: &str, db: &BoxDB) {
fn empty_is_none(s: &str) -> Option<&str> { let gitea = Gitea::new(Url::parse(instance_url).unwrap(), self.client.clone());
if s.trim().is_empty() {
None
} else {
Some(s)
}
}
let mut page = 1; let mut page = 1;
let instance_url = Url::parse(hostname).unwrap(); let hostname = gitea.get_hostname();
let hostname = get_hostname(&instance_url); if !db.forge_exists(hostname).await.unwrap() {
if !db.forge_exists(&hostname).await.unwrap() {
let msg = CreateForge { let msg = CreateForge {
hostname: &hostname, hostname,
forge_type: ForgeImplementation::Gitea, forge_type: gitea.forge_type(),
}; };
db.create_forge_isntance(&msg).await.unwrap(); db.create_forge_isntance(&msg).await.unwrap();
} }
let mut url = instance_url.clone();
url.set_path(REPO_SEARCH_PATH);
let mut repos = Vec::new();
loop { loop {
let mut url = url.clone(); let res = gitea
url.set_query(Some(&format!( .crawl(self.settings.crawler.items_per_api_call, page)
"page={page}&limit={}", .await;
self.settings.crawler.items_per_api_call if res.repos.is_empty() {
))); break;
let res: SearchResults = self }
.client
.get(url)
.send()
.await
.unwrap()
.json()
.await
.unwrap();
let sleep_fut = time::sleep(Duration::new( let sleep_fut = time::sleep(Duration::new(
self.settings.crawler.wait_before_next_api_call, self.settings.crawler.wait_before_next_api_call,
@ -76,85 +54,26 @@ impl Data {
)); ));
let sleep_fut = tokio::spawn(sleep_fut); let sleep_fut = tokio::spawn(sleep_fut);
for repo in res.data.iter() { for (username, u) in res.users.iter() {
if !db if !db
.user_exists(&repo.owner.username, Some(&hostname)) .user_exists(&username, Some(&gitea.get_hostname()))
.await .await
.unwrap() .unwrap()
{ {
let mut profile_url = instance_url.clone(); let msg = u.as_ref().into();
profile_url.set_path(&repo.owner.username);
let msg = AddUser {
hostname: &hostname,
username: &repo.owner.username,
html_link: profile_url.as_str(),
profile_photo: Some(&repo.owner.avatar_url),
};
db.add_user(&msg).await.unwrap(); db.add_user(&msg).await.unwrap();
} }
}
let mut url = instance_url.clone(); for r in res.repos.iter() {
url.set_path(&format!( let msg = r.into();
"/api/v1/repos/{}/{}/topics", db.create_repository(&msg).await.unwrap();
repo.owner.username, repo.name
));
let topics: Topics = self
.client
.get(url)
.send()
.await
.unwrap()
.json()
.await
.unwrap();
let add_repo_msg = AddRepository {
tags: Some(topics.topics),
name: &repo.name,
website: empty_is_none(&repo.website),
description: empty_is_none(&repo.description),
owner: &repo.owner.username,
html_link: &repo.html_url,
hostname: &hostname,
};
db.create_repository(&add_repo_msg).await.unwrap();
} }
sleep_fut.await.unwrap(); sleep_fut.await.unwrap();
if res.data.is_empty() {
return repos;
}
repos.push(res);
page += 1; page += 1;
} }
} }
/// purpose: interact with instance running on provided hostname and verify if the instance is a
/// Gitea instance.
///
/// will get nodeinfo information, which contains an identifier to uniquely identify Gitea
pub async fn is_gitea(&self, hostname: &str) -> bool {
const GITEA_IDENTIFIER: &str = "gitea";
let mut url = Url::parse(hostname).unwrap();
url.set_path(GITEA_NODEINFO);
let res: serde_json::Value = self
.client
.get(url)
.send()
.await
.unwrap()
.json()
.await
.unwrap();
if let serde_json::Value::String(software) = &res["software"]["name"] {
software == GITEA_IDENTIFIER
} else {
false
}
}
} }
#[cfg(test)] #[cfg(test)]
@ -165,25 +84,26 @@ mod tests {
use url::Url; use url::Url;
pub const GITEA_HOST: &str = "http://localhost:8080"; pub const GITEA_HOST: &str = "http://localhost:8080";
pub const GITEA_USERNAME: &str = "bot";
#[actix_rt::test]
async fn is_gitea_works() {
let (_db, data) = sqlx_sqlite::get_data().await;
assert!(data.is_gitea(GITEA_HOST).await);
}
#[actix_rt::test] #[actix_rt::test]
async fn crawl_gitea() { async fn crawl_gitea() {
let (db, data) = sqlx_sqlite::get_data().await; let (db, data) = sqlx_sqlite::get_data().await;
let res = data.crawl(GITEA_HOST, &db).await; let res = data.crawl(GITEA_HOST, &db).await;
let mut elements = 0;
let username = &res.get(0).unwrap().data.get(0).unwrap().owner.username;
let hostname = get_hostname(&Url::parse(GITEA_HOST).unwrap()); let hostname = get_hostname(&Url::parse(GITEA_HOST).unwrap());
assert!(db.forge_exists(&hostname).await.unwrap()); assert!(db.forge_exists(&hostname).await.unwrap());
assert!(db.user_exists(username, Some(&hostname)).await.unwrap()); assert!(db
res.iter().for_each(|r| elements += r.data.len()); .user_exists(GITEA_USERNAME, Some(&hostname))
.await
assert_eq!(res.len(), 5); .unwrap());
assert_eq!(elements, 100); assert!(db.user_exists(GITEA_USERNAME, None).await.unwrap());
for i in 0..100 {
let repo = format!("reopsitory_{i}");
assert!(db
.repository_exists(&repo, GITEA_USERNAME, hostname.as_str())
.await
.unwrap())
}
assert!(db.forge_exists(&hostname).await.unwrap());
} }
} }

File diff suppressed because one or more lines are too long