feat: migrate gitea interfaces to use forge/gitea interfaces
This commit is contained in:
parent
d2f5f25c87
commit
aefc691033
6 changed files with 82 additions and 369 deletions
46
src/forge.rs
Normal file
46
src/forge.rs
Normal file
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* ForgeFlux StarChart - A federated software forge spider
|
||||
* Copyright © 2022 Aravinth Manivannan <realaravinth@batsense.net>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
use async_trait::async_trait;
|
||||
use db_core::prelude::*;
|
||||
|
||||
#[async_trait]
|
||||
pub trait SCForge: std::marker::Send + std::marker::Sync + CloneSPForge {
|
||||
async fn is_forge(&self) -> bool;
|
||||
async fn get_repositories(&self, limit: usize, page: usize) -> Vec<AddRepository>;
|
||||
}
|
||||
|
||||
/// Trait to clone SCForge
|
||||
pub trait CloneSPForge {
|
||||
/// clone DB
|
||||
fn clone_db(&self) -> Box<dyn SCForge>;
|
||||
}
|
||||
|
||||
impl<T> CloneSPForge for T
|
||||
where
|
||||
T: SCForge + Clone + 'static,
|
||||
{
|
||||
fn clone_db(&self) -> Box<dyn SCForge> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for Box<dyn SCForge> {
|
||||
fn clone(&self) -> Self {
|
||||
(**self).clone_db()
|
||||
}
|
||||
}
|
173
src/gitea.rs
173
src/gitea.rs
|
@ -1,173 +0,0 @@
|
|||
/*
|
||||
* ForgeFlux StarChart - A federated software forge spider
|
||||
* Copyright © 2usize22 Aravinth Manivannan <realaravinth@batsense.net>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
use std::collections::HashMap;
|
||||
|
||||
use db_core::AddRepository;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct SearchResults {
|
||||
pub ok: bool,
|
||||
pub data: Vec<Repository>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Hash, Eq, Serialize, Deserialize)]
|
||||
pub struct User {
|
||||
pub id: usize,
|
||||
pub login: String,
|
||||
pub full_name: String,
|
||||
pub email: String,
|
||||
pub avatar_url: String,
|
||||
pub language: String,
|
||||
pub is_admin: bool,
|
||||
pub last_login: String,
|
||||
pub created: String,
|
||||
pub restricted: bool,
|
||||
pub active: bool,
|
||||
pub prohibit_login: bool,
|
||||
pub location: String,
|
||||
pub website: String,
|
||||
pub description: String,
|
||||
pub visibility: String,
|
||||
pub followers_count: usize,
|
||||
pub following_count: usize,
|
||||
pub starred_repos_count: usize,
|
||||
pub username: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct Repository {
|
||||
pub name: String,
|
||||
pub full_name: String,
|
||||
pub description: String,
|
||||
pub empty: bool,
|
||||
pub private: bool,
|
||||
pub fork: bool,
|
||||
pub template: bool,
|
||||
pub parent: Option<Box<Repository>>,
|
||||
pub mirror: bool,
|
||||
pub size: usize,
|
||||
pub html_url: String,
|
||||
pub ssh_url: String,
|
||||
pub clone_url: String,
|
||||
pub original_url: String,
|
||||
pub owner: User,
|
||||
pub website: String,
|
||||
pub stars_count: usize,
|
||||
pub forks_count: usize,
|
||||
pub watchers_count: usize,
|
||||
pub open_issues_count: usize,
|
||||
pub open_pr_counter: usize,
|
||||
pub release_counter: usize,
|
||||
pub default_branch: String,
|
||||
pub archived: bool,
|
||||
pub created_at: String,
|
||||
pub updated_at: String,
|
||||
pub internal_tracker: InternalIssueTracker,
|
||||
pub has_issues: bool,
|
||||
pub has_wiki: bool,
|
||||
pub has_pull_requests: bool,
|
||||
pub has_projects: bool,
|
||||
pub ignore_whitespace_conflicts: bool,
|
||||
pub allow_merge_commits: bool,
|
||||
pub allow_rebase: bool,
|
||||
pub allow_rebase_explicit: bool,
|
||||
pub allow_squash_merge: bool,
|
||||
pub default_merge_style: String,
|
||||
pub avatar_url: String,
|
||||
pub internal: bool,
|
||||
pub mirror_interval: String,
|
||||
pub mirror_updated: String,
|
||||
pub repo_transfer: Option<Team>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct InternalIssueTracker {
|
||||
pub enable_time_tracker: bool,
|
||||
pub allow_only_contributors_to_track_time: bool,
|
||||
pub enable_issue_dependencies: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct RepoTransfer {
|
||||
pub doer: User,
|
||||
pub recipient: User,
|
||||
pub teams: Option<Team>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Hash, Deserialize)]
|
||||
pub struct Organization {
|
||||
pub avatar_url: String,
|
||||
pub description: String,
|
||||
pub full_name: String,
|
||||
pub id: u64,
|
||||
pub location: String,
|
||||
pub repo_admin_change_team_access: bool,
|
||||
pub username: String,
|
||||
pub visibility: String,
|
||||
pub website: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Hash, Deserialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum Permission {
|
||||
None,
|
||||
Read,
|
||||
Write,
|
||||
Admin,
|
||||
Owner,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct Team {
|
||||
pub can_create_org_repo: bool,
|
||||
pub description: String,
|
||||
pub id: u64,
|
||||
pub includes_all_repositories: bool,
|
||||
pub name: String,
|
||||
pub organization: Organization,
|
||||
pub permission: Permission,
|
||||
pub units: Vec<String>,
|
||||
pub units_map: HashMap<String, String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct Topics {
|
||||
pub topics: Vec<String>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
use std::fs;
|
||||
|
||||
#[test]
|
||||
/// Tests if Gitea responses panic when deserialized with serde into structs defined in this
|
||||
/// module/file. Since Go doesn't have abilities to describe nullable values, I(@realaravinth)
|
||||
/// am forced to do this as I my knowledge about Gitea codebase is very limited.
|
||||
fn schema_doesnt_panic() {
|
||||
let files = ["./tests/schema/gitea/git.batsense.net.json"];
|
||||
for file in files.iter() {
|
||||
let contents = fs::read_to_string(file).unwrap();
|
||||
for line in contents.lines() {
|
||||
let _: SearchResults = serde_json::from_str(line).expect("Gitea schema paniced");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -17,7 +17,7 @@
|
|||
*/
|
||||
pub mod data;
|
||||
pub mod db;
|
||||
pub mod gitea;
|
||||
pub mod forge;
|
||||
pub mod settings;
|
||||
pub mod spider;
|
||||
#[cfg(test)]
|
||||
|
|
|
@ -102,42 +102,6 @@ impl DBType {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
struct DatabaseBuilder {
|
||||
pub port: u32,
|
||||
pub hostname: String,
|
||||
pub username: String,
|
||||
pub password: String,
|
||||
pub name: String,
|
||||
pub database_type: DBType,
|
||||
}
|
||||
|
||||
impl DatabaseBuilder {
|
||||
#[cfg(not(tarpaulin_include))]
|
||||
fn extract_database_url(url: &Url) -> Self {
|
||||
log::debug!("Databse name: {}", url.path());
|
||||
let mut path = url.path().split('/');
|
||||
path.next();
|
||||
let name = path.next().expect("no database name").to_string();
|
||||
|
||||
let database_type = DBType::from_url(url).unwrap();
|
||||
let port = if database_type == DBType::Sqlite {
|
||||
0
|
||||
} else {
|
||||
url.port().expect("Enter database port").into()
|
||||
};
|
||||
|
||||
DatabaseBuilder {
|
||||
port,
|
||||
hostname: url.host().expect("Enter database host").to_string(),
|
||||
username: url.username().into(),
|
||||
password: url.password().expect("Enter database password").into(),
|
||||
name,
|
||||
database_type: DBType::from_url(url).unwrap(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct Database {
|
||||
pub url: String,
|
||||
|
@ -247,48 +211,6 @@ impl Settings {
|
|||
}
|
||||
}
|
||||
|
||||
//#[cfg(not(tarpaulin_include))]
|
||||
//fn set_from_database_url(s: &mut Config, database_conf: &DatabaseBuilder) {
|
||||
// s.set("database.username", database_conf.username.clone())
|
||||
// .expect("Couldn't set database username");
|
||||
// s.set("database.password", database_conf.password.clone())
|
||||
// .expect("Couldn't access database password");
|
||||
// s.set("database.hostname", database_conf.hostname.clone())
|
||||
// .expect("Couldn't access database hostname");
|
||||
// s.set("database.port", database_conf.port as i64)
|
||||
// .expect("Couldn't access database port");
|
||||
// s.set("database.name", database_conf.name.clone())
|
||||
// .expect("Couldn't access database name");
|
||||
// s.set(
|
||||
// "database.database_type",
|
||||
// format!("{}", database_conf.database_type),
|
||||
// )
|
||||
// .expect("Couldn't access database type");
|
||||
//}
|
||||
|
||||
//#[cfg(not(tarpaulin_include))]
|
||||
//fn set_database_url(s: &mut Config) {
|
||||
// s.set(
|
||||
// "database.url",
|
||||
// format!(
|
||||
// r"{}://{}:{}@{}:{}/{}",
|
||||
// s.get::<String>("database.database_type")
|
||||
// .expect("Couldn't access database database_type"),
|
||||
// s.get::<String>("database.username")
|
||||
// .expect("Couldn't access database username"),
|
||||
// s.get::<String>("database.password")
|
||||
// .expect("Couldn't access database password"),
|
||||
// s.get::<String>("database.hostname")
|
||||
// .expect("Couldn't access database hostname"),
|
||||
// s.get::<String>("database.port")
|
||||
// .expect("Couldn't access database port"),
|
||||
// s.get::<String>("database.name")
|
||||
// .expect("Couldn't access database name")
|
||||
// ),
|
||||
// )
|
||||
// .expect("Couldn't set databse url");
|
||||
//}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
|
150
src/spider.rs
150
src/spider.rs
|
@ -21,54 +21,32 @@ use tokio::time;
|
|||
use url::Url;
|
||||
|
||||
use db_core::prelude::*;
|
||||
use forge_core::prelude::*;
|
||||
use gitea::Gitea;
|
||||
|
||||
use crate::data::Data;
|
||||
use crate::db::BoxDB;
|
||||
use crate::gitea::SearchResults;
|
||||
use crate::gitea::Topics;
|
||||
|
||||
const REPO_SEARCH_PATH: &str = "/api/v1/repos/search";
|
||||
const GITEA_NODEINFO: &str = "/api/v1/nodeinfo";
|
||||
|
||||
impl Data {
|
||||
pub async fn crawl(&self, hostname: &str, db: &BoxDB) -> Vec<SearchResults> {
|
||||
fn empty_is_none(s: &str) -> Option<&str> {
|
||||
if s.trim().is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(s)
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn crawl(&self, instance_url: &str, db: &BoxDB) {
|
||||
let gitea = Gitea::new(Url::parse(instance_url).unwrap(), self.client.clone());
|
||||
let mut page = 1;
|
||||
let instance_url = Url::parse(hostname).unwrap();
|
||||
let hostname = get_hostname(&instance_url);
|
||||
if !db.forge_exists(&hostname).await.unwrap() {
|
||||
let hostname = gitea.get_hostname();
|
||||
if !db.forge_exists(hostname).await.unwrap() {
|
||||
let msg = CreateForge {
|
||||
hostname: &hostname,
|
||||
forge_type: ForgeImplementation::Gitea,
|
||||
hostname,
|
||||
forge_type: gitea.forge_type(),
|
||||
};
|
||||
db.create_forge_isntance(&msg).await.unwrap();
|
||||
}
|
||||
|
||||
let mut url = instance_url.clone();
|
||||
url.set_path(REPO_SEARCH_PATH);
|
||||
let mut repos = Vec::new();
|
||||
loop {
|
||||
let mut url = url.clone();
|
||||
url.set_query(Some(&format!(
|
||||
"page={page}&limit={}",
|
||||
self.settings.crawler.items_per_api_call
|
||||
)));
|
||||
let res: SearchResults = self
|
||||
.client
|
||||
.get(url)
|
||||
.send()
|
||||
.await
|
||||
.unwrap()
|
||||
.json()
|
||||
.await
|
||||
.unwrap();
|
||||
let res = gitea
|
||||
.crawl(self.settings.crawler.items_per_api_call, page)
|
||||
.await;
|
||||
if res.repos.is_empty() {
|
||||
break;
|
||||
}
|
||||
|
||||
let sleep_fut = time::sleep(Duration::new(
|
||||
self.settings.crawler.wait_before_next_api_call,
|
||||
|
@ -76,85 +54,26 @@ impl Data {
|
|||
));
|
||||
let sleep_fut = tokio::spawn(sleep_fut);
|
||||
|
||||
for repo in res.data.iter() {
|
||||
for (username, u) in res.users.iter() {
|
||||
if !db
|
||||
.user_exists(&repo.owner.username, Some(&hostname))
|
||||
.user_exists(&username, Some(&gitea.get_hostname()))
|
||||
.await
|
||||
.unwrap()
|
||||
{
|
||||
let mut profile_url = instance_url.clone();
|
||||
profile_url.set_path(&repo.owner.username);
|
||||
let msg = AddUser {
|
||||
hostname: &hostname,
|
||||
username: &repo.owner.username,
|
||||
html_link: profile_url.as_str(),
|
||||
profile_photo: Some(&repo.owner.avatar_url),
|
||||
};
|
||||
let msg = u.as_ref().into();
|
||||
db.add_user(&msg).await.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
let mut url = instance_url.clone();
|
||||
url.set_path(&format!(
|
||||
"/api/v1/repos/{}/{}/topics",
|
||||
repo.owner.username, repo.name
|
||||
));
|
||||
|
||||
let topics: Topics = self
|
||||
.client
|
||||
.get(url)
|
||||
.send()
|
||||
.await
|
||||
.unwrap()
|
||||
.json()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let add_repo_msg = AddRepository {
|
||||
tags: Some(topics.topics),
|
||||
name: &repo.name,
|
||||
website: empty_is_none(&repo.website),
|
||||
description: empty_is_none(&repo.description),
|
||||
owner: &repo.owner.username,
|
||||
html_link: &repo.html_url,
|
||||
hostname: &hostname,
|
||||
};
|
||||
|
||||
db.create_repository(&add_repo_msg).await.unwrap();
|
||||
for r in res.repos.iter() {
|
||||
let msg = r.into();
|
||||
db.create_repository(&msg).await.unwrap();
|
||||
}
|
||||
|
||||
sleep_fut.await.unwrap();
|
||||
if res.data.is_empty() {
|
||||
return repos;
|
||||
}
|
||||
repos.push(res);
|
||||
page += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// purpose: interact with instance running on provided hostname and verify if the instance is a
|
||||
/// Gitea instance.
|
||||
///
|
||||
/// will get nodeinfo information, which contains an identifier to uniquely identify Gitea
|
||||
pub async fn is_gitea(&self, hostname: &str) -> bool {
|
||||
const GITEA_IDENTIFIER: &str = "gitea";
|
||||
let mut url = Url::parse(hostname).unwrap();
|
||||
url.set_path(GITEA_NODEINFO);
|
||||
|
||||
let res: serde_json::Value = self
|
||||
.client
|
||||
.get(url)
|
||||
.send()
|
||||
.await
|
||||
.unwrap()
|
||||
.json()
|
||||
.await
|
||||
.unwrap();
|
||||
if let serde_json::Value::String(software) = &res["software"]["name"] {
|
||||
software == GITEA_IDENTIFIER
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -165,25 +84,26 @@ mod tests {
|
|||
use url::Url;
|
||||
|
||||
pub const GITEA_HOST: &str = "http://localhost:8080";
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn is_gitea_works() {
|
||||
let (_db, data) = sqlx_sqlite::get_data().await;
|
||||
assert!(data.is_gitea(GITEA_HOST).await);
|
||||
}
|
||||
pub const GITEA_USERNAME: &str = "bot";
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn crawl_gitea() {
|
||||
let (db, data) = sqlx_sqlite::get_data().await;
|
||||
let res = data.crawl(GITEA_HOST, &db).await;
|
||||
let mut elements = 0;
|
||||
let username = &res.get(0).unwrap().data.get(0).unwrap().owner.username;
|
||||
let hostname = get_hostname(&Url::parse(GITEA_HOST).unwrap());
|
||||
assert!(db.forge_exists(&hostname).await.unwrap());
|
||||
assert!(db.user_exists(username, Some(&hostname)).await.unwrap());
|
||||
res.iter().for_each(|r| elements += r.data.len());
|
||||
|
||||
assert_eq!(res.len(), 5);
|
||||
assert_eq!(elements, 100);
|
||||
assert!(db
|
||||
.user_exists(GITEA_USERNAME, Some(&hostname))
|
||||
.await
|
||||
.unwrap());
|
||||
assert!(db.user_exists(GITEA_USERNAME, None).await.unwrap());
|
||||
for i in 0..100 {
|
||||
let repo = format!("reopsitory_{i}");
|
||||
assert!(db
|
||||
.repository_exists(&repo, GITEA_USERNAME, hostname.as_str())
|
||||
.await
|
||||
.unwrap())
|
||||
}
|
||||
assert!(db.forge_exists(&hostname).await.unwrap());
|
||||
}
|
||||
}
|
||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in a new issue