Merge pull request 'feat: crawl forgeflux adapters' (#3) from crawl-forgeflux into master
Reviewed-on: #3
This commit is contained in:
commit
e486e6d6a8
10 changed files with 390 additions and 9 deletions
|
@ -3,3 +3,4 @@ export SQLITE_TMP="$(pwd)/db/db-sqlx-sqlite/tmp"
|
|||
export SQLITE_DATABASE_URL="sqlite://$SQLITE_TMP/admin.db"
|
||||
export STARCHART__CRAWLER__WAIT_BEFORE_NEXT_API_CALL=0
|
||||
export FORGEJO_HOST=http://localhost:3000
|
||||
export FORGEFLUX_HOST=http://localhost:7000
|
||||
|
|
|
@ -7,9 +7,11 @@ steps:
|
|||
POSTGRES_DATABASE_URL: postgres://postgres:password@database:5432/postgres
|
||||
commands:
|
||||
- pip install requests
|
||||
- sed -i 's/localhost/forgejo/' scripts/gitea.py
|
||||
- sed -i 's/localhost\:3000/forgejo/' scripts/gitea.py
|
||||
- python ./scripts/gitea.py
|
||||
|
||||
# - curl -vv http://forgejo/api/v1/repos/bot/repository_58
|
||||
# - curl -vv http://forge_forgeflux/forgejo/bot/repository_58
|
||||
# - curl -vv http://forge_forgeflux/forgejo/bot/repository_01
|
||||
|
||||
test:
|
||||
image: rust
|
||||
|
@ -18,7 +20,8 @@ steps:
|
|||
environment:
|
||||
POSTGRES_DATABASE_URL: postgres://postgres:password@database:5432/postgres
|
||||
SQLITE_DATABASE_URL: sqlite:///tmp/admin.db
|
||||
FORGEJO_HOST: http://forgejo:3000
|
||||
FORGEJO_HOST: http://forgejo
|
||||
FORGEFLUX_HOST: http://forge_forgeflux
|
||||
commands:
|
||||
# - curl -fsSL https://deb.nodesource.com/setup_16.x | bash - &&\
|
||||
# - apt update && apt-get -y --no-install-recommends install nodejs tar gpg curl wget
|
||||
|
@ -32,7 +35,6 @@ steps:
|
|||
# - make lint
|
||||
- make test.workspaces
|
||||
|
||||
|
||||
build_docker_img:
|
||||
image: plugins/docker
|
||||
when:
|
||||
|
@ -42,6 +44,20 @@ steps:
|
|||
repo: forgeflux/starchart
|
||||
tags: latest
|
||||
|
||||
build_and_publish_docker_img:
|
||||
image: plugins/docker
|
||||
when:
|
||||
event: [push, tag, deployment]
|
||||
branch: master
|
||||
settings:
|
||||
username: forgeflux
|
||||
password:
|
||||
from_secret: DOCKER_TOKEN
|
||||
repo: forgeflux/starchart
|
||||
tags:
|
||||
latest
|
||||
|
||||
|
||||
services:
|
||||
forgejo:
|
||||
image: codeberg.org/forgejo/forgejo:9
|
||||
|
@ -49,9 +65,47 @@ services:
|
|||
FORGEJO__security__INSTALL_LOCK: true
|
||||
FORGEJO__federation__ENABLED: true
|
||||
FORGEJO__server__ROOT_URL: http://forgejo
|
||||
FORGEJO__server__HTTP_PORT: 3000
|
||||
FORGEJO__server__HTTP_PORT: 80
|
||||
|
||||
database:
|
||||
image: postgres
|
||||
environment:
|
||||
POSTGRES_PASSWORD: password
|
||||
|
||||
forge_forgeflux_database:
|
||||
image: postgres
|
||||
environment:
|
||||
POSTGRES_PASSWORD: password
|
||||
|
||||
forge_forgeflux:
|
||||
image: forgeflux/forgeflux:latest
|
||||
depends_on:
|
||||
- forgeflux_postgres
|
||||
environment:
|
||||
FORGEFLUX_server_PUBLIC_URL_HAS_HTTPS: false
|
||||
FORGEFLUX_debug: true
|
||||
FORGEFLUX_source_code: https://git.batsense.net/ForgeFlux/ForgeFlux
|
||||
FORGEFLUX_allow_registration: true
|
||||
FORGEFLUX_database_POOL: 2
|
||||
FORGEFLUX_forges_FORGEJO_url: http://forgejo
|
||||
FORGEFLUX_forges_FORGEJO_client_id: foo
|
||||
FORGEFLUX_forges_FORGEJO_client_secret: foo
|
||||
FORGEFLUX_forges_FORGEJO_user_USERNAME: foo
|
||||
FORGEFLUX_forges_FORGEJO_user_API_TOKEN: foo
|
||||
DATABASE_URL: postgres://postgres:password@forgeflux_postgres:5432/postgres
|
||||
PORT: 80
|
||||
FORGEFLUX_server_DOMAIN: forge_forgeflux
|
||||
FORGEFLUX_server_COOKIE_SECRET: 995cde0721b6e41602dd111438cc7c1b2506dc14bad31d2653fb9a4adce1f84e
|
||||
FORGEFLUX_server_IP: 0.0.0.0
|
||||
FORGEFLUX_forges_GITHUB_url: https://github.com
|
||||
FORGEFLUX_forges_GITHUB_api_url: https://api.github.com
|
||||
FORGEFLUX_forges_GITHUB_client_id: foo
|
||||
FORGEFLUX_forges_GITHUB_client_secret: foo
|
||||
FORGEFLUX_forges_GITHUB_user_USERNAME: foo
|
||||
FORGEFLUX_forges_GITHUB_user_PERSONAL_ACCESS_TOKEN: foo
|
||||
FORGEFLUX_starchart_enable: true
|
||||
|
||||
forgeflux_postgres:
|
||||
image: postgres:17.2
|
||||
environment:
|
||||
POSTGRES_PASSWORD: password # change password
|
||||
|
|
17
Cargo.lock
generated
17
Cargo.lock
generated
|
@ -1,6 +1,6 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "actix"
|
||||
|
@ -1147,6 +1147,20 @@ dependencies = [
|
|||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "forgeflux"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"actix-rt",
|
||||
"async-trait",
|
||||
"forge-core",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tokio",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "form_urlencoded"
|
||||
version = "1.0.1"
|
||||
|
@ -2922,6 +2936,7 @@ dependencies = [
|
|||
"derive_more",
|
||||
"federate-core",
|
||||
"forge-core",
|
||||
"forgeflux",
|
||||
"gitea",
|
||||
"lazy_static",
|
||||
"log",
|
||||
|
|
|
@ -16,6 +16,7 @@ members = [
|
|||
"db/db-core",
|
||||
"db/db-sqlx-sqlite",
|
||||
"forge/forge-core",
|
||||
"forge/forgeflux",
|
||||
"forge/gitea",
|
||||
"federate/federate-core",
|
||||
"federate/publiccodeyml"
|
||||
|
@ -76,6 +77,9 @@ path = "./db/db-sqlx-sqlite"
|
|||
[dependencies.gitea]
|
||||
path = "./forge/gitea"
|
||||
|
||||
[dependencies.forgeflux]
|
||||
path = "./forge/forgeflux"
|
||||
|
||||
[dependencies.forge-core]
|
||||
path = "./forge/forge-core"
|
||||
|
||||
|
|
|
@ -309,6 +309,8 @@ impl Clone for Box<dyn SCDatabase> {
|
|||
pub enum ForgeImplementation {
|
||||
/// [Gitea](https://gitea.io) softare forge
|
||||
Gitea,
|
||||
/// [ForgeFlux](https://net.forgeflux.net)
|
||||
ForgeFlux,
|
||||
}
|
||||
|
||||
impl ForgeImplementation {
|
||||
|
@ -316,6 +318,7 @@ impl ForgeImplementation {
|
|||
pub const fn to_str(&self) -> &'static str {
|
||||
match self {
|
||||
ForgeImplementation::Gitea => "gitea",
|
||||
ForgeImplementation::ForgeFlux => "forgeflux",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -326,9 +329,11 @@ impl FromStr for ForgeImplementation {
|
|||
/// Convert [str] to [ForgeImplementation]
|
||||
fn from_str(s: &str) -> DBResult<Self> {
|
||||
const GITEA: &str = ForgeImplementation::Gitea.to_str();
|
||||
const FORGEFLUX: &str = ForgeImplementation::ForgeFlux.to_str();
|
||||
let s = s.trim();
|
||||
match s {
|
||||
GITEA => Ok(Self::Gitea),
|
||||
FORGEFLUX => Ok(Self::ForgeFlux),
|
||||
_ => Err(DBError::UnknownForgeType(s.to_owned())),
|
||||
}
|
||||
}
|
||||
|
|
37
forge/forgeflux/Cargo.toml
Normal file
37
forge/forgeflux/Cargo.toml
Normal file
|
@ -0,0 +1,37 @@
|
|||
[package]
|
||||
name = "forgeflux"
|
||||
version = "0.1.0"
|
||||
authors = ["realaravinth <realaravinth@batsense.net>"]
|
||||
description = "ForgeFlux StarChart - Federated forge spider"
|
||||
documentation = "https://forgeflux.org/"
|
||||
edition = "2021"
|
||||
license = "AGPLv3 or later version"
|
||||
|
||||
|
||||
[lib]
|
||||
name = "forgeflux"
|
||||
path = "src/lib.rs"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
async-trait = "0.1.51"
|
||||
url = { version = "2.2.2", features = ["serde"] }
|
||||
tokio = { version = "1.17", features = ["time"] }
|
||||
|
||||
[dependencies.forge-core]
|
||||
path = "../forge-core"
|
||||
|
||||
[dependencies.reqwest]
|
||||
features = ["rustls-tls-native-roots", "gzip", "deflate", "brotli", "json"]
|
||||
version = "0.11.10"
|
||||
|
||||
[dependencies.serde]
|
||||
features = ["derive"]
|
||||
version = "1"
|
||||
|
||||
[dependencies.serde_json]
|
||||
version = "1"
|
||||
|
||||
[dev-dependencies]
|
||||
actix-rt = "2.7"
|
187
forge/forgeflux/src/lib.rs
Normal file
187
forge/forgeflux/src/lib.rs
Normal file
|
@ -0,0 +1,187 @@
|
|||
/*
|
||||
* ForgeFlux StarChart - A federated software forge spider
|
||||
* Copyright © 2022 Aravinth Manivannan <realaravinth@batsense.net>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use reqwest::Client;
|
||||
use tokio::task::JoinHandle;
|
||||
use url::Url;
|
||||
|
||||
use db_core::ForgeImplementation;
|
||||
use forge_core::dev::*;
|
||||
use forge_core::Repository;
|
||||
|
||||
pub mod schema;
|
||||
|
||||
const REPO_SEARCH_PATH: &str = "/search/repositories";
|
||||
const FORGEFLUX_NODEINFO: &str = "/nodeinfo/2.0";
|
||||
const FORGEFLUX_IDENTIFIER: &str = "forgeflux";
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ForgeFlux {
|
||||
pub instance_url: Url,
|
||||
pub client: Client,
|
||||
url: Url,
|
||||
}
|
||||
|
||||
impl ForgeFlux {
|
||||
pub fn new(instance_url: Url, client: Client) -> Self {
|
||||
let url = Url::parse(&db_core::clean_url(&instance_url)).unwrap();
|
||||
|
||||
Self {
|
||||
instance_url,
|
||||
client,
|
||||
url,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for ForgeFlux {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.url == other.url && self.instance_url == other.instance_url
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl SCForge for ForgeFlux {
|
||||
async fn is_forge(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn get_url(&self) -> &Url {
|
||||
&self.url
|
||||
}
|
||||
|
||||
fn forge_type(&self) -> ForgeImplementation {
|
||||
ForgeImplementation::ForgeFlux
|
||||
}
|
||||
|
||||
async fn crawl(&self, limit: u64, page: u64, rate_limit: u64) -> CrawlResp {
|
||||
fn empty_is_none(s: &str) -> Option<String> {
|
||||
let s = s.trim();
|
||||
if s.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(s.to_owned())
|
||||
}
|
||||
}
|
||||
|
||||
let mut tags = Tags::default();
|
||||
let mut users = UserMap::default();
|
||||
let mut internal_users = UserMap::default();
|
||||
let mut repos = Repositories::default();
|
||||
|
||||
let instance_url = self.instance_url.clone();
|
||||
|
||||
let mut url = instance_url.clone();
|
||||
url.set_path(REPO_SEARCH_PATH);
|
||||
url.set_query(Some(&format!("page={page}&limit={limit}")));
|
||||
let mut res: Vec<schema::Repository> = self
|
||||
.client
|
||||
.get(url)
|
||||
.send()
|
||||
.await
|
||||
.unwrap()
|
||||
.json()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let mut sleep_fut: Option<JoinHandle<()>> = None;
|
||||
for repo in res.drain(0..) {
|
||||
let user = if internal_users.contains_key(&repo.attributed_to.to_string()) {
|
||||
if let Some(sleep_fut) = sleep_fut {
|
||||
sleep_fut.await.unwrap();
|
||||
}
|
||||
|
||||
let user: schema::User = self
|
||||
.client
|
||||
.get(repo.attributed_to.clone())
|
||||
.send()
|
||||
.await
|
||||
.unwrap()
|
||||
.json()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
sleep_fut = Some(tokio::spawn(tokio::time::sleep(Duration::new(
|
||||
rate_limit, 0,
|
||||
))));
|
||||
|
||||
let profile_photo = if let Some(profile_photo) = user.icon {
|
||||
Some(profile_photo.url.to_string())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let f_user = Arc::new(User {
|
||||
url: user.id.clone(),
|
||||
username: Arc::new(user.preferred_username),
|
||||
html_link: user.id.to_string(),
|
||||
profile_photo,
|
||||
});
|
||||
|
||||
users.insert(f_user.username.clone(), f_user.clone());
|
||||
users.insert(Arc::new(f_user.url.to_string()), f_user.clone());
|
||||
f_user
|
||||
} else {
|
||||
internal_users
|
||||
.get(&repo.attributed_to.to_string())
|
||||
.unwrap()
|
||||
.clone()
|
||||
};
|
||||
|
||||
let frepo = Repository {
|
||||
url: self.url.clone(),
|
||||
website: None,
|
||||
name: repo.name,
|
||||
owner: user,
|
||||
html_link: repo.id.to_string(),
|
||||
tags: None,
|
||||
description: Some(repo.summary),
|
||||
};
|
||||
|
||||
repos.push(frepo);
|
||||
}
|
||||
CrawlResp { repos, tags, users }
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use url::Url;
|
||||
|
||||
pub const NET_REPOSITORIES: u64 = 0;
|
||||
pub const PER_CRAWL: u64 = 10;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn forgeflux_works() {
|
||||
let ctx = ForgeFlux::new(
|
||||
Url::parse(&std::env::var("FORGEFLUX_HOST").unwrap()).unwrap(),
|
||||
Client::new(),
|
||||
);
|
||||
|
||||
assert!(ctx.is_forge().await);
|
||||
let steps = NET_REPOSITORIES / PER_CRAWL;
|
||||
|
||||
for i in 0..steps {
|
||||
let res = ctx.crawl(PER_CRAWL, i, 0).await;
|
||||
assert_eq!(res.repos.len() as u64, PER_CRAWL);
|
||||
}
|
||||
}
|
||||
}
|
69
forge/forgeflux/src/schema.rs
Normal file
69
forge/forgeflux/src/schema.rs
Normal file
|
@ -0,0 +1,69 @@
|
|||
/*
|
||||
* ForgeFlux StarChart - A federated software forge spider
|
||||
* Copyright © 2usize22 Aravinth Manivannan <realaravinth@batsense.net>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
use serde::{Deserialize, Serialize};
|
||||
use url::Url;
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, Serialize, Eq, PartialEq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ProfilePhoto {
|
||||
pub url: Url,
|
||||
pub media_type: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct User {
|
||||
pub name: String,
|
||||
pub preferred_username: String,
|
||||
pub id: Url,
|
||||
pub url: Option<Url>,
|
||||
pub icon: Option<ProfilePhoto>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Repository {
|
||||
pub id: Url,
|
||||
pub clone_uri: Url,
|
||||
pub inbox: Url,
|
||||
pub name: String,
|
||||
pub attributed_to: Url,
|
||||
pub summary: String,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
use std::fs;
|
||||
|
||||
#[test]
|
||||
/// Tests if Gitea responses panic when deserialized with serde into structs defined in this
|
||||
/// module/file. Since Go doesn't have abilities to describe nullable values, I(@realaravinth)
|
||||
/// am forced to do this as I my knowledge about Gitea codebase is very limited.
|
||||
fn schema_doesnt_panic() {
|
||||
let files = ["./tests/schema/forgeflux/net.forgeflux.org.json"];
|
||||
for file in files.iter() {
|
||||
let contents = fs::read_to_string(file).unwrap();
|
||||
for line in contents.lines() {
|
||||
let _: Vec<Repository> =
|
||||
serde_json::from_str(line).expect("Forgeflux schema paniced");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
File diff suppressed because one or more lines are too long
|
@ -25,6 +25,7 @@ use url::Url;
|
|||
|
||||
use db_core::prelude::*;
|
||||
use forge_core::prelude::*;
|
||||
use forgeflux::ForgeFlux;
|
||||
use gitea::Gitea;
|
||||
|
||||
use crate::ctx::Ctx;
|
||||
|
@ -37,9 +38,16 @@ impl Ctx {
|
|||
info!("[crawl][{instance_url}] Init crawling");
|
||||
let forge: Box<dyn SCForge> =
|
||||
Box::new(Gitea::new(instance_url.clone(), self.client.clone()));
|
||||
if !forge.is_forge().await {
|
||||
unimplemented!("Forge type unimplemented");
|
||||
}
|
||||
let forge = if forge.is_forge().await {
|
||||
forge
|
||||
} else {
|
||||
let forgeflux = Box::new(ForgeFlux::new(instance_url.clone(), self.client.clone()));
|
||||
if forgeflux.is_forge().await {
|
||||
forgeflux
|
||||
} else {
|
||||
unimplemented!("Forge type unimplemented");
|
||||
}
|
||||
};
|
||||
|
||||
let mut page = 1;
|
||||
let url = forge.get_url();
|
||||
|
|
Loading…
Reference in a new issue