From b3d9dc38d95d0b24cb8468be5a5d989fccb44a5e Mon Sep 17 00:00:00 2001 From: realaravinth Date: Sat, 4 Jun 2022 20:35:06 +0530 Subject: [PATCH] feat: impl crawl accepts rate-limit configuration for gitea --- Cargo.lock | 1 + forge/gitea/Cargo.toml | 1 + forge/gitea/src/lib.rs | 15 +++++++++++++-- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3723cf8..0449068 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1150,6 +1150,7 @@ dependencies = [ "reqwest", "serde", "serde_json", + "tokio", "url", ] diff --git a/forge/gitea/Cargo.toml b/forge/gitea/Cargo.toml index 1e1adb6..0c4a12a 100644 --- a/forge/gitea/Cargo.toml +++ b/forge/gitea/Cargo.toml @@ -17,6 +17,7 @@ path = "src/lib.rs" [dependencies] async-trait = "0.1.51" url = { version = "2.2.2", features = ["serde"] } +tokio = { version = "1.17", features = ["time"] } [dependencies.forge-core] path = "../forge-core" diff --git a/forge/gitea/src/lib.rs b/forge/gitea/src/lib.rs index 4496ed3..9433dbb 100644 --- a/forge/gitea/src/lib.rs +++ b/forge/gitea/src/lib.rs @@ -16,8 +16,10 @@ * along with this program. If not, see . */ use std::sync::Arc; +use std::time::Duration; use reqwest::Client; +use tokio::task::JoinHandle; use url::Url; use db_core::ForgeImplementation; @@ -86,7 +88,7 @@ impl SCForge for Gitea { ForgeImplementation::Gitea } - async fn crawl(&self, limit: u64, page: u64) -> CrawlResp { + async fn crawl(&self, limit: u64, page: u64, rate_limit: u64) -> CrawlResp { fn empty_is_none(s: &str) -> Option { let s = s.trim(); if s.is_empty() { @@ -127,6 +129,8 @@ impl SCForge for Gitea { }) } + let mut sleep_fut: Option> = None; + for repo in res.data.drain(0..) { let user = if !users.contains_key(&repo.owner.username) { let u = to_user(repo.owner, self); @@ -143,6 +147,10 @@ impl SCForge for Gitea { &user.username, repo.name )); + if let Some(sleep_fut) = sleep_fut { + sleep_fut.await.unwrap(); + } + let mut topics: schema::Topics = self .client .get(url) @@ -152,6 +160,9 @@ impl SCForge for Gitea { .json() .await .unwrap(); + sleep_fut = Some(tokio::spawn(tokio::time::sleep(Duration::new( + rate_limit, 0, + )))); let mut rtopics = Vec::with_capacity(topics.topics.len()); for t in topics.topics.drain(0..) { @@ -194,7 +205,7 @@ mod tests { let steps = NET_REPOSITORIES / PER_CRAWL; for i in 0..steps { - let res = ctx.crawl(PER_CRAWL, i).await; + let res = ctx.crawl(PER_CRAWL, i, 1).await; assert_eq!(res.repos.len() as u64, PER_CRAWL); } }