feat: impl crawl accepts rate-limit configuration for gitea

This commit is contained in:
Aravinth Manivannan 2022-06-04 20:35:06 +05:30
parent 7d60189b25
commit b3d9dc38d9
Signed by: realaravinth
GPG key ID: AD9F0F08E855ED88
3 changed files with 15 additions and 2 deletions

1
Cargo.lock generated
View file

@ -1150,6 +1150,7 @@ dependencies = [
"reqwest", "reqwest",
"serde", "serde",
"serde_json", "serde_json",
"tokio",
"url", "url",
] ]

View file

@ -17,6 +17,7 @@ path = "src/lib.rs"
[dependencies] [dependencies]
async-trait = "0.1.51" async-trait = "0.1.51"
url = { version = "2.2.2", features = ["serde"] } url = { version = "2.2.2", features = ["serde"] }
tokio = { version = "1.17", features = ["time"] }
[dependencies.forge-core] [dependencies.forge-core]
path = "../forge-core" path = "../forge-core"

View file

@ -16,8 +16,10 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration;
use reqwest::Client; use reqwest::Client;
use tokio::task::JoinHandle;
use url::Url; use url::Url;
use db_core::ForgeImplementation; use db_core::ForgeImplementation;
@ -86,7 +88,7 @@ impl SCForge for Gitea {
ForgeImplementation::Gitea ForgeImplementation::Gitea
} }
async fn crawl(&self, limit: u64, page: u64) -> CrawlResp { async fn crawl(&self, limit: u64, page: u64, rate_limit: u64) -> CrawlResp {
fn empty_is_none(s: &str) -> Option<String> { fn empty_is_none(s: &str) -> Option<String> {
let s = s.trim(); let s = s.trim();
if s.is_empty() { if s.is_empty() {
@ -127,6 +129,8 @@ impl SCForge for Gitea {
}) })
} }
let mut sleep_fut: Option<JoinHandle<()>> = None;
for repo in res.data.drain(0..) { for repo in res.data.drain(0..) {
let user = if !users.contains_key(&repo.owner.username) { let user = if !users.contains_key(&repo.owner.username) {
let u = to_user(repo.owner, self); let u = to_user(repo.owner, self);
@ -143,6 +147,10 @@ impl SCForge for Gitea {
&user.username, repo.name &user.username, repo.name
)); ));
if let Some(sleep_fut) = sleep_fut {
sleep_fut.await.unwrap();
}
let mut topics: schema::Topics = self let mut topics: schema::Topics = self
.client .client
.get(url) .get(url)
@ -152,6 +160,9 @@ impl SCForge for Gitea {
.json() .json()
.await .await
.unwrap(); .unwrap();
sleep_fut = Some(tokio::spawn(tokio::time::sleep(Duration::new(
rate_limit, 0,
))));
let mut rtopics = Vec::with_capacity(topics.topics.len()); let mut rtopics = Vec::with_capacity(topics.topics.len());
for t in topics.topics.drain(0..) { for t in topics.topics.drain(0..) {
@ -194,7 +205,7 @@ mod tests {
let steps = NET_REPOSITORIES / PER_CRAWL; let steps = NET_REPOSITORIES / PER_CRAWL;
for i in 0..steps { for i in 0..steps {
let res = ctx.crawl(PER_CRAWL, i).await; let res = ctx.crawl(PER_CRAWL, i, 1).await;
assert_eq!(res.repos.len() as u64, PER_CRAWL); assert_eq!(res.repos.len() as u64, PER_CRAWL);
} }
} }