diff --git a/Cargo.lock b/Cargo.lock
index 73d5bf2..33fd19a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -379,6 +379,7 @@ dependencies = [
"serde 1.0.136",
"serde_json",
"thiserror",
+ "url",
]
[[package]]
diff --git a/src/spider.rs b/src/spider.rs
index 7ef72e6..c1ee7f2 100644
--- a/src/spider.rs
+++ b/src/spider.rs
@@ -16,6 +16,7 @@
* along with this program. If not, see .
*/
use std::time::Duration;
+
use tokio::time;
use url::Url;
@@ -31,8 +32,8 @@ const GITEA_NODEINFO: &str = "/api/v1/nodeinfo";
impl Data {
pub async fn crawl(&self, hostname: &str, db: &BoxDB) -> Vec {
let mut page = 1;
- let mut url = Url::parse(hostname).unwrap();
- let hostname = url.host().as_ref().unwrap().to_string();
+ let mut instance_url = Url::parse(hostname).unwrap();
+ let hostname = get_hostname(&instance_url);
if !db.forge_exists(&hostname).await.unwrap() {
let msg = CreateForge {
hostname: &hostname,
@@ -41,6 +42,7 @@ impl Data {
db.create_forge_isntance(&msg).await.unwrap();
}
+ let mut url = instance_url.clone();
url.set_path(REPO_SEARCH_PATH);
let mut repos = Vec::new();
loop {
@@ -59,11 +61,31 @@ impl Data {
.await
.unwrap();
- time::sleep(Duration::new(
+ let sleep_fut = time::sleep(Duration::new(
self.settings.crawler.wait_before_next_api_call,
0,
- ))
- .await;
+ ));
+ let sleep_fut = tokio::spawn(sleep_fut);
+
+ for repo in res.data.iter() {
+ if !db
+ .user_exists(&repo.owner.username, Some(&hostname))
+ .await
+ .unwrap()
+ {
+ let mut profile_url = instance_url.clone();
+ profile_url.set_path(&repo.owner.username);
+ let msg = AddUser {
+ hostname: &hostname,
+ username: &repo.owner.username,
+ html_link: profile_url.as_str(),
+ profile_photo: Some(&repo.owner.avatar_url),
+ };
+ db.add_user(&msg).await.unwrap();
+ }
+ }
+
+ sleep_fut.await.unwrap();
if res.data.is_empty() {
return repos;
}
@@ -101,6 +123,9 @@ impl Data {
#[cfg(test)]
mod tests {
use crate::tests::sqlx_sqlite;
+ use db_core::prelude::*;
+
+ use url::Url;
pub const GITEA_HOST: &str = "http://localhost:8080";
@@ -115,7 +140,12 @@ mod tests {
let (db, data) = sqlx_sqlite::get_data().await;
let res = data.crawl(GITEA_HOST, &db).await;
let mut elements = 0;
+ let username = &res.get(0).unwrap().data.get(0).unwrap().owner.username;
+ let hostname = get_hostname(&Url::parse(GITEA_HOST).unwrap());
+ assert!(db.forge_exists(&hostname).await.unwrap());
+ assert!(db.user_exists(username, Some(&hostname)).await.unwrap());
res.iter().for_each(|r| elements += r.data.len());
+
assert_eq!(res.len(), 5);
assert_eq!(elements, 100);
}