feat: username save on spider::crawl

This commit is contained in:
Aravinth Manivannan 2022-04-30 23:51:38 +05:30
parent cb9b8d6d0d
commit 3d98149244
Signed by: realaravinth
GPG key ID: AD9F0F08E855ED88
2 changed files with 36 additions and 5 deletions

1
Cargo.lock generated
View file

@ -379,6 +379,7 @@ dependencies = [
"serde 1.0.136",
"serde_json",
"thiserror",
"url",
]
[[package]]

View file

@ -16,6 +16,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
use std::time::Duration;
use tokio::time;
use url::Url;
@ -31,8 +32,8 @@ const GITEA_NODEINFO: &str = "/api/v1/nodeinfo";
impl Data {
pub async fn crawl(&self, hostname: &str, db: &BoxDB) -> Vec<SearchResults> {
let mut page = 1;
let mut url = Url::parse(hostname).unwrap();
let hostname = url.host().as_ref().unwrap().to_string();
let mut instance_url = Url::parse(hostname).unwrap();
let hostname = get_hostname(&instance_url);
if !db.forge_exists(&hostname).await.unwrap() {
let msg = CreateForge {
hostname: &hostname,
@ -41,6 +42,7 @@ impl Data {
db.create_forge_isntance(&msg).await.unwrap();
}
let mut url = instance_url.clone();
url.set_path(REPO_SEARCH_PATH);
let mut repos = Vec::new();
loop {
@ -59,11 +61,31 @@ impl Data {
.await
.unwrap();
time::sleep(Duration::new(
let sleep_fut = time::sleep(Duration::new(
self.settings.crawler.wait_before_next_api_call,
0,
))
.await;
));
let sleep_fut = tokio::spawn(sleep_fut);
for repo in res.data.iter() {
if !db
.user_exists(&repo.owner.username, Some(&hostname))
.await
.unwrap()
{
let mut profile_url = instance_url.clone();
profile_url.set_path(&repo.owner.username);
let msg = AddUser {
hostname: &hostname,
username: &repo.owner.username,
html_link: profile_url.as_str(),
profile_photo: Some(&repo.owner.avatar_url),
};
db.add_user(&msg).await.unwrap();
}
}
sleep_fut.await.unwrap();
if res.data.is_empty() {
return repos;
}
@ -101,6 +123,9 @@ impl Data {
#[cfg(test)]
mod tests {
use crate::tests::sqlx_sqlite;
use db_core::prelude::*;
use url::Url;
pub const GITEA_HOST: &str = "http://localhost:8080";
@ -115,7 +140,12 @@ mod tests {
let (db, data) = sqlx_sqlite::get_data().await;
let res = data.crawl(GITEA_HOST, &db).await;
let mut elements = 0;
let username = &res.get(0).unwrap().data.get(0).unwrap().owner.username;
let hostname = get_hostname(&Url::parse(GITEA_HOST).unwrap());
assert!(db.forge_exists(&hostname).await.unwrap());
assert!(db.user_exists(username, Some(&hostname)).await.unwrap());
res.iter().for_each(|r| elements += r.data.len());
assert_eq!(res.len(), 5);
assert_eq!(elements, 100);
}