From d3fde704ff89f4df9754f6dad29dc33e49697971 Mon Sep 17 00:00:00 2001 From: Aravinth Manivannan Date: Wed, 22 Feb 2023 18:10:34 +0530 Subject: [PATCH] fix: archive after each crawl --- src/spider.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/spider.rs b/src/spider.rs index 05ac171..2586dfb 100644 --- a/src/spider.rs +++ b/src/spider.rs @@ -48,6 +48,7 @@ impl Ctx { let msg = CreateForge { url: url.clone(), forge_type: forge.forge_type(), + import: false, }; db.create_forge_instance(&msg).await.unwrap(); @@ -56,6 +57,7 @@ impl Ctx { let msg = CreateForge { url: url.clone(), forge_type: forge.forge_type, + import: false, }; federate.create_forge_instance(&msg).await.unwrap(); } @@ -189,9 +191,10 @@ impl Crawler { let forges = c.db.get_all_forges(offset, LIMIT).await.unwrap(); if forges.is_empty() { + c.federate.tar().await.unwrap(); + page = 0; tokio::time::sleep(std::time::Duration::new(c.ctx.settings.crawler.ttl, 0)) .await; - c.federate.tar().await.unwrap(); if c.shutdown() { info!("Stopping crawling job"); break; @@ -209,6 +212,7 @@ impl Crawler { .await; page += 1; } + if c.shutdown() { info!("Stopping crawling job"); break;