feat: job runner to execute download requests from mCaptcha/mCaptcha
This commit is contained in:
parent
c0a125d5f1
commit
c8ecd29e94
3 changed files with 130 additions and 10 deletions
12
Cargo.lock
generated
12
Cargo.lock
generated
|
@ -3301,9 +3301,21 @@ dependencies = [
|
||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
"signal-hook-registry",
|
"signal-hook-registry",
|
||||||
"socket2",
|
"socket2",
|
||||||
|
"tokio-macros",
|
||||||
"windows-sys 0.45.0",
|
"windows-sys 0.45.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tokio-macros"
|
||||||
|
version = "1.8.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d266c00fde287f55d3f1c3e96c500c362a2b8c695076ec180f27918820bc6df8"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn 1.0.109",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tokio-native-tls"
|
name = "tokio-native-tls"
|
||||||
version = "0.3.1"
|
version = "0.3.1"
|
||||||
|
|
|
@ -70,7 +70,7 @@ mime = "0.3.16"
|
||||||
#sailfish = "0.3.2"
|
#sailfish = "0.3.2"
|
||||||
tracing = { version = "0.1.37", features = ["log"] }
|
tracing = { version = "0.1.37", features = ["log"] }
|
||||||
tera = { version="1.17.1", features=["builtins"]}
|
tera = { version="1.17.1", features=["builtins"]}
|
||||||
tokio = { version = "1.25.0", features = ["fs"] }
|
tokio = { version = "1.25.0", features = ["fs", "macros"] }
|
||||||
csv-async = { version = "1.2.5", features = ["serde", "tokio"] }
|
csv-async = { version = "1.2.5", features = ["serde", "tokio"] }
|
||||||
async-trait = "0.1.68"
|
async-trait = "0.1.68"
|
||||||
reqwest = { version = "0.11.18", features = ["json", "gzip"] }
|
reqwest = { version = "0.11.18", features = ["json", "gzip"] }
|
||||||
|
|
126
src/mcaptcha.rs
126
src/mcaptcha.rs
|
@ -14,12 +14,17 @@
|
||||||
* You should have received a copy of the GNU Affero General Public License
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use reqwest::Client;
|
use reqwest::Client;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
use tokio::sync::oneshot;
|
||||||
|
use tokio::task::JoinHandle;
|
||||||
|
use tokio::time::sleep;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
use crate::errors::*;
|
use crate::{api::v1::mcaptcha::db::SchedulerJob, errors::*, AppData};
|
||||||
|
|
||||||
/* TODO:
|
/* TODO:
|
||||||
* 1. Define traits to interact with mCaptcha
|
* 1. Define traits to interact with mCaptcha
|
||||||
|
@ -79,11 +84,6 @@ impl Clone for Box<dyn MCaptchaClient> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
|
|
||||||
pub struct Secret {
|
|
||||||
pub secret: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct MCaptchaClientReqwest {
|
pub struct MCaptchaClientReqwest {
|
||||||
client: Client,
|
client: Client,
|
||||||
|
@ -122,7 +122,8 @@ impl MCaptchaClient for MCaptchaClientReqwest {
|
||||||
campaign_id: &str,
|
campaign_id: &str,
|
||||||
page: usize,
|
page: usize,
|
||||||
) -> ServiceResult<Vec<PerformanceAnalytics>> {
|
) -> ServiceResult<Vec<PerformanceAnalytics>> {
|
||||||
mcaptcha.set_path(&format!("/api/v1/survey/{campaign_id}/get?page={page}"));
|
mcaptcha.set_path(&format!("/api/v1/survey/takeout/{campaign_id}/get"));
|
||||||
|
mcaptcha.set_query(Some(&format!("page={page}")));
|
||||||
let res = self
|
let res = self
|
||||||
.client
|
.client
|
||||||
.get(mcaptcha)
|
.get(mcaptcha)
|
||||||
|
@ -136,6 +137,114 @@ impl MCaptchaClient for MCaptchaClientReqwest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct MCaptchaDownloader {
|
||||||
|
data: AppData,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MCaptchaDownloader {
|
||||||
|
pub fn new(data: AppData) -> Self {
|
||||||
|
Self { data }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn can_run(rx: &mut oneshot::Receiver<()>) -> bool {
|
||||||
|
matches!(rx.try_recv(), Err(oneshot::error::TryRecvError::Empty))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn start_job(
|
||||||
|
&self,
|
||||||
|
) -> ServiceResult<(oneshot::Sender<()>, JoinHandle<()>)> {
|
||||||
|
let (tx, mut rx) = oneshot::channel();
|
||||||
|
let this = self.clone();
|
||||||
|
let fut = async move {
|
||||||
|
loop {
|
||||||
|
if !Self::can_run(&mut rx) {
|
||||||
|
log::info!("stopping survey uploads");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
let task = this.data.get_next_job_to_run().await.unwrap();
|
||||||
|
if task.is_none() {
|
||||||
|
for _ in 0..5 {
|
||||||
|
if !Self::can_run(&mut rx) {
|
||||||
|
log::info!("Stopping survey uploads");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
sleep(Duration::new(1, 0)).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let task = task.unwrap();
|
||||||
|
this.data.mark_job_scheduled(&task).await.unwrap();
|
||||||
|
this.exec_job(&task, &mut rx).await.unwrap();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let handle = tokio::spawn(fut);
|
||||||
|
Ok((tx, handle))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn exec_job(
|
||||||
|
&self,
|
||||||
|
job: &SchedulerJob,
|
||||||
|
rx: &mut oneshot::Receiver<()>,
|
||||||
|
) -> ServiceResult<()> {
|
||||||
|
let checkpoint = self.data.mcaptcha_get_checkpoint(&job.campaign_id).await?;
|
||||||
|
const LIMIT: usize = 50;
|
||||||
|
let mut page = 1 + (checkpoint / LIMIT);
|
||||||
|
let campaign_str = job.campaign_id.to_string();
|
||||||
|
log::info!("getting page {page} from {campaign_str}");
|
||||||
|
loop {
|
||||||
|
if !Self::can_run(rx) {
|
||||||
|
log::info!("Stopping survey downloads");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut res = self
|
||||||
|
.data
|
||||||
|
.mcaptcha
|
||||||
|
.download_benchmarks(job.url.clone(), &campaign_str, page)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
if !Self::can_run(rx) {
|
||||||
|
log::info!("Stopping survey downloads");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
let skip = checkpoint - ((page - 1) * LIMIT);
|
||||||
|
let new_records = res.len() - skip as usize;
|
||||||
|
let mut skip = skip as isize;
|
||||||
|
for r in res.drain(0..) {
|
||||||
|
if skip > 0 {
|
||||||
|
skip -= 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
self.data
|
||||||
|
.mcaptcha_insert_analytics(&job.campaign_id, &r)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
self.data
|
||||||
|
.mcaptcha_set_checkpoint(&job.campaign_id, new_records)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
if !Self::can_run(rx) {
|
||||||
|
log::info!("Stopping survey downloads");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
page += 1;
|
||||||
|
if res.len() < LIMIT {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.data.mark_job_finished(job).await.unwrap();
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub mod tests {
|
pub mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
@ -196,8 +305,7 @@ pub mod tests {
|
||||||
) -> ServiceResult<Vec<PerformanceAnalytics>> {
|
) -> ServiceResult<Vec<PerformanceAnalytics>> {
|
||||||
println!(
|
println!(
|
||||||
"mcaptcha_url {}, campaign_id {}, page: {page}",
|
"mcaptcha_url {}, campaign_id {}, page: {page}",
|
||||||
mcaptcha.to_string(),
|
mcaptcha, campaign_id
|
||||||
campaign_id
|
|
||||||
);
|
);
|
||||||
let res = BENCHMARK.clone();
|
let res = BENCHMARK.clone();
|
||||||
Ok(res)
|
Ok(res)
|
||||||
|
|
Loading…
Reference in a new issue