From bd401d888e59382138f1c3ddf71b1a5ee768ae73 Mon Sep 17 00:00:00 2001 From: realaravinth Date: Sun, 21 Aug 2022 23:10:40 +0530 Subject: [PATCH] fix: flatten content and then apply markup rules --- src/main.rs | 1 + src/post.rs | 370 ++++++++++++++++++++++++++++++++++++++++++++ src/proxy.rs | 8 + templates/main.css | 42 ++--- templates/post.html | 59 +------ 5 files changed, 403 insertions(+), 77 deletions(-) create mode 100644 src/post.rs diff --git a/src/main.rs b/src/main.rs index ae95476..b282197 100644 --- a/src/main.rs +++ b/src/main.rs @@ -25,6 +25,7 @@ use log::info; mod data; mod meta; +mod post; mod proxy; mod render_html; mod routes; diff --git a/src/post.rs b/src/post.rs new file mode 100644 index 0000000..b5df08d --- /dev/null +++ b/src/post.rs @@ -0,0 +1,370 @@ +/* + * Copyright (C) 2021 Aravinth Manivannan + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +use std::{collections::HashMap, hash::Hash}; + +use crate::data::*; +use crate::proxy::StringUtils; +use get_post::*; + +#[derive(Eq, PartialEq)] +enum PostitionType { + Start, + End, +} + +struct Markup<'a, 'b> { + markup: &'a GetPostPostContentBodyModelParagraphsMarkups, + p: &'a GetPostPostContentBodyModelParagraphs, + pos_type: PostitionType, + gists: &'b Option>, +} + +impl<'a, 'b> Markup<'a, 'b> { + fn start( + p: &GetPostPostContentBodyModelParagraphs, + gists: &'b Option>, + pindex: usize, + in_oli: &mut bool, + ) -> String { + if p.type_ == "IMG" { + let metadata = p.metadata.as_ref().unwrap(); + format!( + r#"
"#, + metadata.original_width.as_ref().unwrap(), + crate::V1_API_ROUTES.proxy.get_medium_asset(&metadata.id) + ) + } else if p.type_ == "P" { + "

".into() + } else if p.type_ == "PRE" { + "

".into()
+        } else if p.type_ == "BQ" {
+            "
".into() + } else if p.type_ == "H1" { + "

".into() + } else if p.type_ == "H2" { + "

".into() + } else if p.type_ == "H3" { + if pindex == 0 { + log::debug!("caught heading"); + "".into() + } else { + "

".into() + } + } else if p.type_ == "H4" { + "

".into() + } else if p.type_ == "H5" { + "

".into() + } else if p.type_ == "H6" { + "
".into() + } else if p.type_ == "IFRAME" { + let src = &p + .iframe + .as_ref() + .unwrap() + .media_resource + .as_ref() + .unwrap() + .href; + if src.contains("gist.github.com") { + let gist_id = crate::data::Data::get_gist_id(src); + let (_, gist) = gists + .as_ref() + .unwrap() + .iter() + .find(|(id, _)| id == gist_id) + .as_ref() + .unwrap(); + + let mut gists = String::default(); + for file in &gist.files { + gists += &format!( + r#"
{}
"#, + file.get_html_content() + ); + } + format!( + r#"
{gists} + See gist on GitHub"#, + &gist.html_url + ) + } else { + format!(r#"".into() + } + } else if p.type_ == "OLI" { + "".into() + } else { + "".into() + }; + if *in_oli { + if p.type_ != "OLL" { + *in_oli = false; + format!("{resp}") + } else { + resp + } + } else { + resp + } + } + + fn apply_markup(&self, pindex: usize) -> String { + if self.markup.type_ == "A" { + if let Some(anchor_type) = &self.markup.anchor_type { + if anchor_type == "LINK" { + if self.pos_type == PostitionType::Start { + format!( + r#""#, + self.markup.href.as_ref().unwrap() + ) + } else { + "".into() + } + } else if anchor_type == "USER" { + if self.pos_type == PostitionType::Start { + format!( + r#""#, + self.markup.user_id.as_ref().unwrap() + ) + } else { + "".into() + } + } else { + // log::error!("unknown markup.anchor_type: {:?} post id {}", anchor_type, id); + if self.pos_type == PostitionType::Start { + "".into() + } else { + "".into() + } + } + } else { + // log::error!("unknown markup.anchor_type: {:?} post id {}", anchor_type, id); + if self.pos_type == PostitionType::Start { + "".into() + } else { + "".into() + } + } + } else if self.markup.type_ == "PRE" { + if self.pos_type == PostitionType::Start { + "
".into()
+            } else {
+                "
".into() + } + } else if self.markup.type_ == "EM" { + if self.pos_type == PostitionType::Start { + "".into() + } else { + "".into() + } + } else if self.markup.type_ == "STRONG" { + if self.pos_type == PostitionType::Start { + "".into() + } else { + "".into() + } + } else if self.markup.type_ == "CODE" { + if self.pos_type == PostitionType::Start { + "".into() + } else { + "".into() + } + } else { + // log::error!("unknown markup.type_: {:?} post id {}", markup.type_, id); + if self.pos_type == PostitionType::Start { + log::info!("Unknown type"); + r#" +

+ From LibMedium: LibMedium is built by reverse + engineering the Meduim's internal API. This post contains + markup(formatting rules) that we are unaware of. + Please report this URL on our bug tracker so that we can + improve page rendering. +
+ Alternatively, you can also email me at realaravinth at batsense dot net! +

+ "# + .into() + } else { + "".into() + } + } + } +} + +#[derive(Default)] +struct PositionMap<'a, 'b> { + map: HashMap>>, + arr: Vec, +} + +impl<'a, 'b> PositionMap<'a, 'b> { + fn insert_if_not_exists(&mut self, pos: i64, m: Markup<'a, 'b>) { + if let Some(markups) = self.map.get_mut(&pos) { + markups.push(m); + } else { + self.map.insert(pos, vec![m]); + self.arr.push(pos); + } + } +} + +pub fn apply_markup<'b>( + data: &PostResp, + gists: &'b Option>, +) -> Vec { + let mut paragraphs: Vec = Vec::with_capacity(data.content.body_model.paragraphs.len()); + for (pindex, p) in data.content.body_model.paragraphs.iter().enumerate() { + let mut pos = PositionMap::default(); + if p.type_ == "H3" && pindex == 0 { + log::debug!("FOUND TOP LEVEL H3. Breaking"); + continue; + } + for m in p.markups.iter() { + let start_markup = Markup { + markup: &m, + p, + gists, + pos_type: PostitionType::Start, + }; + pos.insert_if_not_exists(m.start, start_markup); + let end_markup = Markup { + markup: &m, + p, + gists, + pos_type: PostitionType::End, + }; + + pos.insert_if_not_exists(m.end, end_markup); + } + + let mut cur = 0; + + fn incr_cur(cur: usize, point: i64) -> usize { + let incr = point as usize - cur; + let post_incr = cur + incr; + log::debug!( + "cur before incr: {cur}, incr by: {}, post incr: {}", + incr, + post_incr + ); + post_incr + } + + let mut content = String::with_capacity(p.text.len()); + let mut in_oli = false; + content += &Markup::start(&p, &gists, pindex, &mut in_oli); + pos.arr.sort(); + if let Some(first) = pos.arr.get(0) { + //content += p.text.substring(cur, *first as usize); + content += p.text.slice(cur..*first as usize); + cur = incr_cur(cur, *first); + for point in pos.arr.iter() { + //content.push(p.text.substring(start, start + point); + // if *point != 0 { + + if cur != *point as usize { + // content += p.text.substring(cur, *point as usize); + content += p.text.slice(cur..*point as usize); + } + // } + let pos_markups = pos.map.get(point).unwrap(); + for m in pos_markups.iter() { + // println!("{}", &m.apply_markup(pindex)); + content += &m.apply_markup(pindex); + } + cur = incr_cur(cur, *point); + } + log::debug!("LAST"); + content += p.text.slice(cur..); + content += &Markup::end(&p, pindex, &mut in_oli); + } else { + log::debug!("LAST WITH NO MARKUP"); + content += p.text.slice(cur..); + content += &Markup::end(&p, pindex, &mut in_oli); + } + paragraphs.push(content); + } + paragraphs +} diff --git a/src/proxy.rs b/src/proxy.rs index eb64ec4..5a8e546 100644 --- a/src/proxy.rs +++ b/src/proxy.rs @@ -23,6 +23,7 @@ use reqwest::header::CONTENT_TYPE; use sailfish::TemplateOnce; use crate::data::PostResp; +use crate::post::apply_markup; use crate::AppData; const CACHE_AGE: u32 = 60 * 60 * 24; @@ -99,11 +100,14 @@ impl StringUtils for str { Bound::Included(bound) | Bound::Excluded(bound) => *bound, Bound::Unbounded => 0, }; + log::debug!("{}", self); + log::debug!("start: {start}"); let len = match range.end_bound() { Bound::Included(bound) => *bound + 1, Bound::Excluded(bound) => *bound, Bound::Unbounded => self.len(), } - start; + log::debug!("len {len}"); self.substring(start, len) } } @@ -118,6 +122,7 @@ pub struct Post { pub reading_time: usize, pub id: String, pub gists: Option>, + pub paragraphs: Vec, } const INDEX: &str = include_str!("../templates/index.html"); @@ -211,6 +216,8 @@ async fn page(path: web::Path<(String, String)>, data: AppData) -> impl Responde .unwrap(); let preview_img = crate::V1_API_ROUTES.proxy.get_medium_asset(preview_img); + let paragraphs = apply_markup(&post_data, &gists); + let page = Post { id: id.to_owned(), data: post_data, @@ -218,6 +225,7 @@ async fn page(path: web::Path<(String, String)>, data: AppData) -> impl Responde gists, reading_time, preview_img, + paragraphs, }; let page = page.render_once().unwrap(); diff --git a/templates/main.css b/templates/main.css index c2e4e49..45a046d 100644 --- a/templates/main.css +++ b/templates/main.css @@ -3,13 +3,29 @@ padding: 0; } + +body { + width: 100%; + display: flex; + flex-direction: column; +} + +main { + width: 35em; + margin: auto; + display: flex; + flex-direction: column; +} + + + h1, h2, h3, h4, h5, h6 { - font-family: "Times New Roman", Times, serif; + font-family: sohne, "Helvetica Neue", Helvetica, Arial, sans-serif !important; } a { @@ -27,24 +43,10 @@ a:hover { html { color: #333; - font-family: Georgia, "Times New Roman", Times, serif; + font-family: charter, Georgia, Cambria, "Times New Roman", Times, serif; font-size: 26px; line-height: 1.55rem; } - -body { - width: 100%; - display: flex; - flex-direction: column; -} - -main { - width: 35em; - margin: auto; - display: flex; - flex-direction: column; -} - p { margin: 20px 0; } @@ -72,19 +74,19 @@ code { } */ -/* pre { - font-family: monospace; + font-family: Menlo, Monaco, "Courier New", Courier, monospace; font-size: 15px; white-space: pre-wrap; + /* font-weight: 600; + */ line-height: 1rem; - padding: 5px; + padding: 20px; border-radius: 6px; background-color: rgba(175, 184, 193, 0.2); } -*/ .code-block { display: block; diff --git a/templates/post.html b/templates/post.html index 0824773..ae2a498 100644 --- a/templates/post.html +++ b/templates/post.html @@ -23,63 +23,8 @@

- <. let paragraphs = &data.content.body_model.paragraphs; .> - <. for (pindex, p) in paragraphs.iter().enumerate() {.> - <. if open_list && p.type_ != "OLI" { .> - - <. } .> - <. if pindex == 0 && p.type_ == "H3" {.> - <. continue; .> - <.}.> - <. if p.type_ == "IMG" {.> - <. include!("./img.html"); .> - <.} else if p.type_ == "P" {.> -

<. include!("./_markup.html"); .>

- <.} else if p.type_ == "BQ" {.> -
<. include!("./_markup.html"); .>
- <.} else if p.type_ == "H2" {.> -

<.= p.text .>

- <.} else if p.type_ == "H3" {.> -

<.= p.text .>

- <.} else if p.type_ == "H4" {.> -

<.= p.text .>

- <.} else if p.type_ == "H5" {.> -
<.= p.text .>
- <.} else if p.type_ == "H6" {.> -
<.= p.text .>
- <.} else if p.type_ == "IFRAME" {.> - <. let src = &p.iframe.as_ref().unwrap().media_resource.as_ref().unwrap().href; .> - <. if src.contains("gist.github.com"){.> - <. include!("./gist_insert.html"); .> - - <.} else {.> - - <.}.> - <.} else if p.type_ == "OLI" {.> - <. if !open_list { .> - <. open_list = true;.> -
    - <. } .> -
  1. <.= p.text .>
  2. - <.} else {.> -

    - <. include!("./_markup.html"); .> -

    -

    - From LibMedium: LibMedium is built by reverse - engineering the Meduim's internal API. This post contains - markup(formatting rules) that we are unaware of. - Please report this URL on our bug tracker so that we can - improve page rendering. -
    - Alternatively, you can also email me at realaravinth at batsense dot net! -

    - <.}.> + <. for (_pindex, p) in paragraphs.iter().enumerate() {.> + <.- p .> <.}.>