From bd401d888e59382138f1c3ddf71b1a5ee768ae73 Mon Sep 17 00:00:00 2001
From: realaravinth
Date: Sun, 21 Aug 2022 23:10:40 +0530
Subject: [PATCH] fix: flatten content and then apply markup rules
---
src/main.rs | 1 +
src/post.rs | 370 ++++++++++++++++++++++++++++++++++++++++++++
src/proxy.rs | 8 +
templates/main.css | 42 ++---
templates/post.html | 59 +------
5 files changed, 403 insertions(+), 77 deletions(-)
create mode 100644 src/post.rs
diff --git a/src/main.rs b/src/main.rs
index ae95476..b282197 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -25,6 +25,7 @@ use log::info;
mod data;
mod meta;
+mod post;
mod proxy;
mod render_html;
mod routes;
diff --git a/src/post.rs b/src/post.rs
new file mode 100644
index 0000000..b5df08d
--- /dev/null
+++ b/src/post.rs
@@ -0,0 +1,370 @@
+/*
+ * Copyright (C) 2021 Aravinth Manivannan
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+use std::{collections::HashMap, hash::Hash};
+
+use crate::data::*;
+use crate::proxy::StringUtils;
+use get_post::*;
+
+#[derive(Eq, PartialEq)]
+enum PostitionType {
+ Start,
+ End,
+}
+
+struct Markup<'a, 'b> {
+ markup: &'a GetPostPostContentBodyModelParagraphsMarkups,
+ p: &'a GetPostPostContentBodyModelParagraphs,
+ pos_type: PostitionType,
+ gists: &'b Option>,
+}
+
+impl<'a, 'b> Markup<'a, 'b> {
+ fn start(
+ p: &GetPostPostContentBodyModelParagraphs,
+ gists: &'b Option>,
+ pindex: usize,
+ in_oli: &mut bool,
+ ) -> String {
+ if p.type_ == "IMG" {
+ let metadata = p.metadata.as_ref().unwrap();
+ format!(
+ r#" "#,
+ metadata.original_width.as_ref().unwrap(),
+ crate::V1_API_ROUTES.proxy.get_medium_asset(&metadata.id)
+ )
+ } else if p.type_ == "P" {
+ "".into()
+ } else if p.type_ == "PRE" {
+ "
".into()
+ } else if p.type_ == "BQ" {
+ "".into()
+ } else if p.type_ == "H1" {
+ "".into()
+ } else if p.type_ == "H2" {
+ "".into()
+ } else if p.type_ == "H3" {
+ if pindex == 0 {
+ log::debug!("caught heading");
+ "".into()
+ } else {
+ "".into()
+ }
+ } else if p.type_ == "H4" {
+ "".into()
+ } else if p.type_ == "H5" {
+ "".into()
+ } else if p.type_ == "H6" {
+ "".into()
+ } else if p.type_ == "IFRAME" {
+ let src = &p
+ .iframe
+ .as_ref()
+ .unwrap()
+ .media_resource
+ .as_ref()
+ .unwrap()
+ .href;
+ if src.contains("gist.github.com") {
+ let gist_id = crate::data::Data::get_gist_id(src);
+ let (_, gist) = gists
+ .as_ref()
+ .unwrap()
+ .iter()
+ .find(|(id, _)| id == gist_id)
+ .as_ref()
+ .unwrap();
+
+ let mut gists = String::default();
+ for file in &gist.files {
+ gists += &format!(
+ r#" {}
"#,
+ file.get_html_content()
+ );
+ }
+ format!(
+ r#"".into()
+ } else {
+ "".into()
+ }
+ } else if p.type_ == "OLI" {
+ "".into()
+ } else {
+ "".into()
+ };
+ if *in_oli {
+ if p.type_ != "OLL" {
+ *in_oli = false;
+ format!("{resp}")
+ } else {
+ resp
+ }
+ } else {
+ resp
+ }
+ }
+
+ fn apply_markup(&self, pindex: usize) -> String {
+ if self.markup.type_ == "A" {
+ if let Some(anchor_type) = &self.markup.anchor_type {
+ if anchor_type == "LINK" {
+ if self.pos_type == PostitionType::Start {
+ format!(
+ r#""#,
+ self.markup.href.as_ref().unwrap()
+ )
+ } else {
+ " ".into()
+ }
+ } else if anchor_type == "USER" {
+ if self.pos_type == PostitionType::Start {
+ format!(
+ r#""#,
+ self.markup.user_id.as_ref().unwrap()
+ )
+ } else {
+ " ".into()
+ }
+ } else {
+ // log::error!("unknown markup.anchor_type: {:?} post id {}", anchor_type, id);
+ if self.pos_type == PostitionType::Start {
+ "".into()
+ } else {
+ " ".into()
+ }
+ }
+ } else {
+ // log::error!("unknown markup.anchor_type: {:?} post id {}", anchor_type, id);
+ if self.pos_type == PostitionType::Start {
+ "".into()
+ } else {
+ " ".into()
+ }
+ }
+ } else if self.markup.type_ == "PRE" {
+ if self.pos_type == PostitionType::Start {
+ "".into()
+ } else {
+ " ".into()
+ }
+ } else if self.markup.type_ == "EM" {
+ if self.pos_type == PostitionType::Start {
+ "".into()
+ } else {
+ " ".into()
+ }
+ } else if self.markup.type_ == "STRONG" {
+ if self.pos_type == PostitionType::Start {
+ "".into()
+ } else {
+ " ".into()
+ }
+ } else if self.markup.type_ == "CODE" {
+ if self.pos_type == PostitionType::Start {
+ "".into()
+ } else {
+ "
".into()
+ }
+ } else {
+ // log::error!("unknown markup.type_: {:?} post id {}", markup.type_, id);
+ if self.pos_type == PostitionType::Start {
+ log::info!("Unknown type");
+ r#"
+
+ From LibMedium: LibMedium is built by reverse
+ engineering the Meduim's internal API. This post contains
+ markup(formatting rules) that we are unaware of.
+ Please report this URL on our bug tracker so that we can
+ improve page rendering.
+
+ Alternatively, you can also email me at realaravinth at batsense dot net!
+
+ "#
+ .into()
+ } else {
+ " ".into()
+ }
+ }
+ }
+}
+
+#[derive(Default)]
+struct PositionMap<'a, 'b> {
+ map: HashMap>>,
+ arr: Vec,
+}
+
+impl<'a, 'b> PositionMap<'a, 'b> {
+ fn insert_if_not_exists(&mut self, pos: i64, m: Markup<'a, 'b>) {
+ if let Some(markups) = self.map.get_mut(&pos) {
+ markups.push(m);
+ } else {
+ self.map.insert(pos, vec![m]);
+ self.arr.push(pos);
+ }
+ }
+}
+
+pub fn apply_markup<'b>(
+ data: &PostResp,
+ gists: &'b Option>,
+) -> Vec {
+ let mut paragraphs: Vec = Vec::with_capacity(data.content.body_model.paragraphs.len());
+ for (pindex, p) in data.content.body_model.paragraphs.iter().enumerate() {
+ let mut pos = PositionMap::default();
+ if p.type_ == "H3" && pindex == 0 {
+ log::debug!("FOUND TOP LEVEL H3. Breaking");
+ continue;
+ }
+ for m in p.markups.iter() {
+ let start_markup = Markup {
+ markup: &m,
+ p,
+ gists,
+ pos_type: PostitionType::Start,
+ };
+ pos.insert_if_not_exists(m.start, start_markup);
+ let end_markup = Markup {
+ markup: &m,
+ p,
+ gists,
+ pos_type: PostitionType::End,
+ };
+
+ pos.insert_if_not_exists(m.end, end_markup);
+ }
+
+ let mut cur = 0;
+
+ fn incr_cur(cur: usize, point: i64) -> usize {
+ let incr = point as usize - cur;
+ let post_incr = cur + incr;
+ log::debug!(
+ "cur before incr: {cur}, incr by: {}, post incr: {}",
+ incr,
+ post_incr
+ );
+ post_incr
+ }
+
+ let mut content = String::with_capacity(p.text.len());
+ let mut in_oli = false;
+ content += &Markup::start(&p, &gists, pindex, &mut in_oli);
+ pos.arr.sort();
+ if let Some(first) = pos.arr.get(0) {
+ //content += p.text.substring(cur, *first as usize);
+ content += p.text.slice(cur..*first as usize);
+ cur = incr_cur(cur, *first);
+ for point in pos.arr.iter() {
+ //content.push(p.text.substring(start, start + point);
+ // if *point != 0 {
+
+ if cur != *point as usize {
+ // content += p.text.substring(cur, *point as usize);
+ content += p.text.slice(cur..*point as usize);
+ }
+ // }
+ let pos_markups = pos.map.get(point).unwrap();
+ for m in pos_markups.iter() {
+ // println!("{}", &m.apply_markup(pindex));
+ content += &m.apply_markup(pindex);
+ }
+ cur = incr_cur(cur, *point);
+ }
+ log::debug!("LAST");
+ content += p.text.slice(cur..);
+ content += &Markup::end(&p, pindex, &mut in_oli);
+ } else {
+ log::debug!("LAST WITH NO MARKUP");
+ content += p.text.slice(cur..);
+ content += &Markup::end(&p, pindex, &mut in_oli);
+ }
+ paragraphs.push(content);
+ }
+ paragraphs
+}
diff --git a/src/proxy.rs b/src/proxy.rs
index eb64ec4..5a8e546 100644
--- a/src/proxy.rs
+++ b/src/proxy.rs
@@ -23,6 +23,7 @@ use reqwest::header::CONTENT_TYPE;
use sailfish::TemplateOnce;
use crate::data::PostResp;
+use crate::post::apply_markup;
use crate::AppData;
const CACHE_AGE: u32 = 60 * 60 * 24;
@@ -99,11 +100,14 @@ impl StringUtils for str {
Bound::Included(bound) | Bound::Excluded(bound) => *bound,
Bound::Unbounded => 0,
};
+ log::debug!("{}", self);
+ log::debug!("start: {start}");
let len = match range.end_bound() {
Bound::Included(bound) => *bound + 1,
Bound::Excluded(bound) => *bound,
Bound::Unbounded => self.len(),
} - start;
+ log::debug!("len {len}");
self.substring(start, len)
}
}
@@ -118,6 +122,7 @@ pub struct Post {
pub reading_time: usize,
pub id: String,
pub gists: Option>,
+ pub paragraphs: Vec,
}
const INDEX: &str = include_str!("../templates/index.html");
@@ -211,6 +216,8 @@ async fn page(path: web::Path<(String, String)>, data: AppData) -> impl Responde
.unwrap();
let preview_img = crate::V1_API_ROUTES.proxy.get_medium_asset(preview_img);
+ let paragraphs = apply_markup(&post_data, &gists);
+
let page = Post {
id: id.to_owned(),
data: post_data,
@@ -218,6 +225,7 @@ async fn page(path: web::Path<(String, String)>, data: AppData) -> impl Responde
gists,
reading_time,
preview_img,
+ paragraphs,
};
let page = page.render_once().unwrap();
diff --git a/templates/main.css b/templates/main.css
index c2e4e49..45a046d 100644
--- a/templates/main.css
+++ b/templates/main.css
@@ -3,13 +3,29 @@
padding: 0;
}
+
+body {
+ width: 100%;
+ display: flex;
+ flex-direction: column;
+}
+
+main {
+ width: 35em;
+ margin: auto;
+ display: flex;
+ flex-direction: column;
+}
+
+
+
h1,
h2,
h3,
h4,
h5,
h6 {
- font-family: "Times New Roman", Times, serif;
+ font-family: sohne, "Helvetica Neue", Helvetica, Arial, sans-serif !important;
}
a {
@@ -27,24 +43,10 @@ a:hover {
html {
color: #333;
- font-family: Georgia, "Times New Roman", Times, serif;
+ font-family: charter, Georgia, Cambria, "Times New Roman", Times, serif;
font-size: 26px;
line-height: 1.55rem;
}
-
-body {
- width: 100%;
- display: flex;
- flex-direction: column;
-}
-
-main {
- width: 35em;
- margin: auto;
- display: flex;
- flex-direction: column;
-}
-
p {
margin: 20px 0;
}
@@ -72,19 +74,19 @@ code {
}
*/
-/*
pre {
- font-family: monospace;
+ font-family: Menlo, Monaco, "Courier New", Courier, monospace;
font-size: 15px;
white-space: pre-wrap;
+ /*
font-weight: 600;
+ */
line-height: 1rem;
- padding: 5px;
+ padding: 20px;
border-radius: 6px;
background-color: rgba(175, 184, 193, 0.2);
}
-*/
.code-block {
display: block;
diff --git a/templates/post.html b/templates/post.html
index 0824773..ae2a498 100644
--- a/templates/post.html
+++ b/templates/post.html
@@ -23,63 +23,8 @@
- <. let paragraphs = &data.content.body_model.paragraphs; .>
- <. for (pindex, p) in paragraphs.iter().enumerate() {.>
- <. if open_list && p.type_ != "OLI" { .>
-
- <. } .>
- <. if pindex == 0 && p.type_ == "H3" {.>
- <. continue; .>
- <.}.>
- <. if p.type_ == "IMG" {.>
- <. include!("./img.html"); .>
- <.} else if p.type_ == "P" {.>
- <. include!("./_markup.html"); .>
- <.} else if p.type_ == "BQ" {.>
- <. include!("./_markup.html"); .>
- <.} else if p.type_ == "H2" {.>
- <.= p.text .>
- <.} else if p.type_ == "H3" {.>
- <.= p.text .>
- <.} else if p.type_ == "H4" {.>
- <.= p.text .>
- <.} else if p.type_ == "H5" {.>
- <.= p.text .>
- <.} else if p.type_ == "H6" {.>
- <.= p.text .>
- <.} else if p.type_ == "IFRAME" {.>
- <. let src = &p.iframe.as_ref().unwrap().media_resource.as_ref().unwrap().href; .>
- <. if src.contains("gist.github.com"){.>
- <. include!("./gist_insert.html"); .>
-
- <.} else {.>
-
- <.}.>
- <.} else if p.type_ == "OLI" {.>
- <. if !open_list { .>
- <. open_list = true;.>
-
- <. } .>
- <.= p.text .>
- <.} else {.>
-
- <. include!("./_markup.html"); .>
-
-
- From LibMedium: LibMedium is built by reverse
- engineering the Meduim's internal API. This post contains
- markup(formatting rules) that we are unaware of.
- Please report this URL on our bug tracker so that we can
- improve page rendering.
-
- Alternatively, you can also email me at realaravinth at batsense dot net!
-
- <.}.>
+ <. for (_pindex, p) in paragraphs.iter().enumerate() {.>
+ <.- p .>
<.}.>