From 46940309462d6939db1974e3923ff4d7a3903c0c Mon Sep 17 00:00:00 2001 From: realaravinth Date: Fri, 18 Mar 2022 20:45:01 +0530 Subject: [PATCH] feat: syntax highlighting for github gists --- Cargo.lock | 106 +++++++++++++++++++++++ Cargo.toml | 57 +++++++------ src/data.rs | 17 +++- src/main.rs | 3 +- src/render_html.rs | 170 +++++++++++++++++++++++++++++++++++++ templates/gist_insert.html | 2 +- templates/main.css | 30 ++++++- 7 files changed, 352 insertions(+), 33 deletions(-) create mode 100644 src/render_html.rs diff --git a/Cargo.lock b/Cargo.lock index 9d8e04c..d4015c5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1069,6 +1069,12 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + [[package]] name = "lexical-core" version = "0.7.6" @@ -1111,9 +1117,19 @@ dependencies = [ "serde 1.0.136", "serde_json", "sled", + "syntect", "url", ] +[[package]] +name = "line-wrap" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f30344350a2a51da54c1d53be93fade8a237e545dbcc4bdbe635413f2117cab9" +dependencies = [ + "safemem", +] + [[package]] name = "linked-hash-map" version = "0.5.4" @@ -1315,6 +1331,28 @@ version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5" +[[package]] +name = "onig" +version = "6.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67ddfe2c93bb389eea6e6d713306880c7f6dcc99a75b659ce145d962c861b225" +dependencies = [ + "bitflags", + "lazy_static", + "libc", + "onig_sys", +] + +[[package]] +name = "onig_sys" +version = "69.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dd3eee045c84695b53b20255bb7317063df090b68e18bfac0abb6c39cf7f33e" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "openssl" version = "0.10.38" @@ -1426,6 +1464,20 @@ version = "0.3.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "58893f751c9b0412871a09abd62ecd2a00298c6c83befa223ef98c52aef40cbe" +[[package]] +name = "plist" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd39bc6cdc9355ad1dc5eeedefee696bb35c34caf21768741e81826c0bbd7225" +dependencies = [ + "base64", + "indexmap", + "line-wrap", + "serde 1.0.136", + "time 0.3.7", + "xml-rs", +] + [[package]] name = "ppv-lite86" version = "0.2.16" @@ -1594,6 +1646,12 @@ version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f" +[[package]] +name = "safemem" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef703b7cb59335eae2eb93ceb664c0eb7ea6bf567079d843e09420219668e072" + [[package]] name = "sailfish" version = "0.3.4" @@ -1631,6 +1689,15 @@ dependencies = [ "sailfish-compiler", ] +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "schannel" version = "0.1.19" @@ -1824,6 +1891,28 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "syntect" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b20815bbe80ee0be06e6957450a841185fcf690fe0178f14d77a05ce2caa031" +dependencies = [ + "bincode", + "bitflags", + "flate2", + "fnv", + "lazy_static", + "lazycell", + "onig", + "plist", + "regex-syntax", + "serde 1.0.136", + "serde_derive", + "serde_json", + "walkdir", + "yaml-rust", +] + [[package]] name = "tempfile" version = "3.3.0" @@ -2060,6 +2149,17 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" +[[package]] +name = "walkdir" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +dependencies = [ + "same-file", + "winapi", + "winapi-util", +] + [[package]] name = "want" version = "0.3.0" @@ -2235,6 +2335,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "xml-rs" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2d7d3948613f75c98fd9328cfdcc45acc4d360655289d0a7d4ec931392200a3" + [[package]] name = "yaml-rust" version = "0.4.5" diff --git a/Cargo.toml b/Cargo.toml index 304e1d8..c5c8b4d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,43 +1,46 @@ [package] -name = "libmedium" -version = "0.1.0" -edition = "2021" -build = "build.rs" -homepage = "https://github.com/realaravinth/libmedium" -repository = "https://github.com/realaravinth/libmedium" -documentation = "https://github.con/realaravinth/libmedium" -readme = "https://github.com/realaravinth/libmedium/blob/master/README.md" -license = "AGPLv3 or later version" authors = ["realaravinth "] - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +build = "build.rs" +documentation = "https://github.con/realaravinth/libmedium" +edition = "2021" +homepage = "https://github.com/realaravinth/libmedium" +license = "AGPLv3 or later version" +name = "libmedium" +readme = "https://github.com/realaravinth/libmedium/blob/master/README.md" +repository = "https://github.com/realaravinth/libmedium" +version = "0.1.0" [dependencies] -actix-web = "4.0.0-beta.9" actix-http = "3.0.0-beta.8" actix-rt = "2" -my-codegen = {package = "actix-web-codegen", git ="https://github.com/realaravinth/actix-web"} - +actix-web = "4.0.0-beta.9" +bincode = "1.3.3" +chrono = "0.4.19" config = "0.11" - +derive_more = "0.99" +futures = "0.3.17" +lazy_static = "1.4" +log = "0.4" +num_cpus = "1.13" +pretty_env_logger = "0.4" +sailfish = "0.3.2" serde = "1" serde_json = "1" - -pretty_env_logger = "0.4" -log = "0.4" - -lazy_static = "1.4" - +sled = "0.34.7" +syntect = "4.6.0" url = "2.2" -derive_more = "0.99" -sailfish = "0.3.2" +[dependencies.graphql_client] +features = ["reqwest"] +version = "0.10.0" -num_cpus = "1.13" -reqwest = { version = "0.11.6", features = ["json"] } -graphql_client = { version = "0.10.0", features = ["reqwest"]} +[dependencies.my-codegen] +git = "https://github.com/realaravinth/actix-web" +package = "actix-web-codegen" -chrono = "0.4.19" +[dependencies.reqwest] +features = ["json"] +version = "0.11.6" sled = "0.34.7" bincode = "1.3.3" diff --git a/src/data.rs b/src/data.rs index 516b8b4..e178bcf 100644 --- a/src/data.rs +++ b/src/data.rs @@ -24,6 +24,7 @@ use serde::{Deserialize, Serialize}; use sled::{Db, Tree}; use crate::proxy::StringUtils; +use crate::render_html; use crate::SETTINGS; const POST_CACHE_VERSION: usize = 3; @@ -204,7 +205,7 @@ impl Data { None }; - let gist = match self.gists.get(&id) { + let mut gist = match self.gists.get(&id) { Ok(Some(v)) => bincode::deserialize(&v[..]).unwrap(), _ => { const URL: &str = "https://api.github.com/gists/"; @@ -268,18 +269,30 @@ impl Data { let gist = if let Some(file_name) = file_name { let mut files: Vec = Vec::with_capacity(1); - let file = gist + let mut file = gist .files .iter() .find(|f| f.file_name == file_name) .unwrap() .to_owned(); + let highlight = render_html::SourcegraphQuery { + filepath: &file.file_name, + code: &file.content, + }; + file.content = highlight.syntax_highlight(); files.push(file); GistContent { files, html_url: gist_url, } } else { + gist.files.iter_mut().for_each(|f| { + let highlight = render_html::SourcegraphQuery { + filepath: &f.file_name, + code: &f.content, + }; + f.content = highlight.syntax_highlight(); + }); gist }; diff --git a/src/main.rs b/src/main.rs index 4d55ae9..ae95476 100644 --- a/src/main.rs +++ b/src/main.rs @@ -26,6 +26,7 @@ use log::info; mod data; mod meta; mod proxy; +mod render_html; mod routes; mod settings; @@ -69,7 +70,7 @@ async fn main() -> std::io::Result<()> { .app_data(get_json_err()) .wrap( actix_middleware::DefaultHeaders::new() - .header("Permissions-Policy", "interest-cohort=()"), + .add(("Permissions-Policy", "interest-cohort=()")), ) .wrap(actix_middleware::NormalizePath::new( actix_middleware::TrailingSlash::Trim, diff --git a/src/render_html.rs b/src/render_html.rs new file mode 100644 index 0000000..3af1f08 --- /dev/null +++ b/src/render_html.rs @@ -0,0 +1,170 @@ +/* + * Copyright (C) 2022 Aravinth Manivannan + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +use std::path::Path; + +use syntect::highlighting::{Color, ThemeSet}; +use syntect::html::highlighted_html_for_string; +use syntect::parsing::{SyntaxReference, SyntaxSet}; + +pub trait GenerateHTML { + fn generate(&mut self); +} + +#[allow(dead_code)] +pub const STYLE: &str = " + "; + +thread_local! { + pub(crate) static SYNTAX_SET: SyntaxSet = SyntaxSet::load_defaults_newlines(); +} + +pub struct SourcegraphQuery<'a> { + pub filepath: &'a str, + pub code: &'a str, +} + +impl<'a> SourcegraphQuery<'a> { + pub fn syntax_highlight(&self) -> String { + // let ss = SYNTAX_SET; + let ts = ThemeSet::load_defaults(); + + let theme = &ts.themes["InspiredGitHub"]; + let c = theme.settings.background.unwrap_or(Color::WHITE); + let mut num = 1; + let mut output = format!( + "", + c.r, c.g, c.b + ); + + // highlighted_html_for_string(&q.code, syntax_set, syntax_def, theme), + let html = SYNTAX_SET.with(|ss| { + let language = self.determine_language(ss); + highlighted_html_for_string(self.code, ss, &language, theme) + }); + for (line_num, line) in html.lines().enumerate() { + if !line.trim().is_empty() { + if line_num == 0 { + //|| line_num == total_lines - 1 { + output.push_str(line); + } else { + output.push_str(&format!("
{num}{line}
" + )); + num += 1; + } + } + } + output + } + + // adopted from + // https://github.com/sourcegraph/sourcegraph/blob/9fe138ae75fd64dce06b621572b252a9c9c8da70/docker-images/syntax-highlighter/crates/sg-syntax/src/lib.rs#L81 + // with minimum modifications. Crate was MIT licensed at the time(2022-03-12 11:11) + fn determine_language(&self, syntax_set: &SyntaxSet) -> SyntaxReference { + if self.filepath.is_empty() { + // Legacy codepath, kept for backwards-compatability with old clients. + match syntax_set.find_syntax_by_first_line(self.code) { + Some(v) => { + return v.to_owned(); + } + None => unimplemented!(), //Err(json!({"error": "invalid extension"})), + }; + } + + // Split the input path ("foo/myfile.go") into file name + // ("myfile.go") and extension ("go"). + let path = Path::new(&self.filepath); + let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or(""); + let extension = path.extension().and_then(|x| x.to_str()).unwrap_or(""); + + // Override syntect's language detection for conflicting file extensions because + // it's impossible to express this logic in a syntax definition. + struct Override { + extension: &'static str, + prefix_langs: Vec<(&'static str, &'static str)>, + default: &'static str, + } + let overrides = vec![Override { + extension: "cls", + prefix_langs: vec![("%", "TeX"), ("\\", "TeX")], + default: "Apex", + }]; + + if let Some(Override { + prefix_langs, + default, + .. + }) = overrides.iter().find(|o| o.extension == extension) + { + let name = match prefix_langs + .iter() + .find(|(prefix, _)| self.code.starts_with(prefix)) + { + Some((_, lang)) => lang, + None => default, + }; + return syntax_set + .find_syntax_by_name(name) + .unwrap_or_else(|| syntax_set.find_syntax_plain_text()) + .to_owned(); + } + + syntax_set + // First try to find a syntax whose "extension" matches our file + // name. This is done due to some syntaxes matching an "extension" + // that is actually a whole file name (e.g. "Dockerfile" or "CMakeLists.txt") + // see https://github.com/trishume/syntect/pull/170 + .find_syntax_by_extension(file_name) + .or_else(|| syntax_set.find_syntax_by_extension(extension)) + .or_else(|| syntax_set.find_syntax_by_first_line(self.code)) + .unwrap_or_else(|| syntax_set.find_syntax_plain_text()) + .to_owned() + } +} + +#[cfg(test)] +mod tests { + use super::SourcegraphQuery; + + use syntect::parsing::SyntaxSet; + + #[test] + fn cls_tex() { + let syntax_set = SyntaxSet::load_defaults_newlines(); + let query = SourcegraphQuery { + filepath: "foo.cls", + code: "%", + }; + let result = query.determine_language(&syntax_set); + assert_eq!(result.name, "TeX"); + let _result = query.syntax_highlight(); + } + + //#[test] + //fn cls_apex() { + // let syntax_set = SyntaxSet::load_defaults_newlines(); + // let query = SourcegraphQuery { + // filepath: "foo.cls".to_string(), + // code: "/**".to_string(), + // extension: String::new(), + // }; + // let result = determine_language(&query, &syntax_set); + // assert_eq!(result.unwrap().name, "Apex"); + //} +} diff --git a/templates/gist_insert.html b/templates/gist_insert.html index b897e91..a539d0e 100644 --- a/templates/gist_insert.html +++ b/templates/gist_insert.html @@ -2,7 +2,7 @@ <. let (_, gist)= gists.as_ref().unwrap().iter().find(|(id, _)| id == gist_id).as_ref().unwrap(); .>
<. for file in &gist.files {.> - <.= file.get_html_content() .> +
<.- file.get_html_content() .>
<.}.> See gist on GitHub