feat: syntax highlighting for github gists

This commit is contained in:
Aravinth Manivannan 2022-03-18 20:45:01 +05:30
parent 8e6fd2bfae
commit 4694030946
Signed by: realaravinth
GPG Key ID: AD9F0F08E855ED88
7 changed files with 352 additions and 33 deletions

106
Cargo.lock generated
View File

@ -1069,6 +1069,12 @@ version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "lazycell"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
[[package]]
name = "lexical-core"
version = "0.7.6"
@ -1111,9 +1117,19 @@ dependencies = [
"serde 1.0.136",
"serde_json",
"sled",
"syntect",
"url",
]
[[package]]
name = "line-wrap"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f30344350a2a51da54c1d53be93fade8a237e545dbcc4bdbe635413f2117cab9"
dependencies = [
"safemem",
]
[[package]]
name = "linked-hash-map"
version = "0.5.4"
@ -1315,6 +1331,28 @@ version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5"
[[package]]
name = "onig"
version = "6.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67ddfe2c93bb389eea6e6d713306880c7f6dcc99a75b659ce145d962c861b225"
dependencies = [
"bitflags",
"lazy_static",
"libc",
"onig_sys",
]
[[package]]
name = "onig_sys"
version = "69.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5dd3eee045c84695b53b20255bb7317063df090b68e18bfac0abb6c39cf7f33e"
dependencies = [
"cc",
"pkg-config",
]
[[package]]
name = "openssl"
version = "0.10.38"
@ -1426,6 +1464,20 @@ version = "0.3.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58893f751c9b0412871a09abd62ecd2a00298c6c83befa223ef98c52aef40cbe"
[[package]]
name = "plist"
version = "1.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd39bc6cdc9355ad1dc5eeedefee696bb35c34caf21768741e81826c0bbd7225"
dependencies = [
"base64",
"indexmap",
"line-wrap",
"serde 1.0.136",
"time 0.3.7",
"xml-rs",
]
[[package]]
name = "ppv-lite86"
version = "0.2.16"
@ -1594,6 +1646,12 @@ version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f"
[[package]]
name = "safemem"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef703b7cb59335eae2eb93ceb664c0eb7ea6bf567079d843e09420219668e072"
[[package]]
name = "sailfish"
version = "0.3.4"
@ -1631,6 +1689,15 @@ dependencies = [
"sailfish-compiler",
]
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "schannel"
version = "0.1.19"
@ -1824,6 +1891,28 @@ dependencies = [
"unicode-xid",
]
[[package]]
name = "syntect"
version = "4.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b20815bbe80ee0be06e6957450a841185fcf690fe0178f14d77a05ce2caa031"
dependencies = [
"bincode",
"bitflags",
"flate2",
"fnv",
"lazy_static",
"lazycell",
"onig",
"plist",
"regex-syntax",
"serde 1.0.136",
"serde_derive",
"serde_json",
"walkdir",
"yaml-rust",
]
[[package]]
name = "tempfile"
version = "3.3.0"
@ -2060,6 +2149,17 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
[[package]]
name = "walkdir"
version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56"
dependencies = [
"same-file",
"winapi",
"winapi-util",
]
[[package]]
name = "want"
version = "0.3.0"
@ -2235,6 +2335,12 @@ dependencies = [
"winapi",
]
[[package]]
name = "xml-rs"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2d7d3948613f75c98fd9328cfdcc45acc4d360655289d0a7d4ec931392200a3"
[[package]]
name = "yaml-rust"
version = "0.4.5"

View File

@ -1,43 +1,46 @@
[package]
name = "libmedium"
version = "0.1.0"
edition = "2021"
build = "build.rs"
homepage = "https://github.com/realaravinth/libmedium"
repository = "https://github.com/realaravinth/libmedium"
documentation = "https://github.con/realaravinth/libmedium"
readme = "https://github.com/realaravinth/libmedium/blob/master/README.md"
license = "AGPLv3 or later version"
authors = ["realaravinth <realaravinth@batsense.net>"]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
build = "build.rs"
documentation = "https://github.con/realaravinth/libmedium"
edition = "2021"
homepage = "https://github.com/realaravinth/libmedium"
license = "AGPLv3 or later version"
name = "libmedium"
readme = "https://github.com/realaravinth/libmedium/blob/master/README.md"
repository = "https://github.com/realaravinth/libmedium"
version = "0.1.0"
[dependencies]
actix-web = "4.0.0-beta.9"
actix-http = "3.0.0-beta.8"
actix-rt = "2"
my-codegen = {package = "actix-web-codegen", git ="https://github.com/realaravinth/actix-web"}
actix-web = "4.0.0-beta.9"
bincode = "1.3.3"
chrono = "0.4.19"
config = "0.11"
derive_more = "0.99"
futures = "0.3.17"
lazy_static = "1.4"
log = "0.4"
num_cpus = "1.13"
pretty_env_logger = "0.4"
sailfish = "0.3.2"
serde = "1"
serde_json = "1"
pretty_env_logger = "0.4"
log = "0.4"
lazy_static = "1.4"
sled = "0.34.7"
syntect = "4.6.0"
url = "2.2"
derive_more = "0.99"
sailfish = "0.3.2"
[dependencies.graphql_client]
features = ["reqwest"]
version = "0.10.0"
num_cpus = "1.13"
reqwest = { version = "0.11.6", features = ["json"] }
graphql_client = { version = "0.10.0", features = ["reqwest"]}
[dependencies.my-codegen]
git = "https://github.com/realaravinth/actix-web"
package = "actix-web-codegen"
chrono = "0.4.19"
[dependencies.reqwest]
features = ["json"]
version = "0.11.6"
sled = "0.34.7"
bincode = "1.3.3"

View File

@ -24,6 +24,7 @@ use serde::{Deserialize, Serialize};
use sled::{Db, Tree};
use crate::proxy::StringUtils;
use crate::render_html;
use crate::SETTINGS;
const POST_CACHE_VERSION: usize = 3;
@ -204,7 +205,7 @@ impl Data {
None
};
let gist = match self.gists.get(&id) {
let mut gist = match self.gists.get(&id) {
Ok(Some(v)) => bincode::deserialize(&v[..]).unwrap(),
_ => {
const URL: &str = "https://api.github.com/gists/";
@ -268,18 +269,30 @@ impl Data {
let gist = if let Some(file_name) = file_name {
let mut files: Vec<GistFile> = Vec::with_capacity(1);
let file = gist
let mut file = gist
.files
.iter()
.find(|f| f.file_name == file_name)
.unwrap()
.to_owned();
let highlight = render_html::SourcegraphQuery {
filepath: &file.file_name,
code: &file.content,
};
file.content = highlight.syntax_highlight();
files.push(file);
GistContent {
files,
html_url: gist_url,
}
} else {
gist.files.iter_mut().for_each(|f| {
let highlight = render_html::SourcegraphQuery {
filepath: &f.file_name,
code: &f.content,
};
f.content = highlight.syntax_highlight();
});
gist
};

View File

@ -26,6 +26,7 @@ use log::info;
mod data;
mod meta;
mod proxy;
mod render_html;
mod routes;
mod settings;
@ -69,7 +70,7 @@ async fn main() -> std::io::Result<()> {
.app_data(get_json_err())
.wrap(
actix_middleware::DefaultHeaders::new()
.header("Permissions-Policy", "interest-cohort=()"),
.add(("Permissions-Policy", "interest-cohort=()")),
)
.wrap(actix_middleware::NormalizePath::new(
actix_middleware::TrailingSlash::Trim,

170
src/render_html.rs Normal file
View File

@ -0,0 +1,170 @@
/*
* Copyright (C) 2022 Aravinth Manivannan <realaravinth@batsense.net>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
use std::path::Path;
use syntect::highlighting::{Color, ThemeSet};
use syntect::html::highlighted_html_for_string;
use syntect::parsing::{SyntaxReference, SyntaxSet};
pub trait GenerateHTML {
fn generate(&mut self);
}
#[allow(dead_code)]
pub const STYLE: &str = "
";
thread_local! {
pub(crate) static SYNTAX_SET: SyntaxSet = SyntaxSet::load_defaults_newlines();
}
pub struct SourcegraphQuery<'a> {
pub filepath: &'a str,
pub code: &'a str,
}
impl<'a> SourcegraphQuery<'a> {
pub fn syntax_highlight(&self) -> String {
// let ss = SYNTAX_SET;
let ts = ThemeSet::load_defaults();
let theme = &ts.themes["InspiredGitHub"];
let c = theme.settings.background.unwrap_or(Color::WHITE);
let mut num = 1;
let mut output = format!(
"<style>
.gist_file {{
background-color:#{:02x}{:02x}{:02x};
}}</style>",
c.r, c.g, c.b
);
// highlighted_html_for_string(&q.code, syntax_set, syntax_def, theme),
let html = SYNTAX_SET.with(|ss| {
let language = self.determine_language(ss);
highlighted_html_for_string(self.code, ss, &language, theme)
});
for (line_num, line) in html.lines().enumerate() {
if !line.trim().is_empty() {
if line_num == 0 {
//|| line_num == total_lines - 1 {
output.push_str(line);
} else {
output.push_str(&format!("<div id=\"line-{num}\"class=\"line\"><a href=\"#line-{num}\"<span class=\"line-number\">{num}</span></a>{line}</div>"
));
num += 1;
}
}
}
output
}
// adopted from
// https://github.com/sourcegraph/sourcegraph/blob/9fe138ae75fd64dce06b621572b252a9c9c8da70/docker-images/syntax-highlighter/crates/sg-syntax/src/lib.rs#L81
// with minimum modifications. Crate was MIT licensed at the time(2022-03-12 11:11)
fn determine_language(&self, syntax_set: &SyntaxSet) -> SyntaxReference {
if self.filepath.is_empty() {
// Legacy codepath, kept for backwards-compatability with old clients.
match syntax_set.find_syntax_by_first_line(self.code) {
Some(v) => {
return v.to_owned();
}
None => unimplemented!(), //Err(json!({"error": "invalid extension"})),
};
}
// Split the input path ("foo/myfile.go") into file name
// ("myfile.go") and extension ("go").
let path = Path::new(&self.filepath);
let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
let extension = path.extension().and_then(|x| x.to_str()).unwrap_or("");
// Override syntect's language detection for conflicting file extensions because
// it's impossible to express this logic in a syntax definition.
struct Override {
extension: &'static str,
prefix_langs: Vec<(&'static str, &'static str)>,
default: &'static str,
}
let overrides = vec![Override {
extension: "cls",
prefix_langs: vec![("%", "TeX"), ("\\", "TeX")],
default: "Apex",
}];
if let Some(Override {
prefix_langs,
default,
..
}) = overrides.iter().find(|o| o.extension == extension)
{
let name = match prefix_langs
.iter()
.find(|(prefix, _)| self.code.starts_with(prefix))
{
Some((_, lang)) => lang,
None => default,
};
return syntax_set
.find_syntax_by_name(name)
.unwrap_or_else(|| syntax_set.find_syntax_plain_text())
.to_owned();
}
syntax_set
// First try to find a syntax whose "extension" matches our file
// name. This is done due to some syntaxes matching an "extension"
// that is actually a whole file name (e.g. "Dockerfile" or "CMakeLists.txt")
// see https://github.com/trishume/syntect/pull/170
.find_syntax_by_extension(file_name)
.or_else(|| syntax_set.find_syntax_by_extension(extension))
.or_else(|| syntax_set.find_syntax_by_first_line(self.code))
.unwrap_or_else(|| syntax_set.find_syntax_plain_text())
.to_owned()
}
}
#[cfg(test)]
mod tests {
use super::SourcegraphQuery;
use syntect::parsing::SyntaxSet;
#[test]
fn cls_tex() {
let syntax_set = SyntaxSet::load_defaults_newlines();
let query = SourcegraphQuery {
filepath: "foo.cls",
code: "%",
};
let result = query.determine_language(&syntax_set);
assert_eq!(result.name, "TeX");
let _result = query.syntax_highlight();
}
//#[test]
//fn cls_apex() {
// let syntax_set = SyntaxSet::load_defaults_newlines();
// let query = SourcegraphQuery {
// filepath: "foo.cls".to_string(),
// code: "/**".to_string(),
// extension: String::new(),
// };
// let result = determine_language(&query, &syntax_set);
// assert_eq!(result.unwrap().name, "Apex");
//}
}

View File

@ -2,7 +2,7 @@
<. let (_, gist)= gists.as_ref().unwrap().iter().find(|(id, _)| id == gist_id).as_ref().unwrap(); .>
<div class="gist_container">
<. for file in &gist.files {.>
<code class="code-block"> <.= file.get_html_content() .> </code>
<div class="code-block gist-block"> <.- file.get_html_content() .> </div>
<.}.>
<a class="gist_link" href="<.= &gist.html_url .>" target="_blank"
>See gist on GitHub</a

View File

@ -59,6 +59,7 @@ figcaption {
text-align: center;
}
/*
code {
font-family: monospace;
font-size: 15px;
@ -69,6 +70,21 @@ code {
border-radius: 6px;
background-color: rgba(175, 184, 193, 0.2);
}
*/
/*
pre {
font-family: monospace;
font-size: 15px;
white-space: pre-wrap;
font-weight: 600;
line-height: 1rem;
padding: 5px;
border-radius: 6px;
background-color: rgba(175, 184, 193, 0.2);
}
*/
.code-block {
display: block;
@ -121,7 +137,6 @@ iframe {
font-style: italic;
}
.author {
display: inline-flex;
flex-direction: row;
@ -153,6 +168,17 @@ blockquote {
font-style: italic;
}
ol, ul {
ol,
ul {
margin-left: 40px;
}
.line-number {
margin-right: 20px;
display: inline-block;
}
.gist-block {
overflow-x: scroll;
display: block;
}