From 4bf676419d10cf34d6e4dd9e9c949763b5618a42 Mon Sep 17 00:00:00 2001 From: Peter Cai Date: Wed, 15 Apr 2020 19:28:52 +0800 Subject: [PATCH] blog: use RegExp from js_sys to handle headings and code blocks --- src/blog.rs | 82 ++++++++++++++++++----------------------------------- 1 file changed, 28 insertions(+), 54 deletions(-) diff --git a/src/blog.rs b/src/blog.rs index d9bf530..ab66bd9 100644 --- a/src/blog.rs +++ b/src/blog.rs @@ -6,9 +6,12 @@ // unnecessary from KV. use crate::store; use crate::utils::*; +use js_sys::{JsString, RegExp}; use pulldown_cmark::*; use serde::{Serialize, Deserialize}; use std::vec::Vec; +use wasm_bindgen::JsCast; +use wasm_bindgen::closure::Closure; // A list of the UUIDs of all published blog posts // This should be SORTED with the newest posts at lower indices (closer to 0) @@ -227,62 +230,33 @@ impl PostContentCache { // Do some HTML-level transformations to the compiled result // Because the Markdown parser doesn't always allow us to do // everything, like adding `id` attributes to tags - fn transform_html(html: &mut String) { - // Generate IDs for all headings in article - // This allows navigation via the hash part of the URL - let mut last_idx: usize = 0; - loop { - let len = html.len(); - - let idx = match (&html[last_idx..len]).find(" i + last_idx, - None => break, - }; - - if idx >= len - 4 { - break; - } - - last_idx = idx + 3; - - if &html[idx + 3..idx + 4] != ">" { - continue; - } - - // Now we have found a tag - let htype = &html[idx + 1..idx + 3]; - - // Find the closing tag for this one - // Since it's generated by the Markdown engine, - // we can assume it's correct HTML - let end_idx = match (&html[idx + 3..len]).find(&format!("", htype)) { - Some(i) => i + idx + 3, - None => continue, - }; - - if end_idx >= len - 4 { - break; - } - - let heading = &html[idx + 4..end_idx]; - // We also assume there should be no other HTML tags in the heading - // This should be fine for me but I don't know about others - // However it's really tedious to do anything better... - let heading_anchor = filter_non_ascii_alphanumeric( - &heading.to_lowercase()).replace(" ", "-"); - let inserted_id = format!(" id=\"{}\"", heading_anchor); - - html.insert_str(idx + 3, &inserted_id); - - last_idx = idx + 3 + inserted_id.len(); - } + fn transform_html(html: String) -> String { + let js_html: JsString = html.into(); + + // Add `id="xxx"` to all headings for anchoring + // Replacing is done in a Closure in order to generate + // the proper anchor string for each heading + // This matches only a single line, which is good because + // we only want it to match a single heading tag + // If it matched multiple lines, then it may match the + // ending tag of another heading. + let regex_heading = RegExp::new(r"([^<]*)<\/h\1>", "ig"); + let closure = Closure::wrap(Box::new(|_m: String, p1: String, p2: String| { + let anchor = filter_non_ascii_alphanumeric( + &p2.to_lowercase()).replace(" ", "-"); + format!("{}", p1, anchor, p2, p1) + }) as Box String>); + let js_html = js_html.replace_by_pattern_with_function(®ex_heading, closure.as_ref().unchecked_ref()); // Transform all
 to 

         // For syntax highlighting
-        // Note that though the Markdown engine may also insert classes,
-        // because we insert our class before that class, ours will
-        // always take precedence
-        *html = html.replace("
", "ig");
+        let js_html = js_html.replace_by_pattern(®ex_code, "
");
+
+        js_html.into()
     }
 
     // Only renders the content and spits out a cache object
@@ -329,7 +303,7 @@ impl PostContentCache {
         }
         let mut html_output = String::new();
         html::push_html(&mut html_output, parser.into_iter());
-        Self::transform_html(&mut html_output);
+        html_output = Self::transform_html(html_output);
         PostContentCache {
             uuid: post.uuid.clone(),
             version: CACHE_VERSION.to_owned(),