blog: use RegExp from js_sys to handle headings and code blocks

2020-04-15 19:28:52 +08:00 · 2020-04-15 19:28:52 +08:00 · 4bf676419d
parent e1033ff0b3
commit 4bf676419d
1 changed files with 28 additions and 54 deletions
--- a/src/blog.rs
+++ b/src/blog.rs
@ -6,9 +6,12 @@
 // unnecessary from KV.
 use crate::store;
 use crate::utils::*;
+use js_sys::{JsString, RegExp};
 use pulldown_cmark::*;
 use serde::{Serialize, Deserialize};
 use std::vec::Vec;
+use wasm_bindgen::JsCast;
+use wasm_bindgen::closure::Closure;

 // A list of the UUIDs of all published blog posts
 // This should be SORTED with the newest posts at lower indices (closer to 0)
@ -227,62 +230,33 @@ impl PostContentCache {
    // Do some HTML-level transformations to the compiled result
    // Because the Markdown parser doesn't always allow us to do
    // everything, like adding `id` attributes to tags
-    fn transform_html(html: &mut String) {
-        // Generate IDs for all headings in article
-        // This allows navigation via the hash part of the URL
-        let mut last_idx: usize = 0;
-        loop {
-            let len = html.len();
-            
-            let idx = match (&html[last_idx..len]).find("<h") {
-                Some(i) => i + last_idx,
-                None => break,
-            };
-    
-            if idx >= len - 4 {
-                break;
-            }
-    
-            last_idx = idx + 3;
-    
-            if &html[idx + 3..idx + 4] != ">" {
-                continue;
-            }
-    
-            // Now we have found a <h*> tag
-            let htype = &html[idx + 1..idx + 3];
-    
-            // Find the closing tag for this one
-            // Since it's generated by the Markdown engine,
-            // we can assume it's correct HTML
-            let end_idx = match (&html[idx + 3..len]).find(&format!("</{}>", htype)) {
-                Some(i) => i + idx + 3,
-                None => continue,
-            };
-    
-            if end_idx >= len - 4 {
-                break;
-            }
-    
-            let heading = &html[idx + 4..end_idx];
-            // We also assume there should be no other HTML tags in the heading
-            // This should be fine for me but I don't know about others
-            // However it's really tedious to do anything better...
-            let heading_anchor = filter_non_ascii_alphanumeric(
-                &heading.to_lowercase()).replace(" ", "-");
-            let inserted_id = format!(" id=\"{}\"", heading_anchor);
-    
-            html.insert_str(idx + 3, &inserted_id);
-            
-            last_idx = idx + 3 + inserted_id.len();
-        }
+    fn transform_html(html: String) -> String {
+        let js_html: JsString = html.into();
+
+        // Add `id="xxx"` to all headings for anchoring
+        // Replacing is done in a Closure in order to generate
+        // the proper anchor string for each heading
+        // This matches only a single line, which is good because
+        // we only want it to match a single heading tag
+        // If it matched multiple lines, then it may match the
+        // ending tag of another heading.
+        let regex_heading = RegExp::new(r"<h(\d)>([^<]*)<\/h\1>", "ig");
+        let closure = Closure::wrap(Box::new(|_m: String, p1: String, p2: String| {
+            let anchor = filter_non_ascii_alphanumeric(
+                &p2.to_lowercase()).replace(" ", "-");
+            format!("<h{} id=\"{}\">{}</h{}>", p1, anchor, p2, p1)
+        }) as Box<dyn Fn(String, String, String) -> String>);
+        let js_html = js_html.replace_by_pattern_with_function(&regex_heading, closure.as_ref().unchecked_ref());

        // Transform all <pre><code> to <pre><code class="hljs">
        // For syntax highlighting
-        // Note that though the Markdown engine may also insert classes,
-        // because we insert our class before that class, ours will
-        // always take precedence
-        *html = html.replace("<pre><code", "<pre><code class=\"hljs\" ");
+        // We don't match the end tag because it may span multiple lines
+        // trying to match the end tag could result in accidentally matching
+        // the end tag of another code block.
+        let regex_code = RegExp::new("<pre><code class=\"language-([^\"]*)\">", "ig");
+        let js_html = js_html.replace_by_pattern(&regex_code, "<pre><code class=\"hljs\">");
+
+        js_html.into()
    }

    // Only renders the content and spits out a cache object
@ -329,7 +303,7 @@ impl PostContentCache {
        }
        let mut html_output = String::new();
        html::push_html(&mut html_output, parser.into_iter());
-        Self::transform_html(&mut html_output);
+        html_output = Self::transform_html(html_output);
        PostContentCache {
            uuid: post.uuid.clone(),
            version: CACHE_VERSION.to_owned(),