// Utility functions and structs for the blogging system // Due to limitations of the Cloudflare Workers KV, we do not // store the entire state in one record; instead, different // parts are stroed in different records. This also increases // efficiency, since the program won't need to load anything // unnecessary from KV. use crate::store; use crate::utils::*; use js_sys::{JsString, RegExp}; use pulldown_cmark::*; use serde::{Serialize, Deserialize}; use std::vec::Vec; use wasm_bindgen::{JsCast, JsValue}; use wasm_bindgen::closure::Closure; use wasm_bindgen_futures::future_to_promise; // A list of the UUIDs of all published blog posts // This should be SORTED with the newest posts at lower indices (closer to 0) // The user may edit this via KV UI to change ordering and such // by default new posts are always added to the top #[derive(Serialize, Deserialize)] pub struct PostsList(pub Vec); impl PostsList { pub async fn load() -> PostsList { match store::get_obj("posts_list").await { Ok(v) => PostsList(v), // Don't panic on empty // TODO: What if the user messed up when editing? // That would cause a decode failure and all data will be gone // if a new post is added or updated (overwriting the KV value) // under this logic // (if no new post is added then nothing bad would happen; // the user would probably notice when trying to visit the blog home page) Err(_) => PostsList(vec![]) } } pub fn has_post(&self, uuid: &str) -> bool { self.0.contains(&uuid.into()) } // Add a post to the list and then update the record in KV // Also consumes self, as this should normally be the last action // in an API call pub async fn add_post(mut self, uuid: &str) -> MyResult<()> { if self.has_post(uuid) { return Ok(()); } self.0.insert(0, uuid.into()); store::put_obj_pretty("posts_list", self.0).await } // Remove a post from published list // may be used when deleting / unpublishing a post // Does nothing if uuid not found in list pub async fn remove_post(mut self, uuid: &str) -> MyResult<()> { self.0.remove_item(&uuid); store::put_obj_pretty("posts_list", self.0).await } } #[derive(Serialize, Deserialize)] pub struct Post { // The UUID of the post (a Standard Notes UUID) pub uuid: String, // The UNIX timestamp (in seconds) for the post pub timestamp: u64, // URL of the post (relative to the root of the site) pub url: String, // Title of the post pub title: String, // The Markdown content of the post // We keep the original content here // so that we could make changes to the Markdown parser // in the future; we won't be stuck with a parsed version pub content: String, // Some arbitrary data that could be used by the theme pub theme_config: Option } impl Post { fn uuid_to_post_key(uuid: &str) -> String { format!("post_by_uuid_{}", uuid) } fn url_to_mapping_key(url: &str) -> String { format!("url_mapping_{}", url) } async fn create_url_mapping(url: &str, uuid: &str) -> MyResult<()> { store::put_str(&Self::url_to_mapping_key(url), uuid).await } // Returns Err(InternalError) if the post is not found // Note that the existence status of a post here must // be synchronized with the PostsList; that is, if a // post is not found in PostsList, it must not be found // here either; if a post is found in PostsList, then // this method should not return any error. // (except for hidden posts, in which case they won't be // present in PostsList) pub async fn find_by_uuid(uuid: &str) -> MyResult { store::get_obj(&Self::uuid_to_post_key(uuid)).await } pub async fn find_by_url(url: &str) -> MyResult { let uuid = store::get_str(&Self::url_to_mapping_key(url)).await?; Self::find_by_uuid(&uuid).await } // Write the Post to KV storage; this can be a new post or // update to an existing post; either way, the CALLER is // responsible for making sure PostsList is updated with the // latest set of posts sorted in order. // This function will also create a mapping from URL to UUID in the KV pub async fn write_to_kv(self) -> MyResult<()> { Self::create_url_mapping(&self.url, &self.uuid).await?; store::put_obj(&Self::uuid_to_post_key(&self.uuid), self).await } pub async fn delete_by_uuid(uuid: &str) -> MyResult<()> { store::delete(&Self::uuid_to_post_key(uuid)).await } } lazy_static! { // Whenever this is changed, all cache will be invalided // Use build timestamp string static ref CACHE_VERSION: String = { format!("{}", BUILD_TIMESTAMP) }; } // The prefix path used for caching remote images pub const IMG_CACHE_PREFIX: &'static str = "/imgcache/"; // The divider for summary // Insert this into the article as a standalone line to // make everything above it the summary. DO NOT insert // it within paragraph or anything else otherwise // the layout may break. const SUMMARY_DIVIDER: &'static str = ""; // Cached version of rendered blog content HTMLs // compiled from Markdown // This is needed because // (1) we have to extract some information from // the Markdown source code before anyone // visits, e.g. pictures it refers to ( // for whitelisting the cache URL) // (2) Markdown parsing is slower than filling in // HTML templates of the entire page. If these // Markdown compilation results are cached, // page generation can be very fast while still // keeping some dynamic features available to us #[derive(Serialize, Deserialize)] pub struct PostContentCache { // UUID of the original post uuid: String, // If version != CACHE_VERSION, the cache is invalidated version: String, // Digest of the original content orig_digest: String, // Summary can be defined by inserting SUMMARY_DIVIDER // into the article. Everything before this tag will be // the summary. Becuase it's an HTML comment, it won't // show up in the rendered result. pub summary: String, // Compiled content in HTML pub content: String } impl PostContentCache { fn uuid_to_cache_key(uuid: &str) -> String { format!("content_cache_{}", uuid) } fn url_to_cache_whitelist_key(url: &str) -> String { format!("cache_whitelist_{}", url) } pub async fn is_external_url_whitelisted_for_cache(url: &str) -> bool { if let Some(list) = &crate::CONFIG.extra_remote_proxy_whitelist { if list.contains(&url.into()) { return true; } } match store::get_str(&Self::url_to_cache_whitelist_key(url)).await { Ok(s) => s == "Y", Err(_) => false } } async fn find_by_uuid(uuid: &str) -> MyResult { store::get_obj(&Self::uuid_to_cache_key(uuid)).await } pub async fn find_by_post(post: &Post) -> Option { let cache = match Self::find_by_uuid(&post.uuid).await { Ok(cache) => cache, Err(_) => return None }; if cache.version != *CACHE_VERSION { return None; } if cache.orig_digest != crate::utils::sha1(&post.content).await { return None; } Some(cache) } fn transform_tag<'a>(tag: &mut Tag<'a>) { match tag { Tag::Image(_, url, _) => { // Convert all external image to our cached URL // to protect users and speed up page loading let url_encoded: String = js_sys::encode_uri_component(url).into(); // Also write this URL to whitelist // (just throw the task onto the JS ev loop, // because to make this function async we MUST need to // allocate Vec later in the render function) // we don't care about if this write succeeds or not, // because even if it breaks we still can recover by a simple refresh // and once it's written, it's permanent, so we expect the write // to succeed as soon as the article is submitted let url_cache_key = Self::url_to_cache_whitelist_key(url); crate::EVENT.with(move |ev| { ev.wait_until(&future_to_promise(async move { let _ = store::put_str(&url_cache_key, "Y").await; Ok(JsValue::TRUE) })).unwrap(); }); // Now we can overwrite the tag URL *url = format!("{}{}", IMG_CACHE_PREFIX, url_encoded).into(); }, _ => () } } fn transform_tags<'ev>( parser: impl Iterator> ) -> impl Iterator> { parser.map(|mut ev| { match ev { Event::Start(ref mut tag) | Event::End(ref mut tag) => { Self::transform_tag(tag); ev }, _ => ev } }) } fn transform_code_block_highlight<'ev>( parser: impl Iterator> ) -> impl Iterator> { let mut in_code_block = false; let mut code_block_lang = None; parser.map(move |ev| { match &ev { Event::Start(Tag::CodeBlock(block)) => { in_code_block = true; match block { CodeBlockKind::Fenced(lang) => code_block_lang = Some(lang.to_string()), CodeBlockKind::Indented => code_block_lang = None } }, Event::End(Tag::CodeBlock(_)) => { in_code_block = false; code_block_lang = None; }, Event::Text(text) => { if in_code_block { let highlighted = if let Some(ref code_block_lang) = code_block_lang { crate::hljs::highlight(&code_block_lang, text) } else { crate::hljs::highlight_auto(text) }; return Event::Html( highlighted.into()); } } _ => () } ev }) } // Do some HTML-level transformations to the compiled result // Because the Markdown parser doesn't always allow us to do // everything, like adding `id` attributes to tags fn transform_html(html: String) -> String { let js_html: JsString = html.into(); // Add `id="xxx"` to all headings for anchoring // Replacing is done in a Closure in order to generate // the proper anchor string for each heading // This matches only a single line, which is good because // we only want it to match a single heading tag // If it matched multiple lines, then it may match the // ending tag of another heading. let regex_heading = RegExp::new(r"([^<]*)<\/h\1>", "ig"); let closure = Closure::wrap(Box::new(|_m: String, p1: String, p2: String| { let anchor = filter_non_ascii_alphanumeric( &p2.to_lowercase()).replace(" ", "-"); format!("{}", p1, anchor, p2, p1) }) as Box String>); let js_html = js_html.replace_by_pattern_with_function(®ex_heading, closure.as_ref().unchecked_ref()); // Transform all

        // For syntax highlighting
        // We don't match the end tag because it may span multiple lines
        // trying to match the end tag could result in accidentally matching
        // the end tag of another code block.
        let regex_code = RegExp::new("
", "ig");
        let js_html = js_html.replace_by_pattern(®ex_code, "

        // Transform all non-self-refernece links (does not start with "#") to target="_blank"
        let regex_links = RegExp::new("", "ig");
        let js_html = js_html.replace_by_pattern(®ex_links, "");


    // Only renders the content and spits out a cache object
    // can be used to display the page or to write to cache
    // Despite the signature, this function BLOCKS
    // async only comes from digesting via SubtleCrypto
    pub async fn render(post: &Post) -> PostContentCache {
        let parser = Parser::new_ext(&post.content, Options::all());
        // Apply code highlighting via Highlight.js
        let parser = Self::transform_code_block_highlight(parser);
        // Apply tag transform
        let parser = Self::transform_tags(parser);

        let mut html_output = String::new();
        html::push_html(&mut html_output, parser);
        html_output = Self::transform_html(html_output);
        PostContentCache {
            uuid: post.uuid.clone(),
            version: CACHE_VERSION.to_owned(),
            orig_digest: crate::utils::sha1(&post.content).await,
            summary: match html_output.find(SUMMARY_DIVIDER) {
                None => html_output.clone(),
                Some(x) => (&html_output[0..x]).to_owned()
            content: html_output

    // Tries to find the rendered content cache of post
    // if a valid cache cannot be found, this method
    // will render the content, write that into cache
    // and return this newly-rendered one
    // This will block if it tries to render; if that's a
    // concern, use find_by_post
    pub async fn find_or_render(post: &Post) -> PostContentCache {
        match Self::find_by_post(post).await {
            Some(cache) => cache,
            None => {
                let ret = Self::render(post).await;
                // Ignore save error since if save failed, it can be regenerated anyway
                let _ = ret.save().await;

    // Save the current cache object to KV
    pub async fn save(&self) -> MyResult<()> {
        store::put_obj(&Self::uuid_to_cache_key(&self.uuid), self).await

    pub async fn delete_by_uuid(uuid: &str) -> MyResult<()> {