diff --git a/Cargo.toml b/Cargo.toml index 6b2faed..75b2d73 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,8 @@ web-sys = { version = "0.3", features = [ "Crypto", "Headers", "Request", + "RequestInit", + "RequestRedirect", "Response", "ResponseInit", "SubtleCrypto", diff --git a/src/blog.rs b/src/blog.rs index ba8abbe..788f070 100644 --- a/src/blog.rs +++ b/src/blog.rs @@ -120,7 +120,10 @@ impl Post { // library updates. Updaing this value invalidates all // existing cache and they will be recompiled when someone // visits. -const CACHE_VERSION: &'static str = "0001"; +const CACHE_VERSION: &'static str = "0003"; + +// The prefix path used for caching remote images +pub const IMG_CACHE_PREFIX: &'static str = "/imgcache/"; // Cached version of rendered blog content HTMLs // compiled from Markdown @@ -151,6 +154,17 @@ impl PostContentCache { format!("content_cache_{}", uuid) } + fn url_to_cache_whitelist_key(url: &str) -> String { + format!("cache_whitelist_{}", url) + } + + pub async fn is_external_url_whitelisted_for_cache(url: &str) -> bool { + match store::get_str(&Self::url_to_cache_whitelist_key(url)).await { + Ok(s) => s == "Y", + Err(_) => false + } + } + async fn find_by_uuid(uuid: &str) -> MyResult { store::get_obj(&Self::uuid_to_cache_key(uuid)).await } @@ -172,17 +186,44 @@ impl PostContentCache { Some(cache) } + async fn transform_tag<'a>(tag: &mut Tag<'a>) { + match tag { + Tag::Image(_, url, _) => { + // Convert all external image to our cached URL + // to protect users and speed up page loading + let url_encoded: String = js_sys::encode_uri_component(url).into(); + // Also write this URL to whitelist + // we don't care about if this write succeeds or not, + // because even if it breaks we still can recover by a simple refresh + let _ = store::put_str(&Self::url_to_cache_whitelist_key(url), "Y").await; + // Now we can overwrite the tag URL + *url = format!("{}{}", IMG_CACHE_PREFIX, url_encoded).into(); + }, + _ => () + } + } + // Only renders the content and spits out a cache object // can be used to display the page or to write to cache // Despite the signature, this function BLOCKS // async only comes from digesting via SubtleCrypto pub async fn render(post: &Post) -> PostContentCache { - // TODO: enable some options; pre-process posts to enable - // inline image caching; also generate a summary (?) + // TODO: enable some options; also generate a summary (?) // from first few paragraphs - let parser = Parser::new(&post.content); + // We have to first collect all events into a vector + // because we need to asynchronously transform the events + // which could not be done through mapping on iterators + let mut parser: Vec = Parser::new(&post.content).collect(); + for ev in parser.iter_mut() { + match ev { + Event::Start(tag) | Event::End(tag) => { + Self::transform_tag(tag).await; + } + _ => () + }; + } let mut html_output = String::new(); - html::push_html(&mut html_output, parser); + html::push_html(&mut html_output, parser.into_iter()); PostContentCache { uuid: post.uuid.clone(), version: CACHE_VERSION.to_owned(), diff --git a/src/lib.rs b/src/lib.rs index 507b93a..73ab40b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,8 +11,10 @@ mod blog; mod sn; use cfg_if::cfg_if; +use js_sys::{Promise}; use utils::*; use wasm_bindgen::prelude::*; +use wasm_bindgen_futures::JsFuture; use web_sys::*; cfg_if! { @@ -38,10 +40,39 @@ lazy_static! { fn build_routes() -> router::Router { let mut router = router::Router::new(&default_route); router.add_route("/hello", &hello_world); + router.add_route(blog::IMG_CACHE_PREFIX, &proxy_remote_image); sn::build_routes(&mut router); return router; } +#[wasm_bindgen] +extern "C" { + fn fetch(req: &Request) -> Promise; +} + +// A caching proxy for images inserted into articles +// to protect user's privacy and accelerate page load +async fn proxy_remote_image(req: Request, url: Url) -> MyResult { + if req.method() != "GET" { + return Err(Error::BadRequest("Unsupported method".into())); + } + + let path = url.pathname(); + let remote_url: String = js_sys::decode_uri_component( + &path[blog::IMG_CACHE_PREFIX.len()..path.len()] + ).internal_err()?.into(); + + if !blog::PostContentCache::is_external_url_whitelisted_for_cache(&remote_url).await { + return Err(Error::Unauthorized("This URL is not whitelisted".into())); + } + + let new_req = Request::new_with_str_and_init(&remote_url, + RequestInit::new() + .method("GET") + .redirect(RequestRedirect::Follow)).internal_err()?; + Ok(JsFuture::from(fetch(&new_req)).await.internal_err()?.into()) +} + async fn default_route(_req: Request, url: Url) -> MyResult { // We assume that anything that falls into this catch-all handler // would be either posts or 404