proxy remote media inserted in posts

To protect user privacy and speed up page loading
This commit is contained in:
Peter Cai 2020-04-09 19:15:45 +08:00
parent 1b42c65294
commit 4474769004
No known key found for this signature in database
GPG Key ID: 71F5FB4E4F3FD54F
3 changed files with 79 additions and 5 deletions

View File

@ -24,6 +24,8 @@ web-sys = { version = "0.3", features = [
"Crypto",
"Headers",
"Request",
"RequestInit",
"RequestRedirect",
"Response",
"ResponseInit",
"SubtleCrypto",

View File

@ -120,7 +120,10 @@ impl Post {
// library updates. Updaing this value invalidates all
// existing cache and they will be recompiled when someone
// visits.
const CACHE_VERSION: &'static str = "0001";
const CACHE_VERSION: &'static str = "0003";
// The prefix path used for caching remote images
pub const IMG_CACHE_PREFIX: &'static str = "/imgcache/";
// Cached version of rendered blog content HTMLs
// compiled from Markdown
@ -151,6 +154,17 @@ impl PostContentCache {
format!("content_cache_{}", uuid)
}
fn url_to_cache_whitelist_key(url: &str) -> String {
format!("cache_whitelist_{}", url)
}
pub async fn is_external_url_whitelisted_for_cache(url: &str) -> bool {
match store::get_str(&Self::url_to_cache_whitelist_key(url)).await {
Ok(s) => s == "Y",
Err(_) => false
}
}
async fn find_by_uuid(uuid: &str) -> MyResult<PostContentCache> {
store::get_obj(&Self::uuid_to_cache_key(uuid)).await
}
@ -172,17 +186,44 @@ impl PostContentCache {
Some(cache)
}
async fn transform_tag<'a>(tag: &mut Tag<'a>) {
match tag {
Tag::Image(_, url, _) => {
// Convert all external image to our cached URL
// to protect users and speed up page loading
let url_encoded: String = js_sys::encode_uri_component(url).into();
// Also write this URL to whitelist
// we don't care about if this write succeeds or not,
// because even if it breaks we still can recover by a simple refresh
let _ = store::put_str(&Self::url_to_cache_whitelist_key(url), "Y").await;
// Now we can overwrite the tag URL
*url = format!("{}{}", IMG_CACHE_PREFIX, url_encoded).into();
},
_ => ()
}
}
// Only renders the content and spits out a cache object
// can be used to display the page or to write to cache
// Despite the signature, this function BLOCKS
// async only comes from digesting via SubtleCrypto
pub async fn render(post: &Post) -> PostContentCache {
// TODO: enable some options; pre-process posts to enable
// inline image caching; also generate a summary (?)
// TODO: enable some options; also generate a summary (?)
// from first few paragraphs
let parser = Parser::new(&post.content);
// We have to first collect all events into a vector
// because we need to asynchronously transform the events
// which could not be done through mapping on iterators
let mut parser: Vec<Event> = Parser::new(&post.content).collect();
for ev in parser.iter_mut() {
match ev {
Event::Start(tag) | Event::End(tag) => {
Self::transform_tag(tag).await;
}
_ => ()
};
}
let mut html_output = String::new();
html::push_html(&mut html_output, parser);
html::push_html(&mut html_output, parser.into_iter());
PostContentCache {
uuid: post.uuid.clone(),
version: CACHE_VERSION.to_owned(),

View File

@ -11,8 +11,10 @@ mod blog;
mod sn;
use cfg_if::cfg_if;
use js_sys::{Promise};
use utils::*;
use wasm_bindgen::prelude::*;
use wasm_bindgen_futures::JsFuture;
use web_sys::*;
cfg_if! {
@ -38,10 +40,39 @@ lazy_static! {
fn build_routes() -> router::Router {
let mut router = router::Router::new(&default_route);
router.add_route("/hello", &hello_world);
router.add_route(blog::IMG_CACHE_PREFIX, &proxy_remote_image);
sn::build_routes(&mut router);
return router;
}
#[wasm_bindgen]
extern "C" {
fn fetch(req: &Request) -> Promise;
}
// A caching proxy for images inserted into articles
// to protect user's privacy and accelerate page load
async fn proxy_remote_image(req: Request, url: Url) -> MyResult<Response> {
if req.method() != "GET" {
return Err(Error::BadRequest("Unsupported method".into()));
}
let path = url.pathname();
let remote_url: String = js_sys::decode_uri_component(
&path[blog::IMG_CACHE_PREFIX.len()..path.len()]
).internal_err()?.into();
if !blog::PostContentCache::is_external_url_whitelisted_for_cache(&remote_url).await {
return Err(Error::Unauthorized("This URL is not whitelisted".into()));
}
let new_req = Request::new_with_str_and_init(&remote_url,
RequestInit::new()
.method("GET")
.redirect(RequestRedirect::Follow)).internal_err()?;
Ok(JsFuture::from(fetch(&new_req)).await.internal_err()?.into())
}
async fn default_route(_req: Request, url: Url) -> MyResult<Response> {
// We assume that anything that falls into this catch-all handler
// would be either posts or 404