#[macro_use] extern crate serde_derive; #[macro_use] extern crate log; #[macro_use] extern crate num_derive; use crate::config::Config; use clappconfig::{anyhow, AppConfig}; use parking_lot::Mutex; use rand::rngs::OsRng; use rand::Rng; use rouille::{Request, Response, ResponseBody}; use std::borrow::Cow; use std::collections::HashMap; use std::hash::{Hash, Hasher}; use std::io::Read; use std::time::Duration; use chrono::{Utc, DateTime}; use std::fs::OpenOptions; use std::fmt::Display; use serde::export::Formatter; use std::fmt; use serde::{Serialize, Serializer, Deserialize, Deserializer}; use serde::de::{DeserializeOwned, Visitor}; use crate::well_known_mime::Mime; mod config; mod well_known_mime; const HDR_EXPIRES : &str = "X-Expires"; const HDR_SECRET : &str = "X-Secret"; const FAVICON : &[u8] = include_bytes!("favicon.ico"); /// Post ID (represented as a 16-digit hex string) type PostId = u64; /// Write token (represented as a 16-digit hex string) type Secret = u64; /// Hash of a data record type DataHash = u64; /// Post stored in the repository #[derive(Debug,Serialize,Deserialize)] struct Post { /// Content-Type mime: Mime, /// Data hash hash: DataHash, /// Secret key for editing or deleting secret: Secret, /// Expiration timestamp #[serde(with = "serde_chrono_datetime_as_unix")] expires: DateTime, } impl Post { /// Check if the post expired pub fn is_expired(&self) -> bool { self.expires < Utc::now() } } fn main() -> anyhow::Result<()> { let config = Config::init("postit", "postit.json", None)?; let serve_at = format!("{}:{}", config.host, config.port); let store = Mutex::new({ let mut store = Repository::new(config); if store.config.persistence { if let Err(e) = store.load() { error!("Load failed: {}", e); } } store }); rouille::start_server(serve_at, move |req| { let mut store_w = store.lock(); let method = req.method(); info!("{} {}", method, req.raw_url()); if req.url() == "/favicon.ico" { return Response::from_data("image/vnd.microsoft.icon", FAVICON); } store_w.gc_expired_posts_if_needed(); let resp = match method { "POST" | "PUT" => store_w.serve_post_put(req), "GET" | "HEAD" => store_w.serve_get_head(req), "DELETE" => store_w.serve_delete(req), _ => rouille::Response::empty_400(), }; if store_w.config.persistence { if let Err(e) = store_w.persist_if_needed() { error!("Store failed: {}", e); } } if resp.is_error() { warn!("Error resp: {}", resp.status_code); } resp }); } type PostsMap = HashMap; type DataMap = HashMap)>; #[derive(Debug,Serialize,Deserialize)] struct Repository { #[serde(skip)] config: Config, /// Flag that the repository needs saving #[serde(skip)] dirty: bool, /// Stored posts posts: PostsMap, /// Post data - (use_count, data) data: DataMap, /// Time of last expired posts GC #[serde(with = "serde_chrono_datetime_as_unix")] last_gc_time: DateTime, } impl Repository { /// New instance fn new(config: Config) -> Self { Repository { config, dirty: false, posts: Default::default(), data: Default::default(), last_gc_time: Utc::now(), } } fn persist_if_needed(&mut self) -> anyhow::Result<()> { if self.dirty { self.persist() } else { Ok(()) } } /// Store to a file fn persist(&mut self) -> anyhow::Result<()> { debug!("Persist to file: {}", self.config.persist_file); self.dirty = false; let file = OpenOptions::new() .truncate(true) .write(true) .create(true) .open(&self.config.persist_file)?; if self.config.compression { let flate = flate2::write::DeflateEncoder::new(file, flate2::Compression::best()); bincode::serialize_into(flate, self)?; } else { bincode::serialize_into(file, self)?; } Ok(()) } /// Load from a file fn load(&mut self) -> anyhow::Result<()> { debug!("Load from file: {}", self.config.persist_file); let file = OpenOptions::new() .read(true) .open(&self.config.persist_file)?; let result : Repository = if self.config.compression { let flate = flate2::read::DeflateDecoder::new(file); bincode::deserialize_from(flate)? } else { bincode::deserialize_from(file)? }; let old_config = self.config.clone(); std::mem::replace(self, result); self.config = old_config; self.dirty = false; Ok(()) } /// Serve a DELETE request fn serve_delete(&mut self, req: &Request) -> Response { let post_id = match self.request_to_post_id(req, true) { Ok(Some(pid)) => pid, Ok(None) => return error_with_text(400, "Post ID required."), Err(resp) => return resp, }; self.delete_post(post_id); Response::text("Deleted.") } /// Serve a POST or PUT request /// /// POST inserts a new record /// PUT updates a record fn serve_post_put(&mut self, req: &Request) -> Response { // Post ID is empty for POST, set for PUT let post_id = match self.request_to_post_id(req, true) { Ok(pid) => { if req.method() == "PUT" && pid.is_none() { warn!("PUT without ID!"); return error_with_text(400, "PUT requires a file ID!"); } else if req.method() == "POST" && pid.is_some() { warn!("POST with ID!"); return error_with_text(400, "Use PUT to update a file!"); } pid } Err(resp) => return resp, }; debug!("Submit new data, post ID: {:?}", post_id); let mut data = vec![]; if let Some(body) = req.data() { // Read up to 1 byte past the limit to catch too large uploads. // We can't reply on the "Length" field, which is not present with chunked encoding. body.take(self.config.max_file_size as u64 + 1) .read_to_end(&mut data) .unwrap(); if data.len() > self.config.max_file_size { return empty_error(413); } } else { return error_with_text(400, "Empty body!"); } // Convert "application/x-www-form-urlencoded" to text/plain (CURL uses this) // NOTE: rouille does NOT parse urlencoded, we will serve the encoded format back if really used. let mime = match req.header("Content-Type") { None => None, Some("application/x-www-form-urlencoded") => None, Some(v) => Some(v), }; let expiry = match req.header(HDR_EXPIRES) { Some(text) => match text.parse() { Ok(v) => { let dur = Duration::from_secs(v); if dur > self.config.max_expiry { return error_with_text( 400, format!( "Expiration time {} out of allowed range 0-{} s", v, self.config.max_expiry.as_secs() ), ); } Some(dur) } Err(_) => { return error_with_text( 400, "Malformed \"X-Expires\", use relative time in seconds.", ); } }, None => None, }; if let Some(id) = post_id { // UPDATE self.update(id, data, mime, expiry); Response::text("Updated OK.") } else { // INSERT let (id, token) = self.insert(data, mime, expiry.unwrap_or(self.config.default_expiry)); Response::text(format!("{:016x}", id)) .with_additional_header("X-Secret", format!("{:016x}", token)) } } /// Serve a GET or HEAD request fn serve_get_head(&mut self, req: &Request) -> Response { let post_id = match self.request_to_post_id(req, false) { Ok(Some(pid)) => pid, Ok(None) => return error_with_text(400, "Post ID required."), Err(resp) => return resp, }; if let Some(post) = self.posts.get(&post_id) { if post.is_expired() { warn!("GET of expired post!"); Response::empty_404() } else { let data = self.data.get(&post.hash); if data.is_none() { error!("No matching data!"); return error_with_text(500, "File data lost."); } Response { status_code: 200, headers: vec![( "Content-Type".into(), format!("{}; charset=utf8", post.mime).into(), )], data: if req.method() == "HEAD" { ResponseBody::empty() } else { ResponseBody::from_data(data.unwrap().1.clone()) }, upgrade: None, } } } else { warn!("No such post!"); Response::empty_404() } } /// Extract post ID from a request. /// /// if `check_secret` is true, ensure a `X-Secret` header contains a valid write token /// for the post ID. fn request_to_post_id( &self, req: &Request, check_secret: bool, ) -> Result, Response> { let url = req.url(); let stripped = url.trim_matches('/'); if stripped.is_empty() { debug!("No ID given"); return Ok(None); } if stripped.len() != 16 { warn!("Bad ID len!"); return Err(Response::empty_404()); } let id = match u64::from_str_radix(stripped, 16) { Ok(bytes) => bytes, Err(_) => { warn!("ID parsing error: {}", stripped); return Err(Response::empty_404()); } }; if check_secret { // Check the write token match self.posts.get(&id) { None => { warn!("ID {} does not exist!", id); return Err(error_with_text(404, "No file with this ID!")); }, Some(post) => { if post.is_expired() { warn!("Access of expired file {}!", id); return Err(error_with_text(404, "No file with this ID!")); } let secret: u64 = match req.header(HDR_SECRET).map(|v| u64::from_str_radix(v, 16)) { Some(Ok(bytes)) => bytes, None => { warn!("Missing secret token!"); return Err(error_with_text(400, "Secret token required!")); } Some(Err(e)) => { warn!("Token parse error: {:?}", e); return Err(error_with_text(400, "Bad secret token format!")); }, }; if post.secret != secret { warn!("Secret token mismatch"); return Err(error_with_text(401, "Invalid secret token!")); } }, } } // secret is now validated and we got an ID Ok(Some(id)) } /// Drop expired posts, if cleaning is due fn gc_expired_posts_if_needed(&mut self) { if Utc::now().signed_duration_since(self.last_gc_time).to_std().unwrap_or_default() > self.config.expired_gc_interval { self.gc_expired_posts(); self.last_gc_time = Utc::now(); } } /// Drop expired posts fn gc_expired_posts(&mut self) { debug!("GC expired uploads"); let mut to_rm = vec![]; for post in &self.posts { if post.1.is_expired() { to_rm.push(*post.0); } } if !to_rm.is_empty() { self.dirty = true; } for id in to_rm { debug!("Drop post ID {:016x}", id); if let Some(post) = self.posts.remove(&id) { Self::drop_data_or_decrement_rc(&mut self.data, post.hash); } } } /// Get hash of a byte vector (for deduplication) fn hash_data(data: &Vec) -> DataHash { let mut hasher = siphasher::sip::SipHasher::new(); data.hash(&mut hasher); hasher.finish() } /// Store a data buffer under a given hash. /// If the buffer is already present in the repository, increment its use count. fn store_data_or_increment_rc(data_map: &mut DataMap, hash: u64, data: Vec) { match data_map.get_mut(&hash) { None => { debug!("Store new data hash #{:016x}", hash); data_map.insert(hash, (1, data)); } Some(entry) => { debug!("Link new use of data hash #{:016x}", hash); entry.0 += 1; // increment use counter } } } /// Drop a data record with the given hash, or decrement its use count if there are other uses fn drop_data_or_decrement_rc(data_map: &mut DataMap, hash: u64) { if let Some(old_data) = data_map.get_mut(&hash) { if old_data.0 > 1 { old_data.0 -= 1; debug!( "Unlink use of data hash #{:016x} ({} remain)", hash, old_data.0 ); } else { debug!("Drop data hash #{:016x}", hash); data_map.remove(&hash); } } } /// Insert a post fn insert(&mut self, data: Vec, mime: Option<&str>, expires: Duration) -> (PostId, Secret) { info!( "Insert post with data of len {} bytes, mime {}, expiry {:?}", data.len(), mime.unwrap_or("unspecified"), expires ); let hash = Self::hash_data(&data); let mime = match mime { None => { Mime::from(tree_magic::from_u8(&data)) }, Some(explicit) => { Mime::from(explicit) }, }; Self::store_data_or_increment_rc(&mut self.data, hash, data); let post_id = loop { let id = OsRng.gen(); if !self.posts.contains_key(&id) { break id; } }; let secret = OsRng.gen(); debug!("Post ID = #{:016x}", post_id); debug!("Data hash = #{:016x}, mime {}", hash, mime); debug!("Secret = #{:016x}", secret); self.posts.insert( post_id, Post { mime, hash, secret, expires: Utc::now() + chrono::Duration::from_std(expires).unwrap(), // this is safe unless mis-configured }, ); self.dirty = true; (post_id, secret) } /// Update a post by ID fn update(&mut self, id: PostId, data: Vec, mime: Option<&str>, expires: Option) { info!( "Update post id #{:016x} with data of len {} bytes, mime {}, expiry {}", id, data.len(), mime.unwrap_or("unchanged"), expires .map(|v| Cow::Owned(format!("{:?}", v))) .unwrap_or("unchanged".into()) ); let hash = Self::hash_data(&data); let post = self.posts.get_mut(&id).unwrap(); // post existence was checked before if hash != post.hash { debug!("Data hash = #{:016x} (content changed)", hash); Self::drop_data_or_decrement_rc(&mut self.data, post.hash); Self::store_data_or_increment_rc(&mut self.data, hash, data); post.hash = hash; self.dirty = true; } else { debug!("Data hash = #{:016x} (no change)", hash); } if let Some(mime) = mime { let new_mime = Mime::from(mime); if post.mime != new_mime { debug!("Content type changed to {}", mime); post.mime = new_mime; self.dirty = true; } } if let Some(exp) = expires { debug!("Expiration changed to {:?} from now", exp); post.expires = Utc::now() + chrono::Duration::from_std(exp).unwrap(); // this is safe unless mis-configured; self.dirty = true; } } /// Delete a post by ID fn delete_post(&mut self, id: PostId) { info!("Delete post id #{:016x}", id); let post = self.posts.remove(&id).unwrap(); // post existence was checked before Self::drop_data_or_decrement_rc(&mut self.data, post.hash); self.dirty = true; } } /// Serialize chrono unix timestamp as seconds mod serde_chrono_datetime_as_unix { use serde::{self, Deserialize, Deserializer, Serializer}; use chrono::{DateTime, Utc, NaiveDateTime}; pub fn serialize(value: &DateTime, se: S) -> Result where S: Serializer, { se.serialize_i64(value.naive_utc().timestamp()) } pub fn deserialize<'de, D>(de: D) -> Result, D::Error> where D: Deserializer<'de>, { let ts: i64 = i64::deserialize(de)?; Ok(DateTime::from_utc(NaiveDateTime::from_timestamp(ts, 0), Utc)) } } fn error_with_text(code: u16, text: impl Into) -> Response { Response { status_code: code, headers: vec![("Content-Type".into(), "text/plain; charset=utf8".into())], data: rouille::ResponseBody::from_string(text), upgrade: None, } } fn empty_error(code: u16) -> Response { Response { status_code: code, headers: vec![("Content-Type".into(), "text/plain; charset=utf8".into())], data: rouille::ResponseBody::empty(), upgrade: None, } }