File sharing server for small files

main.rs 21KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676
  1. #[macro_use]
  2. extern crate serde_derive;
  3. #[macro_use]
  4. extern crate log;
  5. use crate::config::Config;
  6. use crate::well_known_mime::Mime;
  7. use chrono::{DateTime, Utc};
  8. use clappconfig::{anyhow, AppConfig};
  9. use parking_lot::Mutex;
  10. use rand::rngs::OsRng;
  11. use rand::Rng;
  12. use rouille::{Request, Response, ResponseBody};
  13. use std::borrow::Cow;
  14. use std::collections::HashMap;
  15. use std::fs::OpenOptions;
  16. use std::hash::{Hash, Hasher};
  17. use std::io::Read;
  18. use std::time::Duration;
  19. use siphasher::sip::SipHasher;
  20. mod config;
  21. mod well_known_mime;
  22. /// Header to set expiry (seconds)
  23. const HDR_EXPIRY: &str = "X-Expire";
  24. /// Header to pass secret token for update/delete
  25. const HDR_SECRET: &str = "X-Secret";
  26. /// GET param to pass secret token (as a substitute for header)
  27. const GET_EXPIRY: &str = "expire";
  28. /// GET param to pass secret token (as a substitute for header)
  29. const GET_SECRET: &str = "secret";
  30. const FAVICON: &[u8] = include_bytes!("favicon.ico");
  31. /// Post ID (represented as a 16-digit hex string)
  32. type PostId = u64;
  33. /// Write token (represented as a 16-digit hex string)
  34. type Secret = u64;
  35. /// Hash of a data record
  36. type DataHash = u64;
  37. /// Post stored in the repository
  38. #[derive(Debug, Serialize, Deserialize)]
  39. struct Post {
  40. /// Content-Type
  41. mime: Mime,
  42. /// Data hash
  43. hash: DataHash,
  44. /// Secret key for editing or deleting
  45. secret: Secret,
  46. /// Expiration timestamp
  47. #[serde(with = "serde_chrono_datetime_as_unix")]
  48. expires: DateTime<Utc>,
  49. }
  50. impl Post {
  51. /// Check if the post expired
  52. pub fn is_expired(&self) -> bool {
  53. self.expires < Utc::now()
  54. }
  55. /// Get remaining lifetime
  56. pub fn time_remains(&self) -> Duration {
  57. let seconds_remains = self.expires.signed_duration_since(Utc::now())
  58. .num_seconds();
  59. if seconds_remains < 0 {
  60. Duration::from_secs(0)
  61. } else {
  62. Duration::from_secs(seconds_remains as u64)
  63. }
  64. }
  65. }
  66. fn main() -> anyhow::Result<()> {
  67. let config = Config::init("postit", "postit.json", None)?;
  68. let serve_at = format!("{}:{}", config.host, config.port);
  69. let store = Mutex::new({
  70. let mut store = Repository::new(config);
  71. if store.config.persistence {
  72. if let Err(e) = store.load() {
  73. error!("Load failed: {}", e);
  74. }
  75. }
  76. store.gc_expired_posts();
  77. store
  78. });
  79. rouille::start_server(serve_at, move |req| {
  80. let mut store_w = store.lock();
  81. let method = req.method();
  82. info!("{} {}", method, req.raw_url());
  83. if req.url() == "/favicon.ico" {
  84. return decorate_response(Response::from_data("image/vnd.microsoft.icon", FAVICON));
  85. }
  86. store_w.gc_expired_posts_if_needed();
  87. let resp = match method {
  88. "POST" | "PUT" => store_w.serve_post_put(req),
  89. "GET" | "HEAD" => store_w.serve_get_head(req),
  90. "DELETE" => store_w.serve_delete(req),
  91. _ => rouille::Response::empty_400(),
  92. };
  93. if store_w.config.persistence {
  94. if let Err(e) = store_w.persist_if_needed() {
  95. error!("Store failed: {}", e);
  96. }
  97. }
  98. if resp.is_error() {
  99. warn!("Error resp: {}", resp.status_code);
  100. }
  101. decorate_response(resp)
  102. });
  103. }
  104. fn decorate_response(resp : Response) -> Response {
  105. resp.without_header("Server")
  106. .with_additional_header("Server", "postit.rs")
  107. .with_additional_header("Access-Control-Allow-Origin", "*")
  108. .with_additional_header("X-Version", env!("CARGO_PKG_VERSION"))
  109. }
  110. type PostsMap = HashMap<PostId, Post>;
  111. type DataMap = HashMap<DataHash, (usize, Vec<u8>)>;
  112. #[derive(Debug, Serialize, Deserialize)]
  113. struct Repository {
  114. #[serde(skip)]
  115. config: Config,
  116. /// Flag that the repository needs saving
  117. #[serde(skip)]
  118. dirty: bool,
  119. /// Stored posts
  120. posts: PostsMap,
  121. /// Post data - (use_count, data)
  122. data: DataMap,
  123. /// Time of last expired posts GC
  124. #[serde(with = "serde_chrono_datetime_as_unix")]
  125. last_gc_time: DateTime<Utc>,
  126. }
  127. impl Repository {
  128. /// New instance
  129. fn new(config: Config) -> Self {
  130. Repository {
  131. config,
  132. dirty: false,
  133. posts: Default::default(),
  134. data: Default::default(),
  135. last_gc_time: Utc::now(),
  136. }
  137. }
  138. fn persist_if_needed(&mut self) -> anyhow::Result<()> {
  139. if self.dirty {
  140. self.persist()
  141. } else {
  142. Ok(())
  143. }
  144. }
  145. /// Store to a file
  146. fn persist(&mut self) -> anyhow::Result<()> {
  147. debug!("Persist to file: {}", self.config.persist_file);
  148. self.dirty = false;
  149. let file = OpenOptions::new()
  150. .truncate(true)
  151. .write(true)
  152. .create(true)
  153. .open(&self.config.persist_file)?;
  154. if self.config.compression {
  155. let flate = flate2::write::DeflateEncoder::new(file, flate2::Compression::best());
  156. bincode::serialize_into(flate, self)?;
  157. } else {
  158. bincode::serialize_into(file, self)?;
  159. }
  160. Ok(())
  161. }
  162. /// Load from a file
  163. fn load(&mut self) -> anyhow::Result<()> {
  164. debug!("Load from file: {}", self.config.persist_file);
  165. let file = OpenOptions::new()
  166. .read(true)
  167. .open(&self.config.persist_file)?;
  168. let result: Repository = if self.config.compression {
  169. let flate = flate2::read::DeflateDecoder::new(file);
  170. bincode::deserialize_from(flate)?
  171. } else {
  172. bincode::deserialize_from(file)?
  173. };
  174. let old_config = self.config.clone();
  175. std::mem::replace(self, result);
  176. self.config = old_config;
  177. self.dirty = false;
  178. Ok(())
  179. }
  180. /// Serve a DELETE request
  181. fn serve_delete(&mut self, req: &Request) -> Response {
  182. let post_id = match self.request_to_post_id(req, true) {
  183. Ok(Some(pid)) => pid,
  184. Ok(None) => return error_with_text(400, "File ID required."),
  185. Err(resp) => return resp,
  186. };
  187. self.delete_post(post_id);
  188. Response::text("Deleted.")
  189. }
  190. /// Serve a POST or PUT request
  191. ///
  192. /// POST inserts a new record
  193. /// PUT updates a record
  194. fn serve_post_put(&mut self, req: &Request) -> Response {
  195. let is_post = req.method() == "POST";
  196. let is_put = req.method() == "PUT";
  197. // Post ID is empty for POST, set for PUT
  198. let post_id = match self.request_to_post_id(req, true) {
  199. Ok(pid) => {
  200. if is_put && pid.is_none() {
  201. warn!("PUT without ID!");
  202. return error_with_text(400, "PUT requires a file ID!");
  203. } else if is_post && pid.is_some() {
  204. warn!("POST with ID!");
  205. return error_with_text(400, "Use PUT to update a file!");
  206. }
  207. pid
  208. }
  209. Err(resp) => return resp,
  210. };
  211. debug!("Submit new data, post ID: {:?}", post_id);
  212. let mut data = vec![];
  213. if let Some(body) = req.data() {
  214. // Read up to 1 byte past the limit to catch too large uploads.
  215. // We can't reply on the "Length" field, which is not present with chunked encoding.
  216. body.take(self.config.max_file_size as u64 + 1)
  217. .read_to_end(&mut data)
  218. .unwrap();
  219. if is_post && data.len() == 0 {
  220. warn!("Empty body!");
  221. return error_with_text(400, "Empty body!");
  222. } else if data.len() > self.config.max_file_size {
  223. warn!("Upload too large!");
  224. return empty_error(413);
  225. }
  226. } else {
  227. // Should not be possible
  228. panic!("Req data None!");
  229. }
  230. // Convert "application/x-www-form-urlencoded" to text/plain (CURL uses this)
  231. // NOTE: rouille does NOT parse urlencoded, we will serve the encoded format back if really used.
  232. let mime = match req.header("Content-Type") {
  233. None => None,
  234. Some("application/x-www-form-urlencoded") => None,
  235. Some(v) => Some(v),
  236. };
  237. let expiry = req.get_param(GET_EXPIRY);
  238. let mut expiry_s = expiry.as_ref().map(|s| s.as_str());
  239. if expiry_s.is_none() {
  240. expiry_s = req.header(HDR_EXPIRY);
  241. }
  242. let expiry = match expiry_s {
  243. Some(text) => match text.parse() {
  244. Ok(v) => {
  245. let dur = Duration::from_secs(v);
  246. if dur > self.config.max_expiry {
  247. return error_with_text(
  248. 400,
  249. format!(
  250. "Expiration time {} out of allowed range 0-{} s",
  251. v,
  252. self.config.max_expiry.as_secs()
  253. ),
  254. );
  255. }
  256. Some(dur)
  257. }
  258. Err(_) => {
  259. return error_with_text(
  260. 400,
  261. "Malformed expiration, use relative time in seconds.",
  262. );
  263. }
  264. },
  265. None => None,
  266. };
  267. let the_id;
  268. let resp = if let Some(id) = post_id {
  269. // UPDATE
  270. self.update(id, data, mime, expiry);
  271. the_id = id;
  272. Response::text("Updated OK.")
  273. } else {
  274. // INSERT
  275. the_id = self.insert(data, mime, expiry.unwrap_or(self.config.default_expiry));
  276. Response::text(format!("{:016x}", the_id))
  277. };
  278. let post = self.posts.get(&the_id).unwrap();
  279. resp
  280. .with_additional_header(HDR_SECRET, format!("{:016x}", post.secret))
  281. .with_additional_header(HDR_EXPIRY, post.time_remains().as_secs().to_string())
  282. }
  283. /// Serve a GET or HEAD request
  284. fn serve_get_head(&mut self, req: &Request) -> Response {
  285. let post_id = match self.request_to_post_id(req, false) {
  286. Ok(Some(pid)) => pid,
  287. Ok(None) => return error_with_text(400, "File ID required."),
  288. Err(resp) => return resp,
  289. };
  290. if let Some(post) = self.posts.get(&post_id) {
  291. if post.is_expired() {
  292. warn!("GET of expired post!");
  293. Response::empty_404()
  294. } else {
  295. let data = self.data.get(&post.hash);
  296. if data.is_none() {
  297. error!("No matching data!");
  298. return error_with_text(500, "File data lost.");
  299. }
  300. let seconds_remains = post.expires.signed_duration_since(Utc::now())
  301. .num_seconds();
  302. Response {
  303. status_code: 200,
  304. headers: vec![
  305. (
  306. "Content-Type".into(),
  307. format!("{}; charset=utf8", post.mime).into(),
  308. ),
  309. (
  310. "Cache-Control".into(),
  311. format!("public, max-age={}", seconds_remains).into()
  312. ),
  313. (
  314. HDR_EXPIRY.into(),
  315. seconds_remains.to_string().into()
  316. )
  317. ],
  318. data: if req.method() == "HEAD" {
  319. ResponseBody::empty()
  320. } else {
  321. ResponseBody::from_data(data.unwrap().1.clone())
  322. },
  323. upgrade: None,
  324. }
  325. }
  326. } else {
  327. warn!("No such post!");
  328. Response::empty_404()
  329. }
  330. }
  331. /// Extract post ID from a request.
  332. ///
  333. /// if `check_secret` is true, ensure a `X-Secret` header contains a valid write token
  334. /// for the post ID.
  335. fn request_to_post_id(
  336. &self,
  337. req: &Request,
  338. check_secret: bool,
  339. ) -> Result<Option<PostId>, Response> {
  340. let url = req.url();
  341. let stripped = url.trim_matches('/');
  342. if stripped.is_empty() {
  343. debug!("No ID given");
  344. return Ok(None);
  345. }
  346. if stripped.len() != 16 {
  347. warn!("Bad ID len!");
  348. return Err(Response::empty_404());
  349. }
  350. let id = match u64::from_str_radix(stripped, 16) {
  351. Ok(bytes) => bytes,
  352. Err(_) => {
  353. warn!("ID parsing error: {}", stripped);
  354. return Err(Response::empty_404());
  355. }
  356. };
  357. if check_secret {
  358. // Check the write token
  359. match self.posts.get(&id) {
  360. None => {
  361. warn!("ID {} does not exist!", id);
  362. return Err(error_with_text(404, "No file with this ID!"));
  363. }
  364. Some(post) => {
  365. if post.is_expired() {
  366. warn!("Access of expired file {}!", id);
  367. return Err(error_with_text(404, "No file with this ID!"));
  368. }
  369. let secret = req.get_param(GET_SECRET);
  370. let mut secret_str = secret.as_ref().map(|s| s.as_str());
  371. if secret_str.is_none() {
  372. secret_str = req.header(HDR_SECRET);
  373. }
  374. let secret: u64 =
  375. match secret_str.map(|v| u64::from_str_radix(v, 16)) {
  376. Some(Ok(bytes)) => bytes,
  377. None => {
  378. warn!("Missing secret token!");
  379. return Err(error_with_text(400, "Secret token required!"));
  380. }
  381. Some(Err(e)) => {
  382. warn!("Token parse error: {:?}", e);
  383. return Err(error_with_text(400, "Bad secret token format!"));
  384. }
  385. };
  386. if post.secret != secret {
  387. warn!("Secret token mismatch");
  388. return Err(error_with_text(401, "Invalid secret token!"));
  389. }
  390. }
  391. }
  392. }
  393. // secret is now validated and we got an ID
  394. Ok(Some(id))
  395. }
  396. /// Drop expired posts, if cleaning is due
  397. fn gc_expired_posts_if_needed(&mut self) {
  398. if Utc::now()
  399. .signed_duration_since(self.last_gc_time)
  400. .to_std()
  401. .unwrap_or_default()
  402. > self.config.expired_gc_interval
  403. {
  404. self.gc_expired_posts();
  405. self.last_gc_time = Utc::now();
  406. }
  407. }
  408. /// Drop expired posts
  409. fn gc_expired_posts(&mut self) {
  410. debug!("GC expired uploads");
  411. let mut to_rm = vec![];
  412. for post in &self.posts {
  413. if post.1.is_expired() {
  414. to_rm.push(*post.0);
  415. }
  416. }
  417. if !to_rm.is_empty() {
  418. self.dirty = true;
  419. }
  420. for id in to_rm {
  421. debug!("Drop post ID {:016x}", id);
  422. if let Some(post) = self.posts.remove(&id) {
  423. Self::drop_data_or_decrement_rc(&mut self.data, post.hash);
  424. }
  425. }
  426. }
  427. /// Get hash of a byte vector (for deduplication)
  428. fn hash_data(data: &Vec<u8>) -> DataHash {
  429. let mut hasher = SipHasher::new();
  430. data.hash(&mut hasher);
  431. hasher.finish()
  432. }
  433. /// Store a data buffer under a given hash.
  434. /// If the buffer is already present in the repository, increment its use count.
  435. fn store_data_or_increment_rc(data_map: &mut DataMap, hash: u64, data: Vec<u8>) {
  436. match data_map.get_mut(&hash) {
  437. None => {
  438. debug!("Store new data hash #{:016x}", hash);
  439. data_map.insert(hash, (1, data));
  440. }
  441. Some(entry) => {
  442. debug!("Link new use of data hash #{:016x}", hash);
  443. entry.0 += 1; // increment use counter
  444. }
  445. }
  446. }
  447. /// Drop a data record with the given hash, or decrement its use count if there are other uses
  448. fn drop_data_or_decrement_rc(data_map: &mut DataMap, hash: u64) {
  449. if let Some(old_data) = data_map.get_mut(&hash) {
  450. if old_data.0 > 1 {
  451. old_data.0 -= 1;
  452. debug!(
  453. "Unlink use of data hash #{:016x} ({} remain)",
  454. hash, old_data.0
  455. );
  456. } else {
  457. debug!("Drop data hash #{:016x}", hash);
  458. data_map.remove(&hash);
  459. }
  460. }
  461. }
  462. /// Insert a post
  463. fn insert(&mut self, data: Vec<u8>, mime: Option<&str>, expires: Duration) -> PostId {
  464. info!(
  465. "Insert post with data of len {} bytes, mime {}, expiry {:?}",
  466. data.len(),
  467. mime.unwrap_or("unspecified"),
  468. expires
  469. );
  470. let hash = Self::hash_data(&data);
  471. let mime = match mime {
  472. None => Mime::from(tree_magic::from_u8(&data)),
  473. Some(explicit) => Mime::from(explicit),
  474. };
  475. Self::store_data_or_increment_rc(&mut self.data, hash, data);
  476. let post_id = loop {
  477. let id = OsRng.gen();
  478. if !self.posts.contains_key(&id) {
  479. break id;
  480. }
  481. };
  482. let secret = OsRng.gen();
  483. debug!("File ID = #{:016x} (http://{}:{}/{:016x})", post_id, self.config.host, self.config.port, post_id);
  484. debug!("Data hash = #{:016x}, mime {}", hash, mime);
  485. debug!("Secret = #{:016x}", secret);
  486. self.posts.insert(
  487. post_id,
  488. Post {
  489. mime,
  490. hash,
  491. secret,
  492. expires: Utc::now() + chrono::Duration::from_std(expires).unwrap(), // this is safe unless mis-configured
  493. },
  494. );
  495. self.dirty = true;
  496. post_id
  497. }
  498. /// Update a post by ID
  499. fn update(&mut self, id: PostId, data: Vec<u8>, mime: Option<&str>, expires: Option<Duration>) {
  500. info!(
  501. "Update post id #{:016x} with data of len {} bytes, mime {}, expiry {}",
  502. id,
  503. data.len(),
  504. mime.unwrap_or("unchanged"),
  505. expires
  506. .map(|v| Cow::Owned(format!("{:?}", v)))
  507. .unwrap_or("unchanged".into())
  508. );
  509. let post = self.posts.get_mut(&id).unwrap(); // post existence was checked before
  510. if !data.is_empty() {
  511. let hash = Self::hash_data(&data);
  512. if hash != post.hash {
  513. debug!("Data hash = #{:016x} (content changed)", hash);
  514. Self::drop_data_or_decrement_rc(&mut self.data, post.hash);
  515. Self::store_data_or_increment_rc(&mut self.data, hash, data);
  516. post.hash = hash;
  517. self.dirty = true;
  518. } else {
  519. debug!("Data hash = #{:016x} (no change)", hash);
  520. }
  521. }
  522. if let Some(mime) = mime {
  523. let new_mime = Mime::from(mime);
  524. if post.mime != new_mime {
  525. debug!("Content type changed to {}", mime);
  526. post.mime = new_mime;
  527. self.dirty = true;
  528. }
  529. }
  530. if let Some(exp) = expires {
  531. debug!("Expiration changed to {:?} from now", exp);
  532. post.expires = Utc::now() + chrono::Duration::from_std(exp).unwrap(); // this is safe unless mis-configured;
  533. self.dirty = true;
  534. }
  535. }
  536. /// Delete a post by ID
  537. fn delete_post(&mut self, id: PostId) {
  538. info!("Delete post id #{:016x}", id);
  539. let post = self.posts.remove(&id).unwrap(); // post existence was checked before
  540. Self::drop_data_or_decrement_rc(&mut self.data, post.hash);
  541. self.dirty = true;
  542. }
  543. }
  544. /// Serialize chrono unix timestamp as seconds
  545. mod serde_chrono_datetime_as_unix {
  546. use chrono::{DateTime, NaiveDateTime, Utc};
  547. use serde::{self, Deserialize, Deserializer, Serializer};
  548. pub fn serialize<S>(value: &DateTime<Utc>, se: S) -> Result<S::Ok, S::Error>
  549. where
  550. S: Serializer,
  551. {
  552. se.serialize_i64(value.naive_utc().timestamp())
  553. }
  554. pub fn deserialize<'de, D>(de: D) -> Result<DateTime<Utc>, D::Error>
  555. where
  556. D: Deserializer<'de>,
  557. {
  558. let ts: i64 = i64::deserialize(de)?;
  559. Ok(DateTime::from_utc(
  560. NaiveDateTime::from_timestamp(ts, 0),
  561. Utc,
  562. ))
  563. }
  564. }
  565. fn error_with_text(code: u16, text: impl Into<String>) -> Response {
  566. Response {
  567. status_code: code,
  568. headers: vec![("Content-Type".into(), "text/plain; charset=utf8".into())],
  569. data: rouille::ResponseBody::from_string(text),
  570. upgrade: None,
  571. }
  572. }
  573. fn empty_error(code: u16) -> Response {
  574. Response {
  575. status_code: code,
  576. headers: vec![("Content-Type".into(), "text/plain; charset=utf8".into())],
  577. data: rouille::ResponseBody::empty(),
  578. upgrade: None,
  579. }
  580. }