From f5059c8510bdcf5f729eca86068bea5b75cd3d27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Hru=C5=A1ka?= Date: Wed, 25 Mar 2020 19:29:24 +0100 Subject: [PATCH] v1 many fixes, new api --- Cargo.toml | 4 +- README.md | 142 ++++--- src/lib.rs | 1048 ++++++++++++++++++++++++++++++++-------------------- 3 files changed, 730 insertions(+), 464 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3a555f4..6058f0e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "json_dotpath" -version = "0.1.2" +version = "1.0.0" authors = ["Ondřej Hruška "] edition = "2018" license = "MIT" @@ -16,3 +16,5 @@ categories = [ serde = "1" serde_derive = "1" serde_json = "1" +failure = "0.1.7" +failure_derive = "0.1.7" diff --git a/README.md b/README.md index 6d1bc68..b41af02 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,23 @@ Access members of nested JSON arrays and objects using "dotted paths". +## Changes + +### 1.0.0 + +The API changed to return `Result>` instead of panicking internally on error. +The library is now much safer to use. + +Further, all logic has been adjusted to be more robust and consistent. + +Array append and prepend operators now use `<<` and `>>` instead of overloading `<` and `>`, +which now work the same way in all array accesses (getting the first and last element). + +## Dotted path + +Dotted path represents a path from the root of a JSON object to one of its nodes. +Such path is represented by joining the object and array keys by dots: + Consider this example JSON: ```json @@ -14,90 +31,97 @@ Consider this example JSON: } ``` -The following can be used to access its parts: -- `obj.dot_get("fruit")` ... get the fruits array -- `obj.dot_get("fruit.0.name")` ... 0th fruit name, "lemon" -- `obj.dot_get("fruit.>.color")` ... last fruit's color, "red" +The following paths represent its parts: -The JSON can also be manipulated: +- `""` ... the whole object +- `"fruit"` ... the fruits array +- `"fruit.0"` ... the first fruit object, `{"name": "lemon", "color": "yellow"}` +- `"fruit.1.name"` ... the second (index is 0-based) fruit's name, `"apple"` -- `obj.dot_take("fruit.1")` ... extract the "apple" object, removing it from the JSON -- `obj.dot_set("fruit.<1", json!({"name":"plum","color":"blue"})` ... insert before the 1st element, shifting the rest -- `obj.dot_set("fruit.>1", json!({"name":"plum","color":"blue"})` ... insert after the 1st element, shifting the rest -- `obj.dot_set("fruit.>.name", "tangerine")` ... set the last fruit's name -- `obj.dot_set("fruit.>", Value::Null)` ... append a JSON null -- `obj.dot_set("fruit.<", true)` ... prepend a JSON true -- `obj.dot_set("vegetables.onion.>", "aaa")` ... add `"vegetables": {"onion":["aaa"]}` to the outer object - (the parent map and array are created automatically) +Special patterns may be used for object manipulation as well (see below). -Any serializable type or `serde_json::Value` can be stored to or retrieved from -the nested object (`Value::Object`, `Value::Array`, `Value::Null`). - -Any value stored in the object can also be modified in place, without deserialization, -by getting a mutable reference (`dot_get_mut(path)`). +## Object operations -This crate is useful for tasks such as working with dynamic JSON API payloads, -parsing config files, or building a polymorphic data store. +Five principal methods are added by the `DotPaths` trait to `serde_json::Value`, +`Vec` and `serde_json::Map` (the inner structs of `Value::Object` and `Value::Array`). -## Supported Operations +- `dot_get(path)` - get a value by path +- `dot_get_mut(path)` - get a mutable reference to an element of the JSON object +- `dot_set(path, value)` - set a new value, dropping the original (if any) +- `dot_replace(path, value)` - set a new value, returning the original (if any) +- `dot_take(path, value)` - remove a value by path, returning it (if any) -### Object and Array -- Set (dropping the original value, if any) -- Remove (remove and drop a value) -- Take (remove a value and deserialize it) -- Replace (take and set) -- Get (find & deserialize) -- Get a mutable reference to a Value +`dot_set()` supports array manipulation syntax not found in the other methods, namely the +`>n` and `` (aliased to `json_dotpath::Result`), +either as `json_dotpath::Result<()>`, or `json_dotpath::Result>` when a value is expected. -Examples: +These results should be handled carefully, as they report structural errors (meaning the requested operation +could not be performed), or the path given was invalid. -- `abc` -- `_123` -- `key with spaces` +### Dynamic object building -If a numeric key or a key nonconforming in other way must be used, prefix it with `#`. -It will be taken literally as a string, excluding the prefix. +When a path that does not exist but could (e.g. an appended array element, a new object key), and one of the assignment +methods or `dot_get_mut()` are used, this element will be created automatically, including its parent elements as needed. -e.g. to get 456 from `{"foo":{"123":456}}`, use `foo.#123` instead of `foo.123` +This is well illustrated in one of the unit tests: + +```rust +let mut obj = Value::Null; +let _ = obj.dot_get_mut("foo.0").unwrap(); // get mut, here only for side effects +assert_eq!(json!({"foo": [null]}), obj); +``` + +Null can flexibly become an array or object in such situations (see "Special handling of Null" below). + +## Dotted Path Syntax + +Path is simply a sequence of path segment joined by periods (`.`). + +Some symbols are ascribed special meaning by the library, depending on the method they're used in. +All symbols (including `.`) may be escaped using a backslash if their literal value is needed as part of the path. ### Array Patterns Array keys must be numeric (integer), or one of the special patterns listed below. -- `-` ... prepend -- `<` ... prepend (or get first) -- `+` ... append -- `>` ... append (or get last) +- `<` ... first element +- `>` ... last element +- `-` or `<<` ... prepend +- `+` or `>>` ... append - `n`, e.g. `>5` ... insert after the n-th element -### Path Examples - -- Empty path ... access the root element -- `5` ... get the element `"five"` from `[0,1,2,3,4,"five"]` -- `a.b.c` ... get `1` from `{ "a": { "b": { "c": 1 } } }` -- `a.0.x` ... get `1` from `{ "a": [ { "x": 1 } ] }` - It's possible to create nested arrays or objects by setting a non-existent path, provided the key syntax rules are maintained. See unit tests for more examples. + +### Special handling of Null + +JSON null in an object can transparently become an array or object by setting it's members (even nested), +as if it was an empty array or object. Whether it should become an array or object depends on the key used to index into it. + +- numeric key turns null into an array (only `0` and the special array operators are allowed, + as other numbers are out of range for an empty array) +- any other key turns it into a map +- any key starting with an escape creates a map as well (e.g. `\0.aaa` turns `null` into `{"0": {"aaa": …} }` ) + +JSON null is considered an empty value and is transformed into `Ok(None)` when retrieved, as it can not be deserialized. + +Setting a value to `Value::Null` works as expected and places a JSON null in the object. diff --git a/src/lib.rs b/src/lib.rs index 177798f..8f30821 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,550 +1,731 @@ use serde::de::DeserializeOwned; use serde::Serialize; use serde_json::{Map, Value}; -use std::cmp::Ordering; use std::mem; #[cfg(test)] -#[macro_use] extern crate serde_derive; +#[macro_use] +extern crate serde_derive; +#[macro_use] +extern crate failure; + +/// Errors from dot_path methods +#[derive(Debug, Fail)] +pub enum Error { + /// Path hit a value in the JSON object that is not array or map + /// and could not continue the traversal. + /// + /// (e.g. `foo.bar` in `{"foo": 123}`) + #[fail(display = "Unexpected value reached while traversing path")] + BadPathElement, + + /// Array index out of range + #[fail(display = "Invalid array index: {}", _0)] + BadIndex(usize), + + /// Invalid (usually empty) key used in Map or Array. + /// If the key is valid but out of bounds, `BadIndex` will be used. + #[fail(display = "Invalid key: {}", _0)] + InvalidKey(String), + + /// Error serializing or deserializing a value + #[fail(display = "Invalid array or map key")] + SerdeError(#[fail(cause)] serde_json::Error), +} + +impl From for Error { + fn from(e: serde_json::Error) -> Self { + Error::SerdeError(e) + } +} + +use crate::Error::{BadPathElement, InvalidKey, BadIndex}; + +pub type Result = std::result::Result; + +/// Convert Some(Value::Null) to None. +trait NullToNone { + fn null_to_none(self) -> Option; +} + +impl NullToNone for Option { + fn null_to_none(self) -> Option { + match self { + None | Some(Value::Null) => None, + Some(v) => Some(v), + } + } +} + +impl<'a> NullToNone<&'a Value> for Option<&'a Value> { + fn null_to_none(self) -> Option<&'a Value> { + match self { + None | Some(&Value::Null) => None, + Some(v) => Some(v), + } + } +} /// Access and mutate nested JSON elements by dotted paths /// /// The path is composed of keys separated by dots, e.g. `foo.bar.1`. /// +/// All symbols in a path may be escaped by backslash (`\`) to have them treated literally, +/// e.g. to access a key containing a period. +/// /// Arrays are indexed by numeric strings or special keys (see `dot_get()` and `dot_set()`). /// /// This trait is implemented for `serde_json::Value`, specifically the /// `Map`, `Array`, and `Null` variants. Empty path can also be used to access a scalar. +/// +/// Methods on this trait do not panic, errors are passed to the caller. pub trait DotPaths { /// Get an item by path, if present. /// - /// JSON `null` becomes `None`, same as unpopulated path. + /// If the element does not exist or is `null`, None is returned. + /// Accessing array index out of range raises `Err(BadIndex)`. /// - /// # Special keys - /// Arrays can be indexed by special keys for reading: - /// - `>` ... last element - /// - `#123` ... map keys may be prefixed by `#` to use numeric strings - /// or other unusual forms (excluding dot `.`, which is still illegal in keys) + /// The path does not need to reach a leaf node, i.e. it is possible to extract a subtree + /// of a JSON object this way. /// - /// # Panics - /// - If the path attempts to index into a scalar (e.g. `"foo.bar"` in `{"foo": 123}`) - /// - If the path uses invalid key in an array or map - fn dot_get(&self, path: &str) -> Option - where - T: DeserializeOwned; - - /// Get an item, or a default value. + /// # Special symbols + /// - `>` ... last element of an array + /// - `<` ... first element of an array (same as `0`) + fn dot_get(&self, path: &str) -> Result> + where + T: DeserializeOwned; + + /// Get an item by path, or a default value if it does not exist. /// - /// # Special keys - /// see `dot_get()` + /// This method is best suited for JSON objects (`Map`) or nullable fields. /// - /// # Panics - /// see `dot_get()` - fn dot_get_or(&self, path: &str, def: T) -> T - where - T: DeserializeOwned, + /// See `dot_get()` for more details. + fn dot_get_or(&self, path: &str, def: T) -> Result + where + T: DeserializeOwned, { - self.dot_get(path).unwrap_or(def) + self.dot_get(path) + .map(|o| o.unwrap_or(def)) } /// Get an item, or a default value using the Default trait /// - /// # Special keys - /// see `dot_get()` + /// This method is best suited for JSON objects (`Map`) or nullable fields. /// - /// # Panics - /// see `dot_get()` - fn dot_get_or_default(&self, path: &str) -> T - where - T: DeserializeOwned + Default, + /// See `dot_get()` for more details. + fn dot_get_or_default(&self, path: &str) -> Result + where + T: DeserializeOwned + Default, { - self.dot_get(path).unwrap_or_default() + self.dot_get_or(path, T::default()) } /// Get a mutable reference to an item /// - /// # Special keys - /// see `dot_get()` + /// If the path does not exist but a value on the path can be created (i.e. because the path + /// reaches `null`, array or object), a `null` value is inserted in that location (creating + /// its parent nodes as needed) and a mutable reference to this new `null` node is returned. /// - /// # Panics - /// see `dot_get()` - fn dot_get_mut(&mut self, path: &str) -> Option<&mut Value>; - - /// Insert an item by path. + /// The path does not need to reach a leaf node, i.e. it is possible to extract a subtree + /// of a JSON object this way. /// /// # Special keys - /// Arrays can be indexed by special keys: - /// - `+` or `>` ... append - /// - `-` or `<` ... prepend + /// - `>` ... last element of an array + /// - `<` ... first element of an array (same as `0`) + fn dot_get_mut(&mut self, path: &str) -> Result<&mut Value>; + + /// Insert an item by path. The original value is dropped, if any. + /// + /// # Special symbols + /// Arrays can be modified using special keys in the path: + /// - `+` or `>>` ... append + /// - `-` or `<<` ... prepend /// - `>n` ... insert after an index `n` /// - `(&mut self, path: &str, value: T) - where - T: Serialize; + /// - `>` ... last element of an array + /// - `<` ... first element of an array (same as `0`) + fn dot_set(&mut self, path: &str, value: T) -> Result<()> + where + T: Serialize { + + // This is a default implementation. + // Vec uses a custom implementation to support the special syntax. + + let _ = self.dot_replace::(path, value)?; // Original value is dropped + Ok(()) + } /// Replace a value by path with a new value. /// The value types do not have to match. /// - /// # Panics - /// see `dot_get()` - fn dot_replace(&mut self, path: &str, value: T) -> Option - where - T: Serialize, - U: DeserializeOwned; - - /// Get an item using a path, removing it from the store. - /// If no item was stored under this path, then None is returned. + /// Returns `Ok(None)` if the path was previously empty or `null`. /// - /// # Panics - /// see `dot_get()` - fn dot_take(&mut self, path: &str) -> Option - where - T: DeserializeOwned; + /// # Special keys + /// - `>` ... last element of an array + /// - `<` ... first element of an array (same as `0`) + fn dot_replace(&mut self, path: &str, value: NEW) -> Result> + where + NEW: Serialize, + OLD: DeserializeOwned; + + /// Get an item using a path, removing it from the object. + /// + /// Value becomes `null` when taken by an empty path, map entry is removed, + /// and array item is extracted, shifting the remainder forward. + /// + /// Returns `Ok(None)` if the path was previously empty or `null`. + /// + /// # Special keys + /// - `>` ... last element of an array + /// - `<` ... first element of an array (same as `0`) + fn dot_take(&mut self, path: &str) -> Result> + where + T: DeserializeOwned; - /// Remove an item matching a key. + /// Remove and drop an item matching a key. /// Returns true if any item was removed. /// - /// # Panics - /// see `dot_get()` - fn dot_remove(&mut self, path: &str) -> bool { - self.dot_take::(path).is_some() + /// # Special keys + /// - `>` ... last element of an array + /// - `<` ... first element of an array (same as `0`) + fn dot_remove(&mut self, path: &str) -> Result<()> { + let _ = self.dot_take::(path)?; // Original value is dropped + Ok(()) } } /// Split the path string by dot, if present. /// -/// Returns a tuple of (before_dot, after_dot) -fn path_split(path: &str) -> (&str, Option<&str>) { - let dot = path.find('.'); - match dot { - None => (path, None), - Some(pos) => (&path[0..pos], Some(&path[pos + 1..])), +/// Returns a tuple of (before_dot, after_dot), removing escapes +fn path_split(path: &str) -> (String, Option<&str>) { + let mut buf = String::new(); + let mut escaped = false; + for (n, c) in path.char_indices() { + match c { + _ if escaped => { + buf.push(c); + escaped = false; + } + '\\' => { + escaped = true; + } + '.' => { + return (buf, Some(&path[n + 1..])); + } + _ => { + buf.push(c); + } + } } + + // trailing slash is discarded + (buf, None) } impl DotPaths for serde_json::Value { - fn dot_get(&self, path: &str) -> Option - where - T: DeserializeOwned, + fn dot_get(&self, path: &str) -> Result> + where + T: DeserializeOwned, { match self { Value::Array(vec) => vec.dot_get(path), Value::Object(map) => map.dot_get(path), - Value::Null => None, + Value::Null => Ok(None), _ => { if path.is_empty() { - serde_json::from_value(self.to_owned()).ok() + // Get the whole value. + // We know it's not null - checked above + Ok(Some(serde_json::from_value(self.to_owned())?)) } else { - panic!("Node is not array or object!"); + // Path continues, but we can't traverse into a scalar + Err(BadPathElement) } } } } - fn dot_get_mut(&mut self, path: &str) -> Option<&mut Value> { + fn dot_get_mut(&mut self, path: &str) -> Result<&mut Value> { match self { Value::Array(vec) => vec.dot_get_mut(path), Value::Object(map) => map.dot_get_mut(path), - Value::Null => None, _ => { if path.is_empty() { - Some(self) + Ok(self) } else { - panic!("Node is not array or object!"); - } - } - } - } + if self.is_null() { + // Spawn parents + self.dot_set(path, Value::Null)?; + // Now it will succeed + return self.dot_get_mut(path); + } - fn dot_set(&mut self, path: &str, value: T) - where - T: Serialize, - { - match self { - Value::Array(vec) => { - vec.dot_set(path, value); - } - Value::Object(map) => { - map.dot_set(path, value); - } - Value::Null => { - mem::replace(self, new_by_path_root(path, value)); - } - _ => { - if path.is_empty() { - mem::replace(self, serde_json::to_value(value).expect("Serialize error")); - } else { - panic!("Node is not an array, object, or null!"); + // Path continues, but we can't traverse into a scalar + Err(BadPathElement) } } } } - fn dot_replace(&mut self, path: &str, value: T) -> Option - where - T: Serialize, - U: DeserializeOwned, + fn dot_replace(&mut self, path: &str, value: NEW) -> Result> + where + NEW: Serialize, + OLD: DeserializeOwned, { match self { Value::Array(vec) => vec.dot_replace(path, value), Value::Object(map) => map.dot_replace(path, value), Value::Null => { - self.dot_set(path, value); - None + // spawn new + mem::replace(self, new_by_path_root(path, value)?); + Ok(None) } _ => { if path.is_empty() { - let new = serde_json::to_value(value).expect("Serialize error"); + let new = serde_json::to_value(value)?; let old = mem::replace(self, new); - Some(serde_json::from_value(old).expect("Unserialize error")) + Ok(serde_json::from_value(old)?) } else { - panic!("Node is not an array, object, or null!") + // Path continues, but we can't traverse into a scalar + Err(BadPathElement) } } } } - fn dot_take(&mut self, path: &str) -> Option - where - T: DeserializeOwned, + fn dot_take(&mut self, path: &str) -> Result> + where + T: DeserializeOwned, { match self { Value::Array(vec) => vec.dot_take(path), Value::Object(map) => map.dot_take(path), - Value::Null => None, + Value::Null => Ok(None), _ => { if path.is_empty() { - let old = mem::replace(self, Value::Null); - Some(serde_json::from_value(old).expect("Unserialize error")) + // This won't happen with array or object, they really remove the value. + // Value is replaced with NULL only when dot_take() is called + // with an empty path. + let old = mem::replace(self, Value::Null); // we know it's not null, checked above + Ok(Some(serde_json::from_value(old)?)) } else { - panic!("Node is not an array, object, or null!") + // Path continues, but we can't traverse into a scalar + Err(BadPathElement) } } } } + + fn dot_set(&mut self, path: &str, value: T) -> Result<()> + where + T: Serialize { + match self { + // Special case for Vec, which implements additional path symbols + Value::Array(a) => { + a.dot_set(path, value) + } + _ => { + let _ = self.dot_replace::(path, value)?; // Original value is dropped + Ok(()) + } + } + } } /// Create a Value::Object or Value::Array based on a nested path. /// /// Builds the parent path to a non-existent key in set-type operations. -fn new_by_path_root(path: &str, value: T) -> Value -where - T: Serialize, +#[must_use] +fn new_by_path_root(path: &str, value: T) -> Result + where + T: Serialize, { if path.is_empty() { - return serde_json::to_value(value).expect("Serialize error"); + return Ok(serde_json::to_value(value)?); } + let escaped = path.starts_with('\\'); let (sub1, _) = path_split(path); - if sub1 == "0" || sub1 == "+" || sub1 == "<" || sub1 == ">" { + if !escaped && ["0", "+", "-", "<", ">", "<<", ">>"].contains(&sub1.as_str()) { // new vec let mut new_vec = vec![]; - new_vec.dot_set(path, value); - Value::Array(new_vec) + new_vec.dot_set(path, value)?; + Ok(Value::Array(new_vec)) } else { // new map let mut new_map = Map::new(); - new_map.dot_set(path, value); - Value::Object(new_map) + new_map.dot_set(path, value)?; + Ok(Value::Object(new_map)) } } -/// Check if a key is valid to use by dot paths in Value::Object. -/// The key must start with an alpha character or underscore and must not contain period. -#[must_use] -fn validate_map_key(key: &str) -> &str { - if key.contains('.') { - // this shouldn't happen due to the way the splitting works - panic!("Invalid map key: {}", key); - } - - // 'literal modifier', e.g. for numeric map keys - if key.starts_with('#') { - return &key[1..]; - } - - if !key.starts_with(|p: char| p.is_ascii_alphabetic() || p == '_') { - panic!("Invalid map key: {}", key); - } - - key -} - impl DotPaths for serde_json::Map { - fn dot_get(&self, path: &str) -> Option - where - T: DeserializeOwned, + fn dot_get(&self, path: &str) -> Result> + where + T: DeserializeOwned, { let (my, sub) = path_split(path); - let my = validate_map_key(my); - if let Some(sub_path) = sub { - self.get(my) - .map(|child| child.dot_get(sub_path)) // this produces Option> - .unwrap_or_default() - } else { - self.get(my) - .map(ToOwned::to_owned) - .map(serde_json::from_value) - .transpose() // Option to Result