<?php namespace MightyPork\Utils; use MightyPork\Exceptions\FormatException; use Illuminate\Support\Collection; use Log; class Str extends \Illuminate\Support\Str { /** * Split and trim * * TODO unit test * * @param string $haystack string to split * @param string|string[] $delimiters delimiter * @return array pieces, trimmed. */ public static function splitTrim($haystack, $delimiters=[',', ';', '|']) { $haystack = trim($haystack); if (strlen($haystack) == 0) return []; return array_map('trim', self::split($haystack, $delimiters)); } /** * Split a string using one or more delimiters * * TODO unit test * * @param string $haystack * @param string|array $delimiters * @return array */ public static function split($haystack, $delimiters) { if (is_string($delimiters)) { return explode($delimiters, $haystack); } // make sure it's array if (!is_array($delimiters)) { $delimiters = [$delimiters]; } // helper $regex_escape = function ($x) { return preg_quote($x, '/'); }; // compose splitting regex $reg = "/" . implode('|', array_map($regex_escape, $delimiters)) . "/"; return preg_split($reg, $haystack); } /** * Split "CSV" string to items, trim each item. * Empty values at start and end are discarded. * * TODO unit test * * @param string $str * @return array items */ public static function splitCsv($str) { return array_map('trim', explode(',', trim($str, ','))); } /** * Pad an integer to 2 digit * * TODO unit test * * @param int $int the number * @return string padded with zero */ public static function pad2($int) { return sprintf("%02d", $int); } /** * Remove diacritics from a string * * TODO unit test * * @param string $str * @return string ascii */ public static function asciify($str) { return iconv('UTF-8', 'US-ASCII//TRANSLIT', $str); } /** * Public for unit tests * * Convert mask to regex * * @param $mask * @return mixed|string */ public static function _pregMaskPrepare($mask) { $mask = preg_quote($mask); // number = repeat $mask = preg_replace('#(?<![.*+,{\d\\\\]|^)(\d+)#', '{$1}', $mask); // can repeat ? $mask = strtr($mask, [ '\?' => '?', '\*' => '*', '\+' => '+', '\{' => '{', '\}' => '}', '\(' => '(', '\)' => ')', 'd' => '\d', 'F' => '-?\d+(\.\d+)?', 'D' => '-?\d+', 'a' => '[[:alpha:]]', '\\\\' => '\\', ]); $mask = strtr($mask, [ '\\\\' => '\\', ]); return $mask; } /** * Match a string against a mask. * * Special symbols: * - `*` repeat previous any number of times * - `?` previous is optional * - `()` grouping * - `a` alpha * - `d` digit * - `F` float value d, d.ddd with optional leading - * - `+` repeat previous symbol any number of times * - Number - repeat N times * - {N}, {M,N} - repeat n times, like in regex * * Any other characters are matched literally. * * @param string $mask mask to match against * @param string $string tested string * @return bool matches */ public static function maskMatch($mask, $string) { $mask = self::_pregMaskPrepare($mask); return 1 === preg_match('|^' . $mask . '$|u', $string); } /** * Format a string with {0} {foo} or {} * * TODO unit test * * @param string $format * @param array ...$args substitutions. Can also be an explicit array. * @return string */ public static function format($format, ...$args) { $args = func_get_args(); $format = array_shift($args); // explicit array given if (is_array($args[0])) { $args = $args[0]; } $format = preg_replace_callback('#\{\}#', function () { static $i = 0; return '{' . ($i++) . '}'; }, $format); return str_replace( array_map(function ($k) { return '{' . $k . '}'; }, array_keys($args)), array_values($args), $format ); } /** * Remove whitespace on the left side of a block. * This function is quite expensive, so it's recommended * to cache the result, if possible. * * <b>NOTE:</b> The current implementation converts all leading tabs to spaces. * * @param string $txt text to trim * @param int $tab_size number of spaces per tab, default 4 * @param bool $ltrim_nl remove a leading newline * @param bool $ign_noindent ignore lines with zero indentation * @return string left-trimmed block. */ public static function unindentBlock($txt, $tab_size = 4, $ltrim_nl = true, $ign_noindent = true) { $pad = 1024; // max indent size $tabsp = str_repeat(' ', $tab_size); if ($ltrim_nl) { // also make sure first line is not blank $txt = ltrim($txt, "\n"); } $txt = preg_replace_callback('/^([ \t]*)(?=[^ \t\n]|$)/m', function ($m) use (&$pad, $tabsp, $ign_noindent) { static $i = 0; $indent = $m[1]; if ($indent == '' && $ign_noindent) { // no indentation, perhaps stripped by editor? return ''; } // normalize tab to 4 spaces $normalized = strtr($indent, ["\t" => $tabsp]); $len = strlen($normalized); $pad = min($pad, $len); $i++; return $normalized; }, $txt ); return preg_replace("/^ {{$pad}}/m", '', $txt); } /** * Get a simple string representation of an array, similar to json_encode(), * except without the obnoxious quotes and escapes. * * @param array $array * @return string [a, b, c] */ public static function arr($array) { if ($array instanceof Collection || Utils::isAssoc($array)) { $x = '['; foreach ($array as $k => $v) { $x .= "$k:" . json_encode($v) . ', '; } $x = rtrim($x, ', ') . ']'; return $x; } else { return '[' . implode(', ', collect($array)->map(function ($x) { return is_array($x) ? json_encode($x, JSON_UNESCAPED_UNICODE) : (string) $x; })->toArray()) . ']'; } } /** * Get substring from last occurrence of token * * TODO unit test * * @param string $token token delimiting the last chunk from left. Not included. * @param string $haystack * @return string part of haystack after token. */ public static function fromLast($token, $haystack) { $rpos = strrpos($haystack, $token); if ($rpos === false) return $haystack; return substr($haystack, $rpos + strlen($token)); } public static function rpad($str, $len, $fill = ' ') { $filln = max(0, $len - mb_strlen($str)); return $str . str_repeat(mb_substr($fill, 0, 1), $filln); } public static function lpad($str, $len, $fill = ' ') { $filln = max(0, $len - mb_strlen($str)); return str_repeat(mb_substr($fill, 0, 1), $filln) . $str; } /** * Remove wrapping quotes from a string. * C-slashes will also be removed. * * @param $str * @return string */ public static function unquote($str) { if (!$str || !is_string($str)) { return $str; } $a = $str[0]; $b = $str[strlen($str) - 1]; if ($a == $b && $a == '"' || $b == "'") { $str = substr($str, 1, strlen($str) - 2); } $str = stripcslashes($str); return $str; } /** * Apply a rewrite. * * - Simple foo|bar rewrites 0|1 (or false|true) * - Key-value rewrite is possible with 7=foo|9=bar * - '*' (asterisk) matches everything (9=foo|*=other) * - '\*' - match literal asterisk * - starts with % - format using sprintf * - Compare funcs can also be used: lt, gt, le, ge, range, in * example: lt(100)=Foo|range(100,200)=Bar|gt(200)=Baz * * @param mixed $value value from expression * @param string $rewrite rewrite patterns, | separated * @return mixed result to show */ public static function rewrite($value, $rewrite) { // TODO předělat na jednodušší zápis if ($rewrite[0] == '%') return sprintf($rewrite, $value); $rewrite_map = []; foreach (explode('|', trim($rewrite)) as $i => $rw) { $ar = preg_split('/(?<![\\\\])=/', $rw); if (count($ar) == 2) { // key value pair $key = trim(str_replace('\=', '=', $ar[0])); $rewrite_map[$key] = trim(str_replace('\=', '=', $ar[1])); } elseif (count($ar) == 1) { // literal rewrite $rewrite_map[$i] = $rw; } else { Log::warning("Invalid rewrite format: $rw"); return $value; // don't rewrite it } } // apply the rewrite if any foreach ($rewrite_map as $k => $replacement) { if (is_numeric($k) && (((int)$k == (int)$value) || abs((float)$k - (float)$value)<0.00001)) { // exact match return $replacement; } else if (($k==='true'||$k==='false') && (Utils::parseBool($k) == Utils::parseBool($value))) { // bool match return $replacement; } else if (preg_match('/([a-z]+)\(([^)]+)\)/i', $k, $mm)) { // we have a comparing function if (static::testCompareFunc($value, $mm[1], $mm[2])) { return $replacement; } } else if ($k === '*') { return $replacement; // catch-all } else if ($k === '\\*') { if ($value === '*') { return $replacement; // literal asterisk } } } return $value; } /** * Check if compare function matches value, copied from FB2 * * @param mixed $value value to format * @param string $func function name * @param mixed $argument extra argument for the func * @return bool */ private static function testCompareFunc($value, $func, $argument) { $value_f = floatval($value); $arg_f = floatval($argument); $fun = trim(strtolower($func)); switch ($fun) { case 'lt': return $value_f < $arg_f; case 'gt': return $value_f > $arg_f; case 'le': return $value_f <= $arg_f; case 'ge': return $value_f >= $arg_f; case 'eq': return $value_f == $arg_f; // this is kinda useless, but to make the set complete case 'range': // range(-10,0) = zima $bounds = array_map(function ($x) { return floatval(trim($x)); }, explode(',', $argument)); if (count($bounds) != 2) { Log::error("Invalid range bounds: $argument"); return false; } return $value_f >= $bounds[0] && $value_f < $bounds[1]; case 'in': // in(10,20,30) = 10, 20 or 30 *funguje i pro string in(ZAP,VYP) = ZAP nebo VYP $bounds = array_map('trim', explode(',', $argument)); return in_array(trim($value), $bounds); default: Log::error("Invalid rewrite function: $func"); return false; } } /** * Apply multiple sets of substitutions to a format and get all the results. * * @param string $format Format same as for Str::format(). Fields are marked {}, {0} or {key} * @param array $subs_arrays array of arrays of substitutions - eg. [[a1, b1], [a2, b2], ...] * @return array */ public static function mapFormat($format, $subs_arrays) { $gather = []; foreach ($subs_arrays as $subs) { if (!is_array($subs)) $subs = [$subs]; $gather[] = Str::format($format, $subs); } return $gather; } /** * Find all needle positions within a haystack * * @param string $haystack * @param string $needle * @return array */ public static function findPositions($haystack, $needle) { $lastPos = 0; $positions = []; while (($lastPos = strpos($haystack, $needle, $lastPos)) !== false) { $positions[] = $lastPos; $lastPos = $lastPos + strlen($needle); } return $positions; } /** * Discard positions in a string that are preceded by an unescaped backslash. * * @param string $str * @param int[] $positions * @return int[] */ public static function discardEscapedPositions($str, array $positions) { $actualPos = []; foreach ($positions as $pos) { if ($pos >= 1) { if ($str[$pos - 1] == '\\') { if ($pos >= 2 && $str[$pos - 2] == '\\') { // escaped backslash before - it's a valid quote } else { // this quote is escaped continue; } } } $actualPos[] = $pos; } return $actualPos; } /** * Split a string at given positions * * @param string $string * @param int[] $positions * @return string[] */ public static function splitAt($string, $positions) { $chunks = []; array_push($positions, strlen($string)); array_unshift($positions, -1); foreach ($positions as $i => $position) { if ($position >= strlen($string)) break; $chunks[] = substr($string, $position + 1, $positions[$i + 1] - $position - 1); } return $chunks; } /** * Split a string to pieces by commas, ignoring commas within strings delimited by double quotes. * A double quote can be escaped using a backslash. * * @param string $str * @param string $delimiter * @return mixed */ public static function splitCommandArgs($str, $delimiter = ',') { if ($str === '') return []; // Find unescaped quotes $quotes = Str::findPositions($str, '"'); $quotes = Str::discardEscapedPositions($str, $quotes); if (count($quotes) % 2 != 0) { throw new FormatException("Unmatched quote in command arguments: $str"); } $commas = Str::findPositions($str, $delimiter); $commas = Utils::discardPositionsWithinPairs($commas, $quotes); $chunks = Str::splitAt($str, $commas); $arr = collect($chunks)->trim()->toArray(); return $arr; } /** * Get substring to first match of a token * * @param string $token token delimiting the parts * @param string $haystack full string * @return string portion of the string until the first token; or the whole string if token is not present. */ public static function toFirst($token, $haystack) { $lpos = strpos($haystack, $token); if ($lpos === false) return $haystack; return substr($haystack, 0, $lpos); } /** * Get substring from first match of a token * * @param string $token token delimiting the parts * @param string $haystack full string * @return string portion of the string until the first token; or empty string if token is not present. */ public static function fromFirst($token, $haystack, $exclusive = false) { $lpos = strpos($haystack, $token); if ($lpos === false) return ''; return substr($haystack, $lpos+($exclusive?1:0)); } public static function toLast($token, $haystack) { $rpos = strrpos($haystack, $token); if ($rpos === false) return $haystack; return self::substr($haystack, 0, $rpos); } /** * Check if translation exists * * @param string $descrKey tested transl key * @return bool exists */ public static function translationExists($descrKey) { $tr = trans($descrKey); return !preg_match('/^(\w+\.)+(\w+)$/', $tr); } /** * Expand array of strings using bash-style repeat patterns {a,b,c}, {from..to} * * ie. to produce ad1 to ad4, use ad{1..4}. Supports multiple patterns, producing all permutations. * * @param string|string[] $sourceStrings * @return string[] */ public static function expandBashRepeat($sourceStrings) { if (is_string($sourceStrings)) $sourceStrings = [$sourceStrings]; $outputs = []; foreach($sourceStrings as $str) { $i=0; $arrays = []; $str2 = preg_replace_callback('/\{([^{}]+)\}/', function($m) use(&$i, &$arrays) { $seq = explode(',',$m[1]); if(count($seq)>=2) { $arrays[$i] = $seq; } else { $ab = explode('..',$m[1]); if (!$ab) { return $m[0]; } $a = intval($ab[0]); $b = intval($ab[1]); $arrays[$i] = range($a, $b); } return '{{'.($i++).'}}'; }, $str); $strs = [$str2]; for($n=count($arrays)-1;$n>=0;$n--) { $arr = $arrays[$n]; $tmpstrs = []; foreach($arr as $subs) { foreach($strs as $ss) { $tmpstrs[] = str_replace('{{'.$n.'}}', $subs, $ss); } } $strs = $tmpstrs; } $outputs = array_merge($outputs, $strs); } return $outputs; } /** * Remove trailing commas from JSON string * * @param string $str * @return string */ public static function cleanJson($str) { return preg_replace('/,\s*([}\]])/s','\1', $str); } public static function ellipsis($str, $maxlen, $margin=10) { $len = mb_strlen($str); if ($len > $maxlen) { $hard = mb_substr($str, 0, $maxlen); $lastspace = mb_strrpos($hard, ' '); if ($lastspace > $maxlen - $margin) { $hard = mb_substr($hard, 0, $lastspace); } return $hard . '…'; } return $str; } }