datatable.directory codebase https://datatable.directory/
datatable.directory/porklib/Utils/Str.php

674 lines
16 KiB

7 years ago
<?php
namespace MightyPork\Utils;
use MightyPork\Exceptions\FormatException;
use Illuminate\Support\Collection;
use Log;
class Str extends \Illuminate\Support\Str
{
/**
* Split and trim
*
* TODO unit test
*
* @param string $haystack string to split
* @param string|string[] $delimiters delimiter
* @return array pieces, trimmed.
*/
public static function splitTrim($haystack, $delimiters=[',', ';', '|'])
{
$haystack = trim($haystack);
if (strlen($haystack) == 0) return [];
return array_map('trim', self::split($haystack, $delimiters));
}
/**
* Split a string using one or more delimiters
*
* TODO unit test
*
* @param string $haystack
* @param string|array $delimiters
* @return array
*/
public static function split($haystack, $delimiters)
{
if (is_string($delimiters)) {
return explode($delimiters, $haystack);
}
// make sure it's array
if (!is_array($delimiters)) {
$delimiters = [$delimiters];
}
// helper
$regex_escape = function ($x) {
return preg_quote($x, '/');
};
// compose splitting regex
$reg = "/" . implode('|', array_map($regex_escape, $delimiters)) . "/";
return preg_split($reg, $haystack);
}
/**
* Split "CSV" string to items, trim each item.
* Empty values at start and end are discarded.
*
* TODO unit test
*
* @param string $str
* @return array items
*/
public static function splitCsv($str)
{
return array_map('trim', explode(',', trim($str, ',')));
}
/**
* Pad an integer to 2 digit
*
* TODO unit test
*
* @param int $int the number
* @return string padded with zero
*/
public static function pad2($int)
{
return sprintf("%02d", $int);
}
/**
* Remove diacritics from a string
*
* TODO unit test
*
* @param string $str
* @return string ascii
*/
public static function asciify($str)
{
return iconv('UTF-8', 'US-ASCII//TRANSLIT', $str);
}
/**
* Public for unit tests
*
* Convert mask to regex
*
* @param $mask
* @return mixed|string
*/
public static function _pregMaskPrepare($mask)
{
$mask = preg_quote($mask);
// number = repeat
$mask = preg_replace('#(?<![.*+,{\d\\\\]|^)(\d+)#', '{$1}', $mask); // can repeat ?
$mask = strtr($mask, [
'\?' => '?',
'\*' => '*',
'\+' => '+',
'\{' => '{',
'\}' => '}',
'\(' => '(',
'\)' => ')',
'd' => '\d',
'F' => '-?\d+(\.\d+)?',
'D' => '-?\d+',
'a' => '[[:alpha:]]',
'\\\\' => '\\',
]);
$mask = strtr($mask, [
'\\\\' => '\\',
]);
return $mask;
}
/**
* Match a string against a mask.
*
* Special symbols:
* - `*` repeat previous any number of times
* - `?` previous is optional
* - `()` grouping
* - `a` alpha
* - `d` digit
* - `F` float value d, d.ddd with optional leading -
* - `+` repeat previous symbol any number of times
* - Number - repeat N times
* - {N}, {M,N} - repeat n times, like in regex
*
* Any other characters are matched literally.
*
* @param string $mask mask to match against
* @param string $string tested string
* @return bool matches
*/
public static function maskMatch($mask, $string)
{
$mask = self::_pregMaskPrepare($mask);
return 1 === preg_match('|^' . $mask . '$|u', $string);
}
/**
* Format a string with {0} {foo} or {}
*
* TODO unit test
*
* @param string $format
* @param array ...$args substitutions. Can also be an explicit array.
* @return string
*/
public static function format($format, ...$args)
{
$args = func_get_args();
$format = array_shift($args);
// explicit array given
if (is_array($args[0])) {
$args = $args[0];
}
$format = preg_replace_callback('#\{\}#', function () {
static $i = 0;
return '{' . ($i++) . '}';
}, $format);
return str_replace(
array_map(function ($k) {
return '{' . $k . '}';
}, array_keys($args)),
array_values($args),
$format
);
}
/**
* Remove whitespace on the left side of a block.
* This function is quite expensive, so it's recommended
* to cache the result, if possible.
*
* <b>NOTE:</b> The current implementation converts all leading tabs to spaces.
*
* @param string $txt text to trim
* @param int $tab_size number of spaces per tab, default 4
* @param bool $ltrim_nl remove a leading newline
* @param bool $ign_noindent ignore lines with zero indentation
* @return string left-trimmed block.
*/
public static function unindentBlock($txt, $tab_size = 4, $ltrim_nl = true, $ign_noindent = true)
{
$pad = 1024; // max indent size
$tabsp = str_repeat(' ', $tab_size);
if ($ltrim_nl) {
// also make sure first line is not blank
$txt = ltrim($txt, "\n");
}
$txt = preg_replace_callback('/^([ \t]*)(?=[^ \t\n]|$)/m',
function ($m) use (&$pad, $tabsp, $ign_noindent) {
static $i = 0;
$indent = $m[1];
if ($indent == '' && $ign_noindent) {
// no indentation, perhaps stripped by editor?
return '';
}
// normalize tab to 4 spaces
$normalized = strtr($indent, ["\t" => $tabsp]);
$len = strlen($normalized);
$pad = min($pad, $len);
$i++;
return $normalized;
},
$txt
);
return preg_replace("/^ {{$pad}}/m", '', $txt);
}
/**
* Get a simple string representation of an array, similar to json_encode(),
* except without the obnoxious quotes and escapes.
*
* @param array $array
* @return string [a, b, c]
*/
public static function arr($array)
{
if ($array instanceof Collection || Utils::isAssoc($array)) {
$x = '[';
foreach ($array as $k => $v) {
$x .= "$k:" . json_encode($v) . ', ';
}
$x = rtrim($x, ', ') . ']';
return $x;
} else {
return '[' . implode(', ', collect($array)->map(function ($x) {
return is_array($x) ? json_encode($x, JSON_UNESCAPED_UNICODE) : (string) $x;
})->toArray()) . ']';
}
}
/**
* Get substring from last occurrence of token
*
* TODO unit test
*
* @param string $token token delimiting the last chunk from left. Not included.
* @param string $haystack
* @return string part of haystack after token.
*/
public static function fromLast($token, $haystack)
{
$rpos = strrpos($haystack, $token);
if ($rpos === false) return $haystack;
return substr($haystack, $rpos + strlen($token));
}
public static function rpad($str, $len, $fill = ' ')
{
$filln = max(0, $len - mb_strlen($str));
return $str . str_repeat(mb_substr($fill, 0, 1), $filln);
}
public static function lpad($str, $len, $fill = ' ')
{
$filln = max(0, $len - mb_strlen($str));
return str_repeat(mb_substr($fill, 0, 1), $filln) . $str;
}
/**
* Remove wrapping quotes from a string.
* C-slashes will also be removed.
*
* @param $str
* @return string
*/
public static function unquote($str)
{
if (!$str || !is_string($str)) {
return $str;
}
$a = $str[0];
$b = $str[strlen($str) - 1];
if ($a == $b && $a == '"' || $b == "'") {
$str = substr($str, 1, strlen($str) - 2);
}
$str = stripcslashes($str);
return $str;
}
/**
* Apply a rewrite.
*
* - Simple foo|bar rewrites 0|1 (or false|true)
* - Key-value rewrite is possible with 7=foo|9=bar
* - '*' (asterisk) matches everything (9=foo|*=other)
* - '\*' - match literal asterisk
* - starts with % - format using sprintf
* - Compare funcs can also be used: lt, gt, le, ge, range, in
* example: lt(100)=Foo|range(100,200)=Bar|gt(200)=Baz
*
* @param mixed $value value from expression
* @param string $rewrite rewrite patterns, | separated
* @return mixed result to show
*/
public static function rewrite($value, $rewrite)
{
// TODO předělat na jednodušší zápis
if ($rewrite[0] == '%') return sprintf($rewrite, $value);
$rewrite_map = [];
foreach (explode('|', trim($rewrite)) as $i => $rw) {
$ar = preg_split('/(?<![\\\\])=/', $rw);
if (count($ar) == 2) {
// key value pair
$key = trim(str_replace('\=', '=', $ar[0]));
$rewrite_map[$key] = trim(str_replace('\=', '=', $ar[1]));
} elseif (count($ar) == 1) {
// literal rewrite
$rewrite_map[$i] = $rw;
} else {
Log::warning("Invalid rewrite format: $rw");
return $value; // don't rewrite it
}
}
// apply the rewrite if any
foreach ($rewrite_map as $k => $replacement) {
if (is_numeric($k) && (((int)$k == (int)$value) || abs((float)$k - (float)$value)<0.00001)) {
// exact match
return $replacement;
}
else if (($k==='true'||$k==='false') && (Utils::parseBool($k) == Utils::parseBool($value))) {
// bool match
return $replacement;
}
else if (preg_match('/([a-z]+)\(([^)]+)\)/i', $k, $mm)) {
// we have a comparing function
if (static::testCompareFunc($value, $mm[1], $mm[2])) {
return $replacement;
}
}
else if ($k === '*') {
return $replacement; // catch-all
}
else if ($k === '\\*') {
if ($value === '*') {
return $replacement; // literal asterisk
}
}
}
return $value;
}
/**
* Check if compare function matches value, copied from FB2
*
* @param mixed $value value to format
* @param string $func function name
* @param mixed $argument extra argument for the func
* @return bool
*/
private static function testCompareFunc($value, $func, $argument)
{
$value_f = floatval($value);
$arg_f = floatval($argument);
$fun = trim(strtolower($func));
switch ($fun) {
case 'lt':
return $value_f < $arg_f;
case 'gt':
return $value_f > $arg_f;
case 'le':
return $value_f <= $arg_f;
case 'ge':
return $value_f >= $arg_f;
case 'eq':
return $value_f == $arg_f; // this is kinda useless, but to make the set complete
case 'range': // range(-10,0) = zima
$bounds = array_map(function ($x) {
return floatval(trim($x));
}, explode(',', $argument));
if (count($bounds) != 2) {
Log::error("Invalid range bounds: $argument");
return false;
}
return $value_f >= $bounds[0] && $value_f < $bounds[1];
case 'in': // in(10,20,30) = 10, 20 or 30 *funguje i pro string in(ZAP,VYP) = ZAP nebo VYP
$bounds = array_map('trim', explode(',', $argument));
return in_array(trim($value), $bounds);
default:
Log::error("Invalid rewrite function: $func");
return false;
}
}
/**
* Apply multiple sets of substitutions to a format and get all the results.
*
* @param string $format Format same as for Str::format(). Fields are marked {}, {0} or {key}
* @param array $subs_arrays array of arrays of substitutions - eg. [[a1, b1], [a2, b2], ...]
* @return array
*/
public static function mapFormat($format, $subs_arrays)
{
$gather = [];
foreach ($subs_arrays as $subs) {
if (!is_array($subs)) $subs = [$subs];
$gather[] = Str::format($format, $subs);
}
return $gather;
}
/**
* Find all needle positions within a haystack
*
* @param string $haystack
* @param string $needle
* @return array
*/
public static function findPositions($haystack, $needle)
{
$lastPos = 0;
$positions = [];
while (($lastPos = strpos($haystack, $needle, $lastPos)) !== false) {
$positions[] = $lastPos;
$lastPos = $lastPos + strlen($needle);
}
return $positions;
}
/**
* Discard positions in a string that are preceded by an unescaped backslash.
*
* @param string $str
* @param int[] $positions
* @return int[]
*/
public static function discardEscapedPositions($str, array $positions)
{
$actualPos = [];
foreach ($positions as $pos) {
if ($pos >= 1) {
if ($str[$pos - 1] == '\\') {
if ($pos >= 2 && $str[$pos - 2] == '\\') {
// escaped backslash before - it's a valid quote
} else {
// this quote is escaped
continue;
}
}
}
$actualPos[] = $pos;
}
return $actualPos;
}
/**
* Split a string at given positions
*
* @param string $string
* @param int[] $positions
* @return string[]
*/
public static function splitAt($string, $positions)
{
$chunks = [];
array_push($positions, strlen($string));
array_unshift($positions, -1);
foreach ($positions as $i => $position) {
if ($position >= strlen($string)) break;
$chunks[] = substr($string, $position + 1, $positions[$i + 1] - $position - 1);
}
return $chunks;
}
/**
* Split a string to pieces by commas, ignoring commas within strings delimited by double quotes.
* A double quote can be escaped using a backslash.
*
* @param string $str
* @param string $delimiter
* @return mixed
*/
public static function splitCommandArgs($str, $delimiter = ',')
{
if ($str === '') return [];
// Find unescaped quotes
$quotes = Str::findPositions($str, '"');
$quotes = Str::discardEscapedPositions($str, $quotes);
if (count($quotes) % 2 != 0) {
throw new FormatException("Unmatched quote in command arguments: $str");
}
$commas = Str::findPositions($str, $delimiter);
$commas = Utils::discardPositionsWithinPairs($commas, $quotes);
$chunks = Str::splitAt($str, $commas);
$arr = collect($chunks)->trim()->toArray();
return $arr;
}
/**
* Get substring to first match of a token
*
* @param string $token token delimiting the parts
* @param string $haystack full string
* @return string portion of the string until the first token; or the whole string if token is not present.
*/
public static function toFirst($token, $haystack)
{
$lpos = strpos($haystack, $token);
if ($lpos === false) return $haystack;
return substr($haystack, 0, $lpos);
}
/**
* Get substring from first match of a token
*
* @param string $token token delimiting the parts
* @param string $haystack full string
* @return string portion of the string until the first token; or empty string if token is not present.
*/
public static function fromFirst($token, $haystack, $exclusive = false)
{
$lpos = strpos($haystack, $token);
if ($lpos === false) return '';
return substr($haystack, $lpos+($exclusive?1:0));
}
public static function toLast($token, $haystack)
{
$rpos = strrpos($haystack, $token);
if ($rpos === false) return $haystack;
return self::substr($haystack, 0, $rpos);
}
/**
* Check if translation exists
*
* @param string $descrKey tested transl key
* @return bool exists
*/
public static function translationExists($descrKey)
{
$tr = trans($descrKey);
return !preg_match('/^(\w+\.)+(\w+)$/', $tr);
}
/**
* Expand array of strings using bash-style repeat patterns {a,b,c}, {from..to}
*
* ie. to produce ad1 to ad4, use ad{1..4}. Supports multiple patterns, producing all permutations.
*
* @param string|string[] $sourceStrings
* @return string[]
*/
public static function expandBashRepeat($sourceStrings)
{
if (is_string($sourceStrings)) $sourceStrings = [$sourceStrings];
$outputs = [];
foreach($sourceStrings as $str) {
$i=0;
$arrays = [];
$str2 = preg_replace_callback('/\{([^{}]+)\}/', function($m) use(&$i, &$arrays) {
$seq = explode(',',$m[1]);
if(count($seq)>=2) {
$arrays[$i] = $seq;
} else {
$ab = explode('..',$m[1]);
if (!$ab) { return $m[0]; }
$a = intval($ab[0]);
$b = intval($ab[1]);
$arrays[$i] = range($a, $b);
}
return '{{'.($i++).'}}';
}, $str);
$strs = [$str2];
for($n=count($arrays)-1;$n>=0;$n--) {
$arr = $arrays[$n];
$tmpstrs = [];
foreach($arr as $subs) {
foreach($strs as $ss) {
$tmpstrs[] = str_replace('{{'.$n.'}}', $subs, $ss);
}
}
$strs = $tmpstrs;
}
$outputs = array_merge($outputs, $strs);
}
return $outputs;
}
/**
* Remove trailing commas from JSON string
*
* @param string $str
* @return string
*/
public static function cleanJson($str)
{
return preg_replace('/,\s*([}\]])/s','\1', $str);
}
public static function ellipsis($str, $maxlen, $margin=10)
7 years ago
{
$len = mb_strlen($str);
if ($len > $maxlen) {
$hard = mb_substr($str, 0, $maxlen);
$lastspace = mb_strrpos($hard, ' ');
if ($lastspace > $maxlen - $margin) {
$hard = mb_substr($hard, 0, $lastspace);
}
return $hard . '…';
7 years ago
}
return $str;
}
}