BaseStringHelper.php 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561
  1. <?php
  2. /**
  3. * @link https://www.yiiframework.com/
  4. * @copyright Copyright (c) 2008 Yii Software LLC
  5. * @license https://www.yiiframework.com/license/
  6. */
  7. namespace yii\helpers;
  8. use Yii;
  9. /**
  10. * BaseStringHelper provides concrete implementation for [[StringHelper]].
  11. *
  12. * Do not use BaseStringHelper. Use [[StringHelper]] instead.
  13. *
  14. * @author Qiang Xue <qiang.xue@gmail.com>
  15. * @author Alex Makarov <sam@rmcreative.ru>
  16. * @since 2.0
  17. */
  18. class BaseStringHelper
  19. {
  20. /**
  21. * Returns the number of bytes in the given string.
  22. * This method ensures the string is treated as a byte array by using `mb_strlen()`.
  23. *
  24. * @param string $string the string being measured for length
  25. * @return int the number of bytes in the given string.
  26. */
  27. public static function byteLength($string)
  28. {
  29. return mb_strlen((string)$string, '8bit');
  30. }
  31. /**
  32. * Returns the portion of string specified by the start and length parameters.
  33. * This method ensures the string is treated as a byte array by using `mb_substr()`.
  34. *
  35. * @param string $string the input string. Must be one character or longer.
  36. * @param int $start the starting position
  37. * @param int|null $length the desired portion length. If not specified or `null`, there will be
  38. * no limit on length i.e. the output will be until the end of the string.
  39. * @return string the extracted part of string, or FALSE on failure or an empty string.
  40. * @see https://www.php.net/manual/en/function.substr.php
  41. */
  42. public static function byteSubstr($string, $start, $length = null)
  43. {
  44. if ($length === null) {
  45. $length = static::byteLength($string);
  46. }
  47. return mb_substr((string)$string, $start, $length, '8bit');
  48. }
  49. /**
  50. * Returns the trailing name component of a path.
  51. * This method is similar to the php function `basename()` except that it will
  52. * treat both \ and / as directory separators, independent of the operating system.
  53. * This method was mainly created to work on php namespaces. When working with real
  54. * file paths, php's `basename()` should work fine for you.
  55. * Note: this method is not aware of the actual filesystem, or path components such as "..".
  56. *
  57. * @param string $path A path string.
  58. * @param string $suffix If the name component ends in suffix this will also be cut off.
  59. * @return string the trailing name component of the given path.
  60. * @see https://www.php.net/manual/en/function.basename.php
  61. */
  62. public static function basename($path, $suffix = '')
  63. {
  64. $path = (string)$path;
  65. $len = mb_strlen($suffix);
  66. if ($len > 0 && mb_substr($path, -$len) === $suffix) {
  67. $path = mb_substr($path, 0, -$len);
  68. }
  69. $path = rtrim(str_replace('\\', '/', $path), '/');
  70. $pos = mb_strrpos($path, '/');
  71. if ($pos !== false) {
  72. return mb_substr($path, $pos + 1);
  73. }
  74. return $path;
  75. }
  76. /**
  77. * Returns parent directory's path.
  78. * This method is similar to `dirname()` except that it will treat
  79. * both \ and / as directory separators, independent of the operating system.
  80. *
  81. * @param string $path A path string.
  82. * @return string the parent directory's path.
  83. * @see https://www.php.net/manual/en/function.basename.php
  84. */
  85. public static function dirname($path)
  86. {
  87. $normalizedPath = rtrim(
  88. str_replace('\\', '/', (string)$path),
  89. '/'
  90. );
  91. $separatorPosition = mb_strrpos($normalizedPath, '/');
  92. if ($separatorPosition !== false) {
  93. return mb_substr($path, 0, $separatorPosition);
  94. }
  95. return '';
  96. }
  97. /**
  98. * Truncates a string to the number of characters specified.
  99. *
  100. * In order to truncate for an exact length, the $suffix char length must be counted towards the $length. For example
  101. * to have a string which is exactly 255 long with $suffix `...` of 3 chars, then `StringHelper::truncate($string, 252, '...')`
  102. * must be used to ensure you have 255 long string afterwards.
  103. *
  104. * @param string $string The string to truncate.
  105. * @param int $length How many characters from original string to include into truncated string.
  106. * @param string $suffix String to append to the end of truncated string.
  107. * @param string|null $encoding The charset to use, defaults to charset currently used by application.
  108. * @param bool $asHtml Whether to treat the string being truncated as HTML and preserve proper HTML tags.
  109. * This parameter is available since version 2.0.1.
  110. * @return string the truncated string.
  111. */
  112. public static function truncate($string, $length, $suffix = '...', $encoding = null, $asHtml = false)
  113. {
  114. $string = (string)$string;
  115. if ($encoding === null) {
  116. $encoding = Yii::$app ? Yii::$app->charset : 'UTF-8';
  117. }
  118. if ($asHtml) {
  119. return static::truncateHtml($string, $length, $suffix, $encoding);
  120. }
  121. if (mb_strlen($string, $encoding) > $length) {
  122. return rtrim(mb_substr($string, 0, $length, $encoding)) . $suffix;
  123. }
  124. return $string;
  125. }
  126. /**
  127. * Truncates a string to the number of words specified.
  128. *
  129. * @param string $string The string to truncate.
  130. * @param int $count How many words from original string to include into truncated string.
  131. * @param string $suffix String to append to the end of truncated string.
  132. * @param bool $asHtml Whether to treat the string being truncated as HTML and preserve proper HTML tags.
  133. * This parameter is available since version 2.0.1.
  134. * @return string the truncated string.
  135. */
  136. public static function truncateWords($string, $count, $suffix = '...', $asHtml = false)
  137. {
  138. if ($asHtml) {
  139. return static::truncateHtml($string, $count, $suffix);
  140. }
  141. $words = preg_split('/(\s+)/u', trim($string), 0, PREG_SPLIT_DELIM_CAPTURE);
  142. if (count($words) / 2 > $count) {
  143. return implode('', array_slice($words, 0, ($count * 2) - 1)) . $suffix;
  144. }
  145. return $string;
  146. }
  147. /**
  148. * Truncate a string while preserving the HTML.
  149. *
  150. * @param string $string The string to truncate
  151. * @param int $count The counter
  152. * @param string $suffix String to append to the end of the truncated string.
  153. * @param string|bool $encoding Encoding flag or charset.
  154. * @return string
  155. * @since 2.0.1
  156. */
  157. protected static function truncateHtml($string, $count, $suffix, $encoding = false)
  158. {
  159. $config = \HTMLPurifier_Config::create(null);
  160. if (Yii::$app !== null) {
  161. $config->set('Cache.SerializerPath', Yii::$app->getRuntimePath());
  162. }
  163. $lexer = \HTMLPurifier_Lexer::create($config);
  164. $tokens = $lexer->tokenizeHTML($string, $config, new \HTMLPurifier_Context());
  165. $openTokens = [];
  166. $totalCount = 0;
  167. $depth = 0;
  168. $truncated = [];
  169. foreach ($tokens as $token) {
  170. if ($token instanceof \HTMLPurifier_Token_Start) { //Tag begins
  171. $openTokens[$depth] = $token->name;
  172. $truncated[] = $token;
  173. ++$depth;
  174. } elseif ($token instanceof \HTMLPurifier_Token_Text && $totalCount <= $count) { //Text
  175. if (false === $encoding) {
  176. preg_match('/^(\s*)/um', $token->data, $prefixSpace) ?: $prefixSpace = ['', ''];
  177. $token->data = $prefixSpace[1] . self::truncateWords(ltrim($token->data), $count - $totalCount, '');
  178. $currentCount = self::countWords($token->data);
  179. } else {
  180. $token->data = self::truncate($token->data, $count - $totalCount, '', $encoding);
  181. $currentCount = mb_strlen($token->data, $encoding);
  182. }
  183. $totalCount += $currentCount;
  184. $truncated[] = $token;
  185. } elseif ($token instanceof \HTMLPurifier_Token_End) { //Tag ends
  186. if ($token->name === $openTokens[$depth - 1]) {
  187. --$depth;
  188. unset($openTokens[$depth]);
  189. $truncated[] = $token;
  190. }
  191. } elseif ($token instanceof \HTMLPurifier_Token_Empty) { //Self contained tags, i.e. <img/> etc.
  192. $truncated[] = $token;
  193. }
  194. if ($totalCount >= $count) {
  195. if (0 < count($openTokens)) {
  196. krsort($openTokens);
  197. foreach ($openTokens as $name) {
  198. $truncated[] = new \HTMLPurifier_Token_End($name);
  199. }
  200. }
  201. break;
  202. }
  203. }
  204. $context = new \HTMLPurifier_Context();
  205. $generator = new \HTMLPurifier_Generator($config, $context);
  206. return $generator->generateFromTokens($truncated) . ($totalCount >= $count ? $suffix : '');
  207. }
  208. /**
  209. * Check if given string starts with specified substring. Binary and multibyte safe.
  210. *
  211. * @param string $string Input string
  212. * @param string $with Part to search inside the $string
  213. * @param bool $caseSensitive Case sensitive search. Default is true. When case sensitive is enabled, `$with` must
  214. * exactly match the starting of the string in order to get a true value.
  215. * @return bool Returns true if first input starts with second input, false otherwise
  216. */
  217. public static function startsWith($string, $with, $caseSensitive = true)
  218. {
  219. $string = (string)$string;
  220. $with = (string)$with;
  221. if (!$bytes = static::byteLength($with)) {
  222. return true;
  223. }
  224. if ($caseSensitive) {
  225. return strncmp($string, $with, $bytes) === 0;
  226. }
  227. $encoding = Yii::$app ? Yii::$app->charset : 'UTF-8';
  228. $string = static::byteSubstr($string, 0, $bytes);
  229. return mb_strtolower($string, $encoding) === mb_strtolower($with, $encoding);
  230. }
  231. /**
  232. * Check if given string ends with specified substring. Binary and multibyte safe.
  233. *
  234. * @param string $string Input string to check
  235. * @param string $with Part to search inside of the `$string`.
  236. * @param bool $caseSensitive Case sensitive search. Default is true. When case sensitive is enabled, `$with` must
  237. * exactly match the ending of the string in order to get a true value.
  238. * @return bool Returns true if first input ends with second input, false otherwise
  239. */
  240. public static function endsWith($string, $with, $caseSensitive = true)
  241. {
  242. $string = (string)$string;
  243. $with = (string)$with;
  244. if (!$bytes = static::byteLength($with)) {
  245. return true;
  246. }
  247. if ($caseSensitive) {
  248. // Warning check, see https://php.net/substr-compare#refsect1-function.substr-compare-returnvalues
  249. if (static::byteLength($string) < $bytes) {
  250. return false;
  251. }
  252. return substr_compare($string, $with, -$bytes, $bytes) === 0;
  253. }
  254. $encoding = Yii::$app ? Yii::$app->charset : 'UTF-8';
  255. $string = static::byteSubstr($string, -$bytes);
  256. return mb_strtolower($string, $encoding) === mb_strtolower($with, $encoding);
  257. }
  258. /**
  259. * Explodes string into array, optionally trims values and skips empty ones.
  260. *
  261. * @param string $string String to be exploded.
  262. * @param string $delimiter Delimiter. Default is ','.
  263. * @param mixed $trim Whether to trim each element. Can be:
  264. * - boolean - to trim normally;
  265. * - string - custom characters to trim. Will be passed as a second argument to `trim()` function.
  266. * - callable - will be called for each value instead of trim. Takes the only argument - value.
  267. * @param bool $skipEmpty Whether to skip empty strings between delimiters. Default is false.
  268. * @return array
  269. * @since 2.0.4
  270. */
  271. public static function explode($string, $delimiter = ',', $trim = true, $skipEmpty = false)
  272. {
  273. $result = explode($delimiter, $string);
  274. if ($trim !== false) {
  275. if ($trim === true) {
  276. $trim = 'trim';
  277. } elseif (!is_callable($trim)) {
  278. $trim = function ($v) use ($trim) {
  279. return trim($v, $trim);
  280. };
  281. }
  282. $result = array_map($trim, $result);
  283. }
  284. if ($skipEmpty) {
  285. // Wrapped with array_values to make array keys sequential after empty values removing
  286. $result = array_values(array_filter($result, function ($value) {
  287. return $value !== '';
  288. }));
  289. }
  290. return $result;
  291. }
  292. /**
  293. * Counts words in a string.
  294. *
  295. * @param string $string the text to calculate
  296. * @return int
  297. * @since 2.0.8
  298. */
  299. public static function countWords($string)
  300. {
  301. return count(preg_split('/\s+/u', $string, 0, PREG_SPLIT_NO_EMPTY));
  302. }
  303. /**
  304. * Returns string representation of number value with replaced commas to dots, if decimal point
  305. * of current locale is comma.
  306. *
  307. * @param int|float|string $value the value to normalize.
  308. * @return string
  309. * @since 2.0.11
  310. */
  311. public static function normalizeNumber($value)
  312. {
  313. $value = (string) $value;
  314. $localeInfo = localeconv();
  315. $decimalSeparator = isset($localeInfo['decimal_point']) ? $localeInfo['decimal_point'] : null;
  316. if ($decimalSeparator !== null && $decimalSeparator !== '.') {
  317. $value = str_replace($decimalSeparator, '.', $value);
  318. }
  319. return $value;
  320. }
  321. /**
  322. * Encodes string into "Base 64 Encoding with URL and Filename Safe Alphabet" (RFC 4648).
  323. *
  324. * > Note: Base 64 padding `=` may be at the end of the returned string.
  325. * > `=` is not transparent to URL encoding.
  326. *
  327. * @param string $input the string to encode.
  328. * @return string encoded string.
  329. * @see https://tools.ietf.org/html/rfc4648#page-7
  330. * @since 2.0.12
  331. */
  332. public static function base64UrlEncode($input)
  333. {
  334. return strtr(base64_encode($input), '+/', '-_');
  335. }
  336. /**
  337. * Decodes "Base 64 Encoding with URL and Filename Safe Alphabet" (RFC 4648).
  338. *
  339. * @param string $input encoded string.
  340. * @return string decoded string.
  341. * @see https://tools.ietf.org/html/rfc4648#page-7
  342. * @since 2.0.12
  343. */
  344. public static function base64UrlDecode($input)
  345. {
  346. return base64_decode(strtr($input, '-_', '+/'));
  347. }
  348. /**
  349. * Safely casts a float to string independent of the current locale.
  350. * The decimal separator will always be `.`.
  351. *
  352. * @param float|int $number a floating point number or integer.
  353. * @return string the string representation of the number.
  354. * @since 2.0.13
  355. */
  356. public static function floatToString($number)
  357. {
  358. // . and , are the only decimal separators known in ICU data,
  359. // so its safe to call str_replace here
  360. return str_replace(',', '.', (string) $number);
  361. }
  362. /**
  363. * Checks if the passed string would match the given shell wildcard pattern.
  364. * This function emulates [[fnmatch()]], which may be unavailable at certain environment, using PCRE.
  365. *
  366. * @param string $pattern the shell wildcard pattern.
  367. * @param string $string the tested string.
  368. * @param array $options options for matching. Valid options are:
  369. *
  370. * - caseSensitive: bool, whether pattern should be case sensitive. Defaults to `true`.
  371. * - escape: bool, whether backslash escaping is enabled. Defaults to `true`.
  372. * - filePath: bool, whether slashes in string only matches slashes in the given pattern. Defaults to `false`.
  373. *
  374. * @return bool whether the string matches pattern or not.
  375. * @since 2.0.14
  376. */
  377. public static function matchWildcard($pattern, $string, $options = [])
  378. {
  379. if ($pattern === '*' && empty($options['filePath'])) {
  380. return true;
  381. }
  382. $replacements = [
  383. '\\\\\\\\' => '\\\\',
  384. '\\\\\\*' => '[*]',
  385. '\\\\\\?' => '[?]',
  386. '\*' => '.*',
  387. '\?' => '.',
  388. '\[\!' => '[^',
  389. '\[' => '[',
  390. '\]' => ']',
  391. '\-' => '-',
  392. ];
  393. if (isset($options['escape']) && !$options['escape']) {
  394. unset($replacements['\\\\\\\\']);
  395. unset($replacements['\\\\\\*']);
  396. unset($replacements['\\\\\\?']);
  397. }
  398. if (!empty($options['filePath'])) {
  399. $replacements['\*'] = '[^/\\\\]*';
  400. $replacements['\?'] = '[^/\\\\]';
  401. }
  402. $pattern = strtr(preg_quote($pattern, '#'), $replacements);
  403. $pattern = '#^' . $pattern . '$#us';
  404. if (isset($options['caseSensitive']) && !$options['caseSensitive']) {
  405. $pattern .= 'i';
  406. }
  407. return preg_match($pattern, (string)$string) === 1;
  408. }
  409. /**
  410. * This method provides a unicode-safe implementation of built-in PHP function `ucfirst()`.
  411. *
  412. * @param string $string the string to be proceeded
  413. * @param string $encoding Optional, defaults to "UTF-8"
  414. * @return string
  415. * @see https://www.php.net/manual/en/function.ucfirst.php
  416. * @since 2.0.16
  417. * @phpcs:disable PSR1.Methods.CamelCapsMethodName.NotCamelCaps
  418. */
  419. public static function mb_ucfirst($string, $encoding = 'UTF-8')
  420. {
  421. $firstChar = mb_substr((string)$string, 0, 1, $encoding);
  422. $rest = mb_substr((string)$string, 1, null, $encoding);
  423. return mb_strtoupper($firstChar, $encoding) . $rest;
  424. }
  425. /**
  426. * This method provides a unicode-safe implementation of built-in PHP function `ucwords()`.
  427. *
  428. * @param string $string the string to be proceeded
  429. * @param string $encoding Optional, defaults to "UTF-8"
  430. * @return string
  431. * @see https://www.php.net/manual/en/function.ucwords
  432. * @since 2.0.16
  433. * @phpcs:disable PSR1.Methods.CamelCapsMethodName.NotCamelCaps
  434. */
  435. public static function mb_ucwords($string, $encoding = 'UTF-8')
  436. {
  437. $string = (string) $string;
  438. if (empty($string)) {
  439. return $string;
  440. }
  441. $parts = preg_split('/(\s+\W+\s+|^\W+\s+|\s+)/u', $string, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
  442. $ucfirstEven = trim(mb_substr($parts[0], -1, 1, $encoding)) === '';
  443. foreach ($parts as $key => $value) {
  444. $isEven = (bool)($key % 2);
  445. if ($ucfirstEven === $isEven) {
  446. $parts[$key] = static::mb_ucfirst($value, $encoding);
  447. }
  448. }
  449. return implode('', $parts);
  450. }
  451. /**
  452. * Masks a portion of a string with a repeated character.
  453. * This method is multibyte-safe.
  454. *
  455. * @param string $string The input string.
  456. * @param int $start The starting position from where to begin masking.
  457. * This can be a positive or negative integer.
  458. * Positive values count from the beginning,
  459. * negative values count from the end of the string.
  460. * @param int $length The length of the section to be masked.
  461. * The masking will start from the $start position
  462. * and continue for $length characters.
  463. * @param string $mask The character to use for masking. The default is '*'.
  464. * @return string The masked string.
  465. */
  466. public static function mask($string, $start, $length, $mask = '*')
  467. {
  468. $strLength = mb_strlen($string, 'UTF-8');
  469. // Return original string if start position is out of bounds
  470. if ($start >= $strLength || $start < -$strLength) {
  471. return $string;
  472. }
  473. $masked = mb_substr($string, 0, $start, 'UTF-8');
  474. $masked .= str_repeat($mask, abs($length));
  475. $masked .= mb_substr($string, $start + abs($length), null, 'UTF-8');
  476. return $masked;
  477. }
  478. /**
  479. * Returns the portion of the string that lies between the first occurrence of the start string
  480. * and the last occurrence of the end string after that.
  481. *
  482. * @param string $string The input string.
  483. * @param string $start The string marking the start of the portion to extract.
  484. * @param string $end The string marking the end of the portion to extract.
  485. * @return string|null The portion of the string between the first occurrence of
  486. * start and the last occurrence of end, or null if either start or end cannot be found.
  487. */
  488. public static function findBetween($string, $start, $end)
  489. {
  490. $startPos = mb_strpos($string, $start);
  491. if ($startPos === false) {
  492. return null;
  493. }
  494. $startPos += mb_strlen($start);
  495. $endPos = mb_strrpos($string, $end, $startPos);
  496. if ($endPos === false) {
  497. return null;
  498. }
  499. return mb_substr($string, $startPos, $endPos - $startPos);
  500. }
  501. }