1: <?php
2:
3: 4: 5: 6:
7:
8: namespace Nette\Utils;
9:
10: use Nette;
11:
12:
13: 14: 15:
16: class Strings
17: {
18:
19: 20: 21:
22: final public function __construct()
23: {
24: throw new Nette\StaticClassException;
25: }
26:
27:
28: 29: 30: 31: 32:
33: public static function checkEncoding($s)
34: {
35: return $s === self::fixEncoding($s);
36: }
37:
38:
39: 40: 41: 42: 43:
44: public static function fixEncoding($s)
45: {
46:
47: if (PHP_VERSION_ID < 50400) {
48: return @iconv('UTF-16', 'UTF-8//IGNORE', iconv('UTF-8', 'UTF-16//IGNORE', $s));
49: } else {
50: return htmlspecialchars_decode(htmlspecialchars($s, ENT_NOQUOTES | ENT_IGNORE, 'UTF-8'), ENT_NOQUOTES);
51: }
52: }
53:
54:
55: 56: 57: 58: 59: 60:
61: public static function chr($code)
62: {
63: if ($code < 0 || ($code >= 0xD800 && $code <= 0xDFFF) || $code > 0x10FFFF) {
64: throw new Nette\InvalidArgumentException('Code point must be in range 0x0 to 0xD7FF or 0xE000 to 0x10FFFF.');
65: }
66: return iconv('UTF-32BE', 'UTF-8//IGNORE', pack('N', $code));
67: }
68:
69:
70: 71: 72: 73: 74: 75:
76: public static function startsWith($haystack, $needle)
77: {
78: return strncmp($haystack, $needle, strlen($needle)) === 0;
79: }
80:
81:
82: 83: 84: 85: 86: 87:
88: public static function endsWith($haystack, $needle)
89: {
90: return strlen($needle) === 0 || substr($haystack, -strlen($needle)) === $needle;
91: }
92:
93:
94: 95: 96: 97: 98: 99:
100: public static function contains($haystack, $needle)
101: {
102: return strpos($haystack, $needle) !== FALSE;
103: }
104:
105:
106: 107: 108: 109: 110: 111: 112:
113: public static function substring($s, $start, $length = NULL)
114: {
115: if (function_exists('mb_substr')) {
116: if ($length === NULL && PHP_VERSION_ID < 50408) {
117: $length = self::length($s);
118: }
119: return mb_substr($s, $start, $length, 'UTF-8');
120: } elseif ($length === NULL) {
121: $length = self::length($s);
122: } elseif ($start < 0 && $length < 0) {
123: $start += self::length($s);
124: }
125: return iconv_substr($s, $start, $length, 'UTF-8');
126: }
127:
128:
129: 130: 131: 132: 133:
134: public static function normalize($s)
135: {
136: $s = self::normalizeNewLines($s);
137:
138:
139: $s = preg_replace('#[\x00-\x08\x0B-\x1F\x7F-\x9F]+#u', '', $s);
140:
141:
142: $s = preg_replace('#[\t ]+$#m', '', $s);
143:
144:
145: $s = trim($s, "\n");
146:
147: return $s;
148: }
149:
150:
151: 152: 153: 154: 155:
156: public static function normalizeNewLines($s)
157: {
158: return str_replace(array("\r\n", "\r"), "\n", $s);
159: }
160:
161:
162: 163: 164: 165: 166:
167: public static function toAscii($s)
168: {
169: $s = preg_replace('#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u', '', $s);
170: $s = strtr($s, '`\'"^~?', "\x01\x02\x03\x04\x05\x06");
171: $s = str_replace(
172: array("\xE2\x80\x9E", "\xE2\x80\x9C", "\xE2\x80\x9D", "\xE2\x80\x9A", "\xE2\x80\x98", "\xE2\x80\x99", "\xC2\xB0"),
173: array("\x03", "\x03", "\x03", "\x02", "\x02", "\x02", "\x04"), $s
174: );
175: if (class_exists('Transliterator') && $transliterator = \Transliterator::create('Any-Latin; Latin-ASCII')) {
176: $s = $transliterator->transliterate($s);
177: }
178: if (ICONV_IMPL === 'glibc') {
179: $s = str_replace(
180: array("\xC2\xBB", "\xC2\xAB", "\xE2\x80\xA6", "\xE2\x84\xA2", "\xC2\xA9", "\xC2\xAE"),
181: array('>>', '<<', '...', 'TM', '(c)', '(R)'), $s
182: );
183: $s = @iconv('UTF-8', 'WINDOWS-1250//TRANSLIT//IGNORE', $s);
184: $s = strtr($s, "\xa5\xa3\xbc\x8c\xa7\x8a\xaa\x8d\x8f\x8e\xaf\xb9\xb3\xbe\x9c\x9a\xba\x9d\x9f\x9e"
185: . "\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3"
186: . "\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8"
187: . "\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf8\xf9\xfa\xfb\xfc\xfd\xfe"
188: . "\x96\xa0\x8b\x97\x9b\xa6\xad\xb7",
189: 'ALLSSSSTZZZallssstzzzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTsraaaalccceeeeiiddnnooooruuuuyt- <->|-.');
190: $s = preg_replace('#[^\x00-\x7F]++#', '', $s);
191: } else {
192: $s = @iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $s);
193: }
194: $s = str_replace(array('`', "'", '"', '^', '~', '?'), '', $s);
195: return strtr($s, "\x01\x02\x03\x04\x05\x06", '`\'"^~?');
196: }
197:
198:
199: 200: 201: 202: 203: 204: 205:
206: public static function webalize($s, $charlist = NULL, $lower = TRUE)
207: {
208: $s = self::toAscii($s);
209: if ($lower) {
210: $s = strtolower($s);
211: }
212: $s = preg_replace('#[^a-z0-9' . preg_quote($charlist, '#') . ']+#i', '-', $s);
213: $s = trim($s, '-');
214: return $s;
215: }
216:
217:
218: 219: 220: 221: 222: 223: 224:
225: public static function truncate($s, $maxLen, $append = "\xE2\x80\xA6")
226: {
227: if (self::length($s) > $maxLen) {
228: $maxLen = $maxLen - self::length($append);
229: if ($maxLen < 1) {
230: return $append;
231:
232: } elseif ($matches = self::match($s, '#^.{1,'.$maxLen.'}(?=[\s\x00-/:-@\[-`{-~])#us')) {
233: return $matches[0] . $append;
234:
235: } else {
236: return self::substring($s, 0, $maxLen) . $append;
237: }
238: }
239: return $s;
240: }
241:
242:
243: 244: 245: 246: 247: 248: 249:
250: public static function indent($s, $level = 1, $chars = "\t")
251: {
252: if ($level > 0) {
253: $s = self::replace($s, '#(?:^|[\r\n]+)(?=[^\r\n])#', '$0' . str_repeat($chars, $level));
254: }
255: return $s;
256: }
257:
258:
259: 260: 261: 262: 263:
264: public static function lower($s)
265: {
266: return mb_strtolower($s, 'UTF-8');
267: }
268:
269:
270: 271: 272: 273: 274:
275: public static function firstLower($s)
276: {
277: return self::lower(self::substring($s, 0, 1)) . self::substring($s, 1);
278: }
279:
280:
281: 282: 283: 284: 285:
286: public static function upper($s)
287: {
288: return mb_strtoupper($s, 'UTF-8');
289: }
290:
291:
292: 293: 294: 295: 296:
297: public static function firstUpper($s)
298: {
299: return self::upper(self::substring($s, 0, 1)) . self::substring($s, 1);
300: }
301:
302:
303: 304: 305: 306: 307:
308: public static function capitalize($s)
309: {
310: return mb_convert_case($s, MB_CASE_TITLE, 'UTF-8');
311: }
312:
313:
314: 315: 316: 317: 318: 319: 320:
321: public static function compare($left, $right, $len = NULL)
322: {
323: if ($len < 0) {
324: $left = self::substring($left, $len, -$len);
325: $right = self::substring($right, $len, -$len);
326: } elseif ($len !== NULL) {
327: $left = self::substring($left, 0, $len);
328: $right = self::substring($right, 0, $len);
329: }
330: return self::lower($left) === self::lower($right);
331: }
332:
333:
334: 335: 336: 337: 338:
339: public static function findPrefix($strings)
340: {
341: if (!is_array($strings)) {
342: $strings = func_get_args();
343: }
344: $first = array_shift($strings);
345: for ($i = 0; $i < strlen($first); $i++) {
346: foreach ($strings as $s) {
347: if (!isset($s[$i]) || $first[$i] !== $s[$i]) {
348: while ($i && $first[$i - 1] >= "\x80" && $first[$i] >= "\x80" && $first[$i] < "\xC0") {
349: $i--;
350: }
351: return substr($first, 0, $i);
352: }
353: }
354: }
355: return $first;
356: }
357:
358:
359: 360: 361: 362: 363: 364:
365: public static function length($s)
366: {
367: return function_exists('mb_strlen') ? mb_strlen($s, 'UTF-8') : strlen(utf8_decode($s));
368: }
369:
370:
371: 372: 373: 374: 375: 376:
377: public static function trim($s, $charlist = " \t\n\r\0\x0B\xC2\xA0")
378: {
379: $charlist = preg_quote($charlist, '#');
380: return self::replace($s, '#^['.$charlist.']+|['.$charlist.']+\z#u', '');
381: }
382:
383:
384: 385: 386: 387: 388: 389: 390:
391: public static function padLeft($s, $length, $pad = ' ')
392: {
393: $length = max(0, $length - self::length($s));
394: $padLen = self::length($pad);
395: return str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen) . $s;
396: }
397:
398:
399: 400: 401: 402: 403: 404: 405:
406: public static function padRight($s, $length, $pad = ' ')
407: {
408: $length = max(0, $length - self::length($s));
409: $padLen = self::length($pad);
410: return $s . str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen);
411: }
412:
413:
414: 415: 416: 417: 418:
419: public static function reverse($s)
420: {
421: return @iconv('UTF-32LE', 'UTF-8', strrev(@iconv('UTF-8', 'UTF-32BE', $s)));
422: }
423:
424:
425: 426: 427: 428:
429: public static function random($length = 10, $charlist = '0-9a-z')
430: {
431: return Random::generate($length, $charlist);
432: }
433:
434:
435: 436: 437: 438: 439: 440: 441:
442: public static function before($haystack, $needle, $nth = 1)
443: {
444: $pos = self::pos($haystack, $needle, $nth);
445: return $pos === FALSE
446: ? FALSE
447: : substr($haystack, 0, $pos);
448: }
449:
450:
451: 452: 453: 454: 455: 456: 457:
458: public static function after($haystack, $needle, $nth = 1)
459: {
460: $pos = self::pos($haystack, $needle, $nth);
461: return $pos === FALSE
462: ? FALSE
463: : (string) substr($haystack, $pos + strlen($needle));
464: }
465:
466:
467: 468: 469: 470:
471: private static function pos($haystack, $needle, $nth = 1)
472: {
473: if (!$nth) {
474: return FALSE;
475: } elseif ($nth > 0) {
476: if (strlen($needle) === 0) {
477: return 0;
478: }
479: $pos = 0;
480: while (FALSE !== ($pos = strpos($haystack, $needle, $pos)) && --$nth) {
481: $pos++;
482: }
483: } else {
484: $len = strlen($haystack);
485: if (strlen($needle) === 0) {
486: return $len;
487: }
488: $pos = $len - 1;
489: while (FALSE !== ($pos = strrpos($haystack, $needle, $pos - $len)) && ++$nth) {
490: $pos--;
491: }
492: }
493: return $pos;
494: }
495:
496:
497: 498: 499: 500: 501: 502: 503:
504: public static function split($subject, $pattern, $flags = 0)
505: {
506: return self::pcre('preg_split', array($pattern, $subject, -1, $flags | PREG_SPLIT_DELIM_CAPTURE));
507: }
508:
509:
510: 511: 512: 513: 514: 515: 516: 517:
518: public static function match($subject, $pattern, $flags = 0, $offset = 0)
519: {
520: if ($offset > strlen($subject)) {
521: return NULL;
522: }
523: return self::pcre('preg_match', array($pattern, $subject, & $m, $flags, $offset))
524: ? $m
525: : NULL;
526: }
527:
528:
529: 530: 531: 532: 533: 534: 535: 536:
537: public static function matchAll($subject, $pattern, $flags = 0, $offset = 0)
538: {
539: if ($offset > strlen($subject)) {
540: return array();
541: }
542: self::pcre('preg_match_all', array(
543: $pattern, $subject, & $m,
544: ($flags & PREG_PATTERN_ORDER) ? $flags : ($flags | PREG_SET_ORDER),
545: $offset,
546: ));
547: return $m;
548: }
549:
550:
551: 552: 553: 554: 555: 556: 557: 558:
559: public static function replace($subject, $pattern, $replacement = NULL, $limit = -1)
560: {
561: if (is_object($replacement) || is_array($replacement)) {
562: if ($replacement instanceof Nette\Callback) {
563: $replacement = $replacement->getNative();
564: }
565: if (!is_callable($replacement, FALSE, $textual)) {
566: throw new Nette\InvalidStateException("Callback '$textual' is not callable.");
567: }
568:
569: return self::pcre('preg_replace_callback', array($pattern, $replacement, $subject, $limit));
570:
571: } elseif ($replacement === NULL && is_array($pattern)) {
572: $replacement = array_values($pattern);
573: $pattern = array_keys($pattern);
574: }
575:
576: return self::pcre('preg_replace', array($pattern, $replacement, $subject, $limit));
577: }
578:
579:
580:
581: public static function pcre($func, $args)
582: {
583: static $messages = array(
584: PREG_INTERNAL_ERROR => 'Internal error',
585: PREG_BACKTRACK_LIMIT_ERROR => 'Backtrack limit was exhausted',
586: PREG_RECURSION_LIMIT_ERROR => 'Recursion limit was exhausted',
587: PREG_BAD_UTF8_ERROR => 'Malformed UTF-8 data',
588: 5 => 'Offset didn\'t correspond to the begin of a valid UTF-8 code point',
589: );
590: $res = Callback::invokeSafe($func, $args, function ($message) use ($args) {
591:
592: throw new RegexpException($message . ' in pattern: ' . implode(' or ', (array) $args[0]));
593: });
594:
595: if (($code = preg_last_error())
596: && ($res === NULL || !in_array($func, array('preg_filter', 'preg_replace_callback', 'preg_replace')))
597: ) {
598: throw new RegexpException((isset($messages[$code]) ? $messages[$code] : 'Unknown error')
599: . ' (pattern: ' . implode(' or ', (array) $args[0]) . ')', $code);
600: }
601: return $res;
602: }
603:
604: }
605: