1: <?php
2:
3: 4: 5: 6: 7: 8: 9: 10:
11:
12: namespace Nette\Utils;
13:
14: use Nette,
15: Nette\Diagnostics\Debugger;
16:
17:
18:
19: 20: 21: 22: 23:
24: class Strings
25: {
26:
27: 28: 29:
30: final public function __construct()
31: {
32: throw new Nette\StaticClassException;
33: }
34:
35:
36:
37: 38: 39: 40: 41: 42:
43: public static function checkEncoding($s, $encoding = 'UTF-8')
44: {
45: return $s === self::fixEncoding($s, $encoding);
46: }
47:
48:
49:
50: 51: 52: 53: 54: 55:
56: public static function fixEncoding($s, $encoding = 'UTF-8')
57: {
58:
59: if (strcasecmp($encoding, 'UTF-8') === 0) {
60: $s = str_replace("\xEF\xBB\xBF", '', $s);
61: }
62: if (PHP_VERSION_ID >= 50400) {
63: ini_set('mbstring.substitute_character', 'none');
64: return mb_convert_encoding($s, $encoding, $encoding);
65: }
66: return @iconv('UTF-16', $encoding . '//IGNORE', iconv($encoding, 'UTF-16//IGNORE', $s));
67: }
68:
69:
70:
71: 72: 73: 74: 75: 76:
77: public static function chr($code, $encoding = 'UTF-8')
78: {
79: return iconv('UTF-32BE', $encoding . '//IGNORE', pack('N', $code));
80: }
81:
82:
83:
84: 85: 86: 87: 88: 89:
90: public static function startsWith($haystack, $needle)
91: {
92: return strncmp($haystack, $needle, strlen($needle)) === 0;
93: }
94:
95:
96:
97: 98: 99: 100: 101: 102:
103: public static function endsWith($haystack, $needle)
104: {
105: return strlen($needle) === 0 || substr($haystack, -strlen($needle)) === $needle;
106: }
107:
108:
109:
110: 111: 112: 113: 114: 115:
116: public static function contains($haystack, $needle)
117: {
118: return strpos($haystack, $needle) !== FALSE;
119: }
120:
121:
122:
123: 124: 125: 126: 127: 128: 129:
130: public static function substring($s, $start, $length = NULL)
131: {
132: if ($length === NULL) {
133: $length = self::length($s);
134: }
135: return function_exists('mb_substr') ? mb_substr($s, $start, $length, 'UTF-8') : iconv_substr($s, $start, $length, 'UTF-8');
136: }
137:
138:
139:
140: 141: 142: 143: 144:
145: public static function normalize($s)
146: {
147:
148: $s = str_replace("\r\n", "\n", $s);
149: $s = strtr($s, "\r", "\n");
150:
151:
152: $s = preg_replace('#[\x00-\x08\x0B-\x1F\x7F]+#', '', $s);
153:
154:
155: $s = preg_replace('#[\t ]+$#m', '', $s);
156:
157:
158: $s = trim($s, "\n");
159:
160: return $s;
161: }
162:
163:
164:
165: 166: 167: 168: 169:
170: public static function toAscii($s)
171: {
172: $s = preg_replace('#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u', '', $s);
173: $s = strtr($s, '`\'"^~', "\x01\x02\x03\x04\x05");
174: if (ICONV_IMPL === 'glibc') {
175: $s = @iconv('UTF-8', 'WINDOWS-1250//TRANSLIT', $s);
176: $s = strtr($s, "\xa5\xa3\xbc\x8c\xa7\x8a\xaa\x8d\x8f\x8e\xaf\xb9\xb3\xbe\x9c\x9a\xba\x9d\x9f\x9e"
177: . "\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3"
178: . "\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8"
179: . "\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x96",
180: "ALLSSSSTZZZallssstzzzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTsraaaalccceeeeiiddnnooooruuuuyt-");
181: } else {
182: $s = @iconv('UTF-8', 'ASCII//TRANSLIT', $s);
183: }
184: $s = str_replace(array('`', "'", '"', '^', '~'), '', $s);
185: return strtr($s, "\x01\x02\x03\x04\x05", '`\'"^~');
186: }
187:
188:
189:
190: 191: 192: 193: 194: 195: 196:
197: public static function webalize($s, $charlist = NULL, $lower = TRUE)
198: {
199: $s = self::toAscii($s);
200: if ($lower) {
201: $s = strtolower($s);
202: }
203: $s = preg_replace('#[^a-z0-9' . preg_quote($charlist, '#') . ']+#i', '-', $s);
204: $s = trim($s, '-');
205: return $s;
206: }
207:
208:
209:
210: 211: 212: 213: 214: 215: 216:
217: public static function truncate($s, $maxLen, $append = "\xE2\x80\xA6")
218: {
219: if (self::length($s) > $maxLen) {
220: $maxLen = $maxLen - self::length($append);
221: if ($maxLen < 1) {
222: return $append;
223:
224: } elseif ($matches = self::match($s, '#^.{1,'.$maxLen.'}(?=[\s\x00-/:-@\[-`{-~])#us')) {
225: return $matches[0] . $append;
226:
227: } else {
228: return self::substring($s, 0, $maxLen) . $append;
229: }
230: }
231: return $s;
232: }
233:
234:
235:
236: 237: 238: 239: 240: 241: 242:
243: public static function indent($s, $level = 1, $chars = "\t")
244: {
245: return $level < 1 ? $s : self::replace($s, '#(?:^|[\r\n]+)(?=[^\r\n])#', '$0' . str_repeat($chars, $level));
246: }
247:
248:
249:
250: 251: 252: 253: 254:
255: public static function lower($s)
256: {
257: return mb_strtolower($s, 'UTF-8');
258: }
259:
260:
261:
262: 263: 264: 265: 266:
267: public static function upper($s)
268: {
269: return mb_strtoupper($s, 'UTF-8');
270: }
271:
272:
273:
274: 275: 276: 277: 278:
279: public static function firstUpper($s)
280: {
281: return self::upper(self::substring($s, 0, 1)) . self::substring($s, 1);
282: }
283:
284:
285:
286: 287: 288: 289: 290:
291: public static function capitalize($s)
292: {
293: return mb_convert_case($s, MB_CASE_TITLE, 'UTF-8');
294: }
295:
296:
297:
298: 299: 300: 301: 302: 303: 304:
305: public static function compare($left, $right, $len = NULL)
306: {
307: if ($len < 0) {
308: $left = self::substring($left, $len, -$len);
309: $right = self::substring($right, $len, -$len);
310: } elseif ($len !== NULL) {
311: $left = self::substring($left, 0, $len);
312: $right = self::substring($right, 0, $len);
313: }
314: return self::lower($left) === self::lower($right);
315: }
316:
317:
318:
319: 320: 321: 322: 323:
324: public static function length($s)
325: {
326: return strlen(utf8_decode($s));
327: }
328:
329:
330:
331: 332: 333: 334: 335: 336:
337: public static function trim($s, $charlist = " \t\n\r\0\x0B\xC2\xA0")
338: {
339: $charlist = preg_quote($charlist, '#');
340: return self::replace($s, '#^['.$charlist.']+|['.$charlist.']+\z#u', '');
341: }
342:
343:
344:
345: 346: 347: 348: 349: 350: 351:
352: public static function padLeft($s, $length, $pad = ' ')
353: {
354: $length = max(0, $length - self::length($s));
355: $padLen = self::length($pad);
356: return str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen) . $s;
357: }
358:
359:
360:
361: 362: 363: 364: 365: 366: 367:
368: public static function padRight($s, $length, $pad = ' ')
369: {
370: $length = max(0, $length - self::length($s));
371: $padLen = self::length($pad);
372: return $s . str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen);
373: }
374:
375:
376:
377: 378: 379: 380: 381:
382: public static function reverse($s)
383: {
384: return @iconv('UTF-32LE', 'UTF-8', strrev(@iconv('UTF-8', 'UTF-32BE', $s)));
385: }
386:
387:
388:
389: 390: 391: 392: 393: 394:
395: public static function random($length = 10, $charlist = '0-9a-z')
396: {
397: $charlist = str_shuffle(preg_replace_callback('#.-.#', function($m) {
398: return implode('', range($m[0][0], $m[0][2]));
399: }, $charlist));
400: $chLen = strlen($charlist);
401:
402: static $rand3;
403: if (!$rand3) {
404: $rand3 = md5(serialize($_SERVER), TRUE);
405: }
406:
407: $s = '';
408: for ($i = 0; $i < $length; $i++) {
409: if ($i % 5 === 0) {
410: list($rand, $rand2) = explode(' ', microtime());
411: $rand += lcg_value();
412: }
413: $rand *= $chLen;
414: $s .= $charlist[($rand + $rand2 + ord($rand3[$i % strlen($rand3)])) % $chLen];
415: $rand -= (int) $rand;
416: }
417: return $s;
418: }
419:
420:
421:
422: 423: 424: 425: 426: 427: 428:
429: public static function split($subject, $pattern, $flags = 0)
430: {
431: set_error_handler(function($severity, $message) use ($pattern) {
432: restore_error_handler();
433: throw new RegexpException("$message in pattern: $pattern");
434: });
435: $res = preg_split($pattern, $subject, -1, $flags | PREG_SPLIT_DELIM_CAPTURE);
436: restore_error_handler();
437: if (preg_last_error()) {
438: throw new RegexpException(NULL, preg_last_error(), $pattern);
439: }
440: return $res;
441: }
442:
443:
444:
445: 446: 447: 448: 449: 450: 451: 452:
453: public static function match($subject, $pattern, $flags = 0, $offset = 0)
454: {
455: if ($offset > strlen($subject)) {
456: return NULL;
457: }
458: set_error_handler(function($severity, $message) use ($pattern) {
459: restore_error_handler();
460: throw new RegexpException("$message in pattern: $pattern");
461: });
462: $res = preg_match($pattern, $subject, $m, $flags, $offset);
463: restore_error_handler();
464: if (preg_last_error()) {
465: throw new RegexpException(NULL, preg_last_error(), $pattern);
466: }
467: if ($res) {
468: return $m;
469: }
470: }
471:
472:
473:
474: 475: 476: 477: 478: 479: 480: 481:
482: public static function matchAll($subject, $pattern, $flags = 0, $offset = 0)
483: {
484: if ($offset > strlen($subject)) {
485: return array();
486: }
487: set_error_handler(function($severity, $message) use ($pattern) {
488: restore_error_handler();
489: throw new RegexpException("$message in pattern: $pattern");
490: });
491: $res = preg_match_all(
492: $pattern, $subject, $m,
493: ($flags & PREG_PATTERN_ORDER) ? $flags : ($flags | PREG_SET_ORDER),
494: $offset
495: );
496: restore_error_handler();
497: if (preg_last_error()) {
498: throw new RegexpException(NULL, preg_last_error(), $pattern);
499: }
500: return $m;
501: }
502:
503:
504:
505: 506: 507: 508: 509: 510: 511: 512:
513: public static function replace($subject, $pattern, $replacement = NULL, $limit = -1)
514: {
515: if (is_object($replacement) || is_array($replacement)) {
516: if ($replacement instanceof Nette\Callback) {
517: $replacement = $replacement->getNative();
518: }
519: if (!is_callable($replacement, FALSE, $textual)) {
520: throw new Nette\InvalidStateException("Callback '$textual' is not callable.");
521: }
522:
523: set_error_handler(function($severity, $message) use (& $tmp) {
524: restore_error_handler();
525: throw new RegexpException("$message in pattern: $tmp");
526: });
527: foreach ((array) $pattern as $tmp) {
528: preg_match($tmp, '');
529: }
530: restore_error_handler();
531:
532: $res = preg_replace_callback($pattern, $replacement, $subject, $limit);
533: if ($res === NULL && preg_last_error()) {
534: throw new RegexpException(NULL, preg_last_error(), $pattern);
535: }
536: return $res;
537:
538: } elseif ($replacement === NULL && is_array($pattern)) {
539: $replacement = array_values($pattern);
540: $pattern = array_keys($pattern);
541: }
542:
543: set_error_handler(function($severity, $message) use ($pattern) {
544: restore_error_handler();
545: throw new RegexpException("$message in pattern: " . implode(' or ', (array) $pattern));
546: });
547: $res = preg_replace($pattern, $replacement, $subject, $limit);
548: restore_error_handler();
549: if (preg_last_error()) {
550: throw new RegexpException(NULL, preg_last_error(), implode(' or ', (array) $pattern));
551: }
552: return $res;
553: }
554:
555: }
556:
557:
558:
559: 560: 561:
562: class RegexpException extends \Exception
563: {
564: static public $messages = array(
565: PREG_INTERNAL_ERROR => 'Internal error',
566: PREG_BACKTRACK_LIMIT_ERROR => 'Backtrack limit was exhausted',
567: PREG_RECURSION_LIMIT_ERROR => 'Recursion limit was exhausted',
568: PREG_BAD_UTF8_ERROR => 'Malformed UTF-8 data',
569: 5 => 'Offset didn\'t correspond to the begin of a valid UTF-8 code point',
570: );
571:
572: public function __construct($message, $code = NULL, $pattern = NULL)
573: {
574: if (!$message) {
575: $message = (isset(self::$messages[$code]) ? self::$messages[$code] : 'Unknown error') . ($pattern ? " (pattern: $pattern)" : '');
576: }
577: parent::__construct($message, $code);
578: }
579:
580: }
581: