1: <?php
2:
3: 4: 5: 6:
7:
8: namespace Nette\Utils;
9:
10: use Nette;
11:
12:
13: 14: 15: 16: 17:
18: class Strings
19: {
20:
21: 22: 23:
24: final public function __construct()
25: {
26: throw new Nette\StaticClassException;
27: }
28:
29:
30: 31: 32: 33: 34:
35: public static function checkEncoding($s)
36: {
37: return $s === self::fixEncoding($s);
38: }
39:
40:
41: 42: 43: 44: 45:
46: public static function fixEncoding($s)
47: {
48:
49: if (PHP_VERSION_ID < 50400) {
50: return @iconv('UTF-16', 'UTF-8//IGNORE', iconv('UTF-8', 'UTF-16//IGNORE', $s));
51: } else {
52: return htmlspecialchars_decode(htmlspecialchars($s, ENT_NOQUOTES | ENT_IGNORE, 'UTF-8'), ENT_NOQUOTES);
53: }
54: }
55:
56:
57: 58: 59: 60: 61: 62:
63: public static function chr($code)
64: {
65: if ($code < 0 || ($code >= 0xD800 && $code <= 0xDFFF) || $code > 0x10FFFF) {
66: throw new Nette\InvalidArgumentException('Code point must be in range 0x0 to 0xD7FF or 0xE000 to 0x10FFFF.');
67: }
68: return iconv('UTF-32BE', 'UTF-8//IGNORE', pack('N', $code));
69: }
70:
71:
72: 73: 74: 75: 76: 77:
78: public static function startsWith($haystack, $needle)
79: {
80: return strncmp($haystack, $needle, strlen($needle)) === 0;
81: }
82:
83:
84: 85: 86: 87: 88: 89:
90: public static function endsWith($haystack, $needle)
91: {
92: return strlen($needle) === 0 || substr($haystack, -strlen($needle)) === $needle;
93: }
94:
95:
96: 97: 98: 99: 100: 101:
102: public static function contains($haystack, $needle)
103: {
104: return strpos($haystack, $needle) !== FALSE;
105: }
106:
107:
108: 109: 110: 111: 112: 113: 114:
115: public static function substring($s, $start, $length = NULL)
116: {
117: if (function_exists('mb_substr')) {
118: if ($length === NULL && PHP_VERSION_ID < 50408) {
119: $length = self::length($s);
120: }
121: return mb_substr($s, $start, $length, 'UTF-8');
122: } elseif ($length === NULL) {
123: $length = self::length($s);
124: } elseif ($start < 0 && $length < 0) {
125: $start += self::length($s);
126: }
127: return iconv_substr($s, $start, $length, 'UTF-8');
128: }
129:
130:
131: 132: 133: 134: 135:
136: public static function normalize($s)
137: {
138: $s = self::normalizeNewLines($s);
139:
140:
141: $s = preg_replace('#[\x00-\x08\x0B-\x1F\x7F-\x9F]+#u', '', $s);
142:
143:
144: $s = preg_replace('#[\t ]+$#m', '', $s);
145:
146:
147: $s = trim($s, "\n");
148:
149: return $s;
150: }
151:
152:
153: 154: 155: 156: 157:
158: public static function normalizeNewLines($s)
159: {
160: return str_replace(array("\r\n", "\r"), "\n", $s);
161: }
162:
163:
164: 165: 166: 167: 168:
169: public static function toAscii($s)
170: {
171: $s = preg_replace('#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u', '', $s);
172: $s = strtr($s, '`\'"^~?', "\x01\x02\x03\x04\x05\x06");
173: $s = str_replace(
174: array("\xE2\x80\x9E", "\xE2\x80\x9C", "\xE2\x80\x9D", "\xE2\x80\x9A", "\xE2\x80\x98", "\xE2\x80\x99", "\xC2\xB0"),
175: array("\x03", "\x03", "\x03", "\x02", "\x02", "\x02", "\x04"), $s
176: );
177: if (class_exists('Transliterator') && $transliterator = \Transliterator::create('Any-Latin; Latin-ASCII')) {
178: $s = $transliterator->transliterate($s);
179: }
180: if (ICONV_IMPL === 'glibc') {
181: $s = str_replace(
182: array("\xC2\xBB", "\xC2\xAB", "\xE2\x80\xA6", "\xE2\x84\xA2", "\xC2\xA9", "\xC2\xAE"),
183: array('>>', '<<', '...', 'TM', '(c)', '(R)'), $s
184: );
185: $s = @iconv('UTF-8', 'WINDOWS-1250//TRANSLIT//IGNORE', $s);
186: $s = strtr($s, "\xa5\xa3\xbc\x8c\xa7\x8a\xaa\x8d\x8f\x8e\xaf\xb9\xb3\xbe\x9c\x9a\xba\x9d\x9f\x9e"
187: . "\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3"
188: . "\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8"
189: . "\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf8\xf9\xfa\xfb\xfc\xfd\xfe"
190: . "\x96\xa0\x8b\x97\x9b\xa6\xad\xb7",
191: "ALLSSSSTZZZallssstzzzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTsraaaalccceeeeiiddnnooooruuuuyt- <->|-.");
192: $s = preg_replace('#[^\x00-\x7F]++#', '', $s);
193: } else {
194: $s = @iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $s);
195: }
196: $s = str_replace(array('`', "'", '"', '^', '~', '?'), '', $s);
197: return strtr($s, "\x01\x02\x03\x04\x05\x06", '`\'"^~?');
198: }
199:
200:
201: 202: 203: 204: 205: 206: 207:
208: public static function webalize($s, $charlist = NULL, $lower = TRUE)
209: {
210: $s = self::toAscii($s);
211: if ($lower) {
212: $s = strtolower($s);
213: }
214: $s = preg_replace('#[^a-z0-9' . preg_quote($charlist, '#') . ']+#i', '-', $s);
215: $s = trim($s, '-');
216: return $s;
217: }
218:
219:
220: 221: 222: 223: 224: 225: 226:
227: public static function truncate($s, $maxLen, $append = "\xE2\x80\xA6")
228: {
229: if (self::length($s) > $maxLen) {
230: $maxLen = $maxLen - self::length($append);
231: if ($maxLen < 1) {
232: return $append;
233:
234: } elseif ($matches = self::match($s, '#^.{1,'.$maxLen.'}(?=[\s\x00-/:-@\[-`{-~])#us')) {
235: return $matches[0] . $append;
236:
237: } else {
238: return self::substring($s, 0, $maxLen) . $append;
239: }
240: }
241: return $s;
242: }
243:
244:
245: 246: 247: 248: 249: 250: 251:
252: public static function indent($s, $level = 1, $chars = "\t")
253: {
254: if ($level > 0) {
255: $s = self::replace($s, '#(?:^|[\r\n]+)(?=[^\r\n])#', '$0' . str_repeat($chars, $level));
256: }
257: return $s;
258: }
259:
260:
261: 262: 263: 264: 265:
266: public static function lower($s)
267: {
268: return mb_strtolower($s, 'UTF-8');
269: }
270:
271:
272: 273: 274: 275: 276:
277: public static function firstLower($s)
278: {
279: return self::lower(self::substring($s, 0, 1)) . self::substring($s, 1);
280: }
281:
282:
283: 284: 285: 286: 287:
288: public static function upper($s)
289: {
290: return mb_strtoupper($s, 'UTF-8');
291: }
292:
293:
294: 295: 296: 297: 298:
299: public static function firstUpper($s)
300: {
301: return self::upper(self::substring($s, 0, 1)) . self::substring($s, 1);
302: }
303:
304:
305: 306: 307: 308: 309:
310: public static function capitalize($s)
311: {
312: return mb_convert_case($s, MB_CASE_TITLE, 'UTF-8');
313: }
314:
315:
316: 317: 318: 319: 320: 321: 322:
323: public static function compare($left, $right, $len = NULL)
324: {
325: if ($len < 0) {
326: $left = self::substring($left, $len, -$len);
327: $right = self::substring($right, $len, -$len);
328: } elseif ($len !== NULL) {
329: $left = self::substring($left, 0, $len);
330: $right = self::substring($right, 0, $len);
331: }
332: return self::lower($left) === self::lower($right);
333: }
334:
335:
336: 337: 338: 339: 340:
341: public static function findPrefix($strings)
342: {
343: if (!is_array($strings)) {
344: $strings = func_get_args();
345: }
346: $first = array_shift($strings);
347: for ($i = 0; $i < strlen($first); $i++) {
348: foreach ($strings as $s) {
349: if (!isset($s[$i]) || $first[$i] !== $s[$i]) {
350: while ($i && $first[$i-1] >= "\x80" && $first[$i] >= "\x80" && $first[$i] < "\xC0") {
351: $i--;
352: }
353: return substr($first, 0, $i);
354: }
355: }
356: }
357: return $first;
358: }
359:
360:
361: 362: 363: 364: 365: 366:
367: public static function length($s)
368: {
369: return function_exists('mb_strlen') ? mb_strlen($s, 'UTF-8') : strlen(utf8_decode($s));
370: }
371:
372:
373: 374: 375: 376: 377: 378:
379: public static function trim($s, $charlist = " \t\n\r\0\x0B\xC2\xA0")
380: {
381: $charlist = preg_quote($charlist, '#');
382: return self::replace($s, '#^['.$charlist.']+|['.$charlist.']+\z#u', '');
383: }
384:
385:
386: 387: 388: 389: 390: 391: 392:
393: public static function padLeft($s, $length, $pad = ' ')
394: {
395: $length = max(0, $length - self::length($s));
396: $padLen = self::length($pad);
397: return str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen) . $s;
398: }
399:
400:
401: 402: 403: 404: 405: 406: 407:
408: public static function padRight($s, $length, $pad = ' ')
409: {
410: $length = max(0, $length - self::length($s));
411: $padLen = self::length($pad);
412: return $s . str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen);
413: }
414:
415:
416: 417: 418: 419: 420:
421: public static function reverse($s)
422: {
423: return @iconv('UTF-32LE', 'UTF-8', strrev(@iconv('UTF-8', 'UTF-32BE', $s)));
424: }
425:
426:
427: 428: 429: 430:
431: public static function random($length = 10, $charlist = '0-9a-z')
432: {
433: return Random::generate($length, $charlist);
434: }
435:
436:
437: 438: 439: 440: 441: 442: 443:
444: public static function split($subject, $pattern, $flags = 0)
445: {
446: return self::pcre('preg_split', array($pattern, $subject, -1, $flags | PREG_SPLIT_DELIM_CAPTURE));
447: }
448:
449:
450: 451: 452: 453: 454: 455: 456: 457:
458: public static function match($subject, $pattern, $flags = 0, $offset = 0)
459: {
460: if ($offset > strlen($subject)) {
461: return NULL;
462: }
463: return self::pcre('preg_match', array($pattern, $subject, & $m, $flags, $offset))
464: ? $m
465: : NULL;
466: }
467:
468:
469: 470: 471: 472: 473: 474: 475: 476:
477: public static function matchAll($subject, $pattern, $flags = 0, $offset = 0)
478: {
479: if ($offset > strlen($subject)) {
480: return array();
481: }
482: self::pcre('preg_match_all', array(
483: $pattern, $subject, & $m,
484: ($flags & PREG_PATTERN_ORDER) ? $flags : ($flags | PREG_SET_ORDER),
485: $offset
486: ));
487: return $m;
488: }
489:
490:
491: 492: 493: 494: 495: 496: 497: 498:
499: public static function replace($subject, $pattern, $replacement = NULL, $limit = -1)
500: {
501: if (is_object($replacement) || is_array($replacement)) {
502: if ($replacement instanceof Nette\Callback) {
503: $replacement = $replacement->getNative();
504: }
505: if (!is_callable($replacement, FALSE, $textual)) {
506: throw new Nette\InvalidStateException("Callback '$textual' is not callable.");
507: }
508:
509: return self::pcre('preg_replace_callback', array($pattern, $replacement, $subject, $limit));
510:
511: } elseif ($replacement === NULL && is_array($pattern)) {
512: $replacement = array_values($pattern);
513: $pattern = array_keys($pattern);
514: }
515:
516: return self::pcre('preg_replace', array($pattern, $replacement, $subject, $limit));
517: }
518:
519:
520:
521: public static function pcre($func, $args)
522: {
523: static $messages = array(
524: PREG_INTERNAL_ERROR => 'Internal error',
525: PREG_BACKTRACK_LIMIT_ERROR => 'Backtrack limit was exhausted',
526: PREG_RECURSION_LIMIT_ERROR => 'Recursion limit was exhausted',
527: PREG_BAD_UTF8_ERROR => 'Malformed UTF-8 data',
528: 5 => 'Offset didn\'t correspond to the begin of a valid UTF-8 code point',
529: );
530: $res = Callback::invokeSafe($func, $args, function($message) use ($args) {
531:
532: throw new RegexpException($message . ' in pattern: ' . implode(' or ', (array) $args[0]));
533: });
534:
535: if (($code = preg_last_error())
536: && ($res === NULL || !in_array($func, array('preg_filter', 'preg_replace_callback', 'preg_replace')))
537: ) {
538: throw new RegexpException((isset($messages[$code]) ? $messages[$code] : 'Unknown error')
539: . ' (pattern: ' . implode(' or ', (array) $args[0]) . ')', $code);
540: }
541: return $res;
542: }
543:
544: }
545:
546:
547: 548: 549:
550: class RegexpException extends \Exception
551: {
552: }
553: