1: <?php
2:
3: 4: 5: 6:
7:
8: namespace Nette\Utils;
9:
10: use Nette;
11:
12:
13: 14: 15: 16: 17:
18: class Strings
19: {
20:
21: 22: 23:
24: final public function __construct()
25: {
26: throw new Nette\StaticClassException;
27: }
28:
29:
30: 31: 32: 33: 34:
35: public static function checkEncoding($s)
36: {
37: if (func_num_args() > 1 && strcasecmp(func_get_arg(1), 'UTF-8')) {
38: trigger_error(__METHOD__ . ' supports only UTF-8 encoding.', E_USER_DEPRECATED);
39: }
40: return $s === self::fixEncoding($s);
41: }
42:
43:
44: 45: 46: 47: 48:
49: public static function fixEncoding($s)
50: {
51: if (func_num_args() > 1 && strcasecmp(func_get_arg(1), 'UTF-8')) {
52: trigger_error(__METHOD__ . ' supports only UTF-8 encoding.', E_USER_DEPRECATED);
53: }
54:
55: if (PHP_VERSION_ID < 50400) {
56: return @iconv('UTF-16', 'UTF-8//IGNORE', iconv('UTF-8', 'UTF-16//IGNORE', $s));
57: } else {
58: return htmlspecialchars_decode(htmlspecialchars($s, ENT_NOQUOTES | ENT_IGNORE, 'UTF-8'), ENT_NOQUOTES);
59: }
60: }
61:
62:
63: 64: 65: 66: 67:
68: public static function chr($code)
69: {
70: if (func_num_args() > 1 && strcasecmp(func_get_arg(1), 'UTF-8')) {
71: trigger_error(__METHOD__ . ' supports only UTF-8 encoding.', E_USER_DEPRECATED);
72: }
73: return iconv('UTF-32BE', 'UTF-8//IGNORE', pack('N', $code));
74: }
75:
76:
77: 78: 79: 80: 81: 82:
83: public static function startsWith($haystack, $needle)
84: {
85: return strncmp($haystack, $needle, strlen($needle)) === 0;
86: }
87:
88:
89: 90: 91: 92: 93: 94:
95: public static function endsWith($haystack, $needle)
96: {
97: return strlen($needle) === 0 || substr($haystack, -strlen($needle)) === $needle;
98: }
99:
100:
101: 102: 103: 104: 105: 106:
107: public static function contains($haystack, $needle)
108: {
109: return strpos($haystack, $needle) !== FALSE;
110: }
111:
112:
113: 114: 115: 116: 117: 118: 119:
120: public static function substring($s, $start, $length = NULL)
121: {
122: if ($length === NULL) {
123: $length = self::length($s);
124: }
125: if (function_exists('mb_substr')) {
126: return mb_substr($s, $start, $length, 'UTF-8');
127: }
128: return iconv_substr($s, $start, $length, 'UTF-8');
129: }
130:
131:
132: 133: 134: 135: 136:
137: public static function normalize($s)
138: {
139: $s = self::normalizeNewLines($s);
140:
141:
142: $s = preg_replace('#[\x00-\x08\x0B-\x1F\x7F]+#', '', $s);
143:
144:
145: $s = preg_replace('#[\t ]+$#m', '', $s);
146:
147:
148: $s = trim($s, "\n");
149:
150: return $s;
151: }
152:
153:
154: 155: 156: 157: 158:
159: public static function normalizeNewLines($s)
160: {
161: return str_replace(array("\r\n", "\r"), "\n", $s);
162: }
163:
164:
165: 166: 167: 168: 169:
170: public static function toAscii($s)
171: {
172: $s = preg_replace('#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u', '', $s);
173: $s = strtr($s, '`\'"^~', "\x01\x02\x03\x04\x05");
174: $s = str_replace(array("\xE2\x80\x9E", "\xE2\x80\x9C", "\xE2\x80\x9D", "\xE2\x80\x9A",
175: "\xE2\x80\x98", "\xE2\x80\x99", "\xC2\xBB", "\xC2\xAB"),
176: array("\x03", "\x03", "\x03", "\x02", "\x02", "\x02", ">>", "<<"), $s);
177: if (ICONV_IMPL === 'glibc') {
178: $s = @iconv('UTF-8', 'WINDOWS-1250//TRANSLIT', $s);
179: $s = strtr($s, "\xa5\xa3\xbc\x8c\xa7\x8a\xaa\x8d\x8f\x8e\xaf\xb9\xb3\xbe\x9c\x9a\xba\x9d\x9f\x9e"
180: . "\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3"
181: . "\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8"
182: . "\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x96",
183: "ALLSSSSTZZZallssstzzzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTsraaaalccceeeeiiddnnooooruuuuyt-");
184: } else {
185: $s = @iconv('UTF-8', 'ASCII//TRANSLIT', $s);
186: }
187: $s = str_replace(array('`', "'", '"', '^', '~'), '', $s);
188: return strtr($s, "\x01\x02\x03\x04\x05", '`\'"^~');
189: }
190:
191:
192: 193: 194: 195: 196: 197: 198:
199: public static function webalize($s, $charlist = NULL, $lower = TRUE)
200: {
201: $s = self::toAscii($s);
202: if ($lower) {
203: $s = strtolower($s);
204: }
205: $s = preg_replace('#[^a-z0-9' . preg_quote($charlist, '#') . ']+#i', '-', $s);
206: $s = trim($s, '-');
207: return $s;
208: }
209:
210:
211: 212: 213: 214: 215: 216: 217:
218: public static function truncate($s, $maxLen, $append = "\xE2\x80\xA6")
219: {
220: if (self::length($s) > $maxLen) {
221: $maxLen = $maxLen - self::length($append);
222: if ($maxLen < 1) {
223: return $append;
224:
225: } elseif ($matches = self::match($s, '#^.{1,'.$maxLen.'}(?=[\s\x00-/:-@\[-`{-~])#us')) {
226: return $matches[0] . $append;
227:
228: } else {
229: return self::substring($s, 0, $maxLen) . $append;
230: }
231: }
232: return $s;
233: }
234:
235:
236: 237: 238: 239: 240: 241: 242:
243: public static function indent($s, $level = 1, $chars = "\t")
244: {
245: if ($level > 0) {
246: $s = self::replace($s, '#(?:^|[\r\n]+)(?=[^\r\n])#', '$0' . str_repeat($chars, $level));
247: }
248: return $s;
249: }
250:
251:
252: 253: 254: 255: 256:
257: public static function lower($s)
258: {
259: return mb_strtolower($s, 'UTF-8');
260: }
261:
262:
263: 264: 265: 266: 267:
268: public static function upper($s)
269: {
270: return mb_strtoupper($s, 'UTF-8');
271: }
272:
273:
274: 275: 276: 277: 278:
279: public static function firstUpper($s)
280: {
281: return self::upper(self::substring($s, 0, 1)) . self::substring($s, 1);
282: }
283:
284:
285: 286: 287: 288: 289:
290: public static function capitalize($s)
291: {
292: return mb_convert_case($s, MB_CASE_TITLE, 'UTF-8');
293: }
294:
295:
296: 297: 298: 299: 300: 301: 302:
303: public static function compare($left, $right, $len = NULL)
304: {
305: if ($len < 0) {
306: $left = self::substring($left, $len, -$len);
307: $right = self::substring($right, $len, -$len);
308: } elseif ($len !== NULL) {
309: $left = self::substring($left, 0, $len);
310: $right = self::substring($right, 0, $len);
311: }
312: return self::lower($left) === self::lower($right);
313: }
314:
315:
316: 317: 318: 319: 320: 321:
322: public static function findPrefix($strings, $second = NULL)
323: {
324: if (!is_array($strings)) {
325: $strings = func_get_args();
326: }
327: $first = array_shift($strings);
328: for ($i = 0; $i < strlen($first); $i++) {
329: foreach ($strings as $s) {
330: if (!isset($s[$i]) || $first[$i] !== $s[$i]) {
331: while ($i && $first[$i-1] >= "\x80" && $first[$i] >= "\x80" && $first[$i] < "\xC0") {
332: $i--;
333: }
334: return substr($first, 0, $i);
335: }
336: }
337: }
338: return $first;
339: }
340:
341:
342: 343: 344: 345: 346:
347: public static function length($s)
348: {
349: return strlen(utf8_decode($s));
350: }
351:
352:
353: 354: 355: 356: 357: 358:
359: public static function trim($s, $charlist = " \t\n\r\0\x0B\xC2\xA0")
360: {
361: $charlist = preg_quote($charlist, '#');
362: return self::replace($s, '#^['.$charlist.']+|['.$charlist.']+\z#u', '');
363: }
364:
365:
366: 367: 368: 369: 370: 371: 372:
373: public static function padLeft($s, $length, $pad = ' ')
374: {
375: $length = max(0, $length - self::length($s));
376: $padLen = self::length($pad);
377: return str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen) . $s;
378: }
379:
380:
381: 382: 383: 384: 385: 386: 387:
388: public static function padRight($s, $length, $pad = ' ')
389: {
390: $length = max(0, $length - self::length($s));
391: $padLen = self::length($pad);
392: return $s . str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen);
393: }
394:
395:
396: 397: 398: 399: 400:
401: public static function reverse($s)
402: {
403: return @iconv('UTF-32LE', 'UTF-8', strrev(@iconv('UTF-8', 'UTF-32BE', $s)));
404: }
405:
406:
407: 408: 409: 410:
411: public static function random($length = 10, $charlist = '0-9a-z')
412: {
413: return Random::generate($length, $charlist);
414: }
415:
416:
417: 418: 419: 420: 421: 422: 423:
424: public static function split($subject, $pattern, $flags = 0)
425: {
426: set_error_handler(function($severity, $message) use ($pattern) {
427: restore_error_handler();
428: throw new RegexpException("$message in pattern: $pattern");
429: });
430: $res = preg_split($pattern, $subject, -1, $flags | PREG_SPLIT_DELIM_CAPTURE);
431: restore_error_handler();
432: if (preg_last_error()) {
433: throw new RegexpException(NULL, preg_last_error(), $pattern);
434: }
435: return $res;
436: }
437:
438:
439: 440: 441: 442: 443: 444: 445: 446:
447: public static function match($subject, $pattern, $flags = 0, $offset = 0)
448: {
449: if ($offset > strlen($subject)) {
450: return NULL;
451: }
452: set_error_handler(function($severity, $message) use ($pattern) {
453: restore_error_handler();
454: throw new RegexpException("$message in pattern: $pattern");
455: });
456: $res = preg_match($pattern, $subject, $m, $flags, $offset);
457: restore_error_handler();
458: if (preg_last_error()) {
459: throw new RegexpException(NULL, preg_last_error(), $pattern);
460: }
461: if ($res) {
462: return $m;
463: }
464: }
465:
466:
467: 468: 469: 470: 471: 472: 473: 474:
475: public static function matchAll($subject, $pattern, $flags = 0, $offset = 0)
476: {
477: if ($offset > strlen($subject)) {
478: return array();
479: }
480: set_error_handler(function($severity, $message) use ($pattern) {
481: restore_error_handler();
482: throw new RegexpException("$message in pattern: $pattern");
483: });
484: preg_match_all(
485: $pattern, $subject, $m,
486: ($flags & PREG_PATTERN_ORDER) ? $flags : ($flags | PREG_SET_ORDER),
487: $offset
488: );
489: restore_error_handler();
490: if (preg_last_error()) {
491: throw new RegexpException(NULL, preg_last_error(), $pattern);
492: }
493: return $m;
494: }
495:
496:
497: 498: 499: 500: 501: 502: 503: 504:
505: public static function replace($subject, $pattern, $replacement = NULL, $limit = -1)
506: {
507: if (is_object($replacement) || is_array($replacement)) {
508: if ($replacement instanceof Nette\Callback) {
509: $replacement = $replacement->getNative();
510: }
511: if (!is_callable($replacement, FALSE, $textual)) {
512: throw new Nette\InvalidStateException("Callback '$textual' is not callable.");
513: }
514:
515: set_error_handler(function($severity, $message) use (& $tmp) {
516: restore_error_handler();
517: throw new RegexpException("$message in pattern: $tmp");
518: });
519: foreach ((array) $pattern as $tmp) {
520: preg_match($tmp, '');
521: }
522: restore_error_handler();
523:
524: $res = preg_replace_callback($pattern, $replacement, $subject, $limit);
525: if ($res === NULL && preg_last_error()) {
526: throw new RegexpException(NULL, preg_last_error(), $pattern);
527: }
528: return $res;
529:
530: } elseif ($replacement === NULL && is_array($pattern)) {
531: $replacement = array_values($pattern);
532: $pattern = array_keys($pattern);
533: }
534:
535: set_error_handler(function($severity, $message) use ($pattern) {
536: restore_error_handler();
537: throw new RegexpException("$message in pattern: " . implode(' or ', (array) $pattern));
538: });
539: $res = preg_replace($pattern, $replacement, $subject, $limit);
540: restore_error_handler();
541: if (preg_last_error()) {
542: throw new RegexpException(NULL, preg_last_error(), implode(' or ', (array) $pattern));
543: }
544: return $res;
545: }
546:
547: }
548:
549:
550: 551: 552:
553: class RegexpException extends \Exception
554: {
555: static public $messages = array(
556: PREG_INTERNAL_ERROR => 'Internal error',
557: PREG_BACKTRACK_LIMIT_ERROR => 'Backtrack limit was exhausted',
558: PREG_RECURSION_LIMIT_ERROR => 'Recursion limit was exhausted',
559: PREG_BAD_UTF8_ERROR => 'Malformed UTF-8 data',
560: 5 => 'Offset didn\'t correspond to the begin of a valid UTF-8 code point',
561: );
562:
563: public function __construct($message, $code = NULL, $pattern = NULL)
564: {
565: if (!$message) {
566: $message = (isset(self::$messages[$code]) ? self::$messages[$code] : 'Unknown error') . ($pattern ? " (pattern: $pattern)" : '');
567: }
568: parent::__construct($message, $code);
569: }
570:
571: }
572: