1: <?php
2:
3: 4: 5: 6:
7:
8: namespace Nette\Utils;
9:
10: use Nette;
11:
12:
13: 14: 15: 16: 17:
18: class Strings
19: {
20:
21: 22: 23:
24: final public function __construct()
25: {
26: throw new Nette\StaticClassException;
27: }
28:
29:
30: 31: 32: 33: 34:
35: public static function checkEncoding($s)
36: {
37: if (func_num_args() > 1 && strcasecmp(func_get_arg(1), 'UTF-8')) {
38: trigger_error(__METHOD__ . ' supports only UTF-8 encoding.', E_USER_DEPRECATED);
39: }
40: return $s === self::fixEncoding($s);
41: }
42:
43:
44: 45: 46: 47: 48:
49: public static function fixEncoding($s)
50: {
51: if (func_num_args() > 1 && strcasecmp(func_get_arg(1), 'UTF-8')) {
52: trigger_error(__METHOD__ . ' supports only UTF-8 encoding.', E_USER_DEPRECATED);
53: }
54:
55: if (PHP_VERSION_ID < 50400) {
56: return @iconv('UTF-16', 'UTF-8//IGNORE', iconv('UTF-8', 'UTF-16//IGNORE', $s));
57: } else {
58: return htmlspecialchars_decode(htmlspecialchars($s, ENT_NOQUOTES | ENT_IGNORE, 'UTF-8'), ENT_NOQUOTES);
59: }
60: }
61:
62:
63: 64: 65: 66: 67:
68: public static function chr($code)
69: {
70: if (func_num_args() > 1 && strcasecmp(func_get_arg(1), 'UTF-8')) {
71: trigger_error(__METHOD__ . ' supports only UTF-8 encoding.', E_USER_DEPRECATED);
72: }
73: return iconv('UTF-32BE', 'UTF-8//IGNORE', pack('N', $code));
74: }
75:
76:
77: 78: 79: 80: 81: 82:
83: public static function startsWith($haystack, $needle)
84: {
85: return strncmp($haystack, $needle, strlen($needle)) === 0;
86: }
87:
88:
89: 90: 91: 92: 93: 94:
95: public static function endsWith($haystack, $needle)
96: {
97: return strlen($needle) === 0 || substr($haystack, -strlen($needle)) === $needle;
98: }
99:
100:
101: 102: 103: 104: 105: 106:
107: public static function contains($haystack, $needle)
108: {
109: return strpos($haystack, $needle) !== FALSE;
110: }
111:
112:
113: 114: 115: 116: 117: 118: 119:
120: public static function substring($s, $start, $length = NULL)
121: {
122: if ($length === NULL) {
123: $length = self::length($s);
124: }
125: if (function_exists('mb_substr')) {
126: return mb_substr($s, $start, $length, 'UTF-8');
127: }
128: return iconv_substr($s, $start, $length, 'UTF-8');
129: }
130:
131:
132: 133: 134: 135: 136:
137: public static function normalize($s)
138: {
139: $s = self::normalizeNewLines($s);
140:
141:
142: $s = preg_replace('#[\x00-\x08\x0B-\x1F\x7F]+#', '', $s);
143:
144:
145: $s = preg_replace('#[\t ]+$#m', '', $s);
146:
147:
148: $s = trim($s, "\n");
149:
150: return $s;
151: }
152:
153:
154: 155: 156: 157: 158:
159: public static function normalizeNewLines($s)
160: {
161: return str_replace(array("\r\n", "\r"), "\n", $s);
162: }
163:
164:
165: 166: 167: 168: 169:
170: public static function toAscii($s)
171: {
172: $s = preg_replace('#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u', '', $s);
173: $s = strtr($s, '`\'"^~', "\x01\x02\x03\x04\x05");
174: $s = str_replace(array("\xE2\x80\x9E", "\xE2\x80\x9C", "\xE2\x80\x9D", "\xE2\x80\x9A",
175: "\xE2\x80\x98", "\xE2\x80\x99", "\xC2\xBB", "\xC2\xAB"),
176: array("\x03", "\x03", "\x03", "\x02", "\x02", "\x02", ">>", "<<"), $s);
177: if (ICONV_IMPL === 'glibc') {
178: $s = @iconv('UTF-8', 'WINDOWS-1250//TRANSLIT', $s);
179: $s = strtr($s, "\xa5\xa3\xbc\x8c\xa7\x8a\xaa\x8d\x8f\x8e\xaf\xb9\xb3\xbe\x9c\x9a\xba\x9d\x9f\x9e"
180: . "\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3"
181: . "\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8"
182: . "\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x96",
183: "ALLSSSSTZZZallssstzzzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTsraaaalccceeeeiiddnnooooruuuuyt-");
184: } else {
185: $s = @iconv('UTF-8', 'ASCII//TRANSLIT', $s);
186: }
187: $s = str_replace(array('`', "'", '"', '^', '~'), '', $s);
188: return strtr($s, "\x01\x02\x03\x04\x05", '`\'"^~');
189: }
190:
191:
192: 193: 194: 195: 196: 197: 198:
199: public static function webalize($s, $charlist = NULL, $lower = TRUE)
200: {
201: $s = self::toAscii($s);
202: if ($lower) {
203: $s = strtolower($s);
204: }
205: $s = preg_replace('#[^a-z0-9' . preg_quote($charlist, '#') . ']+#i', '-', $s);
206: $s = trim($s, '-');
207: return $s;
208: }
209:
210:
211: 212: 213: 214: 215: 216: 217:
218: public static function truncate($s, $maxLen, $append = "\xE2\x80\xA6")
219: {
220: if (self::length($s) > $maxLen) {
221: $maxLen = $maxLen - self::length($append);
222: if ($maxLen < 1) {
223: return $append;
224:
225: } elseif ($matches = self::match($s, '#^.{1,'.$maxLen.'}(?=[\s\x00-/:-@\[-`{-~])#us')) {
226: return $matches[0] . $append;
227:
228: } else {
229: return self::substring($s, 0, $maxLen) . $append;
230: }
231: }
232: return $s;
233: }
234:
235:
236: 237: 238: 239: 240: 241: 242:
243: public static function indent($s, $level = 1, $chars = "\t")
244: {
245: if ($level > 0) {
246: $s = self::replace($s, '#(?:^|[\r\n]+)(?=[^\r\n])#', '$0' . str_repeat($chars, $level));
247: }
248: return $s;
249: }
250:
251:
252: 253: 254: 255: 256:
257: public static function lower($s)
258: {
259: return mb_strtolower($s, 'UTF-8');
260: }
261:
262:
263: 264: 265: 266: 267:
268: public static function upper($s)
269: {
270: return mb_strtoupper($s, 'UTF-8');
271: }
272:
273:
274: 275: 276: 277: 278:
279: public static function firstUpper($s)
280: {
281: return self::upper(self::substring($s, 0, 1)) . self::substring($s, 1);
282: }
283:
284:
285: 286: 287: 288: 289:
290: public static function capitalize($s)
291: {
292: return mb_convert_case($s, MB_CASE_TITLE, 'UTF-8');
293: }
294:
295:
296: 297: 298: 299: 300: 301: 302:
303: public static function compare($left, $right, $len = NULL)
304: {
305: if ($len < 0) {
306: $left = self::substring($left, $len, -$len);
307: $right = self::substring($right, $len, -$len);
308: } elseif ($len !== NULL) {
309: $left = self::substring($left, 0, $len);
310: $right = self::substring($right, 0, $len);
311: }
312: return self::lower($left) === self::lower($right);
313: }
314:
315:
316: 317: 318: 319: 320:
321: public static function findPrefix($strings)
322: {
323: if (!is_array($strings)) {
324: $strings = func_get_args();
325: }
326: $first = array_shift($strings);
327: for ($i = 0; $i < strlen($first); $i++) {
328: foreach ($strings as $s) {
329: if (!isset($s[$i]) || $first[$i] !== $s[$i]) {
330: while ($i && $first[$i-1] >= "\x80" && $first[$i] >= "\x80" && $first[$i] < "\xC0") {
331: $i--;
332: }
333: return substr($first, 0, $i);
334: }
335: }
336: }
337: return $first;
338: }
339:
340:
341: 342: 343: 344: 345:
346: public static function length($s)
347: {
348: return strlen(utf8_decode($s));
349: }
350:
351:
352: 353: 354: 355: 356: 357:
358: public static function trim($s, $charlist = " \t\n\r\0\x0B\xC2\xA0")
359: {
360: $charlist = preg_quote($charlist, '#');
361: return self::replace($s, '#^['.$charlist.']+|['.$charlist.']+\z#u', '');
362: }
363:
364:
365: 366: 367: 368: 369: 370: 371:
372: public static function padLeft($s, $length, $pad = ' ')
373: {
374: $length = max(0, $length - self::length($s));
375: $padLen = self::length($pad);
376: return str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen) . $s;
377: }
378:
379:
380: 381: 382: 383: 384: 385: 386:
387: public static function padRight($s, $length, $pad = ' ')
388: {
389: $length = max(0, $length - self::length($s));
390: $padLen = self::length($pad);
391: return $s . str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen);
392: }
393:
394:
395: 396: 397: 398: 399:
400: public static function reverse($s)
401: {
402: return @iconv('UTF-32LE', 'UTF-8', strrev(@iconv('UTF-8', 'UTF-32BE', $s)));
403: }
404:
405:
406: 407: 408: 409:
410: public static function random($length = 10, $charlist = '0-9a-z')
411: {
412: return Random::generate($length, $charlist);
413: }
414:
415:
416: 417: 418: 419: 420: 421: 422:
423: public static function split($subject, $pattern, $flags = 0)
424: {
425: return self::pcre('preg_split', array($pattern, $subject, -1, $flags | PREG_SPLIT_DELIM_CAPTURE));
426: }
427:
428:
429: 430: 431: 432: 433: 434: 435: 436:
437: public static function match($subject, $pattern, $flags = 0, $offset = 0)
438: {
439: if ($offset > strlen($subject)) {
440: return NULL;
441: }
442: return self::pcre('preg_match', array($pattern, $subject, & $m, $flags, $offset))
443: ? $m
444: : NULL;
445: }
446:
447:
448: 449: 450: 451: 452: 453: 454: 455:
456: public static function matchAll($subject, $pattern, $flags = 0, $offset = 0)
457: {
458: if ($offset > strlen($subject)) {
459: return array();
460: }
461: self::pcre('preg_match_all', array(
462: $pattern, $subject, & $m,
463: ($flags & PREG_PATTERN_ORDER) ? $flags : ($flags | PREG_SET_ORDER),
464: $offset
465: ));
466: return $m;
467: }
468:
469:
470: 471: 472: 473: 474: 475: 476: 477:
478: public static function replace($subject, $pattern, $replacement = NULL, $limit = -1)
479: {
480: if (is_object($replacement) || is_array($replacement)) {
481: if ($replacement instanceof Nette\Callback) {
482: $replacement = $replacement->getNative();
483: }
484: if (!is_callable($replacement, FALSE, $textual)) {
485: throw new Nette\InvalidStateException("Callback '$textual' is not callable.");
486: }
487:
488: return self::pcre('preg_replace_callback', array($pattern, $replacement, $subject, $limit));
489:
490: } elseif ($replacement === NULL && is_array($pattern)) {
491: $replacement = array_values($pattern);
492: $pattern = array_keys($pattern);
493: }
494:
495: return self::pcre('preg_replace', array($pattern, $replacement, $subject, $limit));
496: }
497:
498:
499:
500: public static function pcre($func, $args)
501: {
502: $res = Callback::invokeSafe($func, $args, function($message) use ($args) {
503:
504: throw new RegexpException($message . ' in pattern: ' . implode(' or ', (array) $args[0]));
505: });
506:
507: if (($code = preg_last_error())
508: && ($res === NULL || !in_array($func, array('preg_filter', 'preg_replace_callback', 'preg_replace')))
509: ) {
510: throw new RegexpException(NULL, $code, implode(' or ', (array) $args[0]));
511: }
512: return $res;
513: }
514:
515: }
516:
517:
518: 519: 520:
521: class RegexpException extends \Exception
522: {
523: static public $messages = array(
524: PREG_INTERNAL_ERROR => 'Internal error',
525: PREG_BACKTRACK_LIMIT_ERROR => 'Backtrack limit was exhausted',
526: PREG_RECURSION_LIMIT_ERROR => 'Recursion limit was exhausted',
527: PREG_BAD_UTF8_ERROR => 'Malformed UTF-8 data',
528: 5 => 'Offset didn\'t correspond to the begin of a valid UTF-8 code point',
529: );
530:
531: public function __construct($message, $code = NULL, $pattern = NULL)
532: {
533: if (!$message) {
534: $message = (isset(self::$messages[$code]) ? self::$messages[$code] : 'Unknown error') . ($pattern ? " (pattern: $pattern)" : '');
535: }
536: parent::__construct($message, $code);
537: }
538:
539: }
540: