1: <?php
2:
3: 4: 5: 6: 7: 8: 9: 10: 11:
12:
13:
14:
15: 16: 17: 18: 19: 20:
21: class Strings
22: {
23:
24: 25: 26:
27: final public function __construct()
28: {
29: throw new StaticClassException;
30: }
31:
32:
33:
34: 35: 36: 37: 38: 39:
40: public static function checkEncoding($s, $encoding = 'UTF-8')
41: {
42: return $s === self::fixEncoding($s, $encoding);
43: }
44:
45:
46:
47: 48: 49: 50: 51: 52:
53: public static function fixEncoding($s, $encoding = 'UTF-8')
54: {
55:
56: $s = @iconv('UTF-16', $encoding . '//IGNORE', iconv($encoding, 'UTF-16//IGNORE', $s));
57: return str_replace("\xEF\xBB\xBF", '', $s);
58: }
59:
60:
61:
62: 63: 64: 65: 66: 67:
68: public static function chr($code, $encoding = 'UTF-8')
69: {
70: return iconv('UTF-32BE', $encoding . '//IGNORE', pack('N', $code));
71: }
72:
73:
74:
75: 76: 77: 78: 79: 80:
81: public static function startsWith($haystack, $needle)
82: {
83: return strncmp($haystack, $needle, strlen($needle)) === 0;
84: }
85:
86:
87:
88: 89: 90: 91: 92: 93:
94: public static function endsWith($haystack, $needle)
95: {
96: return strlen($needle) === 0 || substr($haystack, -strlen($needle)) === $needle;
97: }
98:
99:
100:
101: 102: 103: 104: 105: 106:
107: public static function contains($haystack, $needle)
108: {
109: return strpos($haystack, $needle) !== FALSE;
110: }
111:
112:
113:
114: 115: 116: 117: 118: 119: 120:
121: public static function substring($s, $start, $length = NULL)
122: {
123: if ($length === NULL) {
124: $length = self::length($s);
125: }
126: return function_exists('mb_substr') ? mb_substr($s, $start, $length, 'UTF-8') : iconv_substr($s, $start, $length, 'UTF-8');
127: }
128:
129:
130:
131: 132: 133: 134: 135:
136: public static function normalize($s)
137: {
138:
139: $s = str_replace("\r\n", "\n", $s);
140: $s = strtr($s, "\r", "\n");
141:
142:
143: $s = preg_replace('#[\x00-\x08\x0B-\x1F\x7F]+#', '', $s);
144:
145:
146: $s = preg_replace("#[\t ]+$#m", '', $s);
147:
148:
149: $s = trim($s, "\n");
150:
151: return $s;
152: }
153:
154:
155:
156: 157: 158: 159: 160:
161: public static function toAscii($s)
162: {
163: $s = preg_replace('#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{10FFFF}]#u', '', $s);
164: $s = strtr($s, '`\'"^~', "\x01\x02\x03\x04\x05");
165: if (ICONV_IMPL === 'glibc') {
166: $s = @iconv('UTF-8', 'WINDOWS-1250//TRANSLIT', $s);
167: $s = strtr($s, "\xa5\xa3\xbc\x8c\xa7\x8a\xaa\x8d\x8f\x8e\xaf\xb9\xb3\xbe\x9c\x9a\xba\x9d\x9f\x9e"
168: . "\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3"
169: . "\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8"
170: . "\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf8\xf9\xfa\xfb\xfc\xfd\xfe",
171: "ALLSSSSTZZZallssstzzzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTsraaaalccceeeeiiddnnooooruuuuyt");
172: } else {
173: $s = @iconv('UTF-8', 'ASCII//TRANSLIT', $s);
174: }
175: $s = str_replace(array('`', "'", '"', '^', '~'), '', $s);
176: return strtr($s, "\x01\x02\x03\x04\x05", '`\'"^~');
177: }
178:
179:
180:
181: 182: 183: 184: 185: 186: 187:
188: public static function webalize($s, $charlist = NULL, $lower = TRUE)
189: {
190: $s = self::toAscii($s);
191: if ($lower) {
192: $s = strtolower($s);
193: }
194: $s = preg_replace('#[^a-z0-9' . preg_quote($charlist, '#') . ']+#i', '-', $s);
195: $s = trim($s, '-');
196: return $s;
197: }
198:
199:
200:
201: 202: 203: 204: 205: 206: 207:
208: public static function truncate($s, $maxLen, $append = "\xE2\x80\xA6")
209: {
210: if (self::length($s) > $maxLen) {
211: $maxLen = $maxLen - self::length($append);
212: if ($maxLen < 1) {
213: return $append;
214:
215: } elseif ($matches = self::match($s, '#^.{1,'.$maxLen.'}(?=[\s\x00-/:-@\[-`{-~])#us')) {
216: return $matches[0] . $append;
217:
218: } else {
219: return self::substring($s, 0, $maxLen) . $append;
220: }
221: }
222: return $s;
223: }
224:
225:
226:
227: 228: 229: 230: 231: 232: 233:
234: public static function indent($s, $level = 1, $chars = "\t")
235: {
236: return $level < 1 ? $s : self::replace($s, '#(?:^|[\r\n]+)(?=[^\r\n])#', '$0' . str_repeat($chars, $level));
237: }
238:
239:
240:
241: 242: 243: 244: 245:
246: public static function lower($s)
247: {
248: return mb_strtolower($s, 'UTF-8');
249: }
250:
251:
252:
253: 254: 255: 256: 257:
258: public static function upper($s)
259: {
260: return mb_strtoupper($s, 'UTF-8');
261: }
262:
263:
264:
265: 266: 267: 268: 269:
270: public static function firstUpper($s)
271: {
272: return self::upper(self::substring($s, 0, 1)) . self::substring($s, 1);
273: }
274:
275:
276:
277: 278: 279: 280: 281:
282: public static function capitalize($s)
283: {
284: return mb_convert_case($s, MB_CASE_TITLE, 'UTF-8');
285: }
286:
287:
288:
289: 290: 291: 292: 293: 294: 295:
296: public static function compare($left, $right, $len = NULL)
297: {
298: if ($len < 0) {
299: $left = self::substring($left, $len, -$len);
300: $right = self::substring($right, $len, -$len);
301: } elseif ($len !== NULL) {
302: $left = self::substring($left, 0, $len);
303: $right = self::substring($right, 0, $len);
304: }
305: return self::lower($left) === self::lower($right);
306: }
307:
308:
309:
310: 311: 312: 313: 314:
315: public static function length($s)
316: {
317: return strlen(utf8_decode($s));
318: }
319:
320:
321:
322: 323: 324: 325: 326: 327:
328: public static function trim($s, $charlist = " \t\n\r\0\x0B\xC2\xA0")
329: {
330: $charlist = preg_quote($charlist, '#');
331: return self::replace($s, '#^['.$charlist.']+|['.$charlist.']+$#u', '');
332: }
333:
334:
335:
336: 337: 338: 339: 340: 341: 342:
343: public static function padLeft($s, $length, $pad = ' ')
344: {
345: $length = max(0, $length - self::length($s));
346: $padLen = self::length($pad);
347: return str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen) . $s;
348: }
349:
350:
351:
352: 353: 354: 355: 356: 357: 358:
359: public static function padRight($s, $length, $pad = ' ')
360: {
361: $length = max(0, $length - self::length($s));
362: $padLen = self::length($pad);
363: return $s . str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen);
364: }
365:
366:
367:
368: 369: 370: 371: 372:
373: public static function reverse($s)
374: {
375: return @iconv('UTF-32LE', 'UTF-8', strrev(@iconv('UTF-8', 'UTF-32BE', $s)));
376: }
377:
378:
379:
380: 381: 382: 383: 384: 385:
386: public static function random($length = 10, $charlist = '0-9a-z')
387: {
388: $charlist = str_shuffle(preg_replace_callback('#.-.#', create_function('$m', '
389: return implode(\'\', range($m[0][0], $m[0][2]));
390: '), $charlist));
391: $chLen = strlen($charlist);
392:
393: $s = '';
394: for ($i = 0; $i < $length; $i++) {
395: if ($i % 5 === 0) {
396: $rand = lcg_value();
397: $rand2 = microtime(TRUE);
398: }
399: $rand *= $chLen;
400: $s .= $charlist[($rand + $rand2) % $chLen];
401: $rand -= (int) $rand;
402: }
403: return $s;
404: }
405:
406:
407:
408: 409: 410: 411: 412: 413: 414:
415: public static function split($subject, $pattern, $flags = 0)
416: {
417: Debugger::tryError();
418: $res = preg_split($pattern, $subject, -1, $flags | PREG_SPLIT_DELIM_CAPTURE);
419: if (Debugger::catchError($e) || preg_last_error()) {
420: throw new RegexpException($e ? $e->getMessage() : NULL, $e ? NULL : preg_last_error(), $pattern);
421: }
422: return $res;
423: }
424:
425:
426:
427: 428: 429: 430: 431: 432: 433: 434:
435: public static function match($subject, $pattern, $flags = 0, $offset = 0)
436: {
437: if ($offset > strlen($subject)) {
438: return NULL;
439: }
440: Debugger::tryError();
441: $res = preg_match($pattern, $subject, $m, $flags, $offset);
442: if (Debugger::catchError($e) || preg_last_error()) {
443: throw new RegexpException($e ? $e->getMessage() : NULL, $e ? NULL : preg_last_error(), $pattern);
444: }
445: if ($res) {
446: return $m;
447: }
448: }
449:
450:
451:
452: 453: 454: 455: 456: 457: 458: 459:
460: public static function matchAll($subject, $pattern, $flags = 0, $offset = 0)
461: {
462: if ($offset > strlen($subject)) {
463: return array();
464: }
465: Debugger::tryError();
466: $res = preg_match_all(
467: $pattern, $subject, $m,
468: ($flags & PREG_PATTERN_ORDER) ? $flags : ($flags | PREG_SET_ORDER),
469: $offset
470: );
471: if (Debugger::catchError($e) || preg_last_error()) {
472: throw new RegexpException($e ? $e->getMessage() : NULL, $e ? NULL : preg_last_error(), $pattern);
473: }
474: return $m;
475: }
476:
477:
478:
479: 480: 481: 482: 483: 484: 485: 486:
487: public static function replace($subject, $pattern, $replacement = NULL, $limit = -1)
488: {
489: if (is_object($replacement) || is_array($replacement)) {
490: if ($replacement instanceof Callback) {
491: $replacement = $replacement->getNative();
492: }
493: if (!is_callable($replacement, FALSE, $textual)) {
494: throw new InvalidStateException("Callback '$textual' is not callable.");
495: }
496:
497: Debugger::tryError();
498: preg_match($pattern, '');
499: if (Debugger::catchError($e)) {
500: throw new RegexpException($e->getMessage(), NULL, $pattern);
501: }
502:
503: $res = preg_replace_callback($pattern, $replacement, $subject, $limit);
504: if ($res === NULL && preg_last_error()) {
505: throw new RegexpException(NULL, preg_last_error(), $pattern);
506: }
507: return $res;
508:
509: } elseif (is_array($pattern)) {
510: $replacement = array_values($pattern);
511: $pattern = array_keys($pattern);
512: }
513:
514: Debugger::tryError();
515: $res = preg_replace($pattern, $replacement, $subject, $limit);
516: if (Debugger::catchError($e) || preg_last_error()) {
517: throw new RegexpException($e ? $e->getMessage() : NULL, $e ? NULL : preg_last_error(), $pattern);
518: }
519: return $res;
520: }
521:
522: }
523:
524:
525:
526: 527: 528: 529:
530: class RegexpException extends Exception
531: {
532: static public $messages = array(
533: PREG_INTERNAL_ERROR => 'Internal error',
534: PREG_BACKTRACK_LIMIT_ERROR => 'Backtrack limit was exhausted',
535: PREG_RECURSION_LIMIT_ERROR => 'Recursion limit was exhausted',
536: PREG_BAD_UTF8_ERROR => 'Malformed UTF-8 data',
537: 5 => 'Offset didn\'t correspond to the begin of a valid UTF-8 code point',
538: );
539:
540: public function __construct($message, $code = NULL, $pattern = NULL)
541: {
542: if (!$message) {
543: $message = (isset(self::$messages[$code]) ? self::$messages[$code] : 'Unknown error') . ($pattern ? " (pattern: $pattern)" : '');
544: } elseif ($pattern) {
545: $message .= " in pattern: $pattern";
546: }
547: parent::__construct($message, $code);
548: }
549:
550: }
551: