1: <?php
2:
3: 4: 5: 6: 7: 8: 9: 10: 11:
12:
13:
14:
15: 16: 17: 18: 19: 20:
21: class NStrings
22: {
23:
24: 25: 26:
27: final public function __construct()
28: {
29: throw new NStaticClassException;
30: }
31:
32:
33:
34: 35: 36: 37: 38: 39:
40: public static function checkEncoding($s, $encoding = 'UTF-8')
41: {
42: return $s === self::fixEncoding($s, $encoding);
43: }
44:
45:
46:
47: 48: 49: 50: 51: 52:
53: public static function fixEncoding($s, $encoding = 'UTF-8')
54: {
55:
56: if (strcasecmp($encoding, 'UTF-8') === 0) {
57: $s = str_replace("\xEF\xBB\xBF", '', $s);
58: }
59: if (PHP_VERSION_ID >= 50400) {
60: ini_set('mbstring.substitute_character', 'none');
61: return mb_convert_encoding($s, $encoding, $encoding);
62: }
63: return @iconv('UTF-16', $encoding . '//IGNORE', iconv($encoding, 'UTF-16//IGNORE', $s));
64: }
65:
66:
67:
68: 69: 70: 71: 72: 73:
74: public static function chr($code, $encoding = 'UTF-8')
75: {
76: return iconv('UTF-32BE', $encoding . '//IGNORE', pack('N', $code));
77: }
78:
79:
80:
81: 82: 83: 84: 85: 86:
87: public static function startsWith($haystack, $needle)
88: {
89: return strncmp($haystack, $needle, strlen($needle)) === 0;
90: }
91:
92:
93:
94: 95: 96: 97: 98: 99:
100: public static function endsWith($haystack, $needle)
101: {
102: return strlen($needle) === 0 || substr($haystack, -strlen($needle)) === $needle;
103: }
104:
105:
106:
107: 108: 109: 110: 111: 112:
113: public static function contains($haystack, $needle)
114: {
115: return strpos($haystack, $needle) !== FALSE;
116: }
117:
118:
119:
120: 121: 122: 123: 124: 125: 126:
127: public static function substring($s, $start, $length = NULL)
128: {
129: if ($length === NULL) {
130: $length = self::length($s);
131: }
132: return function_exists('mb_substr') ? mb_substr($s, $start, $length, 'UTF-8') : iconv_substr($s, $start, $length, 'UTF-8');
133: }
134:
135:
136:
137: 138: 139: 140: 141:
142: public static function normalize($s)
143: {
144:
145: $s = str_replace("\r\n", "\n", $s);
146: $s = strtr($s, "\r", "\n");
147:
148:
149: $s = preg_replace('#[\x00-\x08\x0B-\x1F\x7F]+#', '', $s);
150:
151:
152: $s = preg_replace('#[\t ]+$#m', '', $s);
153:
154:
155: $s = trim($s, "\n");
156:
157: return $s;
158: }
159:
160:
161:
162: 163: 164: 165: 166:
167: public static function toAscii($s)
168: {
169: $s = preg_replace('#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u', '', $s);
170: $s = strtr($s, '`\'"^~', "\x01\x02\x03\x04\x05");
171: if (ICONV_IMPL === 'glibc') {
172: $s = @iconv('UTF-8', 'WINDOWS-1250//TRANSLIT', $s);
173: $s = strtr($s, "\xa5\xa3\xbc\x8c\xa7\x8a\xaa\x8d\x8f\x8e\xaf\xb9\xb3\xbe\x9c\x9a\xba\x9d\x9f\x9e"
174: . "\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3"
175: . "\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8"
176: . "\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x96",
177: "ALLSSSSTZZZallssstzzzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTsraaaalccceeeeiiddnnooooruuuuyt-");
178: } else {
179: $s = @iconv('UTF-8', 'ASCII//TRANSLIT', $s);
180: }
181: $s = str_replace(array('`', "'", '"', '^', '~'), '', $s);
182: return strtr($s, "\x01\x02\x03\x04\x05", '`\'"^~');
183: }
184:
185:
186:
187: 188: 189: 190: 191: 192: 193:
194: public static function webalize($s, $charlist = NULL, $lower = TRUE)
195: {
196: $s = self::toAscii($s);
197: if ($lower) {
198: $s = strtolower($s);
199: }
200: $s = preg_replace('#[^a-z0-9' . preg_quote($charlist, '#') . ']+#i', '-', $s);
201: $s = trim($s, '-');
202: return $s;
203: }
204:
205:
206:
207: 208: 209: 210: 211: 212: 213:
214: public static function truncate($s, $maxLen, $append = "\xE2\x80\xA6")
215: {
216: if (self::length($s) > $maxLen) {
217: $maxLen = $maxLen - self::length($append);
218: if ($maxLen < 1) {
219: return $append;
220:
221: } elseif ($matches = self::match($s, '#^.{1,'.$maxLen.'}(?=[\s\x00-/:-@\[-`{-~])#us')) {
222: return $matches[0] . $append;
223:
224: } else {
225: return self::substring($s, 0, $maxLen) . $append;
226: }
227: }
228: return $s;
229: }
230:
231:
232:
233: 234: 235: 236: 237: 238: 239:
240: public static function indent($s, $level = 1, $chars = "\t")
241: {
242: return $level < 1 ? $s : self::replace($s, '#(?:^|[\r\n]+)(?=[^\r\n])#', '$0' . str_repeat($chars, $level));
243: }
244:
245:
246:
247: 248: 249: 250: 251:
252: public static function lower($s)
253: {
254: return mb_strtolower($s, 'UTF-8');
255: }
256:
257:
258:
259: 260: 261: 262: 263:
264: public static function upper($s)
265: {
266: return mb_strtoupper($s, 'UTF-8');
267: }
268:
269:
270:
271: 272: 273: 274: 275:
276: public static function firstUpper($s)
277: {
278: return self::upper(self::substring($s, 0, 1)) . self::substring($s, 1);
279: }
280:
281:
282:
283: 284: 285: 286: 287:
288: public static function capitalize($s)
289: {
290: return mb_convert_case($s, MB_CASE_TITLE, 'UTF-8');
291: }
292:
293:
294:
295: 296: 297: 298: 299: 300: 301:
302: public static function compare($left, $right, $len = NULL)
303: {
304: if ($len < 0) {
305: $left = self::substring($left, $len, -$len);
306: $right = self::substring($right, $len, -$len);
307: } elseif ($len !== NULL) {
308: $left = self::substring($left, 0, $len);
309: $right = self::substring($right, 0, $len);
310: }
311: return self::lower($left) === self::lower($right);
312: }
313:
314:
315:
316: 317: 318: 319: 320:
321: public static function length($s)
322: {
323: return strlen(utf8_decode($s));
324: }
325:
326:
327:
328: 329: 330: 331: 332: 333:
334: public static function trim($s, $charlist = " \t\n\r\0\x0B\xC2\xA0")
335: {
336: $charlist = preg_quote($charlist, '#');
337: return self::replace($s, '#^['.$charlist.']+|['.$charlist.']+\z#u', '');
338: }
339:
340:
341:
342: 343: 344: 345: 346: 347: 348:
349: public static function padLeft($s, $length, $pad = ' ')
350: {
351: $length = max(0, $length - self::length($s));
352: $padLen = self::length($pad);
353: return str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen) . $s;
354: }
355:
356:
357:
358: 359: 360: 361: 362: 363: 364:
365: public static function padRight($s, $length, $pad = ' ')
366: {
367: $length = max(0, $length - self::length($s));
368: $padLen = self::length($pad);
369: return $s . str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen);
370: }
371:
372:
373:
374: 375: 376: 377: 378:
379: public static function reverse($s)
380: {
381: return @iconv('UTF-32LE', 'UTF-8', strrev(@iconv('UTF-8', 'UTF-32BE', $s)));
382: }
383:
384:
385:
386: 387: 388: 389: 390: 391:
392: public static function random($length = 10, $charlist = '0-9a-z')
393: {
394: $charlist = str_shuffle(preg_replace_callback('#.-.#', create_function('$m', '
395: return implode(\'\', range($m[0][0], $m[0][2]));
396: '), $charlist));
397: $chLen = strlen($charlist);
398:
399: static $rand3;
400: if (!$rand3) {
401: $rand3 = md5(serialize($_SERVER), TRUE);
402: }
403:
404: $s = '';
405: for ($i = 0; $i < $length; $i++) {
406: if ($i % 5 === 0) {
407: list($rand, $rand2) = explode(' ', microtime());
408: $rand += lcg_value();
409: }
410: $rand *= $chLen;
411: $s .= $charlist[($rand + $rand2 + ord($rand3[$i % strlen($rand3)])) % $chLen];
412: $rand -= (int) $rand;
413: }
414: return $s;
415: }
416:
417:
418:
419: 420: 421: 422: 423: 424: 425:
426: public static function split($subject, $pattern, $flags = 0)
427: {
428: set_error_handler(create_function('$severity, $message', 'extract($GLOBALS[0]['.array_push($GLOBALS[0], array('pattern'=>$pattern)).'-1], EXTR_REFS); // preg_last_error does not return compile errors
429: restore_error_handler();
430: throw new NRegexpException("$message in pattern: $pattern");
431: '));
432: $res = preg_split($pattern, $subject, -1, $flags | PREG_SPLIT_DELIM_CAPTURE);
433: restore_error_handler();
434: if (preg_last_error()) {
435: throw new NRegexpException(NULL, preg_last_error(), $pattern);
436: }
437: return $res;
438: }
439:
440:
441:
442: 443: 444: 445: 446: 447: 448: 449:
450: public static function match($subject, $pattern, $flags = 0, $offset = 0)
451: {
452: if ($offset > strlen($subject)) {
453: return NULL;
454: }
455: set_error_handler(create_function('$severity, $message', 'extract($GLOBALS[0]['.array_push($GLOBALS[0], array('pattern'=>$pattern)).'-1], EXTR_REFS); // preg_last_error does not return compile errors
456: restore_error_handler();
457: throw new NRegexpException("$message in pattern: $pattern");
458: '));
459: $res = preg_match($pattern, $subject, $m, $flags, $offset);
460: restore_error_handler();
461: if (preg_last_error()) {
462: throw new NRegexpException(NULL, preg_last_error(), $pattern);
463: }
464: if ($res) {
465: return $m;
466: }
467: }
468:
469:
470:
471: 472: 473: 474: 475: 476: 477: 478:
479: public static function matchAll($subject, $pattern, $flags = 0, $offset = 0)
480: {
481: if ($offset > strlen($subject)) {
482: return array();
483: }
484: set_error_handler(create_function('$severity, $message', 'extract($GLOBALS[0]['.array_push($GLOBALS[0], array('pattern'=>$pattern)).'-1], EXTR_REFS); // preg_last_error does not return compile errors
485: restore_error_handler();
486: throw new NRegexpException("$message in pattern: $pattern");
487: '));
488: $res = preg_match_all(
489: $pattern, $subject, $m,
490: ($flags & PREG_PATTERN_ORDER) ? $flags : ($flags | PREG_SET_ORDER),
491: $offset
492: );
493: restore_error_handler();
494: if (preg_last_error()) {
495: throw new NRegexpException(NULL, preg_last_error(), $pattern);
496: }
497: return $m;
498: }
499:
500:
501:
502: 503: 504: 505: 506: 507: 508: 509:
510: public static function replace($subject, $pattern, $replacement = NULL, $limit = -1)
511: {
512: if (is_object($replacement) || is_array($replacement)) {
513: if ($replacement instanceof NCallback) {
514: $replacement = $replacement->getNative();
515: }
516: if (!is_callable($replacement, FALSE, $textual)) {
517: throw new InvalidStateException("Callback '$textual' is not callable.");
518: }
519:
520: set_error_handler(create_function('$severity, $message', 'extract($GLOBALS[0]['.array_push($GLOBALS[0], array('tmp'=>& $tmp)).'-1], EXTR_REFS); // preg_last_error does not return compile errors
521: restore_error_handler();
522: throw new NRegexpException("$message in pattern: $tmp");
523: '));
524: foreach ((array) $pattern as $tmp) {
525: preg_match($tmp, '');
526: }
527: restore_error_handler();
528:
529: $res = preg_replace_callback($pattern, $replacement, $subject, $limit);
530: if ($res === NULL && preg_last_error()) {
531: throw new NRegexpException(NULL, preg_last_error(), $pattern);
532: }
533: return $res;
534:
535: } elseif ($replacement === NULL && is_array($pattern)) {
536: $replacement = array_values($pattern);
537: $pattern = array_keys($pattern);
538: }
539:
540: set_error_handler(create_function('$severity, $message', 'extract($GLOBALS[0]['.array_push($GLOBALS[0], array('pattern'=>$pattern)).'-1], EXTR_REFS); // preg_last_error does not return compile errors
541: restore_error_handler();
542: throw new NRegexpException("$message in pattern: " . implode(\' or \', (array) $pattern));
543: '));
544: $res = preg_replace($pattern, $replacement, $subject, $limit);
545: restore_error_handler();
546: if (preg_last_error()) {
547: throw new NRegexpException(NULL, preg_last_error(), implode(' or ', (array) $pattern));
548: }
549: return $res;
550: }
551:
552: }
553:
554:
555:
556: 557: 558: 559:
560: class NRegexpException extends Exception
561: {
562: static public $messages = array(
563: PREG_INTERNAL_ERROR => 'Internal error',
564: PREG_BACKTRACK_LIMIT_ERROR => 'Backtrack limit was exhausted',
565: PREG_RECURSION_LIMIT_ERROR => 'Recursion limit was exhausted',
566: PREG_BAD_UTF8_ERROR => 'Malformed UTF-8 data',
567: 5 => 'Offset didn\'t correspond to the begin of a valid UTF-8 code point',
568: );
569:
570: public function __construct($message, $code = NULL, $pattern = NULL)
571: {
572: if (!$message) {
573: $message = (isset(self::$messages[$code]) ? self::$messages[$code] : 'Unknown error') . ($pattern ? " (pattern: $pattern)" : '');
574: }
575: parent::__construct($message, $code);
576: }
577:
578: }
579: