1: <?php
2:
3: 4: 5: 6: 7: 8: 9: 10: 11:
12:
13:
14:
15: 16: 17: 18: 19: 20:
21: class NStrings
22: {
23:
24: 25: 26:
27: final public function __construct()
28: {
29: throw new NStaticClassException;
30: }
31:
32:
33: 34: 35: 36: 37: 38:
39: public static function checkEncoding($s, $encoding = 'UTF-8')
40: {
41: return $s === self::fixEncoding($s, $encoding);
42: }
43:
44:
45: 46: 47: 48: 49: 50:
51: public static function fixEncoding($s, $encoding = 'UTF-8')
52: {
53:
54: if (strcasecmp($encoding, 'UTF-8') === 0) {
55: $s = str_replace("\xEF\xBB\xBF", '', $s);
56: }
57: if (PHP_VERSION_ID >= 50400) {
58: ini_set('mbstring.substitute_character', 'none');
59: return mb_convert_encoding($s, $encoding, $encoding);
60: }
61: return @iconv('UTF-16', $encoding . '//IGNORE', iconv($encoding, 'UTF-16//IGNORE', $s));
62: }
63:
64:
65: 66: 67: 68: 69: 70:
71: public static function chr($code, $encoding = 'UTF-8')
72: {
73: return iconv('UTF-32BE', $encoding . '//IGNORE', pack('N', $code));
74: }
75:
76:
77: 78: 79: 80: 81: 82:
83: public static function startsWith($haystack, $needle)
84: {
85: return strncmp($haystack, $needle, strlen($needle)) === 0;
86: }
87:
88:
89: 90: 91: 92: 93: 94:
95: public static function endsWith($haystack, $needle)
96: {
97: return strlen($needle) === 0 || substr($haystack, -strlen($needle)) === $needle;
98: }
99:
100:
101: 102: 103: 104: 105: 106:
107: public static function contains($haystack, $needle)
108: {
109: return strpos($haystack, $needle) !== FALSE;
110: }
111:
112:
113: 114: 115: 116: 117: 118: 119:
120: public static function substring($s, $start, $length = NULL)
121: {
122: if ($length === NULL) {
123: $length = self::length($s);
124: }
125: return function_exists('mb_substr') ? mb_substr($s, $start, $length, 'UTF-8') : iconv_substr($s, $start, $length, 'UTF-8');
126: }
127:
128:
129: 130: 131: 132: 133:
134: public static function normalize($s)
135: {
136:
137: $s = str_replace("\r\n", "\n", $s);
138: $s = strtr($s, "\r", "\n");
139:
140:
141: $s = preg_replace('#[\x00-\x08\x0B-\x1F\x7F]+#', '', $s);
142:
143:
144: $s = preg_replace('#[\t ]+$#m', '', $s);
145:
146:
147: $s = trim($s, "\n");
148:
149: return $s;
150: }
151:
152:
153: 154: 155: 156: 157:
158: public static function toAscii($s)
159: {
160: $s = preg_replace('#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u', '', $s);
161: $s = strtr($s, '`\'"^~', "\x01\x02\x03\x04\x05");
162: if (ICONV_IMPL === 'glibc') {
163: $s = @iconv('UTF-8', 'WINDOWS-1250//TRANSLIT', $s);
164: $s = strtr($s, "\xa5\xa3\xbc\x8c\xa7\x8a\xaa\x8d\x8f\x8e\xaf\xb9\xb3\xbe\x9c\x9a\xba\x9d\x9f\x9e"
165: . "\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3"
166: . "\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8"
167: . "\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x96",
168: "ALLSSSSTZZZallssstzzzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTsraaaalccceeeeiiddnnooooruuuuyt-");
169: } else {
170: $s = @iconv('UTF-8', 'ASCII//TRANSLIT', $s);
171: }
172: $s = str_replace(array('`', "'", '"', '^', '~'), '', $s);
173: return strtr($s, "\x01\x02\x03\x04\x05", '`\'"^~');
174: }
175:
176:
177: 178: 179: 180: 181: 182: 183:
184: public static function webalize($s, $charlist = NULL, $lower = TRUE)
185: {
186: $s = self::toAscii($s);
187: if ($lower) {
188: $s = strtolower($s);
189: }
190: $s = preg_replace('#[^a-z0-9' . preg_quote($charlist, '#') . ']+#i', '-', $s);
191: $s = trim($s, '-');
192: return $s;
193: }
194:
195:
196: 197: 198: 199: 200: 201: 202:
203: public static function truncate($s, $maxLen, $append = "\xE2\x80\xA6")
204: {
205: if (self::length($s) > $maxLen) {
206: $maxLen = $maxLen - self::length($append);
207: if ($maxLen < 1) {
208: return $append;
209:
210: } elseif ($matches = self::match($s, '#^.{1,'.$maxLen.'}(?=[\s\x00-/:-@\[-`{-~])#us')) {
211: return $matches[0] . $append;
212:
213: } else {
214: return self::substring($s, 0, $maxLen) . $append;
215: }
216: }
217: return $s;
218: }
219:
220:
221: 222: 223: 224: 225: 226: 227:
228: public static function indent($s, $level = 1, $chars = "\t")
229: {
230: return $level < 1 ? $s : self::replace($s, '#(?:^|[\r\n]+)(?=[^\r\n])#', '$0' . str_repeat($chars, $level));
231: }
232:
233:
234: 235: 236: 237: 238:
239: public static function lower($s)
240: {
241: return mb_strtolower($s, 'UTF-8');
242: }
243:
244:
245: 246: 247: 248: 249:
250: public static function upper($s)
251: {
252: return mb_strtoupper($s, 'UTF-8');
253: }
254:
255:
256: 257: 258: 259: 260:
261: public static function firstUpper($s)
262: {
263: return self::upper(self::substring($s, 0, 1)) . self::substring($s, 1);
264: }
265:
266:
267: 268: 269: 270: 271:
272: public static function capitalize($s)
273: {
274: return mb_convert_case($s, MB_CASE_TITLE, 'UTF-8');
275: }
276:
277:
278: 279: 280: 281: 282: 283: 284:
285: public static function compare($left, $right, $len = NULL)
286: {
287: if ($len < 0) {
288: $left = self::substring($left, $len, -$len);
289: $right = self::substring($right, $len, -$len);
290: } elseif ($len !== NULL) {
291: $left = self::substring($left, 0, $len);
292: $right = self::substring($right, 0, $len);
293: }
294: return self::lower($left) === self::lower($right);
295: }
296:
297:
298: 299: 300: 301: 302:
303: public static function length($s)
304: {
305: return strlen(utf8_decode($s));
306: }
307:
308:
309: 310: 311: 312: 313: 314:
315: public static function trim($s, $charlist = " \t\n\r\0\x0B\xC2\xA0")
316: {
317: $charlist = preg_quote($charlist, '#');
318: return self::replace($s, '#^['.$charlist.']+|['.$charlist.']+\z#u', '');
319: }
320:
321:
322: 323: 324: 325: 326: 327: 328:
329: public static function padLeft($s, $length, $pad = ' ')
330: {
331: $length = max(0, $length - self::length($s));
332: $padLen = self::length($pad);
333: return str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen) . $s;
334: }
335:
336:
337: 338: 339: 340: 341: 342: 343:
344: public static function padRight($s, $length, $pad = ' ')
345: {
346: $length = max(0, $length - self::length($s));
347: $padLen = self::length($pad);
348: return $s . str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen);
349: }
350:
351:
352: 353: 354: 355: 356:
357: public static function reverse($s)
358: {
359: return @iconv('UTF-32LE', 'UTF-8', strrev(@iconv('UTF-8', 'UTF-32BE', $s)));
360: }
361:
362:
363: 364: 365: 366: 367: 368:
369: public static function random($length = 10, $charlist = '0-9a-z')
370: {
371: $charlist = str_shuffle(preg_replace_callback('#.-.#', create_function('$m', '
372: return implode(\'\', range($m[0][0], $m[0][2]));
373: '), $charlist));
374: $chLen = strlen($charlist);
375:
376: static $rand3;
377: if (!$rand3) {
378: $rand3 = md5(serialize($_SERVER), TRUE);
379: }
380:
381: $s = '';
382: for ($i = 0; $i < $length; $i++) {
383: if ($i % 5 === 0) {
384: list($rand, $rand2) = explode(' ', microtime());
385: $rand += lcg_value();
386: }
387: $rand *= $chLen;
388: $s .= $charlist[($rand + $rand2 + ord($rand3[$i % strlen($rand3)])) % $chLen];
389: $rand -= (int) $rand;
390: }
391: return $s;
392: }
393:
394:
395: 396: 397: 398: 399: 400: 401:
402: public static function split($subject, $pattern, $flags = 0)
403: {
404: set_error_handler(create_function('$severity, $message', 'extract($GLOBALS[0]['.array_push($GLOBALS[0], array('pattern'=>$pattern)).'-1], EXTR_REFS); // preg_last_error does not return compile errors
405: restore_error_handler();
406: throw new NRegexpException("$message in pattern: $pattern");
407: '));
408: $res = preg_split($pattern, $subject, -1, $flags | PREG_SPLIT_DELIM_CAPTURE);
409: restore_error_handler();
410: if (preg_last_error()) {
411: throw new NRegexpException(NULL, preg_last_error(), $pattern);
412: }
413: return $res;
414: }
415:
416:
417: 418: 419: 420: 421: 422: 423: 424:
425: public static function match($subject, $pattern, $flags = 0, $offset = 0)
426: {
427: if ($offset > strlen($subject)) {
428: return NULL;
429: }
430: set_error_handler(create_function('$severity, $message', 'extract($GLOBALS[0]['.array_push($GLOBALS[0], array('pattern'=>$pattern)).'-1], EXTR_REFS); // preg_last_error does not return compile errors
431: restore_error_handler();
432: throw new NRegexpException("$message in pattern: $pattern");
433: '));
434: $res = preg_match($pattern, $subject, $m, $flags, $offset);
435: restore_error_handler();
436: if (preg_last_error()) {
437: throw new NRegexpException(NULL, preg_last_error(), $pattern);
438: }
439: if ($res) {
440: return $m;
441: }
442: }
443:
444:
445: 446: 447: 448: 449: 450: 451: 452:
453: public static function matchAll($subject, $pattern, $flags = 0, $offset = 0)
454: {
455: if ($offset > strlen($subject)) {
456: return array();
457: }
458: set_error_handler(create_function('$severity, $message', 'extract($GLOBALS[0]['.array_push($GLOBALS[0], array('pattern'=>$pattern)).'-1], EXTR_REFS); // preg_last_error does not return compile errors
459: restore_error_handler();
460: throw new NRegexpException("$message in pattern: $pattern");
461: '));
462: $res = preg_match_all(
463: $pattern, $subject, $m,
464: ($flags & PREG_PATTERN_ORDER) ? $flags : ($flags | PREG_SET_ORDER),
465: $offset
466: );
467: restore_error_handler();
468: if (preg_last_error()) {
469: throw new NRegexpException(NULL, preg_last_error(), $pattern);
470: }
471: return $m;
472: }
473:
474:
475: 476: 477: 478: 479: 480: 481: 482:
483: public static function replace($subject, $pattern, $replacement = NULL, $limit = -1)
484: {
485: if (is_object($replacement) || is_array($replacement)|| preg_match('#^\x00lambda_\d+\z#', $replacement)) {
486: if ($replacement instanceof NCallback) {
487: $replacement = $replacement->getNative();
488: }
489: if (!is_callable($replacement, FALSE, $textual)) {
490: throw new InvalidStateException("Callback '$textual' is not callable.");
491: }
492:
493: set_error_handler(create_function('$severity, $message', 'extract($GLOBALS[0]['.array_push($GLOBALS[0], array('tmp'=>& $tmp)).'-1], EXTR_REFS); // preg_last_error does not return compile errors
494: restore_error_handler();
495: throw new NRegexpException("$message in pattern: $tmp");
496: '));
497: foreach ((array) $pattern as $tmp) {
498: preg_match($tmp, '');
499: }
500: restore_error_handler();
501:
502: $res = preg_replace_callback($pattern, $replacement, $subject, $limit);
503: if ($res === NULL && preg_last_error()) {
504: throw new NRegexpException(NULL, preg_last_error(), $pattern);
505: }
506: return $res;
507:
508: } elseif ($replacement === NULL && is_array($pattern)) {
509: $replacement = array_values($pattern);
510: $pattern = array_keys($pattern);
511: }
512:
513: set_error_handler(create_function('$severity, $message', 'extract($GLOBALS[0]['.array_push($GLOBALS[0], array('pattern'=>$pattern)).'-1], EXTR_REFS); // preg_last_error does not return compile errors
514: restore_error_handler();
515: throw new NRegexpException("$message in pattern: " . implode(\' or \', (array) $pattern));
516: '));
517: $res = preg_replace($pattern, $replacement, $subject, $limit);
518: restore_error_handler();
519: if (preg_last_error()) {
520: throw new NRegexpException(NULL, preg_last_error(), implode(' or ', (array) $pattern));
521: }
522: return $res;
523: }
524:
525: }
526:
527:
528: 529: 530: 531:
532: class NRegexpException extends Exception
533: {
534: static public $messages = array(
535: PREG_INTERNAL_ERROR => 'Internal error',
536: PREG_BACKTRACK_LIMIT_ERROR => 'Backtrack limit was exhausted',
537: PREG_RECURSION_LIMIT_ERROR => 'Recursion limit was exhausted',
538: PREG_BAD_UTF8_ERROR => 'Malformed UTF-8 data',
539: 5 => 'Offset didn\'t correspond to the begin of a valid UTF-8 code point',
540: );
541:
542: public function __construct($message, $code = NULL, $pattern = NULL)
543: {
544: if (!$message) {
545: $message = (isset(self::$messages[$code]) ? self::$messages[$code] : 'Unknown error') . ($pattern ? " (pattern: $pattern)" : '');
546: }
547: parent::__construct($message, $code);
548: }
549:
550: }
551: