1: <?php
2:
3: 4: 5: 6:
7:
8: namespace Nette\Utils;
9:
10: use Nette;
11:
12:
13: 14: 15: 16: 17:
18: class Tokenizer
19: {
20: const VALUE = 0,
21: OFFSET = 1,
22: TYPE = 2;
23:
24:
25: private $re;
26:
27:
28: private $types;
29:
30:
31: 32: 33: 34:
35: public function __construct(array $patterns, $flags = '')
36: {
37: $this->re = '~(' . implode(')|(', $patterns) . ')~A' . $flags;
38: $keys = array_keys($patterns);
39: $this->types = $keys === range(0, count($patterns) - 1) ? FALSE : $keys;
40: }
41:
42:
43: 44: 45: 46: 47: 48:
49: public function tokenize($input)
50: {
51: if ($this->types) {
52: preg_match_all($this->re, $input, $tokens, PREG_SET_ORDER);
53: $len = 0;
54: $count = count($this->types);
55: foreach ($tokens as & $match) {
56: $type = NULL;
57: for ($i = 1; $i <= $count; $i++) {
58: if (!isset($match[$i])) {
59: break;
60: } elseif ($match[$i] != NULL) {
61: $type = $this->types[$i - 1]; break;
62: }
63: }
64: $match = array(self::VALUE => $match[0], self::OFFSET => $len, self::TYPE => $type);
65: $len += strlen($match[self::VALUE]);
66: }
67: if ($len !== strlen($input)) {
68: $errorOffset = $len;
69: }
70:
71: } else {
72: $tokens = preg_split($this->re, $input, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE | PREG_SPLIT_DELIM_CAPTURE);
73: $last = end($tokens);
74: if ($tokens && !preg_match($this->re, $last[0])) {
75: $errorOffset = $last[1];
76: }
77: }
78:
79: if (isset($errorOffset)) {
80: list($line, $col) = $this->getCoordinates($input, $errorOffset);
81: $token = str_replace("\n", '\n', substr($input, $errorOffset, 10));
82: throw new TokenizerException("Unexpected '$token' on line $line, column $col.");
83: }
84: return $tokens;
85: }
86:
87:
88: 89: 90: 91: 92: 93:
94: public static function getCoordinates($text, $offset)
95: {
96: $text = substr($text, 0, $offset);
97: return array(substr_count($text, "\n") + 1, $offset - strrpos("\n" . $text, "\n") + 1);
98: }
99:
100: }
101: