1: <?php
2:
3: 4: 5: 6: 7: 8: 9: 10:
11:
12: namespace Nette\Utils;
13:
14: use Nette;
15:
16:
17: 18: 19: 20: 21:
22: class Tokenizer extends Nette\Object
23: {
24:
25: public $tokens;
26:
27:
28: public $position = 0;
29:
30:
31: public $ignored = array();
32:
33:
34: private $input;
35:
36:
37: private $re;
38:
39:
40: private $types;
41:
42:
43: public $current;
44:
45:
46: 47: 48: 49:
50: public function __construct(array $patterns, $flags = '')
51: {
52: $this->re = '~(' . implode(')|(', $patterns) . ')~A' . $flags;
53: $keys = array_keys($patterns);
54: $this->types = $keys === range(0, count($patterns) - 1) ? FALSE : $keys;
55: }
56:
57:
58: 59: 60: 61: 62:
63: public function tokenize($input)
64: {
65: $this->input = $input;
66: if ($this->types) {
67: $this->tokens = Strings::matchAll($input, $this->re);
68: $len = 0;
69: $count = count($this->types);
70: $line = 1;
71: foreach ($this->tokens as & $match) {
72: $type = NULL;
73: for ($i = 1; $i <= $count; $i++) {
74: if (!isset($match[$i])) {
75: break;
76: } elseif ($match[$i] != NULL) {
77: $type = $this->types[$i - 1]; break;
78: }
79: }
80: $match = self::createToken($match[0], $type, $line);
81: $len += strlen($match['value']);
82: $line += substr_count($match['value'], "\n");
83: }
84: if ($len !== strlen($input)) {
85: $errorOffset = $len;
86: }
87:
88: } else {
89: $this->tokens = Strings::split($input, $this->re, PREG_SPLIT_NO_EMPTY);
90: if ($this->tokens && !Strings::match(end($this->tokens), $this->re)) {
91: $tmp = Strings::split($this->input, $this->re, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
92: list(, $errorOffset) = end($tmp);
93: }
94: }
95:
96: if (isset($errorOffset)) {
97: $line = $errorOffset ? substr_count($this->input, "\n", 0, $errorOffset) + 1 : 1;
98: $col = $errorOffset - strrpos(substr($this->input, 0, $errorOffset), "\n") + 1;
99: $token = str_replace("\n", '\n', substr($input, $errorOffset, 10));
100: throw new TokenizerException("Unexpected '$token' on line $line, column $col.");
101: }
102: return $this->tokens;
103: }
104:
105:
106: public static function createToken($value, $type = NULL, $line = NULL)
107: {
108: return array('value' => $value, 'type' => $type, 'line' => $line);
109: }
110:
111:
112: 113: 114: 115: 116:
117: public function getOffset($i)
118: {
119: $tokens = Strings::split($this->input, $this->re, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
120: $offset = isset($tokens[$i]) ? $tokens[$i][1] : strlen($this->input);
121: return array(
122: $offset,
123: ($offset ? substr_count($this->input, "\n", 0, $offset) + 1 : 1),
124: $offset - strrpos(substr($this->input, 0, $offset), "\n"),
125: );
126: }
127:
128:
129: 130: 131: 132: 133:
134: public function fetch()
135: {
136: return $this->scan(func_get_args(), TRUE);
137: }
138:
139:
140: 141: 142: 143: 144:
145: public function fetchToken()
146: {
147: return $this->scan(func_get_args(), TRUE) === FALSE ? FALSE : $this->current;
148: }
149:
150:
151: 152: 153: 154: 155:
156: public function fetchAll()
157: {
158: return $this->scan(func_get_args(), FALSE);
159: }
160:
161:
162: 163: 164: 165: 166:
167: public function fetchUntil($arg)
168: {
169: return $this->scan(func_get_args(), FALSE, TRUE, TRUE);
170: }
171:
172:
173: 174: 175: 176: 177:
178: public function isNext($arg)
179: {
180: return (bool) $this->scan(func_get_args(), TRUE, FALSE);
181: }
182:
183:
184: 185: 186: 187: 188:
189: public function isPrev($arg)
190: {
191: return (bool) $this->scan(func_get_args(), TRUE, FALSE, FALSE, TRUE);
192: }
193:
194:
195: 196: 197: 198:
199: public function hasNext()
200: {
201: return isset($this->tokens[$this->position]);
202: }
203:
204:
205: 206: 207: 208:
209: public function hasPrev()
210: {
211: return $this->position > 1;
212: }
213:
214:
215: 216: 217: 218: 219:
220: public function isCurrent($arg)
221: {
222: $args = func_get_args();
223: if (is_array($this->current)) {
224: return in_array($this->current['value'], $args, TRUE)
225: || in_array($this->current['type'], $args, TRUE);
226: } else {
227: return in_array($this->current, $args, TRUE);
228: }
229: }
230:
231:
232: public function reset()
233: {
234: $this->position = 0;
235: $this->current = NULL;
236: }
237:
238:
239: 240: 241: 242: 243:
244: private function scan($wanted, $first, $advance = TRUE, $neg = FALSE, $prev = FALSE)
245: {
246: $res = FALSE;
247: $pos = $this->position + ($prev ? -2 : 0);
248: while (isset($this->tokens[$pos])) {
249: $token = $this->tokens[$pos];
250: $pos += $prev ? -1 : 1;
251: $value = is_array($token) ? $token['value'] : $token;
252: $type = is_array($token) ? $token['type'] : $token;
253: if (!$wanted || (in_array($value, $wanted, TRUE) || in_array($type, $wanted, TRUE)) ^ $neg) {
254: if ($advance) {
255: $this->position = $pos;
256: $this->current = $token;
257: }
258: $res .= $value;
259: if ($first) {
260: break;
261: }
262:
263: } elseif ($neg || !in_array($type, $this->ignored, TRUE)) {
264: break;
265: }
266: }
267: return $res;
268: }
269:
270: }
271:
272:
273: 274: 275:
276: class TokenizerException extends \Exception
277: {
278: }
279: