1: <?php
2:
3: 4: 5: 6: 7: 8: 9: 10:
11:
12: namespace Nette\Utils;
13:
14: use Nette;
15:
16:
17:
18: 19: 20: 21: 22:
23: class Tokenizer extends Nette\Object
24: {
25:
26: public $tokens;
27:
28:
29: public $position = 0;
30:
31:
32: public $ignored = array();
33:
34:
35: private $input;
36:
37:
38: private $re;
39:
40:
41: private $types;
42:
43:
44: public $current;
45:
46:
47:
48: 49: 50: 51:
52: public function __construct(array $patterns, $flags = '')
53: {
54: $this->re = '~(' . implode(')|(', $patterns) . ')~A' . $flags;
55: $keys = array_keys($patterns);
56: $this->types = $keys === range(0, count($patterns) - 1) ? FALSE : $keys;
57: }
58:
59:
60:
61: 62: 63: 64: 65:
66: public function tokenize($input)
67: {
68: $this->input = $input;
69: if ($this->types) {
70: $this->tokens = Strings::matchAll($input, $this->re);
71: $len = 0;
72: $count = count($this->types);
73: $line = 1;
74: foreach ($this->tokens as & $match) {
75: $type = NULL;
76: for ($i = 1; $i <= $count; $i++) {
77: if (!isset($match[$i])) {
78: break;
79: } elseif ($match[$i] != NULL) {
80: $type = $this->types[$i - 1]; break;
81: }
82: }
83: $match = self::createToken($match[0], $type, $line);
84: $len += strlen($match['value']);
85: $line += substr_count($match['value'], "\n");
86: }
87: if ($len !== strlen($input)) {
88: $errorOffset = $len;
89: }
90:
91: } else {
92: $this->tokens = Strings::split($input, $this->re, PREG_SPLIT_NO_EMPTY);
93: if ($this->tokens && !Strings::match(end($this->tokens), $this->re)) {
94: $tmp = Strings::split($this->input, $this->re, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
95: list(, $errorOffset) = end($tmp);
96: }
97: }
98:
99: if (isset($errorOffset)) {
100: $line = $errorOffset ? substr_count($this->input, "\n", 0, $errorOffset) + 1 : 1;
101: $col = $errorOffset - strrpos(substr($this->input, 0, $errorOffset), "\n") + 1;
102: $token = str_replace("\n", '\n', substr($input, $errorOffset, 10));
103: throw new TokenizerException("Unexpected '$token' on line $line, column $col.");
104: }
105: return $this->tokens;
106: }
107:
108:
109:
110: public static function createToken($value, $type = NULL, $line = NULL)
111: {
112: return array('value' => $value, 'type' => $type, 'line' => $line);
113: }
114:
115:
116:
117: 118: 119: 120: 121:
122: public function getOffset($i)
123: {
124: $tokens = Strings::split($this->input, $this->re, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
125: $offset = isset($tokens[$i]) ? $tokens[$i][1] : strlen($this->input);
126: return array(
127: $offset,
128: ($offset ? substr_count($this->input, "\n", 0, $offset) + 1 : 1),
129: $offset - strrpos(substr($this->input, 0, $offset), "\n"),
130: );
131: }
132:
133:
134:
135: 136: 137: 138: 139:
140: public function fetch()
141: {
142: $args = func_get_args();
143: return $this->scan($args, TRUE);
144: }
145:
146:
147:
148: 149: 150: 151: 152:
153: public function fetchToken()
154: {
155: $args = func_get_args();
156: return $this->scan($args, TRUE) === FALSE ? FALSE : $this->current;
157: }
158:
159:
160:
161: 162: 163: 164: 165:
166: public function fetchAll()
167: {
168: $args = func_get_args();
169: return $this->scan($args, FALSE);
170: }
171:
172:
173:
174: 175: 176: 177: 178:
179: public function fetchUntil($arg)
180: {
181: $args = func_get_args();
182: return $this->scan($args, FALSE, TRUE, TRUE);
183: }
184:
185:
186:
187: 188: 189: 190: 191:
192: public function isNext($arg)
193: {
194: $args = func_get_args();
195: return (bool) $this->scan($args, TRUE, FALSE);
196: }
197:
198:
199:
200: 201: 202: 203: 204:
205: public function isPrev($arg)
206: {
207: $args = func_get_args();
208: return (bool) $this->scan($args, TRUE, FALSE, FALSE, TRUE);
209: }
210:
211:
212:
213: 214: 215: 216:
217: public function hasNext()
218: {
219: return isset($this->tokens[$this->position]);
220: }
221:
222:
223:
224: 225: 226: 227:
228: public function hasPrev()
229: {
230: return $this->position > 1;
231: }
232:
233:
234:
235: 236: 237: 238: 239:
240: public function isCurrent($arg)
241: {
242: $args = func_get_args();
243: if (is_array($this->current)) {
244: return in_array($this->current['value'], $args, TRUE)
245: || in_array($this->current['type'], $args, TRUE);
246: } else {
247: return in_array($this->current, $args, TRUE);
248: }
249: }
250:
251:
252:
253: public function reset()
254: {
255: $this->position = 0;
256: $this->current = NULL;
257: }
258:
259:
260:
261: 262: 263: 264: 265:
266: private function scan($wanted, $first, $advance = TRUE, $neg = FALSE, $prev = FALSE)
267: {
268: $res = FALSE;
269: $pos = $this->position + ($prev ? -2 : 0);
270: while (isset($this->tokens[$pos])) {
271: $token = $this->tokens[$pos];
272: $pos += $prev ? -1 : 1;
273: $value = is_array($token) ? $token['value'] : $token;
274: $type = is_array($token) ? $token['type'] : $token;
275: if (!$wanted || (in_array($value, $wanted, TRUE) || in_array($type, $wanted, TRUE)) ^ $neg) {
276: if ($advance) {
277: $this->position = $pos;
278: $this->current = $token;
279: }
280: $res .= $value;
281: if ($first) {
282: break;
283: }
284:
285: } elseif ($neg || !in_array($type, $this->ignored, TRUE)) {
286: break;
287: }
288: }
289: return $res;
290: }
291:
292: }
293:
294:
295:
296: 297: 298: 299:
300: class TokenizerException extends \Exception
301: {
302: }
303: