1: <?php
2:
3: 4: 5: 6: 7: 8: 9: 10: 11:
12:
13:
14:
15: 16: 17: 18: 19: 20:
21: class Tokenizer extends Object
22: {
23:
24: public $tokens;
25:
26:
27: public $position = 0;
28:
29:
30: public $ignored = array();
31:
32:
33: private $input;
34:
35:
36: private $re;
37:
38:
39: private $types;
40:
41:
42: public $current;
43:
44:
45:
46: 47: 48: 49:
50: public function __construct(array $patterns, $flags = '')
51: {
52: $this->re = '~(' . implode(')|(', $patterns) . ')~A' . $flags;
53: $keys = array_keys($patterns);
54: $this->types = $keys === range(0, count($patterns) - 1) ? FALSE : $keys;
55: }
56:
57:
58:
59: 60: 61: 62: 63:
64: public function tokenize($input)
65: {
66: $this->input = $input;
67: if ($this->types) {
68: $this->tokens = Strings::matchAll($input, $this->re);
69: $len = 0;
70: $count = count($this->types);
71: $line = 1;
72: foreach ($this->tokens as & $match) {
73: $type = NULL;
74: for ($i = 1; $i <= $count; $i++) {
75: if (!isset($match[$i])) {
76: break;
77: } elseif ($match[$i] != NULL) {
78: $type = $this->types[$i - 1]; break;
79: }
80: }
81: $match = self::createToken($match[0], $type, $line);
82: $len += strlen($match['value']);
83: $line += substr_count($match['value'], "\n");
84: }
85: if ($len !== strlen($input)) {
86: $errorOffset = $len;
87: }
88:
89: } else {
90: $this->tokens = Strings::split($input, $this->re, PREG_SPLIT_NO_EMPTY);
91: if ($this->tokens && !Strings::match(end($this->tokens), $this->re)) {
92: $tmp = Strings::split($this->input, $this->re, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
93: list(, $errorOffset) = end($tmp);
94: }
95: }
96:
97: if (isset($errorOffset)) {
98: $line = $errorOffset ? substr_count($this->input, "\n", 0, $errorOffset) + 1 : 1;
99: $col = $errorOffset - strrpos(substr($this->input, 0, $errorOffset), "\n") + 1;
100: $token = str_replace("\n", '\n', substr($input, $errorOffset, 10));
101: throw new TokenizerException("Unexpected '$token' on line $line, column $col.");
102: }
103: return $this->tokens;
104: }
105:
106:
107:
108: public static function createToken($value, $type = NULL, $line = NULL)
109: {
110: return array('value' => $value, 'type' => $type, 'line' => $line);
111: }
112:
113:
114:
115: 116: 117: 118: 119:
120: public function getOffset($i)
121: {
122: $tokens = Strings::split($this->input, $this->re, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
123: $offset = isset($tokens[$i]) ? $tokens[$i][1] : strlen($this->input);
124: return array(
125: $offset,
126: ($offset ? substr_count($this->input, "\n", 0, $offset) + 1 : 1),
127: $offset - strrpos(substr($this->input, 0, $offset), "\n"),
128: );
129: }
130:
131:
132:
133: 134: 135: 136: 137:
138: public function fetch()
139: {
140: $args = func_get_args();
141: return $this->scan($args, TRUE);
142: }
143:
144:
145:
146: 147: 148: 149: 150:
151: public function fetchToken()
152: {
153: $args = func_get_args();
154: return $this->scan($args, TRUE) === FALSE ? FALSE : $this->current;
155: }
156:
157:
158:
159: 160: 161: 162: 163:
164: public function fetchAll()
165: {
166: $args = func_get_args();
167: return $this->scan($args, FALSE);
168: }
169:
170:
171:
172: 173: 174: 175: 176:
177: public function fetchUntil($arg)
178: {
179: $args = func_get_args();
180: return $this->scan($args, FALSE, TRUE, TRUE);
181: }
182:
183:
184:
185: 186: 187: 188: 189:
190: public function isNext($arg)
191: {
192: $args = func_get_args();
193: return (bool) $this->scan($args, TRUE, FALSE);
194: }
195:
196:
197:
198: 199: 200: 201: 202:
203: public function isPrev($arg)
204: {
205: $args = func_get_args();
206: return (bool) $this->scan($args, TRUE, FALSE, FALSE, TRUE);
207: }
208:
209:
210:
211: 212: 213: 214:
215: public function hasNext()
216: {
217: return isset($this->tokens[$this->position]);
218: }
219:
220:
221:
222: 223: 224: 225:
226: public function hasPrev()
227: {
228: return $this->position > 1;
229: }
230:
231:
232:
233: 234: 235: 236: 237:
238: public function isCurrent($arg)
239: {
240: $args = func_get_args();
241: if (is_array($this->current)) {
242: return in_array($this->current['value'], $args, TRUE)
243: || in_array($this->current['type'], $args, TRUE);
244: } else {
245: return in_array($this->current, $args, TRUE);
246: }
247: }
248:
249:
250:
251: public function reset()
252: {
253: $this->position = 0;
254: $this->current = NULL;
255: }
256:
257:
258:
259: 260: 261: 262: 263:
264: private function scan($wanted, $first, $advance = TRUE, $neg = FALSE, $prev = FALSE)
265: {
266: $res = FALSE;
267: $pos = $this->position + ($prev ? -2 : 0);
268: while (isset($this->tokens[$pos])) {
269: $token = $this->tokens[$pos];
270: $pos += $prev ? -1 : 1;
271: $value = is_array($token) ? $token['value'] : $token;
272: $type = is_array($token) ? $token['type'] : $token;
273: if (!$wanted || (in_array($value, $wanted, TRUE) || in_array($type, $wanted, TRUE)) ^ $neg) {
274: if ($advance) {
275: $this->position = $pos;
276: $this->current = $token;
277: }
278: $res .= $value;
279: if ($first) {
280: break;
281: }
282:
283: } elseif ($neg || !in_array($type, $this->ignored, TRUE)) {
284: break;
285: }
286: }
287: return $res;
288: }
289:
290: }
291:
292:
293:
294: 295: 296: 297: 298:
299: class TokenizerException extends Exception
300: {
301: }
302: