1: <?php
2:
3: 4: 5: 6: 7: 8: 9: 10: 11:
12:
13:
14:
15: 16: 17: 18: 19: 20:
21: class Tokenizer extends Object
22: {
23:
24: public $tokens;
25:
26:
27: public $position = 0;
28:
29:
30: public $ignored = array();
31:
32:
33: private $input;
34:
35:
36: private $re;
37:
38:
39: private $types;
40:
41:
42: public $current;
43:
44:
45: 46: 47: 48:
49: public function __construct(array $patterns, $flags = '')
50: {
51: $this->re = '~(' . implode(')|(', $patterns) . ')~A' . $flags;
52: $keys = array_keys($patterns);
53: $this->types = $keys === range(0, count($patterns) - 1) ? FALSE : $keys;
54: }
55:
56:
57: 58: 59: 60: 61:
62: public function tokenize($input)
63: {
64: $this->input = $input;
65: if ($this->types) {
66: $this->tokens = Strings::matchAll($input, $this->re);
67: $len = 0;
68: $count = count($this->types);
69: $line = 1;
70: foreach ($this->tokens as & $match) {
71: $type = NULL;
72: for ($i = 1; $i <= $count; $i++) {
73: if (!isset($match[$i])) {
74: break;
75: } elseif ($match[$i] != NULL) {
76: $type = $this->types[$i - 1]; break;
77: }
78: }
79: $match = self::createToken($match[0], $type, $line);
80: $len += strlen($match['value']);
81: $line += substr_count($match['value'], "\n");
82: }
83: if ($len !== strlen($input)) {
84: $errorOffset = $len;
85: }
86:
87: } else {
88: $this->tokens = Strings::split($input, $this->re, PREG_SPLIT_NO_EMPTY);
89: if ($this->tokens && !Strings::match(end($this->tokens), $this->re)) {
90: $tmp = Strings::split($this->input, $this->re, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
91: list(, $errorOffset) = end($tmp);
92: }
93: }
94:
95: if (isset($errorOffset)) {
96: $line = $errorOffset ? substr_count($this->input, "\n", 0, $errorOffset) + 1 : 1;
97: $col = $errorOffset - strrpos(substr($this->input, 0, $errorOffset), "\n") + 1;
98: $token = str_replace("\n", '\n', substr($input, $errorOffset, 10));
99: throw new TokenizerException("Unexpected '$token' on line $line, column $col.");
100: }
101: return $this->tokens;
102: }
103:
104:
105: public static function createToken($value, $type = NULL, $line = NULL)
106: {
107: return array('value' => $value, 'type' => $type, 'line' => $line);
108: }
109:
110:
111: 112: 113: 114: 115:
116: public function getOffset($i)
117: {
118: $tokens = Strings::split($this->input, $this->re, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
119: $offset = isset($tokens[$i]) ? $tokens[$i][1] : strlen($this->input);
120: return array(
121: $offset,
122: ($offset ? substr_count($this->input, "\n", 0, $offset) + 1 : 1),
123: $offset - strrpos(substr($this->input, 0, $offset), "\n"),
124: );
125: }
126:
127:
128: 129: 130: 131: 132:
133: public function fetch()
134: {
135: return $this->scan(func_get_args(), TRUE);
136: }
137:
138:
139: 140: 141: 142: 143:
144: public function fetchToken()
145: {
146: return $this->scan(func_get_args(), TRUE) === FALSE ? FALSE : $this->current;
147: }
148:
149:
150: 151: 152: 153: 154:
155: public function fetchAll()
156: {
157: return $this->scan(func_get_args(), FALSE);
158: }
159:
160:
161: 162: 163: 164: 165:
166: public function fetchUntil($arg)
167: {
168: return $this->scan(func_get_args(), FALSE, TRUE, TRUE);
169: }
170:
171:
172: 173: 174: 175: 176:
177: public function isNext($arg)
178: {
179: return (bool) $this->scan(func_get_args(), TRUE, FALSE);
180: }
181:
182:
183: 184: 185: 186: 187:
188: public function isPrev($arg)
189: {
190: return (bool) $this->scan(func_get_args(), TRUE, FALSE, FALSE, TRUE);
191: }
192:
193:
194: 195: 196: 197:
198: public function hasNext()
199: {
200: return isset($this->tokens[$this->position]);
201: }
202:
203:
204: 205: 206: 207:
208: public function hasPrev()
209: {
210: return $this->position > 1;
211: }
212:
213:
214: 215: 216: 217: 218:
219: public function isCurrent($arg)
220: {
221: $args = func_get_args();
222: if (is_array($this->current)) {
223: return in_array($this->current['value'], $args, TRUE)
224: || in_array($this->current['type'], $args, TRUE);
225: } else {
226: return in_array($this->current, $args, TRUE);
227: }
228: }
229:
230:
231: public function reset()
232: {
233: $this->position = 0;
234: $this->current = NULL;
235: }
236:
237:
238: 239: 240: 241: 242:
243: private function scan($wanted, $first, $advance = TRUE, $neg = FALSE, $prev = FALSE)
244: {
245: $res = FALSE;
246: $pos = $this->position + ($prev ? -2 : 0);
247: while (isset($this->tokens[$pos])) {
248: $token = $this->tokens[$pos];
249: $pos += $prev ? -1 : 1;
250: $value = is_array($token) ? $token['value'] : $token;
251: $type = is_array($token) ? $token['type'] : $token;
252: if (!$wanted || (in_array($value, $wanted, TRUE) || in_array($type, $wanted, TRUE)) ^ $neg) {
253: if ($advance) {
254: $this->position = $pos;
255: $this->current = $token;
256: }
257: $res .= $value;
258: if ($first) {
259: break;
260: }
261:
262: } elseif ($neg || !in_array($type, $this->ignored, TRUE)) {
263: break;
264: }
265: }
266: return $res;
267: }
268:
269: }
270:
271:
272: 273: 274: 275:
276: class TokenizerException extends Exception
277: {
278: }
279: