1: <?php
2:
3: 4: 5: 6: 7: 8: 9: 10: 11:
12:
13:
14:
15: 16: 17: 18: 19:
20: class Tokenizer extends Object implements IteratorAggregate
21: {
22:
23: const RE_STRING = '\'(?:\\\\.|[^\'\\\\])*\'|"(?:\\\\.|[^"\\\\])*"';
24:
25: const T_WHITESPACE = T_WHITESPACE;
26:
27: const T_COMMENT = T_COMMENT;
28:
29:
30: private $input;
31:
32:
33: public $tokens;
34:
35:
36: private $re;
37:
38:
39: private $names;
40:
41:
42:
43: 44: 45: 46: 47:
48: function __construct(array $patterns, $flags = '')
49: {
50: $this->re = '~(' . implode(')|(', $patterns) . ')~A' . $flags;
51: $keys = array_keys($patterns);
52: $this->names = $keys === range(0, count($patterns) - 1) ? FALSE : $keys;
53: }
54:
55:
56:
57: function tokenize($input)
58: {
59: $this->input = $input;
60: if ($this->names) {
61: $this->tokens = String::matchAll($input, $this->re);
62: $len = 0;
63: foreach ($this->tokens as & $match) {
64: $name = NULL;
65: for ($i = 1; $i < count($this->names); $i++) {
66: if (!isset($match[$i])) {
67: break;
68: } elseif ($match[$i] != NULL) {
69: $name = $this->names[$i - 1]; break;
70: }
71: }
72: $match = array($match[0], $name);
73: $len += strlen($match[0]);
74: }
75: if ($len !== strlen($input)) {
76: $errorOffset = $len;
77: }
78:
79: } else {
80: $this->tokens = String::split($input, $this->re, PREG_SPLIT_NO_EMPTY);
81: if ($this->tokens && !String::match(end($this->tokens), $this->re)) {
82: $tmp = String::split($this->input, $this->re, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
83: list(, $errorOffset) = end($tmp);
84: }
85: }
86:
87: if (isset($errorOffset)) {
88: $line = $errorOffset ? substr_count($this->input, "\n", 0, $errorOffset) + 1 : 1;
89: $col = $errorOffset - strrpos(substr($this->input, 0, $errorOffset), "\n") + 1;
90: $token = str_replace("\n", '\n', substr($input, $errorOffset, 10));
91: throw new TokenizerException("Unexpected '$token' on line $line, column $col.");
92: }
93: return $this;
94: }
95:
96:
97:
98: function getIterator()
99: {
100: return new ArrayIterator($this->tokens);
101: }
102:
103:
104:
105: function nextToken($i)
106: {
107: while (isset($this->tokens[++$i])) {
108: $name = $this->tokens[$i][1];
109: if ($name !== self::T_WHITESPACE && $name !== self::T_COMMENT) {
110: return $this->tokens[$i][0];
111: }
112: }
113: }
114:
115:
116:
117: public function getOffset($i)
118: {
119: $tokens = String::split($this->input, $this->re, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
120: list(, $offset) = $tokens[$i];
121: return array(
122: $offset,
123: ($offset ? substr_count($this->input, "\n", 0, $offset) + 1 : 1),
124: $offset - strrpos(substr($this->input, 0, $offset), "\n"),
125: );
126: }
127:
128: }
129:
130:
131:
132: 133: 134:
135: class TokenizerException extends Exception
136: {
137: }
138: