1: <?php
2:
3: 4: 5: 6: 7: 8: 9: 10:
11:
12: namespace Nette;
13:
14: use Nette;
15:
16:
17:
18: 19: 20: 21: 22:
23: class Tokenizer extends Object implements \IteratorAggregate
24: {
25:
26: const RE_STRING = '\'(?:\\\\.|[^\'\\\\])*\'|"(?:\\\\.|[^"\\\\])*"';
27:
28: const T_WHITESPACE = T_WHITESPACE;
29:
30: const T_COMMENT = T_COMMENT;
31:
32:
33: private $input;
34:
35:
36: public $tokens;
37:
38:
39: private $re;
40:
41:
42: private $names;
43:
44:
45:
46: 47: 48: 49: 50:
51: function __construct(array $patterns, $flags = '')
52: {
53: $this->re = '~(' . implode(')|(', $patterns) . ')~A' . $flags;
54: $keys = array_keys($patterns);
55: $this->names = $keys === range(0, count($patterns) - 1) ? FALSE : $keys;
56: }
57:
58:
59:
60: function tokenize($input)
61: {
62: $this->input = $input;
63: if ($this->names) {
64: $this->tokens = String::matchAll($input, $this->re);
65: $len = 0;
66: foreach ($this->tokens as & $match) {
67: $name = NULL;
68: for ($i = 1; $i < count($this->names); $i++) {
69: if (!isset($match[$i])) {
70: break;
71: } elseif ($match[$i] != NULL) {
72: $name = $this->names[$i - 1]; break;
73: }
74: }
75: $match = array($match[0], $name);
76: $len += strlen($match[0]);
77: }
78: if ($len !== strlen($input)) {
79: $errorOffset = $len;
80: }
81:
82: } else {
83: $this->tokens = String::split($input, $this->re, PREG_SPLIT_NO_EMPTY);
84: if ($this->tokens && !String::match(end($this->tokens), $this->re)) {
85: $tmp = String::split($this->input, $this->re, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
86: list(, $errorOffset) = end($tmp);
87: }
88: }
89:
90: if (isset($errorOffset)) {
91: $line = $errorOffset ? substr_count($this->input, "\n", 0, $errorOffset) + 1 : 1;
92: $col = $errorOffset - strrpos(substr($this->input, 0, $errorOffset), "\n") + 1;
93: $token = str_replace("\n", '\n', substr($input, $errorOffset, 10));
94: throw new TokenizerException("Unexpected '$token' on line $line, column $col.");
95: }
96: return $this;
97: }
98:
99:
100:
101: function getIterator()
102: {
103: return new \ArrayIterator($this->tokens);
104: }
105:
106:
107:
108: function nextToken($i)
109: {
110: while (isset($this->tokens[++$i])) {
111: $name = $this->tokens[$i][1];
112: if ($name !== self::T_WHITESPACE && $name !== self::T_COMMENT) {
113: return $this->tokens[$i][0];
114: }
115: }
116: }
117:
118:
119:
120: public function getOffset($i)
121: {
122: $tokens = String::split($this->input, $this->re, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
123: list(, $offset) = $tokens[$i];
124: return array(
125: $offset,
126: ($offset ? substr_count($this->input, "\n", 0, $offset) + 1 : 1),
127: $offset - strrpos(substr($this->input, 0, $offset), "\n"),
128: );
129: }
130:
131: }
132:
133:
134:
135: 136: 137:
138: class TokenizerException extends \Exception
139: {
140: }
141: