1: <?php
2:
3: 4: 5: 6: 7: 8: 9: 10: 11:
12:
13:
14:
15: 16: 17: 18: 19: 20:
21: class NParser extends NObject
22: {
23:
24: const RE_STRING = '\'(?:\\\\.|[^\'\\\\])*\'|"(?:\\\\.|[^"\\\\])*"';
25:
26:
27: const N_PREFIX = 'n:';
28:
29:
30: public $defaultSyntax = 'latte';
31:
32:
33: public $syntaxes = array(
34: 'latte' => array('\\{(?![\\s\'"{}])', '\\}'),
35: 'double' => array('\\{\\{(?![\\s\'"{}])', '\\}\\}'),
36: 'asp' => array('<%\s*', '\s*%>'),
37: 'python' => array('\\{[{%]\s*', '\s*[%}]\\}'),
38: 'off' => array('[^\x00-\xFF]', ''),
39: );
40:
41:
42: private $macroRe;
43:
44:
45: private $input;
46:
47:
48: private $output;
49:
50:
51: private $offset;
52:
53:
54: private $context;
55:
56:
57: private $lastTag;
58:
59:
60: private $endTag;
61:
62:
63: private $xmlMode;
64:
65:
66: const CONTEXT_TEXT = 'text',
67: CONTEXT_CDATA = 'cdata',
68: CONTEXT_TAG = 'tag',
69: CONTEXT_ATTRIBUTE = 'attribute',
70: CONTEXT_NONE = 'none',
71: CONTEXT_COMMENT = 'comment';
72:
73:
74:
75: 76: 77: 78: 79:
80: public function parse($input)
81: {
82: if (!NStrings::checkEncoding($input)) {
83: throw new NLatteException('Template is not valid UTF-8 stream.');
84: }
85: $input = str_replace("\r\n", "\n", $input);
86: $this->input = $input;
87: $this->output = array();
88: $this->offset = 0;
89:
90: $this->setSyntax($this->defaultSyntax);
91: $this->setContext(self::CONTEXT_TEXT);
92: $this->lastTag = $this->endTag = NULL;
93: $this->xmlMode = (bool) preg_match('#^<\?xml\s#m', $input);
94:
95: while ($this->offset < strlen($input)) {
96: $matches = $this->{"context".$this->context[0]}();
97:
98: if (!$matches) {
99: break;
100:
101: } elseif (!empty($matches['comment'])) {
102: $this->addToken(NLatteToken::COMMENT, $matches[0]);
103:
104: } elseif (!empty($matches['macro'])) {
105: $token = $this->addToken(NLatteToken::MACRO, $matches[0]);
106: list($token->name, $token->value, $token->modifiers) = $this->parseMacro($matches['macro']);
107: }
108:
109: $this->filter();
110: }
111:
112: if ($this->offset < strlen($input)) {
113: $this->addToken(NLatteToken::TEXT, substr($this->input, $this->offset));
114: }
115: return $this->output;
116: }
117:
118:
119:
120: 121: 122:
123: private function contextText()
124: {
125: $matches = $this->match('~
126: (?:(?<=\n|^)[ \t]*)?<(?P<closing>/?)(?P<tag>[a-z0-9:]+)| ## begin of HTML tag <tag </tag - ignores <!DOCTYPE
127: <(?P<htmlcomment>!--)| ## begin of HTML comment <!--
128: '.$this->macroRe.' ## macro
129: ~xsi');
130:
131: if (!empty($matches['htmlcomment'])) {
132: $this->addToken(NLatteToken::TAG_BEGIN, $matches[0]);
133: $this->setContext(self::CONTEXT_COMMENT);
134:
135: } elseif (!empty($matches['tag'])) {
136: $token = $this->addToken(NLatteToken::TAG_BEGIN, $matches[0]);
137: $token->name = $matches['tag'];
138: $token->closing = (bool) $matches['closing'];
139: $this->lastTag = $matches['closing'] . strtolower($matches['tag']);
140: $this->setContext(self::CONTEXT_TAG);
141: }
142: return $matches;
143: }
144:
145:
146:
147: 148: 149:
150: private function contextCData()
151: {
152: $matches = $this->match('~
153: </(?P<tag>'.$this->lastTag.')(?![a-z0-9:])| ## end HTML tag </tag
154: '.$this->macroRe.' ## macro
155: ~xsi');
156:
157: if (!empty($matches['tag'])) {
158: $token = $this->addToken(NLatteToken::TAG_BEGIN, $matches[0]);
159: $token->name = $this->lastTag;
160: $token->closing = TRUE;
161: $this->lastTag = '/' . $this->lastTag;
162: $this->setContext(self::CONTEXT_TAG);
163: }
164: return $matches;
165: }
166:
167:
168:
169: 170: 171:
172: private function contextTag()
173: {
174: $matches = $this->match('~
175: (?P<end>\ ?/?>)([ \t]*\n)?| ## end of HTML tag
176: '.$this->macroRe.'| ## macro
177: \s*(?P<attr>[^\s/>={]+)(?:\s*=\s*(?P<value>["\']|[^\s/>{]+))? ## begin of HTML attribute
178: ~xsi');
179:
180: if (!empty($matches['end'])) {
181: $this->addToken(NLatteToken::TAG_END, $matches[0]);
182: $this->setContext(!$this->xmlMode && in_array($this->lastTag, array('script', 'style')) ? self::CONTEXT_CDATA : self::CONTEXT_TEXT);
183:
184: } elseif (!empty($matches['attr'])) {
185: $token = $this->addToken(NLatteToken::ATTRIBUTE, $matches[0]);
186: $token->name = $matches['attr'];
187: $token->value = isset($matches['value']) ? $matches['value'] : '';
188:
189: if ($token->value === '"' || $token->value === "'") {
190: if (NStrings::startsWith($token->name, self::N_PREFIX)) {
191: $token->value = '';
192: if ($m = $this->match('~(.*?)' . $matches['value'] . '~xsi')) {
193: $token->value = $m[1];
194: $token->text .= $m[0];
195: }
196: } else {
197: $this->setContext(self::CONTEXT_ATTRIBUTE, $matches['value']);
198: }
199: }
200: }
201: return $matches;
202: }
203:
204:
205:
206: 207: 208:
209: private function contextAttribute()
210: {
211: $matches = $this->match('~
212: (?P<quote>'.$this->context[1].')| ## end of HTML attribute
213: '.$this->macroRe.' ## macro
214: ~xsi');
215:
216: if (!empty($matches['quote'])) {
217: $this->addToken(NLatteToken::TEXT, $matches[0]);
218: $this->setContext(self::CONTEXT_TAG);
219: }
220: return $matches;
221: }
222:
223:
224:
225: 226: 227:
228: private function contextComment()
229: {
230: $matches = $this->match('~
231: (?<htmlcomment>--\s*>)| ## end of HTML comment
232: '.$this->macroRe.' ## macro
233: ~xsi');
234:
235: if (!empty($matches['htmlcomment'])) {
236: $this->addToken(NLatteToken::TAG_END, $matches[0]);
237: $this->setContext(self::CONTEXT_TEXT);
238: }
239: return $matches;
240: }
241:
242:
243:
244: 245: 246:
247: private function contextNone()
248: {
249: $matches = $this->match('~
250: '.$this->macroRe.' ## macro
251: ~xsi');
252: return $matches;
253: }
254:
255:
256:
257: 258: 259: 260: 261:
262: private function match($re)
263: {
264: if ($matches = NStrings::match($this->input, $re, PREG_OFFSET_CAPTURE, $this->offset)) {
265: $value = substr($this->input, $this->offset, $matches[0][1] - $this->offset);
266: if ($value !== '') {
267: $this->addToken(NLatteToken::TEXT, $value);
268: }
269: $this->offset = $matches[0][1] + strlen($matches[0][0]);
270: foreach ($matches as $k => $v) $matches[$k] = $v[0];
271: }
272: return $matches;
273: }
274:
275:
276:
277: 278: 279:
280: public function setContext($context, $quote = NULL)
281: {
282: $this->context = array($context, $quote);
283: return $this;
284: }
285:
286:
287:
288: 289: 290: 291: 292:
293: public function setSyntax($type)
294: {
295: $type = ($tmp=$type) ? $tmp : $this->defaultSyntax;
296: if (isset($this->syntaxes[$type])) {
297: $this->setDelimiters($this->syntaxes[$type][0], $this->syntaxes[$type][1]);
298: } else {
299: throw new NLatteException("Unknown syntax '$type'");
300: }
301: return $this;
302: }
303:
304:
305:
306: 307: 308: 309: 310: 311:
312: public function setDelimiters($left, $right)
313: {
314: $this->macroRe = '
315: (?P<comment>' . $left . '\\*.*?\\*' . $right . '\n{0,2})|
316: ' . $left . '
317: (?P<macro>(?:' . self::RE_STRING . '|\{
318: (?P<inner>' . self::RE_STRING . '|\{(?P>inner)\}|[^\'"{}])*+
319: \}|[^\'"{}])+?)
320: ' . $right . '
321: (?P<rmargin>[ \t]*(?=\n))?
322: ';
323: return $this;
324: }
325:
326:
327:
328: 329: 330: 331: 332:
333: public function parseMacro($macro)
334: {
335: $match = NStrings::match($macro, '~^
336: (
337: (?P<name>\?|/?[a-z]\w*+(?:[.:]\w+)*+(?!::|\())| ## ?, name, /name, but not function( or class::
338: (?P<noescape>!?)(?P<shortname>/?[=\~#%^&_]?) ## [!] [=] expression to print
339: )(?P<args>.*?)
340: (?P<modifiers>\|[a-z](?:'.NParser::RE_STRING.'|[^\'"]+)*)?
341: ()$~isx');
342:
343: if (!$match) {
344: return FALSE;
345: }
346: if ($match['name'] === '') {
347: $match['name'] = ($tmp=$match['shortname']) ? $tmp : '=';
348: if (!$match['noescape'] && substr($match['shortname'], 0, 1) !== '/') {
349: $match['modifiers'] .= '|escape';
350: }
351: }
352: return array($match['name'], trim($match['args']), $match['modifiers']);
353: }
354:
355:
356:
357: private function addToken($type, $text)
358: {
359: $this->output[] = $token = new NLatteToken;
360: $token->type = $type;
361: $token->text = $text;
362: $token->line = substr_count($this->input, "\n", 0, max(1, $this->offset - 1)) + 1;
363: return $token;
364: }
365:
366:
367:
368: 369: 370:
371: protected function filter()
372: {
373: $token = end($this->output);
374: if ($token->type === NLatteToken::MACRO && $token->name === '/syntax') {
375: $this->setSyntax($this->defaultSyntax);
376: $token->type = NLatteToken::COMMENT;
377:
378: } elseif ($token->type === NLatteToken::MACRO && $token->name === 'syntax') {
379: $this->setSyntax($token->value);
380: $token->type = NLatteToken::COMMENT;
381:
382: } elseif ($token->type === NLatteToken::ATTRIBUTE && $token->name === 'n:syntax') {
383: $this->setSyntax($token->value);
384: $this->endTag = '/' . $this->lastTag;
385: $token->type = NLatteToken::COMMENT;
386:
387: } elseif ($token->type === NLatteToken::TAG_END && $this->lastTag === $this->endTag) {
388: $this->setSyntax($this->defaultSyntax);
389:
390: } elseif ($token->type === NLatteToken::MACRO && $token->name === 'contentType') {
391: if (preg_match('#html|xml#', $token->value, $m)) {
392: $this->xmlMode = $m[0] === 'xml';
393: $this->setContext(self::CONTEXT_TEXT);
394: } else {
395: $this->setContext(self::CONTEXT_NONE);
396: }
397: }
398: }
399:
400: }
401: