1: <?php
2:
3: 4: 5: 6:
7:
8: namespace Latte;
9:
10:
11: 12: 13:
14: class Parser extends Object
15: {
16:
17: const RE_STRING = '\'(?:\\\\.|[^\'\\\\])*\'|"(?:\\\\.|[^"\\\\])*"';
18:
19:
20: const N_PREFIX = 'n:';
21:
22:
23: public $defaultSyntax = 'latte';
24:
25:
26: public $shortNoEscape = FALSE;
27:
28:
29: public $syntaxes = array(
30: 'latte' => array('\\{(?![\\s\'"{}])', '\\}'),
31: 'double' => array('\\{\\{(?![\\s\'"{}])', '\\}\\}'),
32: 'asp' => array('<%\s*', '\s*%>'),
33: 'python' => array('\\{[{%]\s*', '\s*[%}]\\}'),
34: 'off' => array('[^\x00-\xFF]', ''),
35: );
36:
37:
38: private $delimiters;
39:
40:
41: private $input;
42:
43:
44: private $output;
45:
46:
47: private $offset;
48:
49:
50: private $context;
51:
52:
53: private $lastHtmlTag;
54:
55:
56: private $syntaxEndTag;
57:
58:
59: private $syntaxEndLevel = 0;
60:
61:
62: private $xmlMode;
63:
64:
65: const CONTEXT_HTML_TEXT = 'htmlText',
66: CONTEXT_CDATA = 'cdata',
67: CONTEXT_HTML_TAG = 'htmlTag',
68: CONTEXT_HTML_ATTRIBUTE = 'htmlAttribute',
69: CONTEXT_RAW = 'raw',
70: = 'htmlComment',
71: CONTEXT_MACRO = 'macro';
72:
73:
74: 75: 76: 77: 78:
79: public function parse($input)
80: {
81: if (substr($input, 0, 3) === "\xEF\xBB\xBF") {
82: $input = substr($input, 3);
83: }
84: if (!preg_match('##u', $input)) {
85: throw new \InvalidArgumentException('Template is not valid UTF-8 stream.');
86: }
87: $input = str_replace("\r\n", "\n", $input);
88: $this->input = $input;
89: $this->output = array();
90: $this->offset = $tokenCount = 0;
91:
92: $this->setSyntax($this->defaultSyntax);
93: $this->setContext(self::CONTEXT_HTML_TEXT);
94: $this->lastHtmlTag = $this->syntaxEndTag = NULL;
95:
96: while ($this->offset < strlen($input)) {
97: if ($this->{'context' . $this->context[0]}() === FALSE) {
98: break;
99: }
100: while ($tokenCount < count($this->output)) {
101: $this->filter($this->output[$tokenCount++]);
102: }
103: }
104: if ($this->context[0] === self::CONTEXT_MACRO) {
105: throw new CompileException('Malformed macro');
106: }
107:
108: if ($this->offset < strlen($input)) {
109: $this->addToken(Token::TEXT, substr($this->input, $this->offset));
110: }
111: return $this->output;
112: }
113:
114:
115: 116: 117:
118: private function contextHtmlText()
119: {
120: $matches = $this->match('~
121: (?:(?<=\n|^)[ \t]*)?<(?P<closing>/?)(?P<tag>[a-z0-9:]+)| ## begin of HTML tag <tag </tag - ignores <!DOCTYPE
122: <(?P<htmlcomment>!--(?!>))| ## begin of HTML comment <!--, but not <!-->
123: (?P<macro>' . $this->delimiters[0] . ')
124: ~xsi');
125:
126: if (!empty($matches['htmlcomment'])) {
127: $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
128: $this->setContext(self::CONTEXT_HTML_COMMENT);
129:
130: } elseif (!empty($matches['tag'])) {
131: $token = $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
132: $token->name = $matches['tag'];
133: $token->closing = (bool) $matches['closing'];
134: $this->lastHtmlTag = $matches['closing'] . strtolower($matches['tag']);
135: $this->setContext(self::CONTEXT_HTML_TAG);
136:
137: } else {
138: return $this->processMacro($matches);
139: }
140: }
141:
142:
143: 144: 145:
146: private function contextCData()
147: {
148: $matches = $this->match('~
149: </(?P<tag>' . $this->lastHtmlTag . ')(?![a-z0-9:])| ## end HTML tag </tag
150: (?P<macro>' . $this->delimiters[0] . ')
151: ~xsi');
152:
153: if (!empty($matches['tag'])) {
154: $token = $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
155: $token->name = $this->lastHtmlTag;
156: $token->closing = TRUE;
157: $this->lastHtmlTag = '/' . $this->lastHtmlTag;
158: $this->setContext(self::CONTEXT_HTML_TAG);
159: } else {
160: return $this->processMacro($matches);
161: }
162: }
163:
164:
165: 166: 167:
168: private function contextHtmlTag()
169: {
170: $matches = $this->match('~
171: (?P<end>\ ?/?>)([ \t]*\n)?| ## end of HTML tag
172: (?P<macro>' . $this->delimiters[0] . ')|
173: \s*(?P<attr>[^\s/>={]+)(?:\s*=\s*(?P<value>["\']|[^\s/>{]+))? ## beginning of HTML attribute
174: ~xsi');
175:
176: if (!empty($matches['end'])) {
177: $this->addToken(Token::HTML_TAG_END, $matches[0]);
178: $this->setContext(!$this->xmlMode && in_array($this->lastHtmlTag, array('script', 'style'), TRUE) ? self::CONTEXT_CDATA : self::CONTEXT_HTML_TEXT);
179:
180: } elseif (isset($matches['attr']) && $matches['attr'] !== '') {
181: $token = $this->addToken(Token::HTML_ATTRIBUTE, $matches[0]);
182: $token->name = $matches['attr'];
183: $token->value = isset($matches['value']) ? $matches['value'] : '';
184:
185: if ($token->value === '"' || $token->value === "'") {
186: if (strncmp($token->name, self::N_PREFIX, strlen(self::N_PREFIX)) === 0) {
187: $token->value = '';
188: if ($m = $this->match('~(.*?)' . $matches['value'] . '~xsi')) {
189: $token->value = $m[1];
190: $token->text .= $m[0];
191: }
192: } else {
193: $this->setContext(self::CONTEXT_HTML_ATTRIBUTE, $matches['value']);
194: }
195: }
196: } else {
197: return $this->processMacro($matches);
198: }
199: }
200:
201:
202: 203: 204:
205: private function contextHtmlAttribute()
206: {
207: $matches = $this->match('~
208: (?P<quote>' . $this->context[1] . ')| ## end of HTML attribute
209: (?P<macro>' . $this->delimiters[0] . ')
210: ~xsi');
211:
212: if (!empty($matches['quote'])) {
213: $this->addToken(Token::TEXT, $matches[0]);
214: $this->setContext(self::CONTEXT_HTML_TAG);
215: } else {
216: return $this->processMacro($matches);
217: }
218: }
219:
220:
221: 222: 223:
224: private function ()
225: {
226: $matches = $this->match('~
227: (?P<htmlcomment>-->)| ## end of HTML comment
228: (?P<macro>' . $this->delimiters[0] . ')
229: ~xsi');
230:
231: if (!empty($matches['htmlcomment'])) {
232: $this->addToken(Token::HTML_TAG_END, $matches[0]);
233: $this->setContext(self::CONTEXT_HTML_TEXT);
234: } else {
235: return $this->processMacro($matches);
236: }
237: }
238:
239:
240: 241: 242:
243: private function ()
244: {
245: $matches = $this->match('~
246: (?P<macro>' . $this->delimiters[0] . ')
247: ~xsi');
248: return $this->processMacro($matches);
249: }
250:
251:
252: 253: 254:
255: private function contextMacro()
256: {
257: $matches = $this->match('~
258: (?P<comment>\\*.*?\\*' . $this->delimiters[1] . '\n{0,2})|
259: (?P<macro>(?:
260: ' . self::RE_STRING . '|
261: \{(?:' . self::RE_STRING . '|[^\'"{}])*+\}|
262: [^\'"{}]
263: )+?)
264: ' . $this->delimiters[1] . '
265: (?P<rmargin>[ \t]*(?=\n))?
266: ~xsiA');
267:
268: if (!empty($matches['macro'])) {
269: $token = $this->addToken(Token::MACRO_TAG, $this->context[1][1] . $matches[0]);
270: list($token->name, $token->value, $token->modifiers, $token->empty) = $this->parseMacroTag($matches['macro']);
271: $this->context = $this->context[1][0];
272:
273: } elseif (!empty($matches['comment'])) {
274: $this->addToken(Token::COMMENT, $this->context[1][1] . $matches[0]);
275: $this->context = $this->context[1][0];
276:
277: } else {
278: throw new CompileException('Malformed macro');
279: }
280: }
281:
282:
283: private function processMacro($matches)
284: {
285: if (!empty($matches['macro'])) {
286: $this->setContext(self::CONTEXT_MACRO, array($this->context, $matches['macro']));
287: } else {
288: return FALSE;
289: }
290: }
291:
292:
293: 294: 295: 296: 297:
298: private function match($re)
299: {
300: if (!preg_match($re, $this->input, $matches, PREG_OFFSET_CAPTURE, $this->offset)) {
301: if (preg_last_error()) {
302: throw new RegexpException(NULL, preg_last_error());
303: }
304: return array();
305: }
306:
307: $value = substr($this->input, $this->offset, $matches[0][1] - $this->offset);
308: if ($value !== '') {
309: $this->addToken(Token::TEXT, $value);
310: }
311: $this->offset = $matches[0][1] + strlen($matches[0][0]);
312: foreach ($matches as $k => $v) {
313: $matches[$k] = $v[0];
314: }
315: return $matches;
316: }
317:
318:
319: 320: 321:
322: public function setContentType($type)
323: {
324: if (strpos($type, 'html') !== FALSE) {
325: $this->xmlMode = FALSE;
326: $this->setContext(self::CONTEXT_HTML_TEXT);
327: } elseif (strpos($type, 'xml') !== FALSE) {
328: $this->xmlMode = TRUE;
329: $this->setContext(self::CONTEXT_HTML_TEXT);
330: } else {
331: $this->setContext(self::CONTEXT_RAW);
332: }
333: return $this;
334: }
335:
336:
337: 338: 339:
340: public function setContext($context, $quote = NULL)
341: {
342: $this->context = array($context, $quote);
343: return $this;
344: }
345:
346:
347: 348: 349: 350: 351:
352: public function setSyntax($type)
353: {
354: $type = $type ?: $this->defaultSyntax;
355: if (isset($this->syntaxes[$type])) {
356: $this->setDelimiters($this->syntaxes[$type][0], $this->syntaxes[$type][1]);
357: } else {
358: throw new \InvalidArgumentException("Unknown syntax '$type'");
359: }
360: return $this;
361: }
362:
363:
364: 365: 366: 367: 368: 369:
370: public function setDelimiters($left, $right)
371: {
372: $this->delimiters = array($left, $right);
373: return $this;
374: }
375:
376:
377: 378: 379: 380: 381: 382:
383: public function parseMacroTag($tag)
384: {
385: if (!preg_match('~^
386: (
387: (?P<name>\?|/?[a-z]\w*+(?:[.:]\w+)*+(?!::|\(|\\\\))| ## ?, name, /name, but not function( or class:: or namespace\
388: (?P<noescape>!?)(?P<shortname>/?[=\~#%^&_]?) ## !expression, !=expression, ...
389: )(?P<args>.*?)
390: (?P<modifiers>\|[a-z](?:' . self::RE_STRING . '|[^\'"])*(?<!/))?
391: (?P<empty>/?\z)
392: ()\z~isx', $tag, $match)) {
393: if (preg_last_error()) {
394: throw new RegexpException(NULL, preg_last_error());
395: }
396: return FALSE;
397: }
398: if ($match['name'] === '') {
399: $match['name'] = $match['shortname'] ?: '=';
400: if ($match['noescape']) {
401: if (!$this->shortNoEscape) {
402: trigger_error("The noescape shortcut {!...} is deprecated, use {...|noescape} modifier on line {$this->getLine()}.", E_USER_DEPRECATED);
403: }
404: $match['modifiers'] .= '|noescape';
405: }
406: }
407: return array($match['name'], trim($match['args']), $match['modifiers'], (bool) $match['empty']);
408: }
409:
410:
411: private function addToken($type, $text)
412: {
413: $this->output[] = $token = new Token;
414: $token->type = $type;
415: $token->text = $text;
416: $token->line = $this->getLine();
417: return $token;
418: }
419:
420:
421: public function getLine()
422: {
423: return substr_count($this->input, "\n", 0, max(1, $this->offset - 1)) + 1;
424: }
425:
426:
427: 428: 429:
430: protected function filter(Token $token)
431: {
432: if ($token->type === Token::MACRO_TAG && $token->name === '/syntax') {
433: $this->setSyntax($this->defaultSyntax);
434: $token->type = Token::COMMENT;
435:
436: } elseif ($token->type === Token::MACRO_TAG && $token->name === 'syntax') {
437: $this->setSyntax($token->value);
438: $token->type = Token::COMMENT;
439:
440: } elseif ($token->type === Token::HTML_ATTRIBUTE && $token->name === 'n:syntax') {
441: $this->setSyntax($token->value);
442: $this->syntaxEndTag = $this->lastHtmlTag;
443: $this->syntaxEndLevel = 1;
444: $token->type = Token::COMMENT;
445:
446: } elseif ($token->type === Token::HTML_TAG_BEGIN && $this->lastHtmlTag === $this->syntaxEndTag) {
447: $this->syntaxEndLevel++;
448:
449: } elseif ($token->type === Token::HTML_TAG_END && $this->lastHtmlTag === ('/' . $this->syntaxEndTag) && --$this->syntaxEndLevel === 0) {
450: $this->setSyntax($this->defaultSyntax);
451:
452: } elseif ($token->type === Token::MACRO_TAG && $token->name === 'contentType') {
453: $this->setContentType($token->value);
454: }
455: }
456:
457: }
458: