1: <?php
2:
3: 4: 5: 6:
7:
8: namespace Latte;
9:
10:
11: 12: 13:
14: class Parser
15: {
16: use Strict;
17:
18:
19: const RE_STRING = '\'(?:\\\\.|[^\'\\\\])*+\'|"(?:\\\\.|[^"\\\\])*+"';
20:
21:
22: const N_PREFIX = 'n:';
23:
24:
25: const
26: CONTENT_HTML = Engine::CONTENT_HTML,
27: CONTENT_XHTML = Engine::CONTENT_XHTML,
28: CONTENT_XML = Engine::CONTENT_XML,
29: CONTENT_TEXT = Engine::CONTENT_TEXT;
30:
31:
32: public $defaultSyntax = 'latte';
33:
34:
35: public $shortNoEscape;
36:
37:
38: public $syntaxes = [
39: 'latte' => ['\{(?![\s\'"{}])', '\}'],
40: 'double' => ['\{\{(?![\s\'"{}])', '\}\}'],
41: 'off' => ['\{(?=/syntax\})', '\}'],
42: ];
43:
44:
45: private $delimiters;
46:
47:
48: private $input;
49:
50:
51: private $output;
52:
53:
54: private $offset;
55:
56:
57: private $context = [self::CONTEXT_HTML_TEXT, NULL];
58:
59:
60: private $lastHtmlTag;
61:
62:
63: private $syntaxEndTag;
64:
65:
66: private $syntaxEndLevel = 0;
67:
68:
69: private $xmlMode;
70:
71:
72: const
73: CONTEXT_NONE = 'none',
74: CONTEXT_MACRO = 'macro',
75: CONTEXT_HTML_TEXT = 'htmlText',
76: CONTEXT_HTML_TAG = 'htmlTag',
77: CONTEXT_HTML_ATTRIBUTE = 'htmlAttribute',
78: = 'htmlComment',
79: CONTEXT_HTML_CDATA = 'htmlCData';
80:
81:
82: 83: 84: 85: 86:
87: public function parse($input)
88: {
89: if (Helpers::startsWith($input, "\xEF\xBB\xBF")) {
90: $input = substr($input, 3);
91: }
92:
93: $this->input = $input = str_replace("\r\n", "\n", $input);
94: $this->offset = 0;
95: $this->output = [];
96:
97: if (!preg_match('##u', $input)) {
98: preg_match('#(?:[\x00-\x7F]|[\xC0-\xDF][\x80-\xBF]|[\xE0-\xEF][\x80-\xBF]{2}|[\xF0-\xF7][\x80-\xBF]{3})*+#A', $input, $m);
99: $this->offset = strlen($m[0]) + 1;
100: throw new \InvalidArgumentException('Template is not valid UTF-8 stream.');
101: }
102:
103: $this->setSyntax($this->defaultSyntax);
104: $this->lastHtmlTag = $this->syntaxEndTag = NULL;
105:
106: $tokenCount = 0;
107: while ($this->offset < strlen($input)) {
108: if ($this->{'context' . $this->context[0]}() === FALSE) {
109: break;
110: }
111: while ($tokenCount < count($this->output)) {
112: $this->filter($this->output[$tokenCount++]);
113: }
114: }
115: if ($this->context[0] === self::CONTEXT_MACRO) {
116: throw new CompileException('Malformed macro');
117: }
118:
119: if ($this->offset < strlen($input)) {
120: $this->addToken(Token::TEXT, substr($this->input, $this->offset));
121: }
122: return $this->output;
123: }
124:
125:
126: 127: 128:
129: private function contextHtmlText()
130: {
131: $matches = $this->match('~
132: (?:(?<=\n|^)[ \t]*)?<(?P<closing>/?)(?P<tag>[a-z][a-z0-9:]*)| ## begin of HTML tag <tag </tag - ignores <!DOCTYPE
133: <(?P<htmlcomment>!(?:--(?!>))?|\?(?!=|php))| ## begin of <!, <!--, <!DOCTYPE, <?, but not <?php and <?=
134: (?P<macro>' . $this->delimiters[0] . ')
135: ~xsi');
136:
137: if (!empty($matches['htmlcomment'])) {
138: $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
139: $end = $matches['htmlcomment'] === '!--' ? '--' : ($matches['htmlcomment'] === '?' && $this->xmlMode ? '\?' : '');
140: $this->setContext(self::CONTEXT_HTML_COMMENT, $end);
141:
142: } elseif (!empty($matches['tag'])) {
143: $token = $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
144: $token->name = $matches['tag'];
145: $token->closing = (bool) $matches['closing'];
146: $this->lastHtmlTag = $matches['closing'] . strtolower($matches['tag']);
147: $this->setContext(self::CONTEXT_HTML_TAG);
148:
149: } else {
150: return $this->processMacro($matches);
151: }
152: }
153:
154:
155: 156: 157:
158: private function contextHtmlCData()
159: {
160: $matches = $this->match('~
161: </(?P<tag>' . $this->lastHtmlTag . ')(?![a-z0-9:])| ## end HTML tag </tag
162: (?P<macro>' . $this->delimiters[0] . ')
163: ~xsi');
164:
165: if (!empty($matches['tag'])) {
166: $token = $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
167: $token->name = $this->lastHtmlTag;
168: $token->closing = TRUE;
169: $this->lastHtmlTag = '/' . $this->lastHtmlTag;
170: $this->setContext(self::CONTEXT_HTML_TAG);
171: } else {
172: return $this->processMacro($matches);
173: }
174: }
175:
176:
177: 178: 179:
180: private function contextHtmlTag()
181: {
182: $matches = $this->match('~
183: (?P<end>\s?/?>)([ \t]*\n)?| ## end of HTML tag
184: (?P<macro>' . $this->delimiters[0] . ')|
185: \s*(?P<attr>[^\s"\'>/={]+)(?:\s*=\s*(?P<value>["\']|[^\s"\'=<>`{]+))? ## beginning of HTML attribute
186: ~xsi');
187:
188: if (!empty($matches['end'])) {
189: $this->addToken(Token::HTML_TAG_END, $matches[0]);
190: $this->setContext(!$this->xmlMode && in_array($this->lastHtmlTag, ['script', 'style'], TRUE) ? self::CONTEXT_HTML_CDATA : self::CONTEXT_HTML_TEXT);
191:
192: } elseif (isset($matches['attr']) && $matches['attr'] !== '') {
193: $token = $this->addToken(Token::HTML_ATTRIBUTE_BEGIN, $matches[0]);
194: $token->name = $matches['attr'];
195: $token->value = isset($matches['value']) ? $matches['value'] : '';
196:
197: if ($token->value === '"' || $token->value === "'") {
198: if (Helpers::startsWith($token->name, self::N_PREFIX)) {
199: $token->value = '';
200: if ($m = $this->match('~(.*?)' . $matches['value'] . '~xsi')) {
201: $token->value = $m[1];
202: $token->text .= $m[0];
203: }
204: } else {
205: $this->setContext(self::CONTEXT_HTML_ATTRIBUTE, $matches['value']);
206: }
207: }
208: } else {
209: return $this->processMacro($matches);
210: }
211: }
212:
213:
214: 215: 216:
217: private function contextHtmlAttribute()
218: {
219: $matches = $this->match('~
220: (?P<quote>' . $this->context[1] . ')| ## end of HTML attribute
221: (?P<macro>' . $this->delimiters[0] . ')
222: ~xsi');
223:
224: if (!empty($matches['quote'])) {
225: $this->addToken(Token::HTML_ATTRIBUTE_END, $matches[0]);
226: $this->setContext(self::CONTEXT_HTML_TAG);
227: } else {
228: return $this->processMacro($matches);
229: }
230: }
231:
232:
233: 234: 235:
236: private function ()
237: {
238: $matches = $this->match('~
239: (?P<htmlcomment>' . $this->context[1] . '>)| ## end of HTML comment
240: (?P<macro>' . $this->delimiters[0] . ')
241: ~xsi');
242:
243: if (!empty($matches['htmlcomment'])) {
244: $this->addToken(Token::HTML_TAG_END, $matches[0]);
245: $this->setContext(self::CONTEXT_HTML_TEXT);
246: } else {
247: return $this->processMacro($matches);
248: }
249: }
250:
251:
252: 253: 254:
255: private function contextNone()
256: {
257: $matches = $this->match('~
258: (?P<macro>' . $this->delimiters[0] . ')
259: ~xsi');
260: return $this->processMacro($matches);
261: }
262:
263:
264: 265: 266:
267: private function contextMacro()
268: {
269: $matches = $this->match('~
270: (?P<comment>\\*.*?\\*' . $this->delimiters[1] . '\n{0,2})|
271: (?P<macro>(?>
272: ' . self::RE_STRING . '|
273: \{(?>' . self::RE_STRING . '|[^\'"{}])*+\}|
274: [^\'"{}]+
275: )++)
276: ' . $this->delimiters[1] . '
277: (?P<rmargin>[ \t]*(?=\n))?
278: ~xsiA');
279:
280: if (!empty($matches['macro'])) {
281: $token = $this->addToken(Token::MACRO_TAG, $this->context[1][1] . $matches[0]);
282: list($token->name, $token->value, $token->modifiers, $token->empty, $token->closing) = $this->parseMacroTag($matches['macro']);
283: $this->context = $this->context[1][0];
284:
285: } elseif (!empty($matches['comment'])) {
286: $this->addToken(Token::COMMENT, $this->context[1][1] . $matches[0]);
287: $this->context = $this->context[1][0];
288:
289: } else {
290: throw new CompileException('Malformed macro');
291: }
292: }
293:
294:
295: private function processMacro($matches)
296: {
297: if (!empty($matches['macro'])) {
298: $this->setContext(self::CONTEXT_MACRO, [$this->context, $matches['macro']]);
299: } else {
300: return FALSE;
301: }
302: }
303:
304:
305: 306: 307: 308: 309:
310: private function match($re)
311: {
312: if (!preg_match($re, $this->input, $matches, PREG_OFFSET_CAPTURE, $this->offset)) {
313: if (preg_last_error()) {
314: throw new RegexpException(NULL, preg_last_error());
315: }
316: return [];
317: }
318:
319: $value = substr($this->input, $this->offset, $matches[0][1] - $this->offset);
320: if ($value !== '') {
321: $this->addToken(Token::TEXT, $value);
322: }
323: $this->offset = $matches[0][1] + strlen($matches[0][0]);
324: foreach ($matches as $k => $v) {
325: $matches[$k] = $v[0];
326: }
327: return $matches;
328: }
329:
330:
331: 332: 333: 334:
335: public function setContentType($type)
336: {
337: if (in_array($type, [self::CONTENT_HTML, self::CONTENT_XHTML, self::CONTENT_XML], TRUE)) {
338: $this->setContext(self::CONTEXT_HTML_TEXT);
339: $this->xmlMode = $type === self::CONTENT_XML;
340: } else {
341: $this->setContext(self::CONTEXT_NONE);
342: }
343: return $this;
344: }
345:
346:
347: 348: 349:
350: public function setContext($context, $quote = NULL)
351: {
352: $this->context = [$context, $quote];
353: return $this;
354: }
355:
356:
357: 358: 359: 360: 361:
362: public function setSyntax($type)
363: {
364: $type = $type ?: $this->defaultSyntax;
365: if (isset($this->syntaxes[$type])) {
366: $this->setDelimiters($this->syntaxes[$type][0], $this->syntaxes[$type][1]);
367: } else {
368: throw new \InvalidArgumentException("Unknown syntax '$type'");
369: }
370: return $this;
371: }
372:
373:
374: 375: 376: 377: 378: 379:
380: public function setDelimiters($left, $right)
381: {
382: $this->delimiters = [$left, $right];
383: return $this;
384: }
385:
386:
387: 388: 389: 390: 391: 392:
393: public function parseMacroTag($tag)
394: {
395: if (!preg_match('~^
396: (?P<closing>/?)
397: (
398: (?P<name>\?|[a-z]\w*+(?:[.:]\w+)*+(?!::|\(|\\\\))| ## ?, name, /name, but not function( or class:: or namespace\
399: (?P<noescape>!?)(?P<shortname>[=\~#%^&_]?) ## !expression, !=expression, ...
400: )(?P<args>(?:' . self::RE_STRING . '|[^\'"])*?)
401: (?P<modifiers>(?<!\|)\|[a-z](?P<modArgs>(?:' . self::RE_STRING . '|(?:\((?P>modArgs)\))|[^\'"/()]|/(?=.))*+))?
402: (?P<empty>/?\z)
403: ()\z~isx', $tag, $match)) {
404: if (preg_last_error()) {
405: throw new RegexpException(NULL, preg_last_error());
406: }
407: return FALSE;
408: }
409: if ($match['name'] === '') {
410: $match['name'] = $match['shortname'] ?: ($match['closing'] ? '' : '=');
411: if ($match['noescape']) {
412: trigger_error("The noescape shortcut {!...} is deprecated, use {...|noescape} modifier on line {$this->getLine()}.", E_USER_DEPRECATED);
413: $match['modifiers'] .= '|noescape';
414: }
415: }
416: return [$match['name'], trim($match['args']), $match['modifiers'], (bool) $match['empty'], (bool) $match['closing']];
417: }
418:
419:
420: private function addToken($type, $text)
421: {
422: $this->output[] = $token = new Token;
423: $token->type = $type;
424: $token->text = $text;
425: $token->line = $this->getLine() - substr_count(ltrim($text), "\n");
426: return $token;
427: }
428:
429:
430: public function getLine()
431: {
432: return $this->offset
433: ? substr_count(substr($this->input, 0, $this->offset - 1), "\n") + 1
434: : 1;
435: }
436:
437:
438: 439: 440:
441: protected function filter(Token $token)
442: {
443: if ($token->type === Token::MACRO_TAG && $token->name === '/syntax') {
444: $this->setSyntax($this->defaultSyntax);
445: $token->type = Token::COMMENT;
446:
447: } elseif ($token->type === Token::MACRO_TAG && $token->name === 'syntax') {
448: $this->setSyntax($token->value);
449: $token->type = Token::COMMENT;
450:
451: } elseif ($token->type === Token::HTML_ATTRIBUTE_BEGIN && $token->name === 'n:syntax') {
452: $this->setSyntax($token->value);
453: $this->syntaxEndTag = $this->lastHtmlTag;
454: $this->syntaxEndLevel = 1;
455: $token->type = Token::COMMENT;
456:
457: } elseif ($token->type === Token::HTML_TAG_BEGIN && $this->lastHtmlTag === $this->syntaxEndTag) {
458: $this->syntaxEndLevel++;
459:
460: } elseif ($token->type === Token::HTML_TAG_END && $this->lastHtmlTag === ('/' . $this->syntaxEndTag) && --$this->syntaxEndLevel === 0) {
461: $this->setSyntax($this->defaultSyntax);
462:
463: } elseif ($token->type === Token::MACRO_TAG && $token->name === 'contentType') {
464: if (strpos($token->value, 'html') !== FALSE) {
465: $this->setContentType(self::CONTENT_HTML);
466: } elseif (strpos($token->value, 'xml') !== FALSE) {
467: $this->setContentType(self::CONTENT_XML);
468: } else {
469: $this->setContentType(self::CONTENT_TEXT);
470: }
471: }
472: }
473:
474: }
475: