1: <?php
2:
3: 4: 5: 6:
7:
8: namespace Nette\Latte;
9:
10: use Nette,
11: Nette\Utils\Strings;
12:
13:
14: 15: 16: 17: 18:
19: class Parser extends Nette\Object
20: {
21:
22: const RE_STRING = '\'(?:\\\\.|[^\'\\\\])*\'|"(?:\\\\.|[^"\\\\])*"';
23:
24:
25: const N_PREFIX = 'n:';
26:
27:
28: public $defaultSyntax = 'latte';
29:
30:
31: public $shortNoEscape = TRUE;
32:
33:
34: public $syntaxes = array(
35: 'latte' => array('\\{(?![\\s\'"{}])', '\\}'),
36: 'double' => array('\\{\\{(?![\\s\'"{}])', '\\}\\}'),
37: 'asp' => array('<%\s*', '\s*%>'),
38: 'python' => array('\\{[{%]\s*', '\s*[%}]\\}'),
39: 'off' => array('[^\x00-\xFF]', ''),
40: );
41:
42:
43: private $macroRe;
44:
45:
46: private $input;
47:
48:
49: private $output;
50:
51:
52: private $offset;
53:
54:
55: private $context;
56:
57:
58: private $lastHtmlTag;
59:
60:
61: private $syntaxEndTag;
62:
63:
64: private $xmlMode;
65:
66:
67: const CONTEXT_HTML_TEXT = 'htmlText',
68: CONTEXT_CDATA = 'cdata',
69: CONTEXT_HTML_TAG = 'htmlTag',
70: CONTEXT_HTML_ATTRIBUTE = 'htmlAttribute',
71: CONTEXT_RAW = 'raw',
72: = 'htmlComment';
73:
74:
75: 76: 77: 78: 79:
80: public function parse($input)
81: {
82: if (substr($input, 0, 3) === "\xEF\xBB\xBF") {
83: $input = substr($input, 3);
84: }
85: if (!Strings::checkEncoding($input)) {
86: throw new Nette\InvalidArgumentException('Template is not valid UTF-8 stream.');
87: }
88: $input = str_replace("\r\n", "\n", $input);
89: $this->input = $input;
90: $this->output = array();
91: $this->offset = 0;
92:
93: $this->setSyntax($this->defaultSyntax);
94: $this->setContext(self::CONTEXT_HTML_TEXT);
95: $this->lastHtmlTag = $this->syntaxEndTag = NULL;
96:
97: while ($this->offset < strlen($input)) {
98: $matches = $this->{"context".$this->context[0]}();
99:
100: if (!$matches) {
101: break;
102:
103: } elseif (!empty($matches['comment'])) {
104: $this->addToken(Token::COMMENT, $matches[0]);
105:
106: } elseif (!empty($matches['macro'])) {
107: $token = $this->addToken(Token::MACRO_TAG, $matches[0]);
108: list($token->name, $token->value, $token->modifiers, $token->empty) = $this->parseMacroTag($matches['macro']);
109: }
110:
111: $this->filter();
112: }
113:
114: if ($this->offset < strlen($input)) {
115: $this->addToken(Token::TEXT, substr($this->input, $this->offset));
116: }
117: return $this->output;
118: }
119:
120:
121: 122: 123:
124: private function contextHtmlText()
125: {
126: $matches = $this->match('~
127: (?:(?<=\n|^)[ \t]*)?<(?P<closing>/?)(?P<tag>[a-z0-9:]+)| ## begin of HTML tag <tag </tag - ignores <!DOCTYPE
128: <(?P<htmlcomment>!--(?!>))| ## begin of HTML comment <!--, but not <!-->
129: '.$this->macroRe.' ## macro tag
130: ~xsi');
131:
132: if (!empty($matches['htmlcomment'])) {
133: $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
134: $this->setContext(self::CONTEXT_HTML_COMMENT);
135:
136: } elseif (!empty($matches['tag'])) {
137: $token = $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
138: $token->name = $matches['tag'];
139: $token->closing = (bool) $matches['closing'];
140: $this->lastHtmlTag = $matches['closing'] . strtolower($matches['tag']);
141: $this->setContext(self::CONTEXT_HTML_TAG);
142: }
143: return $matches;
144: }
145:
146:
147: 148: 149:
150: private function contextCData()
151: {
152: $matches = $this->match('~
153: </(?P<tag>'.$this->lastHtmlTag.')(?![a-z0-9:])| ## end HTML tag </tag
154: '.$this->macroRe.' ## macro tag
155: ~xsi');
156:
157: if (!empty($matches['tag'])) {
158: $token = $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
159: $token->name = $this->lastHtmlTag;
160: $token->closing = TRUE;
161: $this->lastHtmlTag = '/' . $this->lastHtmlTag;
162: $this->setContext(self::CONTEXT_HTML_TAG);
163: }
164: return $matches;
165: }
166:
167:
168: 169: 170:
171: private function contextHtmlTag()
172: {
173: $matches = $this->match('~
174: (?P<end>\ ?/?>)([ \t]*\n)?| ## end of HTML tag
175: '.$this->macroRe.'| ## macro tag
176: \s*(?P<attr>[^\s/>={]+)(?:\s*=\s*(?P<value>["\']|[^\s/>{]+))? ## beginning of HTML attribute
177: ~xsi');
178:
179: if (!empty($matches['end'])) {
180: $this->addToken(Token::HTML_TAG_END, $matches[0]);
181: $this->setContext(!$this->xmlMode && in_array($this->lastHtmlTag, array('script', 'style'), TRUE) ? self::CONTEXT_CDATA : self::CONTEXT_HTML_TEXT);
182:
183: } elseif (isset($matches['attr']) && $matches['attr'] !== '') {
184: $token = $this->addToken(Token::HTML_ATTRIBUTE, $matches[0]);
185: $token->name = $matches['attr'];
186: $token->value = isset($matches['value']) ? $matches['value'] : '';
187:
188: if ($token->value === '"' || $token->value === "'") {
189: if (Strings::startsWith($token->name, self::N_PREFIX)) {
190: $token->value = '';
191: if ($m = $this->match('~(.*?)' . $matches['value'] . '~xsi')) {
192: $token->value = $m[1];
193: $token->text .= $m[0];
194: }
195: } else {
196: $this->setContext(self::CONTEXT_HTML_ATTRIBUTE, $matches['value']);
197: }
198: }
199: }
200: return $matches;
201: }
202:
203:
204: 205: 206:
207: private function contextHtmlAttribute()
208: {
209: $matches = $this->match('~
210: (?P<quote>'.$this->context[1].')| ## end of HTML attribute
211: '.$this->macroRe.' ## macro tag
212: ~xsi');
213:
214: if (!empty($matches['quote'])) {
215: $this->addToken(Token::TEXT, $matches[0]);
216: $this->setContext(self::CONTEXT_HTML_TAG);
217: }
218: return $matches;
219: }
220:
221:
222: 223: 224:
225: private function ()
226: {
227: $matches = $this->match('~
228: (?P<htmlcomment>-->)| ## end of HTML comment
229: '.$this->macroRe.' ## macro tag
230: ~xsi');
231:
232: if (!empty($matches['htmlcomment'])) {
233: $this->addToken(Token::HTML_TAG_END, $matches[0]);
234: $this->setContext(self::CONTEXT_HTML_TEXT);
235: }
236: return $matches;
237: }
238:
239:
240: 241: 242:
243: private function ()
244: {
245: $matches = $this->match('~
246: '.$this->macroRe.' ## macro tag
247: ~xsi');
248: return $matches;
249: }
250:
251:
252: 253: 254: 255: 256:
257: private function match($re)
258: {
259: if ($matches = Strings::match($this->input, $re, PREG_OFFSET_CAPTURE, $this->offset)) {
260: $value = substr($this->input, $this->offset, $matches[0][1] - $this->offset);
261: if ($value !== '') {
262: $this->addToken(Token::TEXT, $value);
263: }
264: $this->offset = $matches[0][1] + strlen($matches[0][0]);
265: foreach ($matches as $k => $v) {
266: $matches[$k] = $v[0];
267: }
268: }
269: return $matches;
270: }
271:
272:
273: 274: 275:
276: public function setContext($context, $quote = NULL)
277: {
278: $this->context = array($context, $quote);
279: return $this;
280: }
281:
282:
283: 284: 285: 286: 287:
288: public function setSyntax($type)
289: {
290: $type = $type ?: $this->defaultSyntax;
291: if (isset($this->syntaxes[$type])) {
292: $this->setDelimiters($this->syntaxes[$type][0], $this->syntaxes[$type][1]);
293: } else {
294: throw new Nette\InvalidArgumentException("Unknown syntax '$type'");
295: }
296: return $this;
297: }
298:
299:
300: 301: 302: 303: 304: 305:
306: public function setDelimiters($left, $right)
307: {
308: $this->macroRe = '
309: (?P<comment>' . $left . '\\*.*?\\*' . $right . '\n{0,2})|
310: ' . $left . '
311: (?P<macro>(?:
312: ' . self::RE_STRING . '|
313: \{(?:' . self::RE_STRING . '|[^\'"{}])*+\}|
314: [^\'"{}]
315: )+?)
316: ' . $right . '
317: (?P<rmargin>[ \t]*(?=\n))?
318: ';
319: return $this;
320: }
321:
322:
323: 324: 325: 326: 327:
328: public function parseMacroTag($tag)
329: {
330: $match = Strings::match($tag, '~^
331: (
332: (?P<name>\?|/?[a-z]\w*+(?:[.:]\w+)*+(?!::|\(|\\\\))| ## ?, name, /name, but not function( or class:: or namespace\
333: (?P<noescape>!?)(?P<shortname>/?[=\~#%^&_]?) ## !expression, !=expression, ...
334: )(?P<args>.*?)
335: (?P<modifiers>\|[a-z](?:'.Parser::RE_STRING.'|[^\'"])*(?<!/))?
336: (?P<empty>/?\z)
337: ()\z~isx');
338:
339: if (!$match) {
340: return FALSE;
341: }
342: if ($match['name'] === '') {
343: $match['name'] = $match['shortname'] ?: '=';
344: if ($match['noescape']) {
345: if (!$this->shortNoEscape) {
346: trigger_error("The noescape shortcut {!...} is deprecated, use {...|noescape} modifier on line {$this->getLine()}.", E_USER_DEPRECATED);
347: }
348: $match['modifiers'] .= '|noescape';
349: }
350: }
351: return array($match['name'], trim($match['args']), $match['modifiers'], (bool) $match['empty']);
352: }
353:
354:
355: private function addToken($type, $text)
356: {
357: $this->output[] = $token = new Token;
358: $token->type = $type;
359: $token->text = $text;
360: $token->line = $this->getLine();
361: return $token;
362: }
363:
364:
365: private function getLine()
366: {
367: return substr_count($this->input, "\n", 0, max(1, $this->offset - 1)) + 1;
368: }
369:
370:
371: 372: 373:
374: protected function filter()
375: {
376: $token = end($this->output);
377: if ($token->type === Token::MACRO_TAG && $token->name === '/syntax') {
378: $this->setSyntax($this->defaultSyntax);
379: $token->type = Token::COMMENT;
380:
381: } elseif ($token->type === Token::MACRO_TAG && $token->name === 'syntax') {
382: $this->setSyntax($token->value);
383: $token->type = Token::COMMENT;
384:
385: } elseif ($token->type === Token::HTML_ATTRIBUTE && $token->name === 'n:syntax') {
386: $this->setSyntax($token->value);
387: $this->syntaxEndTag = '/' . $this->lastHtmlTag;
388: $token->type = Token::COMMENT;
389:
390: } elseif ($token->type === Token::HTML_TAG_END && $this->lastHtmlTag === $this->syntaxEndTag) {
391: $this->setSyntax($this->defaultSyntax);
392:
393: } elseif ($token->type === Token::MACRO_TAG && $token->name === 'contentType') {
394: if (preg_match('#html|xml#', $token->value, $m)) {
395: $this->xmlMode = $m[0] === 'xml';
396: $this->setContext(self::CONTEXT_HTML_TEXT);
397: } else {
398: $this->setContext(self::CONTEXT_RAW);
399: }
400: }
401: }
402:
403: }
404: