1: <?php
2:
3: 4: 5: 6: 7: 8: 9: 10:
11:
12: namespace Nette\Latte;
13:
14: use Nette,
15: Nette\Utils\Strings;
16:
17:
18:
19: 20: 21: 22: 23:
24: class Parser extends Nette\Object
25: {
26:
27: const RE_STRING = '\'(?:\\\\.|[^\'\\\\])*\'|"(?:\\\\.|[^"\\\\])*"';
28:
29:
30: const N_PREFIX = 'n:';
31:
32:
33: public $defaultSyntax = 'latte';
34:
35:
36: public $syntaxes = array(
37: 'latte' => array('\\{(?![\\s\'"{}])', '\\}'),
38: 'double' => array('\\{\\{(?![\\s\'"{}])', '\\}\\}'),
39: 'asp' => array('<%\s*', '\s*%>'),
40: 'python' => array('\\{[{%]\s*', '\s*[%}]\\}'),
41: 'off' => array('[^\x00-\xFF]', ''),
42: );
43:
44:
45: private $macroRe;
46:
47:
48: private $input;
49:
50:
51: private $output;
52:
53:
54: private $offset;
55:
56:
57: private $context;
58:
59:
60: private $lastHtmlTag;
61:
62:
63: private $syntaxEndTag;
64:
65:
66: private $xmlMode;
67:
68:
69: const CONTEXT_TEXT = 'text',
70: CONTEXT_CDATA = 'cdata',
71: CONTEXT_TAG = 'tag',
72: CONTEXT_ATTRIBUTE = 'attribute',
73: CONTEXT_NONE = 'none',
74: CONTEXT_COMMENT = 'comment';
75:
76:
77:
78: 79: 80: 81: 82:
83: public function parse($input)
84: {
85: if (substr($input, 0, 3) === "\xEF\xBB\xBF") {
86: $input = substr($input, 3);
87: }
88: if (!Strings::checkEncoding($input)) {
89: throw new Nette\InvalidArgumentException('Template is not valid UTF-8 stream.');
90: }
91: $input = str_replace("\r\n", "\n", $input);
92: $this->input = $input;
93: $this->output = array();
94: $this->offset = 0;
95:
96: $this->setSyntax($this->defaultSyntax);
97: $this->setContext(self::CONTEXT_TEXT);
98: $this->lastHtmlTag = $this->syntaxEndTag = NULL;
99: $this->xmlMode = (bool) preg_match('#^<\?xml\s#m', $input);
100:
101: while ($this->offset < strlen($input)) {
102: $matches = $this->{"context".$this->context[0]}();
103:
104: if (!$matches) {
105: break;
106:
107: } elseif (!empty($matches['comment'])) {
108: $this->addToken(Token::COMMENT, $matches[0]);
109:
110: } elseif (!empty($matches['macro'])) {
111: $token = $this->addToken(Token::MACRO_TAG, $matches[0]);
112: list($token->name, $token->value, $token->modifiers) = $this->parseMacroTag($matches['macro']);
113: }
114:
115: $this->filter();
116: }
117:
118: if ($this->offset < strlen($input)) {
119: $this->addToken(Token::TEXT, substr($this->input, $this->offset));
120: }
121: return $this->output;
122: }
123:
124:
125:
126: 127: 128:
129: private function contextText()
130: {
131: $matches = $this->match('~
132: (?:(?<=\n|^)[ \t]*)?<(?P<closing>/?)(?P<tag>[a-z0-9:]+)| ## begin of HTML tag <tag </tag - ignores <!DOCTYPE
133: <(?P<htmlcomment>!--)| ## begin of HTML comment <!--
134: '.$this->macroRe.' ## macro tag
135: ~xsi');
136:
137: if (!empty($matches['htmlcomment'])) {
138: $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
139: $this->setContext(self::CONTEXT_COMMENT);
140:
141: } elseif (!empty($matches['tag'])) {
142: $token = $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
143: $token->name = $matches['tag'];
144: $token->closing = (bool) $matches['closing'];
145: $this->lastHtmlTag = $matches['closing'] . strtolower($matches['tag']);
146: $this->setContext(self::CONTEXT_TAG);
147: }
148: return $matches;
149: }
150:
151:
152:
153: 154: 155:
156: private function contextCData()
157: {
158: $matches = $this->match('~
159: </(?P<tag>'.$this->lastHtmlTag.')(?![a-z0-9:])| ## end HTML tag </tag
160: '.$this->macroRe.' ## macro tag
161: ~xsi');
162:
163: if (!empty($matches['tag'])) {
164: $token = $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
165: $token->name = $this->lastHtmlTag;
166: $token->closing = TRUE;
167: $this->lastHtmlTag = '/' . $this->lastHtmlTag;
168: $this->setContext(self::CONTEXT_TAG);
169: }
170: return $matches;
171: }
172:
173:
174:
175: 176: 177:
178: private function contextTag()
179: {
180: $matches = $this->match('~
181: (?P<end>\ ?/?>)([ \t]*\n)?| ## end of HTML tag
182: '.$this->macroRe.'| ## macro tag
183: \s*(?P<attr>[^\s/>={]+)(?:\s*=\s*(?P<value>["\']|[^\s/>{]+))? ## begin of HTML attribute
184: ~xsi');
185:
186: if (!empty($matches['end'])) {
187: $this->addToken(Token::HTML_TAG_END, $matches[0]);
188: $this->setContext(!$this->xmlMode && in_array($this->lastHtmlTag, array('script', 'style')) ? self::CONTEXT_CDATA : self::CONTEXT_TEXT);
189:
190: } elseif (isset($matches['attr']) && $matches['attr'] !== '') {
191: $token = $this->addToken(Token::HTML_ATTRIBUTE, $matches[0]);
192: $token->name = $matches['attr'];
193: $token->value = isset($matches['value']) ? $matches['value'] : '';
194:
195: if ($token->value === '"' || $token->value === "'") {
196: if (Strings::startsWith($token->name, self::N_PREFIX)) {
197: $token->value = '';
198: if ($m = $this->match('~(.*?)' . $matches['value'] . '~xsi')) {
199: $token->value = $m[1];
200: $token->text .= $m[0];
201: }
202: } else {
203: $this->setContext(self::CONTEXT_ATTRIBUTE, $matches['value']);
204: }
205: }
206: }
207: return $matches;
208: }
209:
210:
211:
212: 213: 214:
215: private function contextAttribute()
216: {
217: $matches = $this->match('~
218: (?P<quote>'.$this->context[1].')| ## end of HTML attribute
219: '.$this->macroRe.' ## macro tag
220: ~xsi');
221:
222: if (!empty($matches['quote'])) {
223: $this->addToken(Token::TEXT, $matches[0]);
224: $this->setContext(self::CONTEXT_TAG);
225: }
226: return $matches;
227: }
228:
229:
230:
231: 232: 233:
234: private function contextComment()
235: {
236: $matches = $this->match('~
237: (?P<htmlcomment>--\s*>)| ## end of HTML comment
238: '.$this->macroRe.' ## macro tag
239: ~xsi');
240:
241: if (!empty($matches['htmlcomment'])) {
242: $this->addToken(Token::HTML_TAG_END, $matches[0]);
243: $this->setContext(self::CONTEXT_TEXT);
244: }
245: return $matches;
246: }
247:
248:
249:
250: 251: 252:
253: private function contextNone()
254: {
255: $matches = $this->match('~
256: '.$this->macroRe.' ## macro tag
257: ~xsi');
258: return $matches;
259: }
260:
261:
262:
263: 264: 265: 266: 267:
268: private function match($re)
269: {
270: if ($matches = Strings::match($this->input, $re, PREG_OFFSET_CAPTURE, $this->offset)) {
271: $value = substr($this->input, $this->offset, $matches[0][1] - $this->offset);
272: if ($value !== '') {
273: $this->addToken(Token::TEXT, $value);
274: }
275: $this->offset = $matches[0][1] + strlen($matches[0][0]);
276: foreach ($matches as $k => $v) $matches[$k] = $v[0];
277: }
278: return $matches;
279: }
280:
281:
282:
283: 284: 285:
286: public function setContext($context, $quote = NULL)
287: {
288: $this->context = array($context, $quote);
289: return $this;
290: }
291:
292:
293:
294: 295: 296: 297: 298:
299: public function setSyntax($type)
300: {
301: $type = $type ?: $this->defaultSyntax;
302: if (isset($this->syntaxes[$type])) {
303: $this->setDelimiters($this->syntaxes[$type][0], $this->syntaxes[$type][1]);
304: } else {
305: throw new Nette\InvalidArgumentException("Unknown syntax '$type'");
306: }
307: return $this;
308: }
309:
310:
311:
312: 313: 314: 315: 316: 317:
318: public function setDelimiters($left, $right)
319: {
320: $this->macroRe = '
321: (?P<comment>' . $left . '\\*.*?\\*' . $right . '\n{0,2})|
322: ' . $left . '
323: (?P<macro>(?:' . self::RE_STRING . '|\{
324: (?P<inner>' . self::RE_STRING . '|\{(?P>inner)\}|[^\'"{}])*+
325: \}|[^\'"{}])+?)
326: ' . $right . '
327: (?P<rmargin>[ \t]*(?=\n))?
328: ';
329: return $this;
330: }
331:
332:
333:
334: 335: 336: 337: 338:
339: public function parseMacroTag($tag)
340: {
341: $match = Strings::match($tag, '~^
342: (
343: (?P<name>\?|/?[a-z]\w*+(?:[.:]\w+)*+(?!::|\())| ## ?, name, /name, but not function( or class::
344: (?P<noescape>!?)(?P<shortname>/?[=\~#%^&_]?) ## !expression, !=expression, ...
345: )(?P<args>.*?)
346: (?P<modifiers>\|[a-z](?:'.Parser::RE_STRING.'|[^\'"])*)?
347: ()$~isx');
348:
349: if (!$match) {
350: return FALSE;
351: }
352: if ($match['name'] === '') {
353: $match['name'] = $match['shortname'] ?: '=';
354: if (!$match['noescape'] && substr($match['shortname'], 0, 1) !== '/') {
355: $match['modifiers'] .= '|escape';
356: }
357: }
358: return array($match['name'], trim($match['args']), $match['modifiers']);
359: }
360:
361:
362:
363: private function addToken($type, $text)
364: {
365: $this->output[] = $token = new Token;
366: $token->type = $type;
367: $token->text = $text;
368: $token->line = substr_count($this->input, "\n", 0, max(1, $this->offset - 1)) + 1;
369: return $token;
370: }
371:
372:
373:
374: 375: 376:
377: protected function filter()
378: {
379: $token = end($this->output);
380: if ($token->type === Token::MACRO_TAG && $token->name === '/syntax') {
381: $this->setSyntax($this->defaultSyntax);
382: $token->type = Token::COMMENT;
383:
384: } elseif ($token->type === Token::MACRO_TAG && $token->name === 'syntax') {
385: $this->setSyntax($token->value);
386: $token->type = Token::COMMENT;
387:
388: } elseif ($token->type === Token::HTML_ATTRIBUTE && $token->name === 'n:syntax') {
389: $this->setSyntax($token->value);
390: $this->syntaxEndTag = '/' . $this->lastHtmlTag;
391: $token->type = Token::COMMENT;
392:
393: } elseif ($token->type === Token::HTML_TAG_END && $this->lastHtmlTag === $this->syntaxEndTag) {
394: $this->setSyntax($this->defaultSyntax);
395:
396: } elseif ($token->type === Token::MACRO_TAG && $token->name === 'contentType') {
397: if (preg_match('#html|xml#', $token->value, $m)) {
398: $this->xmlMode = $m[0] === 'xml';
399: $this->setContext(self::CONTEXT_TEXT);
400: } else {
401: $this->setContext(self::CONTEXT_NONE);
402: }
403: }
404: }
405:
406: }
407: