1: <?php
2:
3: 4: 5: 6:
7:
8: namespace Latte;
9:
10:
11: 12: 13: 14: 15:
16: class Parser extends Object
17: {
18:
19: const RE_STRING = '\'(?:\\\\.|[^\'\\\\])*\'|"(?:\\\\.|[^"\\\\])*"';
20:
21:
22: const N_PREFIX = 'n:';
23:
24:
25: public $defaultSyntax = 'latte';
26:
27:
28: public $shortNoEscape = FALSE;
29:
30:
31: public $syntaxes = array(
32: 'latte' => array('\\{(?![\\s\'"{}])', '\\}'),
33: 'double' => array('\\{\\{(?![\\s\'"{}])', '\\}\\}'),
34: 'asp' => array('<%\s*', '\s*%>'),
35: 'python' => array('\\{[{%]\s*', '\s*[%}]\\}'),
36: 'off' => array('[^\x00-\xFF]', ''),
37: );
38:
39:
40: private $macroRe;
41:
42:
43: private $input;
44:
45:
46: private $output;
47:
48:
49: private $offset;
50:
51:
52: private $context;
53:
54:
55: private $lastHtmlTag;
56:
57:
58: private $syntaxEndTag;
59:
60:
61: private $xmlMode;
62:
63:
64: const CONTEXT_HTML_TEXT = 'htmlText',
65: CONTEXT_CDATA = 'cdata',
66: CONTEXT_HTML_TAG = 'htmlTag',
67: CONTEXT_HTML_ATTRIBUTE = 'htmlAttribute',
68: CONTEXT_RAW = 'raw',
69: = 'htmlComment';
70:
71:
72: 73: 74: 75: 76:
77: public function parse($input)
78: {
79: if (substr($input, 0, 3) === "\xEF\xBB\xBF") {
80: $input = substr($input, 3);
81: }
82: if (!preg_match('##u', $input)) {
83: throw new \InvalidArgumentException('Template is not valid UTF-8 stream.');
84: }
85: $input = str_replace("\r\n", "\n", $input);
86: $this->input = $input;
87: $this->output = array();
88: $this->offset = 0;
89:
90: $this->setSyntax($this->defaultSyntax);
91: $this->setContext(self::CONTEXT_HTML_TEXT);
92: $this->lastHtmlTag = $this->syntaxEndTag = NULL;
93:
94: while ($this->offset < strlen($input)) {
95: $matches = $this->{"context".$this->context[0]}();
96:
97: if (!$matches) {
98: break;
99:
100: } elseif (!empty($matches['comment'])) {
101: $this->addToken(Token::COMMENT, $matches[0]);
102:
103: } elseif (!empty($matches['macro'])) {
104: $token = $this->addToken(Token::MACRO_TAG, $matches[0]);
105: list($token->name, $token->value, $token->modifiers, $token->empty) = $this->parseMacroTag($matches['macro']);
106: }
107:
108: $this->filter();
109: }
110:
111: if ($this->offset < strlen($input)) {
112: $this->addToken(Token::TEXT, substr($this->input, $this->offset));
113: }
114: return $this->output;
115: }
116:
117:
118: 119: 120:
121: private function contextHtmlText()
122: {
123: $matches = $this->match('~
124: (?:(?<=\n|^)[ \t]*)?<(?P<closing>/?)(?P<tag>[a-z0-9:]+)| ## begin of HTML tag <tag </tag - ignores <!DOCTYPE
125: <(?P<htmlcomment>!--(?!>))| ## begin of HTML comment <!--, but not <!-->
126: '.$this->macroRe.' ## macro tag
127: ~xsi');
128:
129: if (!empty($matches['htmlcomment'])) {
130: $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
131: $this->setContext(self::CONTEXT_HTML_COMMENT);
132:
133: } elseif (!empty($matches['tag'])) {
134: $token = $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
135: $token->name = $matches['tag'];
136: $token->closing = (bool) $matches['closing'];
137: $this->lastHtmlTag = $matches['closing'] . strtolower($matches['tag']);
138: $this->setContext(self::CONTEXT_HTML_TAG);
139: }
140: return $matches;
141: }
142:
143:
144: 145: 146:
147: private function contextCData()
148: {
149: $matches = $this->match('~
150: </(?P<tag>'.$this->lastHtmlTag.')(?![a-z0-9:])| ## end HTML tag </tag
151: '.$this->macroRe.' ## macro tag
152: ~xsi');
153:
154: if (!empty($matches['tag'])) {
155: $token = $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
156: $token->name = $this->lastHtmlTag;
157: $token->closing = TRUE;
158: $this->lastHtmlTag = '/' . $this->lastHtmlTag;
159: $this->setContext(self::CONTEXT_HTML_TAG);
160: }
161: return $matches;
162: }
163:
164:
165: 166: 167:
168: private function contextHtmlTag()
169: {
170: $matches = $this->match('~
171: (?P<end>\ ?/?>)([ \t]*\n)?| ## end of HTML tag
172: '.$this->macroRe.'| ## macro tag
173: \s*(?P<attr>[^\s/>={]+)(?:\s*=\s*(?P<value>["\']|[^\s/>{]+))? ## beginning of HTML attribute
174: ~xsi');
175:
176: if (!empty($matches['end'])) {
177: $this->addToken(Token::HTML_TAG_END, $matches[0]);
178: $this->setContext(!$this->xmlMode && in_array($this->lastHtmlTag, array('script', 'style'), TRUE) ? self::CONTEXT_CDATA : self::CONTEXT_HTML_TEXT);
179:
180: } elseif (isset($matches['attr']) && $matches['attr'] !== '') {
181: $token = $this->addToken(Token::HTML_ATTRIBUTE, $matches[0]);
182: $token->name = $matches['attr'];
183: $token->value = isset($matches['value']) ? $matches['value'] : '';
184:
185: if ($token->value === '"' || $token->value === "'") {
186: if (strncmp($token->name, self::N_PREFIX, strlen(self::N_PREFIX)) === 0) {
187: $token->value = '';
188: if ($m = $this->match('~(.*?)' . $matches['value'] . '~xsi')) {
189: $token->value = $m[1];
190: $token->text .= $m[0];
191: }
192: } else {
193: $this->setContext(self::CONTEXT_HTML_ATTRIBUTE, $matches['value']);
194: }
195: }
196: }
197: return $matches;
198: }
199:
200:
201: 202: 203:
204: private function contextHtmlAttribute()
205: {
206: $matches = $this->match('~
207: (?P<quote>'.$this->context[1].')| ## end of HTML attribute
208: '.$this->macroRe.' ## macro tag
209: ~xsi');
210:
211: if (!empty($matches['quote'])) {
212: $this->addToken(Token::TEXT, $matches[0]);
213: $this->setContext(self::CONTEXT_HTML_TAG);
214: }
215: return $matches;
216: }
217:
218:
219: 220: 221:
222: private function ()
223: {
224: $matches = $this->match('~
225: (?P<htmlcomment>-->)| ## end of HTML comment
226: '.$this->macroRe.' ## macro tag
227: ~xsi');
228:
229: if (!empty($matches['htmlcomment'])) {
230: $this->addToken(Token::HTML_TAG_END, $matches[0]);
231: $this->setContext(self::CONTEXT_HTML_TEXT);
232: }
233: return $matches;
234: }
235:
236:
237: 238: 239:
240: private function ()
241: {
242: $matches = $this->match('~
243: '.$this->macroRe.' ## macro tag
244: ~xsi');
245: return $matches;
246: }
247:
248:
249: 250: 251: 252: 253:
254: private function match($re)
255: {
256: if (!preg_match($re, $this->input, $matches, PREG_OFFSET_CAPTURE, $this->offset)) {
257: if (preg_last_error()) {
258: throw new RegexpException(NULL, preg_last_error());
259: }
260: return array();
261: }
262:
263: $value = substr($this->input, $this->offset, $matches[0][1] - $this->offset);
264: if ($value !== '') {
265: $this->addToken(Token::TEXT, $value);
266: }
267: $this->offset = $matches[0][1] + strlen($matches[0][0]);
268: foreach ($matches as $k => $v) {
269: $matches[$k] = $v[0];
270: }
271: return $matches;
272: }
273:
274:
275: 276: 277:
278: public function setContentType($type)
279: {
280: if (strpos($type, 'html') !== FALSE) {
281: $this->xmlMode = FALSE;
282: $this->setContext(self::CONTEXT_HTML_TEXT);
283: } elseif (strpos($type, 'xml') !== FALSE) {
284: $this->xmlMode = TRUE;
285: $this->setContext(self::CONTEXT_HTML_TEXT);
286: } else {
287: $this->setContext(self::CONTEXT_RAW);
288: }
289: return $this;
290: }
291:
292:
293: 294: 295:
296: public function setContext($context, $quote = NULL)
297: {
298: $this->context = array($context, $quote);
299: return $this;
300: }
301:
302:
303: 304: 305: 306: 307:
308: public function setSyntax($type)
309: {
310: $type = $type ?: $this->defaultSyntax;
311: if (isset($this->syntaxes[$type])) {
312: $this->setDelimiters($this->syntaxes[$type][0], $this->syntaxes[$type][1]);
313: } else {
314: throw new \InvalidArgumentException("Unknown syntax '$type'");
315: }
316: return $this;
317: }
318:
319:
320: 321: 322: 323: 324: 325:
326: public function setDelimiters($left, $right)
327: {
328: $this->macroRe = '
329: (?P<comment>' . $left . '\\*.*?\\*' . $right . '\n{0,2})|
330: ' . $left . '
331: (?P<macro>(?:
332: ' . self::RE_STRING . '|
333: \{(?:' . self::RE_STRING . '|[^\'"{}])*+\}|
334: [^\'"{}]
335: )+?)
336: ' . $right . '
337: (?P<rmargin>[ \t]*(?=\n))?
338: ';
339: return $this;
340: }
341:
342:
343: 344: 345: 346: 347: 348:
349: public function parseMacroTag($tag)
350: {
351: if (!preg_match('~^
352: (
353: (?P<name>\?|/?[a-z]\w*+(?:[.:]\w+)*+(?!::|\(|\\\\))| ## ?, name, /name, but not function( or class:: or namespace\
354: (?P<noescape>!?)(?P<shortname>/?[=\~#%^&_]?) ## !expression, !=expression, ...
355: )(?P<args>.*?)
356: (?P<modifiers>\|[a-z](?:'.Parser::RE_STRING.'|[^\'"])*(?<!/))?
357: (?P<empty>/?\z)
358: ()\z~isx', $tag, $match)) {
359: if (preg_last_error()) {
360: throw new RegexpException(NULL, preg_last_error());
361: }
362: return FALSE;
363: }
364: if ($match['name'] === '') {
365: $match['name'] = $match['shortname'] ?: '=';
366: if ($match['noescape']) {
367: if (!$this->shortNoEscape) {
368: trigger_error("The noescape shortcut {!...} is deprecated, use {...|noescape} modifier on line {$this->getLine()}.", E_USER_DEPRECATED);
369: }
370: $match['modifiers'] .= '|noescape';
371: }
372: }
373: return array($match['name'], trim($match['args']), $match['modifiers'], (bool) $match['empty']);
374: }
375:
376:
377: private function addToken($type, $text)
378: {
379: $this->output[] = $token = new Token;
380: $token->type = $type;
381: $token->text = $text;
382: $token->line = $this->getLine();
383: return $token;
384: }
385:
386:
387: private function getLine()
388: {
389: return substr_count($this->input, "\n", 0, max(1, $this->offset - 1)) + 1;
390: }
391:
392:
393: 394: 395:
396: protected function filter()
397: {
398: $token = end($this->output);
399: if ($token->type === Token::MACRO_TAG && $token->name === '/syntax') {
400: $this->setSyntax($this->defaultSyntax);
401: $token->type = Token::COMMENT;
402:
403: } elseif ($token->type === Token::MACRO_TAG && $token->name === 'syntax') {
404: $this->setSyntax($token->value);
405: $token->type = Token::COMMENT;
406:
407: } elseif ($token->type === Token::HTML_ATTRIBUTE && $token->name === 'n:syntax') {
408: $this->setSyntax($token->value);
409: $this->syntaxEndTag = '/' . $this->lastHtmlTag;
410: $token->type = Token::COMMENT;
411:
412: } elseif ($token->type === Token::HTML_TAG_END && $this->lastHtmlTag === $this->syntaxEndTag) {
413: $this->setSyntax($this->defaultSyntax);
414:
415: } elseif ($token->type === Token::MACRO_TAG && $token->name === 'contentType') {
416: $this->setContentType($token->value);
417: }
418: }
419:
420: }
421: