1: <?php
2:
3: 4: 5: 6: 7: 8: 9: 10: 11:
12:
13:
14:
15: 16: 17: 18: 19: 20:
21: class NParser extends NObject
22: {
23:
24: const RE_STRING = '\'(?:\\\\.|[^\'\\\\])*\'|"(?:\\\\.|[^"\\\\])*"';
25:
26:
27: const N_PREFIX = 'n:';
28:
29:
30: public $defaultSyntax = 'latte';
31:
32:
33: public $syntaxes = array(
34: 'latte' => array('\\{(?![\\s\'"{}])', '\\}'),
35: 'double' => array('\\{\\{(?![\\s\'"{}])', '\\}\\}'),
36: 'asp' => array('<%\s*', '\s*%>'),
37: 'python' => array('\\{[{%]\s*', '\s*[%}]\\}'),
38: 'off' => array('[^\x00-\xFF]', ''),
39: );
40:
41:
42: private $macroRe;
43:
44:
45: private $input;
46:
47:
48: private $output;
49:
50:
51: private $offset;
52:
53:
54: private $context;
55:
56:
57: private $lastHtmlTag;
58:
59:
60: private $syntaxEndTag;
61:
62:
63: private $xmlMode;
64:
65:
66: const CONTEXT_TEXT = 'text',
67: CONTEXT_CDATA = 'cdata',
68: CONTEXT_TAG = 'tag',
69: CONTEXT_ATTRIBUTE = 'attribute',
70: CONTEXT_NONE = 'none',
71: = 'comment';
72:
73:
74: 75: 76: 77: 78:
79: public function parse($input)
80: {
81: if (substr($input, 0, 3) === "\xEF\xBB\xBF") {
82: $input = substr($input, 3);
83: }
84: if (!NStrings::checkEncoding($input)) {
85: throw new InvalidArgumentException('Template is not valid UTF-8 stream.');
86: }
87: $input = str_replace("\r\n", "\n", $input);
88: $this->input = $input;
89: $this->output = array();
90: $this->offset = 0;
91:
92: $this->setSyntax($this->defaultSyntax);
93: $this->setContext(self::CONTEXT_TEXT);
94: $this->lastHtmlTag = $this->syntaxEndTag = NULL;
95:
96: while ($this->offset < strlen($input)) {
97: $matches = $this->{"context".$this->context[0]}();
98:
99: if (!$matches) {
100: break;
101:
102: } elseif (!empty($matches['comment'])) {
103: $this->addToken(NLatteToken::COMMENT, $matches[0]);
104:
105: } elseif (!empty($matches['macro'])) {
106: $token = $this->addToken(NLatteToken::MACRO_TAG, $matches[0]);
107: list($token->name, $token->value, $token->modifiers) = $this->parseMacroTag($matches['macro']);
108: }
109:
110: $this->filter();
111: }
112:
113: if ($this->offset < strlen($input)) {
114: $this->addToken(NLatteToken::TEXT, substr($this->input, $this->offset));
115: }
116: return $this->output;
117: }
118:
119:
120: 121: 122:
123: private function contextText()
124: {
125: $matches = $this->match('~
126: (?:(?<=\n|^)[ \t]*)?<(?P<closing>/?)(?P<tag>[a-z0-9:]+)| ## begin of HTML tag <tag </tag - ignores <!DOCTYPE
127: <(?P<htmlcomment>!--)| ## begin of HTML comment <!--
128: '.$this->macroRe.' ## macro tag
129: ~xsi');
130:
131: if (!empty($matches['htmlcomment'])) {
132: $this->addToken(NLatteToken::HTML_TAG_BEGIN, $matches[0]);
133: $this->setContext(self::CONTEXT_COMMENT);
134:
135: } elseif (!empty($matches['tag'])) {
136: $token = $this->addToken(NLatteToken::HTML_TAG_BEGIN, $matches[0]);
137: $token->name = $matches['tag'];
138: $token->closing = (bool) $matches['closing'];
139: $this->lastHtmlTag = $matches['closing'] . strtolower($matches['tag']);
140: $this->setContext(self::CONTEXT_TAG);
141: }
142: return $matches;
143: }
144:
145:
146: 147: 148:
149: private function contextCData()
150: {
151: $matches = $this->match('~
152: </(?P<tag>'.$this->lastHtmlTag.')(?![a-z0-9:])| ## end HTML tag </tag
153: '.$this->macroRe.' ## macro tag
154: ~xsi');
155:
156: if (!empty($matches['tag'])) {
157: $token = $this->addToken(NLatteToken::HTML_TAG_BEGIN, $matches[0]);
158: $token->name = $this->lastHtmlTag;
159: $token->closing = TRUE;
160: $this->lastHtmlTag = '/' . $this->lastHtmlTag;
161: $this->setContext(self::CONTEXT_TAG);
162: }
163: return $matches;
164: }
165:
166:
167: 168: 169:
170: private function contextTag()
171: {
172: $matches = $this->match('~
173: (?P<end>\ ?/?>)([ \t]*\n)?| ## end of HTML tag
174: '.$this->macroRe.'| ## macro tag
175: \s*(?P<attr>[^\s/>={]+)(?:\s*=\s*(?P<value>["\']|[^\s/>{]+))? ## begin of HTML attribute
176: ~xsi');
177:
178: if (!empty($matches['end'])) {
179: $this->addToken(NLatteToken::HTML_TAG_END, $matches[0]);
180: $this->setContext(!$this->xmlMode && in_array($this->lastHtmlTag, array('script', 'style')) ? self::CONTEXT_CDATA : self::CONTEXT_TEXT);
181:
182: } elseif (isset($matches['attr']) && $matches['attr'] !== '') {
183: $token = $this->addToken(NLatteToken::HTML_ATTRIBUTE, $matches[0]);
184: $token->name = $matches['attr'];
185: $token->value = isset($matches['value']) ? $matches['value'] : '';
186:
187: if ($token->value === '"' || $token->value === "'") {
188: if (NStrings::startsWith($token->name, self::N_PREFIX)) {
189: $token->value = '';
190: if ($m = $this->match('~(.*?)' . $matches['value'] . '~xsi')) {
191: $token->value = $m[1];
192: $token->text .= $m[0];
193: }
194: } else {
195: $this->setContext(self::CONTEXT_ATTRIBUTE, $matches['value']);
196: }
197: }
198: }
199: return $matches;
200: }
201:
202:
203: 204: 205:
206: private function contextAttribute()
207: {
208: $matches = $this->match('~
209: (?P<quote>'.$this->context[1].')| ## end of HTML attribute
210: '.$this->macroRe.' ## macro tag
211: ~xsi');
212:
213: if (!empty($matches['quote'])) {
214: $this->addToken(NLatteToken::TEXT, $matches[0]);
215: $this->setContext(self::CONTEXT_TAG);
216: }
217: return $matches;
218: }
219:
220:
221: 222: 223:
224: private function ()
225: {
226: $matches = $this->match('~
227: (?P<htmlcomment>--\s*>)| ## end of HTML comment
228: '.$this->macroRe.' ## macro tag
229: ~xsi');
230:
231: if (!empty($matches['htmlcomment'])) {
232: $this->addToken(NLatteToken::HTML_TAG_END, $matches[0]);
233: $this->setContext(self::CONTEXT_TEXT);
234: }
235: return $matches;
236: }
237:
238:
239: 240: 241:
242: private function contextNone()
243: {
244: $matches = $this->match('~
245: '.$this->macroRe.' ## macro tag
246: ~xsi');
247: return $matches;
248: }
249:
250:
251: 252: 253: 254: 255:
256: private function match($re)
257: {
258: if ($matches = NStrings::match($this->input, $re, PREG_OFFSET_CAPTURE, $this->offset)) {
259: $value = substr($this->input, $this->offset, $matches[0][1] - $this->offset);
260: if ($value !== '') {
261: $this->addToken(NLatteToken::TEXT, $value);
262: }
263: $this->offset = $matches[0][1] + strlen($matches[0][0]);
264: foreach ($matches as $k => $v) $matches[$k] = $v[0];
265: }
266: return $matches;
267: }
268:
269:
270: 271: 272:
273: public function setContext($context, $quote = NULL)
274: {
275: $this->context = array($context, $quote);
276: return $this;
277: }
278:
279:
280: 281: 282: 283: 284:
285: public function setSyntax($type)
286: {
287: $type = ($tmp=$type) ? $tmp : $this->defaultSyntax;
288: if (isset($this->syntaxes[$type])) {
289: $this->setDelimiters($this->syntaxes[$type][0], $this->syntaxes[$type][1]);
290: } else {
291: throw new InvalidArgumentException("Unknown syntax '$type'");
292: }
293: return $this;
294: }
295:
296:
297: 298: 299: 300: 301: 302:
303: public function setDelimiters($left, $right)
304: {
305: $this->macroRe = '
306: (?P<comment>' . $left . '\\*.*?\\*' . $right . '\n{0,2})|
307: ' . $left . '
308: (?P<macro>(?:' . self::RE_STRING . '|\{
309: (?P<inner>' . self::RE_STRING . '|\{(?P>inner)\}|[^\'"{}])*+
310: \}|[^\'"{}])+?)
311: ' . $right . '
312: (?P<rmargin>[ \t]*(?=\n))?
313: ';
314: return $this;
315: }
316:
317:
318: 319: 320: 321: 322:
323: public function parseMacroTag($tag)
324: {
325: $match = NStrings::match($tag, '~^
326: (
327: (?P<name>\?|/?[a-z]\w*+(?:[.:]\w+)*+(?!::|\(|\\\\))| ## ?, name, /name, but not function( or class:: or namespace\
328: (?P<noescape>!?)(?P<shortname>/?[=\~#%^&_]?) ## !expression, !=expression, ...
329: )(?P<args>.*?)
330: (?P<modifiers>\|[a-z](?:'.NParser::RE_STRING.'|[^\'"])*)?
331: ()\z~isx');
332:
333: if (!$match) {
334: return FALSE;
335: }
336: $modifiers = preg_replace('#\|noescape\s?(?=\||\z)#i', '', $match['modifiers'], -1, $noescape);
337: if ($match['name'] === '') {
338: $match['name'] = ($tmp=$match['shortname']) ? $tmp : '=';
339: if (!$noescape && !$match['noescape'] && substr($match['shortname'], 0, 1) !== '/') {
340: $modifiers .= '|escape';
341: }
342: }
343: return array($match['name'], trim($match['args']), $modifiers);
344: }
345:
346:
347: private function addToken($type, $text)
348: {
349: $this->output[] = $token = new NLatteToken;
350: $token->type = $type;
351: $token->text = $text;
352: $token->line = substr_count($this->input, "\n", 0, max(1, $this->offset - 1)) + 1;
353: return $token;
354: }
355:
356:
357: 358: 359:
360: protected function filter()
361: {
362: $token = end($this->output);
363: if ($token->type === NLatteToken::MACRO_TAG && $token->name === '/syntax') {
364: $this->setSyntax($this->defaultSyntax);
365: $token->type = NLatteToken::COMMENT;
366:
367: } elseif ($token->type === NLatteToken::MACRO_TAG && $token->name === 'syntax') {
368: $this->setSyntax($token->value);
369: $token->type = NLatteToken::COMMENT;
370:
371: } elseif ($token->type === NLatteToken::HTML_ATTRIBUTE && $token->name === 'n:syntax') {
372: $this->setSyntax($token->value);
373: $this->syntaxEndTag = '/' . $this->lastHtmlTag;
374: $token->type = NLatteToken::COMMENT;
375:
376: } elseif ($token->type === NLatteToken::HTML_TAG_END && $this->lastHtmlTag === $this->syntaxEndTag) {
377: $this->setSyntax($this->defaultSyntax);
378:
379: } elseif ($token->type === NLatteToken::MACRO_TAG && $token->name === 'contentType') {
380: if (preg_match('#html|xml#', $token->value, $m)) {
381: $this->xmlMode = $m[0] === 'xml';
382: $this->setContext(self::CONTEXT_TEXT);
383: } else {
384: $this->setContext(self::CONTEXT_NONE);
385: }
386: }
387: }
388:
389: }
390: