1: <?php
2:
3: 4: 5: 6: 7: 8: 9: 10: 11:
12:
13:
14:
15: 16: 17: 18: 19: 20:
21: class NParser extends NObject
22: {
23:
24: const RE_STRING = '\'(?:\\\\.|[^\'\\\\])*\'|"(?:\\\\.|[^"\\\\])*"';
25:
26:
27: const N_PREFIX = 'n:';
28:
29:
30: public $defaultSyntax = 'latte';
31:
32:
33: public $syntaxes = array(
34: 'latte' => array('\\{(?![\\s\'"{}])', '\\}'),
35: 'double' => array('\\{\\{(?![\\s\'"{}])', '\\}\\}'),
36: 'asp' => array('<%\s*', '\s*%>'),
37: 'python' => array('\\{[{%]\s*', '\s*[%}]\\}'),
38: 'off' => array('[^\x00-\xFF]', ''),
39: );
40:
41:
42: private $macroRe;
43:
44:
45: private $input;
46:
47:
48: private $output;
49:
50:
51: private $offset;
52:
53:
54: private $context;
55:
56:
57: private $lastHtmlTag;
58:
59:
60: private $syntaxEndTag;
61:
62:
63: private $xmlMode;
64:
65:
66: const CONTEXT_TEXT = 'text',
67: CONTEXT_CDATA = 'cdata',
68: CONTEXT_TAG = 'tag',
69: CONTEXT_ATTRIBUTE = 'attribute',
70: CONTEXT_NONE = 'none',
71: CONTEXT_COMMENT = 'comment';
72:
73:
74:
75: 76: 77: 78: 79:
80: public function parse($input)
81: {
82: if (substr($input, 0, 3) === "\xEF\xBB\xBF") {
83: $input = substr($input, 3);
84: }
85: if (!NStrings::checkEncoding($input)) {
86: throw new InvalidArgumentException('Template is not valid UTF-8 stream.');
87: }
88: $input = str_replace("\r\n", "\n", $input);
89: $this->input = $input;
90: $this->output = array();
91: $this->offset = 0;
92:
93: $this->setSyntax($this->defaultSyntax);
94: $this->setContext(self::CONTEXT_TEXT);
95: $this->lastHtmlTag = $this->syntaxEndTag = NULL;
96:
97: while ($this->offset < strlen($input)) {
98: $matches = $this->{"context".$this->context[0]}();
99:
100: if (!$matches) {
101: break;
102:
103: } elseif (!empty($matches['comment'])) {
104: $this->addToken(NLatteToken::COMMENT, $matches[0]);
105:
106: } elseif (!empty($matches['macro'])) {
107: $token = $this->addToken(NLatteToken::MACRO_TAG, $matches[0]);
108: list($token->name, $token->value, $token->modifiers) = $this->parseMacroTag($matches['macro']);
109: }
110:
111: $this->filter();
112: }
113:
114: if ($this->offset < strlen($input)) {
115: $this->addToken(NLatteToken::TEXT, substr($this->input, $this->offset));
116: }
117: return $this->output;
118: }
119:
120:
121:
122: 123: 124:
125: private function contextText()
126: {
127: $matches = $this->match('~
128: (?:(?<=\n|^)[ \t]*)?<(?P<closing>/?)(?P<tag>[a-z0-9:]+)| ## begin of HTML tag <tag </tag - ignores <!DOCTYPE
129: <(?P<htmlcomment>!--)| ## begin of HTML comment <!--
130: '.$this->macroRe.' ## macro tag
131: ~xsi');
132:
133: if (!empty($matches['htmlcomment'])) {
134: $this->addToken(NLatteToken::HTML_TAG_BEGIN, $matches[0]);
135: $this->setContext(self::CONTEXT_COMMENT);
136:
137: } elseif (!empty($matches['tag'])) {
138: $token = $this->addToken(NLatteToken::HTML_TAG_BEGIN, $matches[0]);
139: $token->name = $matches['tag'];
140: $token->closing = (bool) $matches['closing'];
141: $this->lastHtmlTag = $matches['closing'] . strtolower($matches['tag']);
142: $this->setContext(self::CONTEXT_TAG);
143: }
144: return $matches;
145: }
146:
147:
148:
149: 150: 151:
152: private function contextCData()
153: {
154: $matches = $this->match('~
155: </(?P<tag>'.$this->lastHtmlTag.')(?![a-z0-9:])| ## end HTML tag </tag
156: '.$this->macroRe.' ## macro tag
157: ~xsi');
158:
159: if (!empty($matches['tag'])) {
160: $token = $this->addToken(NLatteToken::HTML_TAG_BEGIN, $matches[0]);
161: $token->name = $this->lastHtmlTag;
162: $token->closing = TRUE;
163: $this->lastHtmlTag = '/' . $this->lastHtmlTag;
164: $this->setContext(self::CONTEXT_TAG);
165: }
166: return $matches;
167: }
168:
169:
170:
171: 172: 173:
174: private function contextTag()
175: {
176: $matches = $this->match('~
177: (?P<end>\ ?/?>)([ \t]*\n)?| ## end of HTML tag
178: '.$this->macroRe.'| ## macro tag
179: \s*(?P<attr>[^\s/>={]+)(?:\s*=\s*(?P<value>["\']|[^\s/>{]+))? ## begin of HTML attribute
180: ~xsi');
181:
182: if (!empty($matches['end'])) {
183: $this->addToken(NLatteToken::HTML_TAG_END, $matches[0]);
184: $this->setContext(!$this->xmlMode && in_array($this->lastHtmlTag, array('script', 'style')) ? self::CONTEXT_CDATA : self::CONTEXT_TEXT);
185:
186: } elseif (isset($matches['attr']) && $matches['attr'] !== '') {
187: $token = $this->addToken(NLatteToken::HTML_ATTRIBUTE, $matches[0]);
188: $token->name = $matches['attr'];
189: $token->value = isset($matches['value']) ? $matches['value'] : '';
190:
191: if ($token->value === '"' || $token->value === "'") {
192: if (NStrings::startsWith($token->name, self::N_PREFIX)) {
193: $token->value = '';
194: if ($m = $this->match('~(.*?)' . $matches['value'] . '~xsi')) {
195: $token->value = $m[1];
196: $token->text .= $m[0];
197: }
198: } else {
199: $this->setContext(self::CONTEXT_ATTRIBUTE, $matches['value']);
200: }
201: }
202: }
203: return $matches;
204: }
205:
206:
207:
208: 209: 210:
211: private function contextAttribute()
212: {
213: $matches = $this->match('~
214: (?P<quote>'.$this->context[1].')| ## end of HTML attribute
215: '.$this->macroRe.' ## macro tag
216: ~xsi');
217:
218: if (!empty($matches['quote'])) {
219: $this->addToken(NLatteToken::TEXT, $matches[0]);
220: $this->setContext(self::CONTEXT_TAG);
221: }
222: return $matches;
223: }
224:
225:
226:
227: 228: 229:
230: private function contextComment()
231: {
232: $matches = $this->match('~
233: (?P<htmlcomment>--\s*>)| ## end of HTML comment
234: '.$this->macroRe.' ## macro tag
235: ~xsi');
236:
237: if (!empty($matches['htmlcomment'])) {
238: $this->addToken(NLatteToken::HTML_TAG_END, $matches[0]);
239: $this->setContext(self::CONTEXT_TEXT);
240: }
241: return $matches;
242: }
243:
244:
245:
246: 247: 248:
249: private function contextNone()
250: {
251: $matches = $this->match('~
252: '.$this->macroRe.' ## macro tag
253: ~xsi');
254: return $matches;
255: }
256:
257:
258:
259: 260: 261: 262: 263:
264: private function match($re)
265: {
266: if ($matches = NStrings::match($this->input, $re, PREG_OFFSET_CAPTURE, $this->offset)) {
267: $value = substr($this->input, $this->offset, $matches[0][1] - $this->offset);
268: if ($value !== '') {
269: $this->addToken(NLatteToken::TEXT, $value);
270: }
271: $this->offset = $matches[0][1] + strlen($matches[0][0]);
272: foreach ($matches as $k => $v) $matches[$k] = $v[0];
273: }
274: return $matches;
275: }
276:
277:
278:
279: 280: 281:
282: public function setContext($context, $quote = NULL)
283: {
284: $this->context = array($context, $quote);
285: return $this;
286: }
287:
288:
289:
290: 291: 292: 293: 294:
295: public function setSyntax($type)
296: {
297: $type = ($tmp=$type) ? $tmp : $this->defaultSyntax;
298: if (isset($this->syntaxes[$type])) {
299: $this->setDelimiters($this->syntaxes[$type][0], $this->syntaxes[$type][1]);
300: } else {
301: throw new InvalidArgumentException("Unknown syntax '$type'");
302: }
303: return $this;
304: }
305:
306:
307:
308: 309: 310: 311: 312: 313:
314: public function setDelimiters($left, $right)
315: {
316: $this->macroRe = '
317: (?P<comment>' . $left . '\\*.*?\\*' . $right . '\n{0,2})|
318: ' . $left . '
319: (?P<macro>(?:' . self::RE_STRING . '|\{
320: (?P<inner>' . self::RE_STRING . '|\{(?P>inner)\}|[^\'"{}])*+
321: \}|[^\'"{}])+?)
322: ' . $right . '
323: (?P<rmargin>[ \t]*(?=\n))?
324: ';
325: return $this;
326: }
327:
328:
329:
330: 331: 332: 333: 334:
335: public function parseMacroTag($tag)
336: {
337: $match = NStrings::match($tag, '~^
338: (
339: (?P<name>\?|/?[a-z]\w*+(?:[.:]\w+)*+(?!::|\())| ## ?, name, /name, but not function( or class::
340: (?P<noescape>!?)(?P<shortname>/?[=\~#%^&_]?) ## !expression, !=expression, ...
341: )(?P<args>.*?)
342: (?P<modifiers>\|[a-z](?:'.NParser::RE_STRING.'|[^\'"])*)?
343: ()$~isx');
344:
345: if (!$match) {
346: return FALSE;
347: }
348: if ($match['name'] === '') {
349: $match['name'] = ($tmp=$match['shortname']) ? $tmp : '=';
350: if (!$match['noescape'] && substr($match['shortname'], 0, 1) !== '/') {
351: $match['modifiers'] .= '|escape';
352: }
353: }
354: return array($match['name'], trim($match['args']), $match['modifiers']);
355: }
356:
357:
358:
359: private function addToken($type, $text)
360: {
361: $this->output[] = $token = new NLatteToken;
362: $token->type = $type;
363: $token->text = $text;
364: $token->line = substr_count($this->input, "\n", 0, max(1, $this->offset - 1)) + 1;
365: return $token;
366: }
367:
368:
369:
370: 371: 372:
373: protected function filter()
374: {
375: $token = end($this->output);
376: if ($token->type === NLatteToken::MACRO_TAG && $token->name === '/syntax') {
377: $this->setSyntax($this->defaultSyntax);
378: $token->type = NLatteToken::COMMENT;
379:
380: } elseif ($token->type === NLatteToken::MACRO_TAG && $token->name === 'syntax') {
381: $this->setSyntax($token->value);
382: $token->type = NLatteToken::COMMENT;
383:
384: } elseif ($token->type === NLatteToken::HTML_ATTRIBUTE && $token->name === 'n:syntax') {
385: $this->setSyntax($token->value);
386: $this->syntaxEndTag = '/' . $this->lastHtmlTag;
387: $token->type = NLatteToken::COMMENT;
388:
389: } elseif ($token->type === NLatteToken::HTML_TAG_END && $this->lastHtmlTag === $this->syntaxEndTag) {
390: $this->setSyntax($this->defaultSyntax);
391:
392: } elseif ($token->type === NLatteToken::MACRO_TAG && $token->name === 'contentType') {
393: if (preg_match('#html|xml#', $token->value, $m)) {
394: $this->xmlMode = $m[0] === 'xml';
395: $this->setContext(self::CONTEXT_TEXT);
396: } else {
397: $this->setContext(self::CONTEXT_NONE);
398: }
399: }
400: }
401:
402: }
403: