1: <?php
2:
3: 4: 5: 6: 7: 8: 9: 10: 11:
12:
13:
14:
15: 16: 17: 18: 19: 20:
21: class NParser extends NObject
22: {
23:
24: const RE_STRING = '\'(?:\\\\.|[^\'\\\\])*\'|"(?:\\\\.|[^"\\\\])*"';
25:
26:
27: const N_PREFIX = 'n:';
28:
29:
30: public $defaultSyntax = 'latte';
31:
32:
33: public $syntaxes = array(
34: 'latte' => array('\\{(?![\\s\'"{}])', '\\}'),
35: 'double' => array('\\{\\{(?![\\s\'"{}])', '\\}\\}'),
36: 'asp' => array('<%\s*', '\s*%>'),
37: 'python' => array('\\{[{%]\s*', '\s*[%}]\\}'),
38: 'off' => array('[^\x00-\xFF]', ''),
39: );
40:
41:
42: private $macroRe;
43:
44:
45: private $input;
46:
47:
48: private $output;
49:
50:
51: private $offset;
52:
53:
54: private $context;
55:
56:
57: private $lastTag;
58:
59:
60: private $endTag;
61:
62:
63: private $xmlMode;
64:
65:
66: const CONTEXT_TEXT = 'text',
67: CONTEXT_CDATA = 'cdata',
68: CONTEXT_TAG = 'tag',
69: CONTEXT_ATTRIBUTE = 'attribute',
70: CONTEXT_NONE = 'none',
71: CONTEXT_COMMENT = 'comment';
72:
73:
74:
75: 76: 77: 78: 79:
80: public function parse($input)
81: {
82: if (substr($input, 0, 3) === "\xEF\xBB\xBF") {
83: $input = substr($input, 3);
84: }
85: if (!NStrings::checkEncoding($input)) {
86: throw new InvalidArgumentException('Template is not valid UTF-8 stream.');
87: }
88: $input = str_replace("\r\n", "\n", $input);
89: $this->input = $input;
90: $this->output = array();
91: $this->offset = 0;
92:
93: $this->setSyntax($this->defaultSyntax);
94: $this->setContext(self::CONTEXT_TEXT);
95: $this->lastTag = $this->endTag = NULL;
96: $this->xmlMode = (bool) preg_match('#^<\?xml\s#m', $input);
97:
98: while ($this->offset < strlen($input)) {
99: $matches = $this->{"context".$this->context[0]}();
100:
101: if (!$matches) {
102: break;
103:
104: } elseif (!empty($matches['comment'])) {
105: $this->addToken(NLatteToken::COMMENT, $matches[0]);
106:
107: } elseif (!empty($matches['macro'])) {
108: $token = $this->addToken(NLatteToken::MACRO, $matches[0]);
109: list($token->name, $token->value, $token->modifiers) = $this->parseMacro($matches['macro']);
110: }
111:
112: $this->filter();
113: }
114:
115: if ($this->offset < strlen($input)) {
116: $this->addToken(NLatteToken::TEXT, substr($this->input, $this->offset));
117: }
118: return $this->output;
119: }
120:
121:
122:
123: 124: 125:
126: private function contextText()
127: {
128: $matches = $this->match('~
129: (?:(?<=\n|^)[ \t]*)?<(?P<closing>/?)(?P<tag>[a-z0-9:]+)| ## begin of HTML tag <tag </tag - ignores <!DOCTYPE
130: <(?P<htmlcomment>!--)| ## begin of HTML comment <!--
131: '.$this->macroRe.' ## macro
132: ~xsi');
133:
134: if (!empty($matches['htmlcomment'])) {
135: $this->addToken(NLatteToken::TAG_BEGIN, $matches[0]);
136: $this->setContext(self::CONTEXT_COMMENT);
137:
138: } elseif (!empty($matches['tag'])) {
139: $token = $this->addToken(NLatteToken::TAG_BEGIN, $matches[0]);
140: $token->name = $matches['tag'];
141: $token->closing = (bool) $matches['closing'];
142: $this->lastTag = $matches['closing'] . strtolower($matches['tag']);
143: $this->setContext(self::CONTEXT_TAG);
144: }
145: return $matches;
146: }
147:
148:
149:
150: 151: 152:
153: private function contextCData()
154: {
155: $matches = $this->match('~
156: </(?P<tag>'.$this->lastTag.')(?![a-z0-9:])| ## end HTML tag </tag
157: '.$this->macroRe.' ## macro
158: ~xsi');
159:
160: if (!empty($matches['tag'])) {
161: $token = $this->addToken(NLatteToken::TAG_BEGIN, $matches[0]);
162: $token->name = $this->lastTag;
163: $token->closing = TRUE;
164: $this->lastTag = '/' . $this->lastTag;
165: $this->setContext(self::CONTEXT_TAG);
166: }
167: return $matches;
168: }
169:
170:
171:
172: 173: 174:
175: private function contextTag()
176: {
177: $matches = $this->match('~
178: (?P<end>\ ?/?>)([ \t]*\n)?| ## end of HTML tag
179: '.$this->macroRe.'| ## macro
180: \s*(?P<attr>[^\s/>={]+)(?:\s*=\s*(?P<value>["\']|[^\s/>{]+))? ## begin of HTML attribute
181: ~xsi');
182:
183: if (!empty($matches['end'])) {
184: $this->addToken(NLatteToken::TAG_END, $matches[0]);
185: $this->setContext(!$this->xmlMode && in_array($this->lastTag, array('script', 'style')) ? self::CONTEXT_CDATA : self::CONTEXT_TEXT);
186:
187: } elseif (isset($matches['attr']) && $matches['attr'] !== '') {
188: $token = $this->addToken(NLatteToken::ATTRIBUTE, $matches[0]);
189: $token->name = $matches['attr'];
190: $token->value = isset($matches['value']) ? $matches['value'] : '';
191:
192: if ($token->value === '"' || $token->value === "'") {
193: if (NStrings::startsWith($token->name, self::N_PREFIX)) {
194: $token->value = '';
195: if ($m = $this->match('~(.*?)' . $matches['value'] . '~xsi')) {
196: $token->value = $m[1];
197: $token->text .= $m[0];
198: }
199: } else {
200: $this->setContext(self::CONTEXT_ATTRIBUTE, $matches['value']);
201: }
202: }
203: }
204: return $matches;
205: }
206:
207:
208:
209: 210: 211:
212: private function contextAttribute()
213: {
214: $matches = $this->match('~
215: (?P<quote>'.$this->context[1].')| ## end of HTML attribute
216: '.$this->macroRe.' ## macro
217: ~xsi');
218:
219: if (!empty($matches['quote'])) {
220: $this->addToken(NLatteToken::TEXT, $matches[0]);
221: $this->setContext(self::CONTEXT_TAG);
222: }
223: return $matches;
224: }
225:
226:
227:
228: 229: 230:
231: private function contextComment()
232: {
233: $matches = $this->match('~
234: (?P<htmlcomment>--\s*>)| ## end of HTML comment
235: '.$this->macroRe.' ## macro
236: ~xsi');
237:
238: if (!empty($matches['htmlcomment'])) {
239: $this->addToken(NLatteToken::TAG_END, $matches[0]);
240: $this->setContext(self::CONTEXT_TEXT);
241: }
242: return $matches;
243: }
244:
245:
246:
247: 248: 249:
250: private function contextNone()
251: {
252: $matches = $this->match('~
253: '.$this->macroRe.' ## macro
254: ~xsi');
255: return $matches;
256: }
257:
258:
259:
260: 261: 262: 263: 264:
265: private function match($re)
266: {
267: if ($matches = NStrings::match($this->input, $re, PREG_OFFSET_CAPTURE, $this->offset)) {
268: $value = substr($this->input, $this->offset, $matches[0][1] - $this->offset);
269: if ($value !== '') {
270: $this->addToken(NLatteToken::TEXT, $value);
271: }
272: $this->offset = $matches[0][1] + strlen($matches[0][0]);
273: foreach ($matches as $k => $v) $matches[$k] = $v[0];
274: }
275: return $matches;
276: }
277:
278:
279:
280: 281: 282:
283: public function setContext($context, $quote = NULL)
284: {
285: $this->context = array($context, $quote);
286: return $this;
287: }
288:
289:
290:
291: 292: 293: 294: 295:
296: public function setSyntax($type)
297: {
298: $type = ($tmp=$type) ? $tmp : $this->defaultSyntax;
299: if (isset($this->syntaxes[$type])) {
300: $this->setDelimiters($this->syntaxes[$type][0], $this->syntaxes[$type][1]);
301: } else {
302: throw new InvalidArgumentException("Unknown syntax '$type'");
303: }
304: return $this;
305: }
306:
307:
308:
309: 310: 311: 312: 313: 314:
315: public function setDelimiters($left, $right)
316: {
317: $this->macroRe = '
318: (?P<comment>' . $left . '\\*.*?\\*' . $right . '\n{0,2})|
319: ' . $left . '
320: (?P<macro>(?:' . self::RE_STRING . '|\{
321: (?P<inner>' . self::RE_STRING . '|\{(?P>inner)\}|[^\'"{}])*+
322: \}|[^\'"{}])+?)
323: ' . $right . '
324: (?P<rmargin>[ \t]*(?=\n))?
325: ';
326: return $this;
327: }
328:
329:
330:
331: 332: 333: 334: 335:
336: public function parseMacro($macro)
337: {
338: $match = NStrings::match($macro, '~^
339: (
340: (?P<name>\?|/?[a-z]\w*+(?:[.:]\w+)*+(?!::|\())| ## ?, name, /name, but not function( or class::
341: (?P<noescape>!?)(?P<shortname>/?[=\~#%^&_]?) ## !expression, !=expression, ...
342: )(?P<args>.*?)
343: (?P<modifiers>\|[a-z](?:'.NParser::RE_STRING.'|[^\'"])*)?
344: ()$~isx');
345:
346: if (!$match) {
347: return FALSE;
348: }
349: if ($match['name'] === '') {
350: $match['name'] = ($tmp=$match['shortname']) ? $tmp : '=';
351: if (!$match['noescape'] && substr($match['shortname'], 0, 1) !== '/') {
352: $match['modifiers'] .= '|escape';
353: }
354: }
355: return array($match['name'], trim($match['args']), $match['modifiers']);
356: }
357:
358:
359:
360: private function addToken($type, $text)
361: {
362: $this->output[] = $token = new NLatteToken;
363: $token->type = $type;
364: $token->text = $text;
365: $token->line = substr_count($this->input, "\n", 0, max(1, $this->offset - 1)) + 1;
366: return $token;
367: }
368:
369:
370:
371: 372: 373:
374: protected function filter()
375: {
376: $token = end($this->output);
377: if ($token->type === NLatteToken::MACRO && $token->name === '/syntax') {
378: $this->setSyntax($this->defaultSyntax);
379: $token->type = NLatteToken::COMMENT;
380:
381: } elseif ($token->type === NLatteToken::MACRO && $token->name === 'syntax') {
382: $this->setSyntax($token->value);
383: $token->type = NLatteToken::COMMENT;
384:
385: } elseif ($token->type === NLatteToken::ATTRIBUTE && $token->name === 'n:syntax') {
386: $this->setSyntax($token->value);
387: $this->endTag = '/' . $this->lastTag;
388: $token->type = NLatteToken::COMMENT;
389:
390: } elseif ($token->type === NLatteToken::TAG_END && $this->lastTag === $this->endTag) {
391: $this->setSyntax($this->defaultSyntax);
392:
393: } elseif ($token->type === NLatteToken::MACRO && $token->name === 'contentType') {
394: if (preg_match('#html|xml#', $token->value, $m)) {
395: $this->xmlMode = $m[0] === 'xml';
396: $this->setContext(self::CONTEXT_TEXT);
397: } else {
398: $this->setContext(self::CONTEXT_NONE);
399: }
400: }
401: }
402:
403: }
404: