1 | 1 | simandl | <?php |
2 | | | /** |
3 | | | * GeSHi - Generic Syntax Highlighter |
4 | | | * |
5 | | | * The GeSHi class for Generic Syntax Highlighting. Please refer to the |
6 | | | * documentation at http://qbnz.com/highlighter/documentation.php for more |
7 | | | * information about how to use this class. |
8 | | | * |
9 | | | * For changes, release notes, TODOs etc, see the relevant files in the docs/ |
10 | | | * directory. |
11 | | | * |
12 | | | * This file is part of GeSHi. |
13 | | | * |
14 | | | * GeSHi is free software; you can redistribute it and/or modify |
15 | | | * it under the terms of the GNU General Public License as published by |
16 | | | * the Free Software Foundation; either version 2 of the License, or |
17 | | | * (at your option) any later version. |
18 | | | * |
19 | | | * GeSHi is distributed in the hope that it will be useful, |
20 | | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
21 | | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
22 | | | * GNU General Public License for more details. |
23 | | | * |
24 | | | * You should have received a copy of the GNU General Public License |
25 | | | * along with GeSHi; if not, write to the Free Software |
26 | | | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
27 | | | * |
28 | | | * @package geshi |
29 | | | * @subpackage core |
30 | | | * @author Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de> |
31 | | | * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann |
32 | | | * @license http://gnu.org/copyleft/gpl.html GNU GPL |
33 | | | * |
34 | | | */ |
35 | | | |
36 | | | // |
37 | | | // GeSHi Constants |
38 | | | // You should use these constant names in your programs instead of |
39 | | | // their values - you never know when a value may change in a future |
40 | | | // version |
41 | | | // |
42 | | | |
43 | | | /** The version of this GeSHi file */ |
44 | | | define('GESHI_VERSION', '1.0.8.1'); |
45 | | | |
46 | | | // Define the root directory for the GeSHi code tree |
47 | | | if (!defined('GESHI_ROOT')) { |
48 | | | /** The root directory for GeSHi */ |
49 | | | define('GESHI_ROOT', dirname(__FILE__) . DIRECTORY_SEPARATOR); |
50 | | | } |
51 | | | /** The language file directory for GeSHi |
52 | | | @access private */ |
53 | | | define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . DIRECTORY_SEPARATOR); |
54 | | | |
55 | | | // Define if GeSHi should be paranoid about security |
56 | | | if (!defined('GESHI_SECURITY_PARANOID')) { |
57 | | | /** Tells GeSHi to be paranoid about security settings */ |
58 | | | define('GESHI_SECURITY_PARANOID', false); |
59 | | | } |
60 | | | |
61 | | | // Line numbers - use with enable_line_numbers() |
62 | | | /** Use no line numbers when building the result */ |
63 | | | define('GESHI_NO_LINE_NUMBERS', 0); |
64 | | | /** Use normal line numbers when building the result */ |
65 | | | define('GESHI_NORMAL_LINE_NUMBERS', 1); |
66 | | | /** Use fancy line numbers when building the result */ |
67 | | | define('GESHI_FANCY_LINE_NUMBERS', 2); |
68 | | | |
69 | | | // Container HTML type |
70 | | | /** Use nothing to surround the source */ |
71 | | | define('GESHI_HEADER_NONE', 0); |
72 | | | /** Use a "div" to surround the source */ |
73 | | | define('GESHI_HEADER_DIV', 1); |
74 | | | /** Use a "pre" to surround the source */ |
75 | | | define('GESHI_HEADER_PRE', 2); |
76 | | | /** Use a pre to wrap lines when line numbers are enabled or to wrap the whole code. */ |
77 | | | define('GESHI_HEADER_PRE_VALID', 3); |
78 | | | /** |
79 | | | * Use a "table" to surround the source: |
80 | | | * |
81 | | | * <table> |
82 | | | * <thead><tr><td colspan="2">$header</td></tr></thead> |
83 | | | * <tbody><tr><td><pre>$linenumbers</pre></td><td><pre>$code></pre></td></tr></tbody> |
84 | | | * <tfooter><tr><td colspan="2">$footer</td></tr></tfoot> |
85 | | | * </table> |
86 | | | * |
87 | | | * this is essentially only a workaround for Firefox, see sf#1651996 or take a look at |
88 | | | * https://bugzilla.mozilla.org/show_bug.cgi?id=365805 |
89 | | | * @note when linenumbers are disabled this is essentially the same as GESHI_HEADER_PRE |
90 | | | */ |
91 | | | define('GESHI_HEADER_PRE_TABLE', 4); |
92 | | | |
93 | | | // Capatalisation constants |
94 | | | /** Lowercase keywords found */ |
95 | | | define('GESHI_CAPS_NO_CHANGE', 0); |
96 | | | /** Uppercase keywords found */ |
97 | | | define('GESHI_CAPS_UPPER', 1); |
98 | | | /** Leave keywords found as the case that they are */ |
99 | | | define('GESHI_CAPS_LOWER', 2); |
100 | | | |
101 | | | // Link style constants |
102 | | | /** Links in the source in the :link state */ |
103 | | | define('GESHI_LINK', 0); |
104 | | | /** Links in the source in the :hover state */ |
105 | | | define('GESHI_HOVER', 1); |
106 | | | /** Links in the source in the :active state */ |
107 | | | define('GESHI_ACTIVE', 2); |
108 | | | /** Links in the source in the :visited state */ |
109 | | | define('GESHI_VISITED', 3); |
110 | | | |
111 | | | // Important string starter/finisher |
112 | | | // Note that if you change these, they should be as-is: i.e., don't |
113 | | | // write them as if they had been run through htmlentities() |
114 | | | /** The starter for important parts of the source */ |
115 | | | define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>'); |
116 | | | /** The ender for important parts of the source */ |
117 | | | define('GESHI_END_IMPORTANT', '<END GeSHi>'); |
118 | | | |
119 | | | /**#@+ |
120 | | | * @access private |
121 | | | */ |
122 | | | // When strict mode applies for a language |
123 | | | /** Strict mode never applies (this is the most common) */ |
124 | | | define('GESHI_NEVER', 0); |
125 | | | /** Strict mode *might* apply, and can be enabled or |
126 | | | disabled by {@link GeSHi->enable_strict_mode()} */ |
127 | | | define('GESHI_MAYBE', 1); |
128 | | | /** Strict mode always applies */ |
129 | | | define('GESHI_ALWAYS', 2); |
130 | | | |
131 | | | // Advanced regexp handling constants, used in language files |
132 | | | /** The key of the regex array defining what to search for */ |
133 | | | define('GESHI_SEARCH', 0); |
134 | | | /** The key of the regex array defining what bracket group in a |
135 | | | matched search to use as a replacement */ |
136 | | | define('GESHI_REPLACE', 1); |
137 | | | /** The key of the regex array defining any modifiers to the regular expression */ |
138 | | | define('GESHI_MODIFIERS', 2); |
139 | | | /** The key of the regex array defining what bracket group in a |
140 | | | matched search to put before the replacement */ |
141 | | | define('GESHI_BEFORE', 3); |
142 | | | /** The key of the regex array defining what bracket group in a |
143 | | | matched search to put after the replacement */ |
144 | | | define('GESHI_AFTER', 4); |
145 | | | /** The key of the regex array defining a custom keyword to use |
146 | | | for this regexp's html tag class */ |
147 | | | define('GESHI_CLASS', 5); |
148 | | | |
149 | | | /** Used in language files to mark comments */ |
150 | | | define('GESHI_COMMENTS', 0); |
151 | | | |
152 | | | /** Used to work around missing PHP features **/ |
153 | | | define('GESHI_PHP_PRE_433', !(version_compare(PHP_VERSION, '4.3.3') === 1)); |
154 | | | |
155 | | | /** make sure we can call stripos **/ |
156 | | | if (!function_exists('stripos')) { |
157 | | | // the offset param of preg_match is not supported below PHP 4.3.3 |
158 | | | if (GESHI_PHP_PRE_433) { |
159 | | | /** |
160 | | | * @ignore |
161 | | | */ |
162 | | | function stripos($haystack, $needle, $offset = null) { |
163 | | | if (!is_null($offset)) { |
164 | | | $haystack = substr($haystack, $offset); |
165 | | | } |
166 | | | if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE)) { |
167 | | | return $match[0][1]; |
168 | | | } |
169 | | | return false; |
170 | | | } |
171 | | | } |
172 | | | else { |
173 | | | /** |
174 | | | * @ignore |
175 | | | */ |
176 | | | function stripos($haystack, $needle, $offset = null) { |
177 | | | if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE, $offset)) { |
178 | | | return $match[0][1]; |
179 | | | } |
180 | | | return false; |
181 | | | } |
182 | | | } |
183 | | | } |
184 | | | |
185 | | | /** some old PHP / PCRE subpatterns only support up to xxx subpatterns in |
186 | | | regular expressions. Set this to false if your PCRE lib is up to date |
187 | | | @see GeSHi->optimize_regexp_list() |
188 | | | **/ |
189 | | | define('GESHI_MAX_PCRE_SUBPATTERNS', 500); |
190 | | | /** it's also important not to generate too long regular expressions |
191 | | | be generous here... but keep in mind, that when reaching this limit we |
192 | | | still have to close open patterns. 12k should do just fine on a 16k limit. |
193 | | | @see GeSHi->optimize_regexp_list() |
194 | | | **/ |
195 | | | define('GESHI_MAX_PCRE_LENGTH', 12288); |
196 | | | |
197 | | | //Number format specification |
198 | | | /** Basic number format for integers */ |
199 | | | define('GESHI_NUMBER_INT_BASIC', 1); //Default integers \d+ |
200 | | | /** Enhanced number format for integers like seen in C */ |
201 | | | define('GESHI_NUMBER_INT_CSTYLE', 2); //Default C-Style \d+[lL]? |
202 | | | /** Number format to highlight binary numbers with a suffix "b" */ |
203 | | | define('GESHI_NUMBER_BIN_SUFFIX', 16); //[01]+[bB] |
204 | | | /** Number format to highlight binary numbers with a prefix % */ |
205 | | | define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32); //%[01]+ |
206 | | | /** Number format to highlight binary numbers with a prefix 0b (C) */ |
207 | | | define('GESHI_NUMBER_BIN_PREFIX_0B', 64); //0b[01]+ |
208 | | | /** Number format to highlight octal numbers with a leading zero */ |
209 | | | define('GESHI_NUMBER_OCT_PREFIX', 256); //0[0-7]+ |
210 | | | /** Number format to highlight octal numbers with a suffix of o */ |
211 | | | define('GESHI_NUMBER_OCT_SUFFIX', 512); //[0-7]+[oO] |
212 | | | /** Number format to highlight hex numbers with a prefix 0x */ |
213 | | | define('GESHI_NUMBER_HEX_PREFIX', 4096); //0x[0-9a-fA-F]+ |
214 | | | /** Number format to highlight hex numbers with a suffix of h */ |
215 | | | define('GESHI_NUMBER_HEX_SUFFIX', 8192); //[0-9][0-9a-fA-F]*h |
216 | | | /** Number format to highlight floating-point numbers without support for scientific notation */ |
217 | | | define('GESHI_NUMBER_FLT_NONSCI', 65536); //\d+\.\d+ |
218 | | | /** Number format to highlight floating-point numbers without support for scientific notation */ |
219 | | | define('GESHI_NUMBER_FLT_NONSCI_F', 131072); //\d+(\.\d+)?f |
220 | | | /** Number format to highlight floating-point numbers with support for scientific notation (E) and optional leading zero */ |
221 | | | define('GESHI_NUMBER_FLT_SCI_SHORT', 262144); //\.\d+e\d+ |
222 | | | /** Number format to highlight floating-point numbers with support for scientific notation (E) and required leading digit */ |
223 | | | define('GESHI_NUMBER_FLT_SCI_ZERO', 524288); //\d+(\.\d+)?e\d+ |
224 | | | //Custom formats are passed by RX array |
225 | | | |
226 | | | // Error detection - use these to analyse faults |
227 | | | /** No sourcecode to highlight was specified |
228 | | | * @deprecated |
229 | | | */ |
230 | | | define('GESHI_ERROR_NO_INPUT', 1); |
231 | | | /** The language specified does not exist */ |
232 | | | define('GESHI_ERROR_NO_SUCH_LANG', 2); |
233 | | | /** GeSHi could not open a file for reading (generally a language file) */ |
234 | | | define('GESHI_ERROR_FILE_NOT_READABLE', 3); |
235 | | | /** The header type passed to {@link GeSHi->set_header_type()} was invalid */ |
236 | | | define('GESHI_ERROR_INVALID_HEADER_TYPE', 4); |
237 | | | /** The line number type passed to {@link GeSHi->enable_line_numbers()} was invalid */ |
238 | | | define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5); |
239 | | | /**#@-*/ |
240 | | | |
241 | | | |
242 | | | /** |
243 | | | * The GeSHi Class. |
244 | | | * |
245 | | | * Please refer to the documentation for GeSHi 1.0.X that is available |
246 | | | * at http://qbnz.com/highlighter/documentation.php for more information |
247 | | | * about how to use this class. |
248 | | | * |
249 | | | * @package geshi |
250 | | | * @author Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de> |
251 | | | * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann |
252 | | | */ |
253 | | | class GeSHi { |
254 | | | /**#@+ |
255 | | | * @access private |
256 | | | */ |
257 | | | /** |
258 | | | * The source code to highlight |
259 | | | * @var string |
260 | | | */ |
261 | | | var $source = ''; |
262 | | | |
263 | | | /** |
264 | | | * The language to use when highlighting |
265 | | | * @var string |
266 | | | */ |
267 | | | var $language = ''; |
268 | | | |
269 | | | /** |
270 | | | * The data for the language used |
271 | | | * @var array |
272 | | | */ |
273 | | | var $language_data = array(); |
274 | | | |
275 | | | /** |
276 | | | * The path to the language files |
277 | | | * @var string |
278 | | | */ |
279 | | | var $language_path = GESHI_LANG_ROOT; |
280 | | | |
281 | | | /** |
282 | | | * The error message associated with an error |
283 | | | * @var string |
284 | | | * @todo check err reporting works |
285 | | | */ |
286 | | | var $error = false; |
287 | | | |
288 | | | /** |
289 | | | * Possible error messages |
290 | | | * @var array |
291 | | | */ |
292 | | | var $error_messages = array( |
293 | | | GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE} (using path {PATH})', |
294 | | | GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file was not readable', |
295 | | | GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid', |
296 | | | GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified is invalid' |
297 | | | ); |
298 | | | |
299 | | | /** |
300 | | | * Whether highlighting is strict or not |
301 | | | * @var boolean |
302 | | | */ |
303 | | | var $strict_mode = false; |
304 | | | |
305 | | | /** |
306 | | | * Whether to use CSS classes in output |
307 | | | * @var boolean |
308 | | | */ |
309 | | | var $use_classes = false; |
310 | | | |
311 | | | /** |
312 | | | * The type of header to use. Can be one of the following |
313 | | | * values: |
314 | | | * |
315 | | | * - GESHI_HEADER_PRE: Source is outputted in a "pre" HTML element. |
316 | | | * - GESHI_HEADER_DIV: Source is outputted in a "div" HTML element. |
317 | | | * - GESHI_HEADER_NONE: No header is outputted. |
318 | | | * |
319 | | | * @var int |
320 | | | */ |
321 | | | var $header_type = GESHI_HEADER_PRE; |
322 | | | |
323 | | | /** |
324 | | | * Array of permissions for which lexics should be highlighted |
325 | | | * @var array |
326 | | | */ |
327 | | | var $lexic_permissions = array( |
328 | | | 'KEYWORDS' => array(), |
329 | | | 'COMMENTS' => array('MULTI' => true), |
330 | | | 'REGEXPS' => array(), |
331 | | | 'ESCAPE_CHAR' => true, |
332 | | | 'BRACKETS' => true, |
333 | | | 'SYMBOLS' => false, |
334 | | | 'STRINGS' => true, |
335 | | | 'NUMBERS' => true, |
336 | | | 'METHODS' => true, |
337 | | | 'SCRIPT' => true |
338 | | | ); |
339 | | | |
340 | | | /** |
341 | | | * The time it took to parse the code |
342 | | | * @var double |
343 | | | */ |
344 | | | var $time = 0; |
345 | | | |
346 | | | /** |
347 | | | * The content of the header block |
348 | | | * @var string |
349 | | | */ |
350 | | | var $header_content = ''; |
351 | | | |
352 | | | /** |
353 | | | * The content of the footer block |
354 | | | * @var string |
355 | | | */ |
356 | | | var $footer_content = ''; |
357 | | | |
358 | | | /** |
359 | | | * The style of the header block |
360 | | | * @var string |
361 | | | */ |
362 | | | var $header_content_style = ''; |
363 | | | |
364 | | | /** |
365 | | | * The style of the footer block |
366 | | | * @var string |
367 | | | */ |
368 | | | var $footer_content_style = ''; |
369 | | | |
370 | | | /** |
371 | | | * Tells if a block around the highlighted source should be forced |
372 | | | * if not using line numbering |
373 | | | * @var boolean |
374 | | | */ |
375 | | | var $force_code_block = false; |
376 | | | |
377 | | | /** |
378 | | | * The styles for hyperlinks in the code |
379 | | | * @var array |
380 | | | */ |
381 | | | var $link_styles = array(); |
382 | | | |
383 | | | /** |
384 | | | * Whether important blocks should be recognised or not |
385 | | | * @var boolean |
386 | | | * @deprecated |
387 | | | * @todo REMOVE THIS FUNCTIONALITY! |
388 | | | */ |
389 | | | var $enable_important_blocks = false; |
390 | | | |
391 | | | /** |
392 | | | * Styles for important parts of the code |
393 | | | * @var string |
394 | | | * @deprecated |
395 | | | * @todo As above - rethink the whole idea of important blocks as it is buggy and |
396 | | | * will be hard to implement in 1.2 |
397 | | | */ |
398 | | | var $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code |
399 | | | |
400 | | | /** |
401 | | | * Whether CSS IDs should be added to the code |
402 | | | * @var boolean |
403 | | | */ |
404 | | | var $add_ids = false; |
405 | | | |
406 | | | /** |
407 | | | * Lines that should be highlighted extra |
408 | | | * @var array |
409 | | | */ |
410 | | | var $highlight_extra_lines = array(); |
411 | | | |
412 | | | /** |
413 | | | * Styles of lines that should be highlighted extra |
414 | | | * @var array |
415 | | | */ |
416 | | | var $highlight_extra_lines_styles = array(); |
417 | | | |
418 | | | /** |
419 | | | * Styles of extra-highlighted lines |
420 | | | * @var string |
421 | | | */ |
422 | | | var $highlight_extra_lines_style = 'background-color: #ffc;'; |
423 | | | |
424 | | | /** |
425 | | | * The line ending |
426 | | | * If null, nl2br() will be used on the result string. |
427 | | | * Otherwise, all instances of \n will be replaced with $line_ending |
428 | | | * @var string |
429 | | | */ |
430 | | | var $line_ending = null; |
431 | | | |
432 | | | /** |
433 | | | * Number at which line numbers should start at |
434 | | | * @var int |
435 | | | */ |
436 | | | var $line_numbers_start = 1; |
437 | | | |
438 | | | /** |
439 | | | * The overall style for this code block |
440 | | | * @var string |
441 | | | */ |
442 | | | var $overall_style = 'font-family:monospace;'; |
443 | | | |
444 | | | /** |
445 | | | * The style for the actual code |
446 | | | * @var string |
447 | | | */ |
448 | | | var $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;'; |
449 | | | |
450 | | | /** |
451 | | | * The overall class for this code block |
452 | | | * @var string |
453 | | | */ |
454 | | | var $overall_class = ''; |
455 | | | |
456 | | | /** |
457 | | | * The overall ID for this code block |
458 | | | * @var string |
459 | | | */ |
460 | | | var $overall_id = ''; |
461 | | | |
462 | | | /** |
463 | | | * Line number styles |
464 | | | * @var string |
465 | | | */ |
466 | | | var $line_style1 = 'font-weight: normal; vertical-align:top;'; |
467 | | | |
468 | | | /** |
469 | | | * Line number styles for fancy lines |
470 | | | * @var string |
471 | | | */ |
472 | | | var $line_style2 = 'font-weight: bold; vertical-align:top;'; |
473 | | | |
474 | | | /** |
475 | | | * Style for line numbers when GESHI_HEADER_PRE_TABLE is chosen |
476 | | | * @var string |
477 | | | */ |
478 | | | var $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;'; |
479 | | | |
480 | | | /** |
481 | | | * Flag for how line numbers are displayed |
482 | | | * @var boolean |
483 | | | */ |
484 | | | var $line_numbers = GESHI_NO_LINE_NUMBERS; |
485 | | | |
486 | | | /** |
487 | | | * Flag to decide if multi line spans are allowed. Set it to false to make sure |
488 | | | * each tag is closed before and reopened after each linefeed. |
489 | | | * @var boolean |
490 | | | */ |
491 | | | var $allow_multiline_span = true; |
492 | | | |
493 | | | /** |
494 | | | * The "nth" value for fancy line highlighting |
495 | | | * @var int |
496 | | | */ |
497 | | | var $line_nth_row = 0; |
498 | | | |
499 | | | /** |
500 | | | * The size of tab stops |
501 | | | * @var int |
502 | | | */ |
503 | | | var $tab_width = 8; |
504 | | | |
505 | | | /** |
506 | | | * Should we use language-defined tab stop widths? |
507 | | | * @var int |
508 | | | */ |
509 | | | var $use_language_tab_width = false; |
510 | | | |
511 | | | /** |
512 | | | * Default target for keyword links |
513 | | | * @var string |
514 | | | */ |
515 | | | var $link_target = ''; |
516 | | | |
517 | | | /** |
518 | | | * The encoding to use for entity encoding |
519 | | | * NOTE: Used with Escape Char Sequences to fix UTF-8 handling (cf. SF#2037598) |
520 | | | * @var string |
521 | | | */ |
522 | | | var $encoding = 'utf-8'; |
523 | | | |
524 | | | /** |
525 | | | * Should keywords be linked? |
526 | | | * @var boolean |
527 | | | */ |
528 | | | var $keyword_links = true; |
529 | | | |
530 | | | /** |
531 | | | * Currently loaded language file |
532 | | | * @var string |
533 | | | * @since 1.0.7.22 |
534 | | | */ |
535 | | | var $loaded_language = ''; |
536 | | | |
537 | | | /** |
538 | | | * Wether the caches needed for parsing are built or not |
539 | | | * |
540 | | | * @var bool |
541 | | | * @since 1.0.8 |
542 | | | */ |
543 | | | var $parse_cache_built = false; |
544 | | | |
545 | | | /** |
546 | | | * Work around for Suhosin Patch with disabled /e modifier |
547 | | | * |
548 | | | * Note from suhosins author in config file: |
549 | | | * <blockquote> |
550 | | | * The /e modifier inside <code>preg_replace()</code> allows code execution. |
551 | | | * Often it is the cause for remote code execution exploits. It is wise to |
552 | | | * deactivate this feature and test where in the application it is used. |
553 | | | * The developer using the /e modifier should be made aware that he should |
554 | | | * use <code>preg_replace_callback()</code> instead |
555 | | | * </blockquote> |
556 | | | * |
557 | | | * @var array |
558 | | | * @since 1.0.8 |
559 | | | */ |
560 | | | var $_kw_replace_group = 0; |
561 | | | var $_rx_key = 0; |
562 | | | |
563 | | | /** |
564 | | | * some "callback parameters" for handle_multiline_regexps |
565 | | | * |
566 | | | * @since 1.0.8 |
567 | | | * @access private |
568 | | | * @var string |
569 | | | */ |
570 | | | var $_hmr_before = ''; |
571 | | | var $_hmr_replace = ''; |
572 | | | var $_hmr_after = ''; |
573 | | | var $_hmr_key = 0; |
574 | | | |
575 | | | /**#@-*/ |
576 | | | |
577 | | | /** |
578 | | | * Creates a new GeSHi object, with source and language |
579 | | | * |
580 | | | * @param string The source code to highlight |
581 | | | * @param string The language to highlight the source with |
582 | | | * @param string The path to the language file directory. <b>This |
583 | | | * is deprecated!</b> I've backported the auto path |
584 | | | * detection from the 1.1.X dev branch, so now it |
585 | | | * should be automatically set correctly. If you have |
586 | | | * renamed the language directory however, you will |
587 | | | * still need to set the path using this parameter or |
588 | | | * {@link GeSHi->set_language_path()} |
589 | | | * @since 1.0.0 |
590 | | | */ |
591 | | | function GeSHi($source = '', $language = '', $path = '') { |
592 | | | if (!empty($source)) { |
593 | | | $this->set_source($source); |
594 | | | } |
595 | | | if (!empty($language)) { |
596 | | | $this->set_language($language); |
597 | | | } |
598 | | | $this->set_language_path($path); |
599 | | | } |
600 | | | |
601 | | | /** |
602 | | | * Returns an error message associated with the last GeSHi operation, |
603 | | | * or false if no error has occured |
604 | | | * |
605 | | | * @return string|false An error message if there has been an error, else false |
606 | | | * @since 1.0.0 |
607 | | | */ |
608 | | | function error() { |
609 | | | if ($this->error) { |
610 | | | //Put some template variables for debugging here ... |
611 | | | $debug_tpl_vars = array( |
612 | | | '{LANGUAGE}' => $this->language, |
613 | | | '{PATH}' => $this->language_path |
614 | | | ); |
615 | | | $msg = str_replace( |
616 | | | array_keys($debug_tpl_vars), |
617 | | | array_values($debug_tpl_vars), |
618 | | | $this->error_messages[$this->error]); |
619 | | | |
620 | | | return "<br /><strong>GeSHi Error:</strong> $msg (code {$this->error})<br />"; |
621 | | | } |
622 | | | return false; |
623 | | | } |
624 | | | |
625 | | | /** |
626 | | | * Gets a human-readable language name (thanks to Simon Patterson |
627 | | | * for the idea :)) |
628 | | | * |
629 | | | * @return string The name for the current language |
630 | | | * @since 1.0.2 |
631 | | | */ |
632 | | | function get_language_name() { |
633 | | | if (GESHI_ERROR_NO_SUCH_LANG == $this->error) { |
634 | | | return $this->language_data['LANG_NAME'] . ' (Unknown Language)'; |
635 | | | } |
636 | | | return $this->language_data['LANG_NAME']; |
637 | | | } |
638 | | | |
639 | | | /** |
640 | | | * Sets the source code for this object |
641 | | | * |
642 | | | * @param string The source code to highlight |
643 | | | * @since 1.0.0 |
644 | | | */ |
645 | | | function set_source($source) { |
646 | | | $this->source = $source; |
647 | | | $this->highlight_extra_lines = array(); |
648 | | | } |
649 | | | |
650 | | | /** |
651 | | | * Sets the language for this object |
652 | | | * |
653 | | | * @note since 1.0.8 this function won't reset language-settings by default anymore! |
654 | | | * if you need this set $force_reset = true |
655 | | | * |
656 | | | * @param string The name of the language to use |
657 | | | * @since 1.0.0 |
658 | | | */ |
659 | | | function set_language($language, $force_reset = false) { |
660 | | | if ($force_reset) { |
661 | | | $this->loaded_language = false; |
662 | | | } |
663 | | | |
664 | | | //Clean up the language name to prevent malicious code injection |
665 | | | $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language); |
666 | | | |
667 | | | $language = strtolower($language); |
668 | | | |
669 | | | //Retreive the full filename |
670 | | | $file_name = $this->language_path . $language . '.php'; |
671 | | | if ($file_name == $this->loaded_language) { |
672 | | | // this language is already loaded! |
673 | | | return; |
674 | | | } |
675 | | | |
676 | | | $this->language = $language; |
677 | | | |
678 | | | $this->error = false; |
679 | | | $this->strict_mode = GESHI_NEVER; |
680 | | | |
681 | | | //Check if we can read the desired file |
682 | | | if (!is_readable($file_name)) { |
683 | | | $this->error = GESHI_ERROR_NO_SUCH_LANG; |
684 | | | return; |
685 | | | } |
686 | | | |
687 | | | // Load the language for parsing |
688 | | | $this->load_language($file_name); |
689 | | | } |
690 | | | |
691 | | | /** |
692 | | | * Sets the path to the directory containing the language files. Note |
693 | | | * that this path is relative to the directory of the script that included |
694 | | | * geshi.php, NOT geshi.php itself. |
695 | | | * |
696 | | | * @param string The path to the language directory |
697 | | | * @since 1.0.0 |
698 | | | * @deprecated The path to the language files should now be automatically |
699 | | | * detected, so this method should no longer be needed. The |
700 | | | * 1.1.X branch handles manual setting of the path differently |
701 | | | * so this method will disappear in 1.2.0. |
702 | | | */ |
703 | | | function set_language_path($path) { |
704 | | | if(strpos($path,':')) { |
705 | | | //Security Fix to prevent external directories using fopen wrappers. |
706 | | | if(DIRECTORY_SEPARATOR == "\\") { |
707 | | | if(!preg_match('#^[a-zA-Z]:#', $path) || false !== strpos($path, ':', 2)) { |
708 | | | return; |
709 | | | } |
710 | | | } else { |
711 | | | return; |
712 | | | } |
713 | | | } |
714 | | | if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) { |
715 | | | //Security Fix to prevent external directories using fopen wrappers. |
716 | | | return; |
717 | | | } |
718 | | | if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) { |
719 | | | //Security Fix to prevent external directories using fopen wrappers. |
720 | | | return; |
721 | | | } |
722 | | | if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) { |
723 | | | //Security Fix to prevent external directories using fopen wrappers. |
724 | | | return; |
725 | | | } |
726 | | | if ($path) { |
727 | | | $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/'; |
728 | | | $this->set_language($this->language); // otherwise set_language_path has no effect |
729 | | | } |
730 | | | } |
731 | | | |
732 | | | /** |
733 | | | * Sets the type of header to be used. |
734 | | | * |
735 | | | * If GESHI_HEADER_DIV is used, the code is surrounded in a "div".This |
736 | | | * means more source code but more control over tab width and line-wrapping. |
737 | | | * GESHI_HEADER_PRE means that a "pre" is used - less source, but less |
738 | | | * control. Default is GESHI_HEADER_PRE. |
739 | | | * |
740 | | | * From 1.0.7.2, you can use GESHI_HEADER_NONE to specify that no header code |
741 | | | * should be outputted. |
742 | | | * |
743 | | | * @param int The type of header to be used |
744 | | | * @since 1.0.0 |
745 | | | */ |
746 | | | function set_header_type($type) { |
747 | | | //Check if we got a valid header type |
748 | | | if (!in_array($type, array(GESHI_HEADER_NONE, GESHI_HEADER_DIV, |
749 | | | GESHI_HEADER_PRE, GESHI_HEADER_PRE_VALID, GESHI_HEADER_PRE_TABLE))) { |
750 | | | $this->error = GESHI_ERROR_INVALID_HEADER_TYPE; |
751 | | | return; |
752 | | | } |
753 | | | |
754 | | | //Set that new header type |
755 | | | $this->header_type = $type; |
756 | | | } |
757 | | | |
758 | | | /** |
759 | | | * Sets the styles for the code that will be outputted |
760 | | | * when this object is parsed. The style should be a |
761 | | | * string of valid stylesheet declarations |
762 | | | * |
763 | | | * @param string The overall style for the outputted code block |
764 | | | * @param boolean Whether to merge the styles with the current styles or not |
765 | | | * @since 1.0.0 |
766 | | | */ |
767 | | | function set_overall_style($style, $preserve_defaults = false) { |
768 | | | if (!$preserve_defaults) { |
769 | | | $this->overall_style = $style; |
770 | | | } else { |
771 | | | $this->overall_style .= $style; |
772 | | | } |
773 | | | } |
774 | | | |
775 | | | /** |
776 | | | * Sets the overall classname for this block of code. This |
777 | | | * class can then be used in a stylesheet to style this object's |
778 | | | * output |
779 | | | * |
780 | | | * @param string The class name to use for this block of code |
781 | | | * @since 1.0.0 |
782 | | | */ |
783 | | | function set_overall_class($class) { |
784 | | | $this->overall_class = $class; |
785 | | | } |
786 | | | |
787 | | | /** |
788 | | | * Sets the overall id for this block of code. This id can then |
789 | | | * be used in a stylesheet to style this object's output |
790 | | | * |
791 | | | * @param string The ID to use for this block of code |
792 | | | * @since 1.0.0 |
793 | | | */ |
794 | | | function set_overall_id($id) { |
795 | | | $this->overall_id = $id; |
796 | | | } |
797 | | | |
798 | | | /** |
799 | | | * Sets whether CSS classes should be used to highlight the source. Default |
800 | | | * is off, calling this method with no arguments will turn it on |
801 | | | * |
802 | | | * @param boolean Whether to turn classes on or not |
803 | | | * @since 1.0.0 |
804 | | | */ |
805 | | | function enable_classes($flag = true) { |
806 | | | $this->use_classes = ($flag) ? true : false; |
807 | | | } |
808 | | | |
809 | | | /** |
810 | | | * Sets the style for the actual code. This should be a string |
811 | | | * containing valid stylesheet declarations. If $preserve_defaults is |
812 | | | * true, then styles are merged with the default styles, with the |
813 | | | * user defined styles having priority |
814 | | | * |
815 | | | * Note: Use this method to override any style changes you made to |
816 | | | * the line numbers if you are using line numbers, else the line of |
817 | | | * code will have the same style as the line number! Consult the |
818 | | | * GeSHi documentation for more information about this. |
819 | | | * |
820 | | | * @param string The style to use for actual code |
821 | | | * @param boolean Whether to merge the current styles with the new styles |
822 | | | * @since 1.0.2 |
823 | | | */ |
824 | | | function set_code_style($style, $preserve_defaults = false) { |
825 | | | if (!$preserve_defaults) { |
826 | | | $this->code_style = $style; |
827 | | | } else { |
828 | | | $this->code_style .= $style; |
829 | | | } |
830 | | | } |
831 | | | |
832 | | | /** |
833 | | | * Sets the styles for the line numbers. |
834 | | | * |
835 | | | * @param string The style for the line numbers that are "normal" |
836 | | | * @param string|boolean If a string, this is the style of the line |
837 | | | * numbers that are "fancy", otherwise if boolean then this |
838 | | | * defines whether the normal styles should be merged with the |
839 | | | * new normal styles or not |
840 | | | * @param boolean If set, is the flag for whether to merge the "fancy" |
841 | | | * styles with the current styles or not |
842 | | | * @since 1.0.2 |
843 | | | */ |
844 | | | function set_line_style($style1, $style2 = '', $preserve_defaults = false) { |
845 | | | //Check if we got 2 or three parameters |
846 | | | if (is_bool($style2)) { |
847 | | | $preserve_defaults = $style2; |
848 | | | $style2 = ''; |
849 | | | } |
850 | | | |
851 | | | //Actually set the new styles |
852 | | | if (!$preserve_defaults) { |
853 | | | $this->line_style1 = $style1; |
854 | | | $this->line_style2 = $style2; |
855 | | | } else { |
856 | | | $this->line_style1 .= $style1; |
857 | | | $this->line_style2 .= $style2; |
858 | | | } |
859 | | | } |
860 | | | |
861 | | | /** |
862 | | | * Sets whether line numbers should be displayed. |
863 | | | * |
864 | | | * Valid values for the first parameter are: |
865 | | | * |
866 | | | * - GESHI_NO_LINE_NUMBERS: Line numbers will not be displayed |
867 | | | * - GESHI_NORMAL_LINE_NUMBERS: Line numbers will be displayed |
868 | | | * - GESHI_FANCY_LINE_NUMBERS: Fancy line numbers will be displayed |
869 | | | * |
870 | | | * For fancy line numbers, the second parameter is used to signal which lines |
871 | | | * are to be fancy. For example, if the value of this parameter is 5 then every |
872 | | | * 5th line will be fancy. |
873 | | | * |
874 | | | * @param int How line numbers should be displayed |
875 | | | * @param int Defines which lines are fancy |
876 | | | * @since 1.0.0 |
877 | | | */ |
878 | | | function enable_line_numbers($flag, $nth_row = 5) { |
879 | | | if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag |
880 | | | && GESHI_FANCY_LINE_NUMBERS != $flag) { |
881 | | | $this->error = GESHI_ERROR_INVALID_LINE_NUMBER_TYPE; |
882 | | | } |
883 | | | $this->line_numbers = $flag; |
884 | | | $this->line_nth_row = $nth_row; |
885 | | | } |
886 | | | |
887 | | | /** |
888 | | | * Sets wether spans and other HTML markup generated by GeSHi can |
889 | | | * span over multiple lines or not. Defaults to true to reduce overhead. |
890 | | | * Set it to false if you want to manipulate the output or manually display |
891 | | | * the code in an ordered list. |
892 | | | * |
893 | | | * @param boolean Wether multiline spans are allowed or not |
894 | | | * @since 1.0.7.22 |
895 | | | */ |
896 | | | function enable_multiline_span($flag) { |
897 | | | $this->allow_multiline_span = (bool) $flag; |
898 | | | } |
899 | | | |
900 | | | /** |
901 | | | * Get current setting for multiline spans, see GeSHi->enable_multiline_span(). |
902 | | | * |
903 | | | * @see enable_multiline_span |
904 | | | * @return bool |
905 | | | */ |
906 | | | function get_multiline_span() { |
907 | | | return $this->allow_multiline_span; |
908 | | | } |
909 | | | |
910 | | | /** |
911 | | | * Sets the style for a keyword group. If $preserve_defaults is |
912 | | | * true, then styles are merged with the default styles, with the |
913 | | | * user defined styles having priority |
914 | | | * |
915 | | | * @param int The key of the keyword group to change the styles of |
916 | | | * @param string The style to make the keywords |
917 | | | * @param boolean Whether to merge the new styles with the old or just |
918 | | | * to overwrite them |
919 | | | * @since 1.0.0 |
920 | | | */ |
921 | | | function set_keyword_group_style($key, $style, $preserve_defaults = false) { |
922 | | | //Set the style for this keyword group |
923 | | | if (!$preserve_defaults) { |
924 | | | $this->language_data['STYLES']['KEYWORDS'][$key] = $style; |
925 | | | } else { |
926 | | | $this->language_data['STYLES']['KEYWORDS'][$key] .= $style; |
927 | | | } |
928 | | | |
929 | | | //Update the lexic permissions |
930 | | | if (!isset($this->lexic_permissions['KEYWORDS'][$key])) { |
931 | | | $this->lexic_permissions['KEYWORDS'][$key] = true; |
932 | | | } |
933 | | | } |
934 | | | |
935 | | | /** |
936 | | | * Turns highlighting on/off for a keyword group |
937 | | | * |
938 | | | * @param int The key of the keyword group to turn on or off |
939 | | | * @param boolean Whether to turn highlighting for that group on or off |
940 | | | * @since 1.0.0 |
941 | | | */ |
942 | | | function set_keyword_group_highlighting($key, $flag = true) { |
943 | | | $this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false; |
944 | | | } |
945 | | | |
946 | | | /** |
947 | | | * Sets the styles for comment groups. If $preserve_defaults is |
948 | | | * true, then styles are merged with the default styles, with the |
949 | | | * user defined styles having priority |
950 | | | * |
951 | | | * @param int The key of the comment group to change the styles of |
952 | | | * @param string The style to make the comments |
953 | | | * @param boolean Whether to merge the new styles with the old or just |
954 | | | * to overwrite them |
955 | | | * @since 1.0.0 |
956 | | | */ |
957 | | | function set_comments_style($key, $style, $preserve_defaults = false) { |
958 | | | if (!$preserve_defaults) { |
959 | | | $this->language_data['STYLES']['COMMENTS'][$key] = $style; |
960 | | | } else { |
961 | | | $this->language_data['STYLES']['COMMENTS'][$key] .= $style; |
962 | | | } |
963 | | | } |
964 | | | |
965 | | | /** |
966 | | | * Turns highlighting on/off for comment groups |
967 | | | * |
968 | | | * @param int The key of the comment group to turn on or off |
969 | | | * @param boolean Whether to turn highlighting for that group on or off |
970 | | | * @since 1.0.0 |
971 | | | */ |
972 | | | function set_comments_highlighting($key, $flag = true) { |
973 | | | $this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false; |
974 | | | } |
975 | | | |
976 | | | /** |
977 | | | * Sets the styles for escaped characters. If $preserve_defaults is |
978 | | | * true, then styles are merged with the default styles, with the |
979 | | | * user defined styles having priority |
980 | | | * |
981 | | | * @param string The style to make the escape characters |
982 | | | * @param boolean Whether to merge the new styles with the old or just |
983 | | | * to overwrite them |
984 | | | * @since 1.0.0 |
985 | | | */ |
986 | | | function set_escape_characters_style($style, $preserve_defaults = false) { |
987 | | | if (!$preserve_defaults) { |
988 | | | $this->language_data['STYLES']['ESCAPE_CHAR'][0] = $style; |
989 | | | } else { |
990 | | | $this->language_data['STYLES']['ESCAPE_CHAR'][0] .= $style; |
991 | | | } |
992 | | | } |
993 | | | |
994 | | | /** |
995 | | | * Turns highlighting on/off for escaped characters |
996 | | | * |
997 | | | * @param boolean Whether to turn highlighting for escape characters on or off |
998 | | | * @since 1.0.0 |
999 | | | */ |
1000 | | | function set_escape_characters_highlighting($flag = true) { |
1001 | | | $this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false; |
1002 | | | } |
1003 | | | |
1004 | | | /** |
1005 | | | * Sets the styles for brackets. If $preserve_defaults is |
1006 | | | * true, then styles are merged with the default styles, with the |
1007 | | | * user defined styles having priority |
1008 | | | * |
1009 | | | * This method is DEPRECATED: use set_symbols_style instead. |
1010 | | | * This method will be removed in 1.2.X |
1011 | | | * |
1012 | | | * @param string The style to make the brackets |
1013 | | | * @param boolean Whether to merge the new styles with the old or just |
1014 | | | * to overwrite them |
1015 | | | * @since 1.0.0 |
1016 | | | * @deprecated In favour of set_symbols_style |
1017 | | | */ |
1018 | | | function set_brackets_style($style, $preserve_defaults = false) { |
1019 | | | if (!$preserve_defaults) { |
1020 | | | $this->language_data['STYLES']['BRACKETS'][0] = $style; |
1021 | | | } else { |
1022 | | | $this->language_data['STYLES']['BRACKETS'][0] .= $style; |
1023 | | | } |
1024 | | | } |
1025 | | | |
1026 | | | /** |
1027 | | | * Turns highlighting on/off for brackets |
1028 | | | * |
1029 | | | * This method is DEPRECATED: use set_symbols_highlighting instead. |
1030 | | | * This method will be remove in 1.2.X |
1031 | | | * |
1032 | | | * @param boolean Whether to turn highlighting for brackets on or off |
1033 | | | * @since 1.0.0 |
1034 | | | * @deprecated In favour of set_symbols_highlighting |
1035 | | | */ |
1036 | | | function set_brackets_highlighting($flag) { |
1037 | | | $this->lexic_permissions['BRACKETS'] = ($flag) ? true : false; |
1038 | | | } |
1039 | | | |
1040 | | | /** |
1041 | | | * Sets the styles for symbols. If $preserve_defaults is |
1042 | | | * true, then styles are merged with the default styles, with the |
1043 | | | * user defined styles having priority |
1044 | | | * |
1045 | | | * @param string The style to make the symbols |
1046 | | | * @param boolean Whether to merge the new styles with the old or just |
1047 | | | * to overwrite them |
1048 | | | * @param int Tells the group of symbols for which style should be set. |
1049 | | | * @since 1.0.1 |
1050 | | | */ |
1051 | | | function set_symbols_style($style, $preserve_defaults = false, $group = 0) { |
1052 | | | // Update the style of symbols |
1053 | | | if (!$preserve_defaults) { |
1054 | | | $this->language_data['STYLES']['SYMBOLS'][$group] = $style; |
1055 | | | } else { |
1056 | | | $this->language_data['STYLES']['SYMBOLS'][$group] .= $style; |
1057 | | | } |
1058 | | | |
1059 | | | // For backward compatibility |
1060 | | | if (0 == $group) { |
1061 | | | $this->set_brackets_style ($style, $preserve_defaults); |
1062 | | | } |
1063 | | | } |
1064 | | | |
1065 | | | /** |
1066 | | | * Turns highlighting on/off for symbols |
1067 | | | * |
1068 | | | * @param boolean Whether to turn highlighting for symbols on or off |
1069 | | | * @since 1.0.0 |
1070 | | | */ |
1071 | | | function set_symbols_highlighting($flag) { |
1072 | | | // Update lexic permissions for this symbol group |
1073 | | | $this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false; |
1074 | | | |
1075 | | | // For backward compatibility |
1076 | | | $this->set_brackets_highlighting ($flag); |
1077 | | | } |
1078 | | | |
1079 | | | /** |
1080 | | | * Sets the styles for strings. If $preserve_defaults is |
1081 | | | * true, then styles are merged with the default styles, with the |
1082 | | | * user defined styles having priority |
1083 | | | * |
1084 | | | * @param string The style to make the escape characters |
1085 | | | * @param boolean Whether to merge the new styles with the old or just |
1086 | | | * to overwrite them |
1087 | | | * @since 1.0.0 |
1088 | | | */ |
1089 | | | function set_strings_style($style, $preserve_defaults = false) { |
1090 | | | if (!$preserve_defaults) { |
1091 | | | $this->language_data['STYLES']['STRINGS'][0] = $style; |
1092 | | | } else { |
1093 | | | $this->language_data['STYLES']['STRINGS'][0] .= $style; |
1094 | | | } |
1095 | | | } |
1096 | | | |
1097 | | | /** |
1098 | | | * Turns highlighting on/off for strings |
1099 | | | * |
1100 | | | * @param boolean Whether to turn highlighting for strings on or off |
1101 | | | * @since 1.0.0 |
1102 | | | */ |
1103 | | | function set_strings_highlighting($flag) { |
1104 | | | $this->lexic_permissions['STRINGS'] = ($flag) ? true : false; |
1105 | | | } |
1106 | | | |
1107 | | | /** |
1108 | | | * Sets the styles for numbers. If $preserve_defaults is |
1109 | | | * true, then styles are merged with the default styles, with the |
1110 | | | * user defined styles having priority |
1111 | | | * |
1112 | | | * @param string The style to make the numbers |
1113 | | | * @param boolean Whether to merge the new styles with the old or just |
1114 | | | * to overwrite them |
1115 | | | * @since 1.0.0 |
1116 | | | */ |
1117 | | | function set_numbers_style($style, $preserve_defaults = false) { |
1118 | | | if (!$preserve_defaults) { |
1119 | | | $this->language_data['STYLES']['NUMBERS'][0] = $style; |
1120 | | | } else { |
1121 | | | $this->language_data['STYLES']['NUMBERS'][0] .= $style; |
1122 | | | } |
1123 | | | } |
1124 | | | |
1125 | | | /** |
1126 | | | * Turns highlighting on/off for numbers |
1127 | | | * |
1128 | | | * @param boolean Whether to turn highlighting for numbers on or off |
1129 | | | * @since 1.0.0 |
1130 | | | */ |
1131 | | | function set_numbers_highlighting($flag) { |
1132 | | | $this->lexic_permissions['NUMBERS'] = ($flag) ? true : false; |
1133 | | | } |
1134 | | | |
1135 | | | /** |
1136 | | | * Sets the styles for methods. $key is a number that references the |
1137 | | | * appropriate "object splitter" - see the language file for the language |
1138 | | | * you are highlighting to get this number. If $preserve_defaults is |
1139 | | | * true, then styles are merged with the default styles, with the |
1140 | | | * user defined styles having priority |
1141 | | | * |
1142 | | | * @param int The key of the object splitter to change the styles of |
1143 | | | * @param string The style to make the methods |
1144 | | | * @param boolean Whether to merge the new styles with the old or just |
1145 | | | * to overwrite them |
1146 | | | * @since 1.0.0 |
1147 | | | */ |
1148 | | | function set_methods_style($key, $style, $preserve_defaults = false) { |
1149 | | | if (!$preserve_defaults) { |
1150 | | | $this->language_data['STYLES']['METHODS'][$key] = $style; |
1151 | | | } else { |
1152 | | | $this->language_data['STYLES']['METHODS'][$key] .= $style; |
1153 | | | } |
1154 | | | } |
1155 | | | |
1156 | | | /** |
1157 | | | * Turns highlighting on/off for methods |
1158 | | | * |
1159 | | | * @param boolean Whether to turn highlighting for methods on or off |
1160 | | | * @since 1.0.0 |
1161 | | | */ |
1162 | | | function set_methods_highlighting($flag) { |
1163 | | | $this->lexic_permissions['METHODS'] = ($flag) ? true : false; |
1164 | | | } |
1165 | | | |
1166 | | | /** |
1167 | | | * Sets the styles for regexps. If $preserve_defaults is |
1168 | | | * true, then styles are merged with the default styles, with the |
1169 | | | * user defined styles having priority |
1170 | | | * |
1171 | | | * @param string The style to make the regular expression matches |
1172 | | | * @param boolean Whether to merge the new styles with the old or just |
1173 | | | * to overwrite them |
1174 | | | * @since 1.0.0 |
1175 | | | */ |
1176 | | | function set_regexps_style($key, $style, $preserve_defaults = false) { |
1177 | | | if (!$preserve_defaults) { |
1178 | | | $this->language_data['STYLES']['REGEXPS'][$key] = $style; |
1179 | | | } else { |
1180 | | | $this->language_data['STYLES']['REGEXPS'][$key] .= $style; |
1181 | | | } |
1182 | | | } |
1183 | | | |
1184 | | | /** |
1185 | | | * Turns highlighting on/off for regexps |
1186 | | | * |
1187 | | | * @param int The key of the regular expression group to turn on or off |
1188 | | | * @param boolean Whether to turn highlighting for the regular expression group on or off |
1189 | | | * @since 1.0.0 |
1190 | | | */ |
1191 | | | function set_regexps_highlighting($key, $flag) { |
1192 | | | $this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false; |
1193 | | | } |
1194 | | | |
1195 | | | /** |
1196 | | | * Sets whether a set of keywords are checked for in a case sensitive manner |
1197 | | | * |
1198 | | | * @param int The key of the keyword group to change the case sensitivity of |
1199 | | | * @param boolean Whether to check in a case sensitive manner or not |
1200 | | | * @since 1.0.0 |
1201 | | | */ |
1202 | | | function set_case_sensitivity($key, $case) { |
1203 | | | $this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false; |
1204 | | | } |
1205 | | | |
1206 | | | /** |
1207 | | | * Sets the case that keywords should use when found. Use the constants: |
1208 | | | * |
1209 | | | * - GESHI_CAPS_NO_CHANGE: leave keywords as-is |
1210 | | | * - GESHI_CAPS_UPPER: convert all keywords to uppercase where found |
1211 | | | * - GESHI_CAPS_LOWER: convert all keywords to lowercase where found |
1212 | | | * |
1213 | | | * @param int A constant specifying what to do with matched keywords |
1214 | | | * @since 1.0.1 |
1215 | | | */ |
1216 | | | function set_case_keywords($case) { |
1217 | | | if (in_array($case, array( |
1218 | | | GESHI_CAPS_NO_CHANGE, GESHI_CAPS_UPPER, GESHI_CAPS_LOWER))) { |
1219 | | | $this->language_data['CASE_KEYWORDS'] = $case; |
1220 | | | } |
1221 | | | } |
1222 | | | |
1223 | | | /** |
1224 | | | * Sets how many spaces a tab is substituted for |
1225 | | | * |
1226 | | | * Widths below zero are ignored |
1227 | | | * |
1228 | | | * @param int The tab width |
1229 | | | * @since 1.0.0 |
1230 | | | */ |
1231 | | | function set_tab_width($width) { |
1232 | | | $this->tab_width = intval($width); |
1233 | | | |
1234 | | | //Check if it fit's the constraints: |
1235 | | | if ($this->tab_width < 1) { |
1236 | | | //Return it to the default |
1237 | | | $this->tab_width = 8; |
1238 | | | } |
1239 | | | } |
1240 | | | |
1241 | | | /** |
1242 | | | * Sets whether or not to use tab-stop width specifed by language |
1243 | | | * |
1244 | | | * @param boolean Whether to use language-specific tab-stop widths |
1245 | | | * @since 1.0.7.20 |
1246 | | | */ |
1247 | | | function set_use_language_tab_width($use) { |
1248 | | | $this->use_language_tab_width = (bool) $use; |
1249 | | | } |
1250 | | | |
1251 | | | /** |
1252 | | | * Returns the tab width to use, based on the current language and user |
1253 | | | * preference |
1254 | | | * |
1255 | | | * @return int Tab width |
1256 | | | * @since 1.0.7.20 |
1257 | | | */ |
1258 | | | function get_real_tab_width() { |
1259 | | | if (!$this->use_language_tab_width || |
1260 | | | !isset($this->language_data['TAB_WIDTH'])) { |
1261 | | | return $this->tab_width; |
1262 | | | } else { |
1263 | | | return $this->language_data['TAB_WIDTH']; |
1264 | | | } |
1265 | | | } |
1266 | | | |
1267 | | | /** |
1268 | | | * Enables/disables strict highlighting. Default is off, calling this |
1269 | | | * method without parameters will turn it on. See documentation |
1270 | | | * for more details on strict mode and where to use it. |
1271 | | | * |
1272 | | | * @param boolean Whether to enable strict mode or not |
1273 | | | * @since 1.0.0 |
1274 | | | */ |
1275 | | | function enable_strict_mode($mode = true) { |
1276 | | | if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) { |
1277 | | | $this->strict_mode = ($mode) ? GESHI_ALWAYS : GESHI_NEVER; |
1278 | | | } |
1279 | | | } |
1280 | | | |
1281 | | | /** |
1282 | | | * Disables all highlighting |
1283 | | | * |
1284 | | | * @since 1.0.0 |
1285 | | | * @todo Rewrite with array traversal |
1286 | | | * @deprecated In favour of enable_highlighting |
1287 | | | */ |
1288 | | | function disable_highlighting() { |
1289 | | | $this->enable_highlighting(false); |
1290 | | | } |
1291 | | | |
1292 | | | /** |
1293 | | | * Enables all highlighting |
1294 | | | * |
1295 | | | * The optional flag parameter was added in version 1.0.7.21 and can be used |
1296 | | | * to enable (true) or disable (false) all highlighting. |
1297 | | | * |
1298 | | | * @since 1.0.0 |
1299 | | | * @param boolean A flag specifying whether to enable or disable all highlighting |
1300 | | | * @todo Rewrite with array traversal |
1301 | | | */ |
1302 | | | function enable_highlighting($flag = true) { |
1303 | | | $flag = $flag ? true : false; |
1304 | | | foreach ($this->lexic_permissions as $key => $value) { |
1305 | | | if (is_array($value)) { |
1306 | | | foreach ($value as $k => $v) { |
1307 | | | $this->lexic_permissions[$key][$k] = $flag; |
1308 | | | } |
1309 | | | } else { |
1310 | | | $this->lexic_permissions[$key] = $flag; |
1311 | | | } |
1312 | | | } |
1313 | | | |
1314 | | | // Context blocks |
1315 | | | $this->enable_important_blocks = $flag; |
1316 | | | } |
1317 | | | |
1318 | | | /** |
1319 | | | * Given a file extension, this method returns either a valid geshi language |
1320 | | | * name, or the empty string if it couldn't be found |
1321 | | | * |
1322 | | | * @param string The extension to get a language name for |
1323 | | | * @param array A lookup array to use instead of the default one |
1324 | | | * @since 1.0.5 |
1325 | | | * @todo Re-think about how this method works (maybe make it private and/or make it |
1326 | | | * a extension->lang lookup?) |
1327 | | | * @todo static? |
1328 | | | */ |
1329 | | | function get_language_name_from_extension( $extension, $lookup = array() ) { |
1330 | | | if ( !is_array($lookup) || empty($lookup)) { |
1331 | | | $lookup = array( |
1332 | | | 'actionscript' => array('as'), |
1333 | | | 'ada' => array('a', 'ada', 'adb', 'ads'), |
1334 | | | 'apache' => array('conf'), |
1335 | | | 'asm' => array('ash', 'asm'), |
1336 | | | 'asp' => array('asp'), |
1337 | | | 'bash' => array('sh'), |
1338 | | | 'c' => array('c', 'h'), |
1339 | | | 'c_mac' => array('c', 'h'), |
1340 | | | 'caddcl' => array(), |
1341 | | | 'cadlisp' => array(), |
1342 | | | 'cdfg' => array('cdfg'), |
1343 | | | 'cobol' => array('cbl'), |
1344 | | | 'cpp' => array('cpp', 'h', 'hpp'), |
1345 | | | 'csharp' => array(), |
1346 | | | 'css' => array('css'), |
1347 | | | 'delphi' => array('dpk', 'dpr', 'pp', 'pas'), |
1348 | | | 'dos' => array('bat', 'cmd'), |
1349 | | | 'gettext' => array('po', 'pot'), |
1350 | | | 'html4strict' => array('html', 'htm'), |
1351 | | | 'ini' => array('ini', 'desktop'), |
1352 | | | 'java' => array('java'), |
1353 | | | 'javascript' => array('js'), |
1354 | | | 'klonec' => array('kl1'), |
1355 | | | 'klonecpp' => array('klx'), |
1356 | | | 'lisp' => array('lisp'), |
1357 | | | 'lua' => array('lua'), |
1358 | | | 'matlab' => array('m'), |
1359 | | | 'mpasm' => array(), |
1360 | | | 'nsis' => array(), |
1361 | | | 'objc' => array(), |
1362 | | | 'oobas' => array(), |
1363 | | | 'oracle8' => array(), |
1364 | | | 'pascal' => array(), |
1365 | | | 'perl' => array('pl', 'pm'), |
1366 | | | 'php' => array('php', 'php5', 'phtml', 'phps'), |
1367 | | | 'python' => array('py'), |
1368 | | | 'qbasic' => array('bi'), |
1369 | | | 'sas' => array('sas'), |
1370 | | | 'scilab' => array('sci'), |
1371 | | | 'smarty' => array(), |
1372 | | | 'vb' => array('bas'), |
1373 | | | 'vbnet' => array(), |
1374 | | | 'visualfoxpro' => array(), |
1375 | | | 'xml' => array('xml') |
1376 | | | ); |
1377 | | | } |
1378 | | | |
1379 | | | foreach ($lookup as $lang => $extensions) { |
1380 | | | if (in_array($extension, $extensions)) { |
1381 | | | return $lang; |
1382 | | | } |
1383 | | | } |
1384 | | | return ''; |
1385 | | | } |
1386 | | | |
1387 | | | /** |
1388 | | | * Given a file name, this method loads its contents in, and attempts |
1389 | | | * to set the language automatically. An optional lookup table can be |
1390 | | | * passed for looking up the language name. If not specified a default |
1391 | | | * table is used |
1392 | | | * |
1393 | | | * The language table is in the form |
1394 | | | * <pre>array( |
1395 | | | * 'lang_name' => array('extension', 'extension', ...), |
1396 | | | * 'lang_name' ... |
1397 | | | * );</pre> |
1398 | | | * |
1399 | | | * @param string The filename to load the source from |
1400 | | | * @param array A lookup array to use instead of the default one |
1401 | | | * @todo Complete rethink of this and above method |
1402 | | | * @since 1.0.5 |
1403 | | | */ |
1404 | | | function load_from_file($file_name, $lookup = array()) { |
1405 | | | if (is_readable($file_name)) { |
1406 | | | $this->set_source(file_get_contents($file_name)); |
1407 | | | $this->set_language($this->get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup)); |
1408 | | | } else { |
1409 | | | $this->error = GESHI_ERROR_FILE_NOT_READABLE; |
1410 | | | } |
1411 | | | } |
1412 | | | |
1413 | | | /** |
1414 | | | * Adds a keyword to a keyword group for highlighting |
1415 | | | * |
1416 | | | * @param int The key of the keyword group to add the keyword to |
1417 | | | * @param string The word to add to the keyword group |
1418 | | | * @since 1.0.0 |
1419 | | | */ |
1420 | | | function add_keyword($key, $word) { |
1421 | | | if (!in_array($word, $this->language_data['KEYWORDS'][$key])) { |
1422 | | | $this->language_data['KEYWORDS'][$key][] = $word; |
1423 | | | |
1424 | | | //NEW in 1.0.8 don't recompile the whole optimized regexp, simply append it |
1425 | | | if ($this->parse_cache_built) { |
1426 | | | $subkey = count($this->language_data['CACHED_KEYWORD_LISTS'][$key]) - 1; |
1427 | | | $this->language_data['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '|' . preg_quote($word, '/'); |
1428 | | | } |
1429 | | | } |
1430 | | | } |
1431 | | | |
1432 | | | /** |
1433 | | | * Removes a keyword from a keyword group |
1434 | | | * |
1435 | | | * @param int The key of the keyword group to remove the keyword from |
1436 | | | * @param string The word to remove from the keyword group |
1437 | | | * @param bool Wether to automatically recompile the optimized regexp list or not. |
1438 | | | * Note: if you set this to false and @see GeSHi->parse_code() was already called once, |
1439 | | | * for the current language, you have to manually call @see GeSHi->optimize_keyword_group() |
1440 | | | * or the removed keyword will stay in cache and still be highlighted! On the other hand |
1441 | | | * it might be too expensive to recompile the regexp list for every removal if you want to |
1442 | | | * remove a lot of keywords. |
1443 | | | * @since 1.0.0 |
1444 | | | */ |
1445 | | | function remove_keyword($key, $word, $recompile = true) { |
1446 | | | $key_to_remove = array_search($word, $this->language_data['KEYWORDS'][$key]); |
1447 | | | if ($key_to_remove !== false) { |
1448 | | | unset($this->language_data['KEYWORDS'][$key][$key_to_remove]); |
1449 | | | |
1450 | | | //NEW in 1.0.8, optionally recompile keyword group |
1451 | | | if ($recompile && $this->parse_cache_built) { |
1452 | | | $this->optimize_keyword_group($key); |
1453 | | | } |
1454 | | | } |
1455 | | | } |
1456 | | | |
1457 | | | /** |
1458 | | | * Creates a new keyword group |
1459 | | | * |
1460 | | | * @param int The key of the keyword group to create |
1461 | | | * @param string The styles for the keyword group |
1462 | | | * @param boolean Whether the keyword group is case sensitive ornot |
1463 | | | * @param array The words to use for the keyword group |
1464 | | | * @since 1.0.0 |
1465 | | | */ |
1466 | | | function add_keyword_group($key, $styles, $case_sensitive = true, $words = array()) { |
1467 | | | $words = (array) $words; |
1468 | | | if (empty($words)) { |
1469 | | | // empty word lists mess up highlighting |
1470 | | | return false; |
1471 | | | } |
1472 | | | |
1473 | | | //Add the new keyword group internally |
1474 | | | $this->language_data['KEYWORDS'][$key] = $words; |
1475 | | | $this->lexic_permissions['KEYWORDS'][$key] = true; |
1476 | | | $this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive; |
1477 | | | $this->language_data['STYLES']['KEYWORDS'][$key] = $styles; |
1478 | | | |
1479 | | | //NEW in 1.0.8, cache keyword regexp |
1480 | | | if ($this->parse_cache_built) { |
1481 | | | $this->optimize_keyword_group($key); |
1482 | | | } |
1483 | | | } |
1484 | | | |
1485 | | | /** |
1486 | | | * Removes a keyword group |
1487 | | | * |
1488 | | | * @param int The key of the keyword group to remove |
1489 | | | * @since 1.0.0 |
1490 | | | */ |
1491 | | | function remove_keyword_group ($key) { |
1492 | | | //Remove the keyword group internally |
1493 | | | unset($this->language_data['KEYWORDS'][$key]); |
1494 | | | unset($this->lexic_permissions['KEYWORDS'][$key]); |
1495 | | | unset($this->language_data['CASE_SENSITIVE'][$key]); |
1496 | | | unset($this->language_data['STYLES']['KEYWORDS'][$key]); |
1497 | | | |
1498 | | | //NEW in 1.0.8 |
1499 | | | unset($this->language_data['CACHED_KEYWORD_LISTS'][$key]); |
1500 | | | } |
1501 | | | |
1502 | | | /** |
1503 | | | * compile optimized regexp list for keyword group |
1504 | | | * |
1505 | | | * @param int The key of the keyword group to compile & optimize |
1506 | | | * @since 1.0.8 |
1507 | | | */ |
1508 | | | function optimize_keyword_group($key) { |
1509 | | | $this->language_data['CACHED_KEYWORD_LISTS'][$key] = |
1510 | | | $this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]); |
1511 | | | } |
1512 | | | |
1513 | | | /** |
1514 | | | * Sets the content of the header block |
1515 | | | * |
1516 | | | * @param string The content of the header block |
1517 | | | * @since 1.0.2 |
1518 | | | */ |
1519 | | | function set_header_content($content) { |
1520 | | | $this->header_content = $content; |
1521 | | | } |
1522 | | | |
1523 | | | /** |
1524 | | | * Sets the content of the footer block |
1525 | | | * |
1526 | | | * @param string The content of the footer block |
1527 | | | * @since 1.0.2 |
1528 | | | */ |
1529 | | | function set_footer_content($content) { |
1530 | | | $this->footer_content = $content; |
1531 | | | } |
1532 | | | |
1533 | | | /** |
1534 | | | * Sets the style for the header content |
1535 | | | * |
1536 | | | * @param string The style for the header content |
1537 | | | * @since 1.0.2 |
1538 | | | */ |
1539 | | | function set_header_content_style($style) { |
1540 | | | $this->header_content_style = $style; |
1541 | | | } |
1542 | | | |
1543 | | | /** |
1544 | | | * Sets the style for the footer content |
1545 | | | * |
1546 | | | * @param string The style for the footer content |
1547 | | | * @since 1.0.2 |
1548 | | | */ |
1549 | | | function set_footer_content_style($style) { |
1550 | | | $this->footer_content_style = $style; |
1551 | | | } |
1552 | | | |
1553 | | | /** |
1554 | | | * Sets whether to force a surrounding block around |
1555 | | | * the highlighted code or not |
1556 | | | * |
1557 | | | * @param boolean Tells whether to enable or disable this feature |
1558 | | | * @since 1.0.7.20 |
1559 | | | */ |
1560 | | | function enable_inner_code_block($flag) { |
1561 | | | $this->force_code_block = (bool)$flag; |
1562 | | | } |
1563 | | | |
1564 | | | /** |
1565 | | | * Sets the base URL to be used for keywords |
1566 | | | * |
1567 | | | * @param int The key of the keyword group to set the URL for |
1568 | | | * @param string The URL to set for the group. If {FNAME} is in |
1569 | | | * the url somewhere, it is replaced by the keyword |
1570 | | | * that the URL is being made for |
1571 | | | * @since 1.0.2 |
1572 | | | */ |
1573 | | | function set_url_for_keyword_group($group, $url) { |
1574 | | | $this->language_data['URLS'][$group] = $url; |
1575 | | | } |
1576 | | | |
1577 | | | /** |
1578 | | | * Sets styles for links in code |
1579 | | | * |
1580 | | | * @param int A constant that specifies what state the style is being |
1581 | | | * set for - e.g. :hover or :visited |
1582 | | | * @param string The styles to use for that state |
1583 | | | * @since 1.0.2 |
1584 | | | */ |
1585 | | | function set_link_styles($type, $styles) { |
1586 | | | $this->link_styles[$type] = $styles; |
1587 | | | } |
1588 | | | |
1589 | | | /** |
1590 | | | * Sets the target for links in code |
1591 | | | * |
1592 | | | * @param string The target for links in the code, e.g. _blank |
1593 | | | * @since 1.0.3 |
1594 | | | */ |
1595 | | | function set_link_target($target) { |
1596 | | | if (!$target) { |
1597 | | | $this->link_target = ''; |
1598 | | | } else { |
1599 | | | $this->link_target = ' target="' . $target . '" '; |
1600 | | | } |
1601 | | | } |
1602 | | | |
1603 | | | /** |
1604 | | | * Sets styles for important parts of the code |
1605 | | | * |
1606 | | | * @param string The styles to use on important parts of the code |
1607 | | | * @since 1.0.2 |
1608 | | | */ |
1609 | | | function set_important_styles($styles) { |
1610 | | | $this->important_styles = $styles; |
1611 | | | } |
1612 | | | |
1613 | | | /** |
1614 | | | * Sets whether context-important blocks are highlighted |
1615 | | | * |
1616 | | | * @param boolean Tells whether to enable or disable highlighting of important blocks |
1617 | | | * @todo REMOVE THIS SHIZ FROM GESHI! |
1618 | | | * @deprecated |
1619 | | | * @since 1.0.2 |
1620 | | | */ |
1621 | | | function enable_important_blocks($flag) { |
1622 | | | $this->enable_important_blocks = ( $flag ) ? true : false; |
1623 | | | } |
1624 | | | |
1625 | | | /** |
1626 | | | * Whether CSS IDs should be added to each line |
1627 | | | * |
1628 | | | * @param boolean If true, IDs will be added to each line. |
1629 | | | * @since 1.0.2 |
1630 | | | */ |
1631 | | | function enable_ids($flag = true) { |
1632 | | | $this->add_ids = ($flag) ? true : false; |
1633 | | | } |
1634 | | | |
1635 | | | /** |
1636 | | | * Specifies which lines to highlight extra |
1637 | | | * |
1638 | | | * The extra style parameter was added in 1.0.7.21. |
1639 | | | * |
1640 | | | * @param mixed An array of line numbers to highlight, or just a line |
1641 | | | * number on its own. |
1642 | | | * @param string A string specifying the style to use for this line. |
1643 | | | * If null is specified, the default style is used. |
1644 | | | * If false is specified, the line will be removed from |
1645 | | | * special highlighting |
1646 | | | * @since 1.0.2 |
1647 | | | * @todo Some data replication here that could be cut down on |
1648 | | | */ |
1649 | | | function highlight_lines_extra($lines, $style = null) { |
1650 | | | if (is_array($lines)) { |
1651 | | | //Split up the job using single lines at a time |
1652 | | | foreach ($lines as $line) { |
1653 | | | $this->highlight_lines_extra($line, $style); |
1654 | | | } |
1655 | | | } else { |
1656 | | | //Mark the line as being highlighted specially |
1657 | | | $lines = intval($lines); |
1658 | | | $this->highlight_extra_lines[$lines] = $lines; |
1659 | | | |
1660 | | | //Decide on which style to use |
1661 | | | if ($style === null) { //Check if we should use default style |
1662 | | | unset($this->highlight_extra_lines_styles[$lines]); |
1663 | | | } else if ($style === false) { //Check if to remove this line |
1664 | | | unset($this->highlight_extra_lines[$lines]); |
1665 | | | unset($this->highlight_extra_lines_styles[$lines]); |
1666 | | | } else { |
1667 | | | $this->highlight_extra_lines_styles[$lines] = $style; |
1668 | | | } |
1669 | | | } |
1670 | | | } |
1671 | | | |
1672 | | | /** |
1673 | | | * Sets the style for extra-highlighted lines |
1674 | | | * |
1675 | | | * @param string The style for extra-highlighted lines |
1676 | | | * @since 1.0.2 |
1677 | | | */ |
1678 | | | function set_highlight_lines_extra_style($styles) { |
1679 | | | $this->highlight_extra_lines_style = $styles; |
1680 | | | } |
1681 | | | |
1682 | | | /** |
1683 | | | * Sets the line-ending |
1684 | | | * |
1685 | | | * @param string The new line-ending |
1686 | | | * @since 1.0.2 |
1687 | | | */ |
1688 | | | function set_line_ending($line_ending) { |
1689 | | | $this->line_ending = (string)$line_ending; |
1690 | | | } |
1691 | | | |
1692 | | | /** |
1693 | | | * Sets what number line numbers should start at. Should |
1694 | | | * be a positive integer, and will be converted to one. |
1695 | | | * |
1696 | | | * <b>Warning:</b> Using this method will add the "start" |
1697 | | | * attribute to the <ol> that is used for line numbering. |
1698 | | | * This is <b>not</b> valid XHTML strict, so if that's what you |
1699 | | | * care about then don't use this method. Firefox is getting |
1700 | | | * support for the CSS method of doing this in 1.1 and Opera |
1701 | | | * has support for the CSS method, but (of course) IE doesn't |
1702 | | | * so it's not worth doing it the CSS way yet. |
1703 | | | * |
1704 | | | * @param int The number to start line numbers at |
1705 | | | * @since 1.0.2 |
1706 | | | */ |
1707 | | | function start_line_numbers_at($number) { |
1708 | | | $this->line_numbers_start = abs(intval($number)); |
1709 | | | } |
1710 | | | |
1711 | | | /** |
1712 | | | * Sets the encoding used for htmlspecialchars(), for international |
1713 | | | * support. |
1714 | | | * |
1715 | | | * NOTE: This is not needed for now because htmlspecialchars() is not |
1716 | | | * being used (it has a security hole in PHP4 that has not been patched). |
1717 | | | * Maybe in a future version it may make a return for speed reasons, but |
1718 | | | * I doubt it. |
1719 | | | * |
1720 | | | * @param string The encoding to use for the source |
1721 | | | * @since 1.0.3 |
1722 | | | */ |
1723 | | | function set_encoding($encoding) { |
1724 | | | if ($encoding) { |
1725 | | | $this->encoding = strtolower($encoding); |
1726 | | | } |
1727 | | | } |
1728 | | | |
1729 | | | /** |
1730 | | | * Turns linking of keywords on or off. |
1731 | | | * |
1732 | | | * @param boolean If true, links will be added to keywords |
1733 | | | * @since 1.0.2 |
1734 | | | */ |
1735 | | | function enable_keyword_links($enable = true) { |
1736 | | | $this->keyword_links = (bool) $enable; |
1737 | | | } |
1738 | | | |
1739 | | | /** |
1740 | | | * Setup caches needed for styling. This is automatically called in |
1741 | | | * parse_code() and get_stylesheet() when appropriate. This function helps |
1742 | | | * stylesheet generators as they rely on some style information being |
1743 | | | * preprocessed |
1744 | | | * |
1745 | | | * @since 1.0.8 |
1746 | | | * @access private |
1747 | | | */ |
1748 | | | function build_style_cache() { |
1749 | | | //Build the style cache needed to highlight numbers appropriate |
1750 | | | if($this->lexic_permissions['NUMBERS']) { |
1751 | | | //First check what way highlighting information for numbers are given |
1752 | | | if(!isset($this->language_data['NUMBERS'])) { |
1753 | | | $this->language_data['NUMBERS'] = 0; |
1754 | | | } |
1755 | | | |
1756 | | | if(is_array($this->language_data['NUMBERS'])) { |
1757 | | | $this->language_data['NUMBERS_CACHE'] = $this->language_data['NUMBERS']; |
1758 | | | } else { |
1759 | | | $this->language_data['NUMBERS_CACHE'] = array(); |
1760 | | | if(!$this->language_data['NUMBERS']) { |
1761 | | | $this->language_data['NUMBERS'] = |
1762 | | | GESHI_NUMBER_INT_BASIC | |
1763 | | | GESHI_NUMBER_FLT_NONSCI; |
1764 | | | } |
1765 | | | |
1766 | | | for($i = 0, $j = $this->language_data['NUMBERS']; $j > 0; ++$i, $j>>=1) { |
1767 | | | //Rearrange style indices if required ... |
1768 | | | if(isset($this->language_data['STYLES']['NUMBERS'][1<<$i])) { |
1769 | | | $this->language_data['STYLES']['NUMBERS'][$i] = |
1770 | | | $this->language_data['STYLES']['NUMBERS'][1<<$i]; |
1771 | | | unset($this->language_data['STYLES']['NUMBERS'][1<<$i]); |
1772 | | | } |
1773 | | | |
1774 | | | //Check if this bit is set for highlighting |
1775 | | | if($j&1) { |
1776 | | | //So this bit is set ... |
1777 | | | //Check if it belongs to group 0 or the actual stylegroup |
1778 | | | if(isset($this->language_data['STYLES']['NUMBERS'][$i])) { |
1779 | | | $this->language_data['NUMBERS_CACHE'][$i] = 1 << $i; |
1780 | | | } else { |
1781 | | | if(!isset($this->language_data['NUMBERS_CACHE'][0])) { |
1782 | | | $this->language_data['NUMBERS_CACHE'][0] = 0; |
1783 | | | } |
1784 | | | $this->language_data['NUMBERS_CACHE'][0] |= 1 << $i; |
1785 | | | } |
1786 | | | } |
1787 | | | } |
1788 | | | } |
1789 | | | } |
1790 | | | } |
1791 | | | |
1792 | | | /** |
1793 | | | * Setup caches needed for parsing. This is automatically called in parse_code() when appropriate. |
1794 | | | * This function makes stylesheet generators much faster as they do not need these caches. |
1795 | | | * |
1796 | | | * @since 1.0.8 |
1797 | | | * @access private |
1798 | | | */ |
1799 | | | function build_parse_cache() { |
1800 | | | // cache symbol regexp |
1801 | | | //As this is a costy operation, we avoid doing it for multiple groups ... |
1802 | | | //Instead we perform it for all symbols at once. |
1803 | | | // |
1804 | | | //For this to work, we need to reorganize the data arrays. |
1805 | | | if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) { |
1806 | | | $this->language_data['MULTIPLE_SYMBOL_GROUPS'] = count($this->language_data['STYLES']['SYMBOLS']) > 1; |
1807 | | | |
1808 | | | $this->language_data['SYMBOL_DATA'] = array(); |
1809 | | | $symbol_preg_multi = array(); // multi char symbols |
1810 | | | $symbol_preg_single = array(); // single char symbols |
1811 | | | foreach ($this->language_data['SYMBOLS'] as $key => $symbols) { |
1812 | | | if (is_array($symbols)) { |
1813 | | | foreach ($symbols as $sym) { |
1814 | | | $sym = $this->hsc($sym); |
1815 | | | if (!isset($this->language_data['SYMBOL_DATA'][$sym])) { |
1816 | | | $this->language_data['SYMBOL_DATA'][$sym] = $key; |
1817 | | | if (isset($sym[1])) { // multiple chars |
1818 | | | $symbol_preg_multi[] = preg_quote($sym, '/'); |
1819 | | | } else { // single char |
1820 | | | if ($sym == '-') { |
1821 | | | // don't trigger range out of order error |
1822 | | | $symbol_preg_single[] = '\-'; |
1823 | | | } else { |
1824 | | | $symbol_preg_single[] = preg_quote($sym, '/'); |
1825 | | | } |
1826 | | | } |
1827 | | | } |
1828 | | | } |
1829 | | | } else { |
1830 | | | $symbols = $this->hsc($symbols); |
1831 | | | if (!isset($this->language_data['SYMBOL_DATA'][$symbols])) { |
1832 | | | $this->language_data['SYMBOL_DATA'][$symbols] = 0; |
1833 | | | if (isset($symbols[1])) { // multiple chars |
1834 | | | $symbol_preg_multi[] = preg_quote($symbols, '/'); |
1835 | | | } else if ($symbols == '-') { |
1836 | | | // don't trigger range out of order error |
1837 | | | $symbol_preg_single[] = '\-'; |
1838 | | | } else { // single char |
1839 | | | $symbol_preg_single[] = preg_quote($symbols, '/'); |
1840 | | | } |
1841 | | | } |
1842 | | | } |
1843 | | | } |
1844 | | | |
1845 | | | //Now we have an array with each possible symbol as the key and the style as the actual data. |
1846 | | | //This way we can set the correct style just the moment we highlight ... |
1847 | | | // |
1848 | | | //Now we need to rewrite our array to get a search string that |
1849 | | | $symbol_preg = array(); |
1850 | | | if (!empty($symbol_preg_multi)) { |
1851 | | | rsort($symbol_preg_multi); |
1852 | | | $symbol_preg[] = implode('|', $symbol_preg_multi); |
1853 | | | } |
1854 | | | if (!empty($symbol_preg_single)) { |
1855 | | | rsort($symbol_preg_single); |
1856 | | | $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']'; |
1857 | | | } |
1858 | | | $this->language_data['SYMBOL_SEARCH'] = implode("|", $symbol_preg); |
1859 | | | } |
1860 | | | |
1861 | | | // cache optimized regexp for keyword matching |
1862 | | | // remove old cache |
1863 | | | $this->language_data['CACHED_KEYWORD_LISTS'] = array(); |
1864 | | | foreach (array_keys($this->language_data['KEYWORDS']) as $key) { |
1865 | | | if (!isset($this->lexic_permissions['KEYWORDS'][$key]) || |
1866 | | | $this->lexic_permissions['KEYWORDS'][$key]) { |
1867 | | | $this->optimize_keyword_group($key); |
1868 | | | } |
1869 | | | } |
1870 | | | |
1871 | | | // brackets |
1872 | | | if ($this->lexic_permissions['BRACKETS']) { |
1873 | | | $this->language_data['CACHE_BRACKET_MATCH'] = array('[', ']', '(', ')', '{', '}'); |
1874 | | | if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) { |
1875 | | | $this->language_data['CACHE_BRACKET_REPLACE'] = array( |
1876 | | | '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">[|>', |
1877 | | | '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">]|>', |
1878 | | | '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">(|>', |
1879 | | | '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">)|>', |
1880 | | | '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">{|>', |
1881 | | | '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">}|>', |
1882 | | | ); |
1883 | | | } |
1884 | | | else { |
1885 | | | $this->language_data['CACHE_BRACKET_REPLACE'] = array( |
1886 | | | '<| class="br0">[|>', |
1887 | | | '<| class="br0">]|>', |
1888 | | | '<| class="br0">(|>', |
1889 | | | '<| class="br0">)|>', |
1890 | | | '<| class="br0">{|>', |
1891 | | | '<| class="br0">}|>', |
1892 | | | ); |
1893 | | | } |
1894 | | | } |
1895 | | | |
1896 | | | //Build the parse cache needed to highlight numbers appropriate |
1897 | | | if($this->lexic_permissions['NUMBERS']) { |
1898 | | | //Check if the style rearrangements have been processed ... |
1899 | | | //This also does some preprocessing to check which style groups are useable ... |
1900 | | | if(!isset($this->language_data['NUMBERS_CACHE'])) { |
1901 | | | $this->build_style_cache(); |
1902 | | | } |
1903 | | | |
1904 | | | //Number format specification |
1905 | | | //All this formats are matched case-insensitively! |
1906 | | | static $numbers_format = array( |
1907 | | | GESHI_NUMBER_INT_BASIC => |
1908 | | | '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)(?![0-9a-z\.])', |
1909 | | | GESHI_NUMBER_INT_CSTYLE => |
1910 | | | '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)l(?![0-9a-z\.])', |
1911 | | | GESHI_NUMBER_BIN_SUFFIX => |
1912 | | | '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?b(?![0-9a-z\.])', |
1913 | | | GESHI_NUMBER_BIN_PREFIX_PERCENT => |
1914 | | | '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])%[01]+?(?![0-9a-z\.])', |
1915 | | | GESHI_NUMBER_BIN_PREFIX_0B => |
1916 | | | '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0b[01]+?(?![0-9a-z\.])', |
1917 | | | GESHI_NUMBER_OCT_PREFIX => |
1918 | | | '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0[0-7]+?(?![0-9a-z\.])', |
1919 | | | GESHI_NUMBER_OCT_SUFFIX => |
1920 | | | '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[0-7]+?o(?![0-9a-z\.])', |
1921 | | | GESHI_NUMBER_HEX_PREFIX => |
1922 | | | '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0x[0-9a-f]+?(?![0-9a-z\.])', |
1923 | | | GESHI_NUMBER_HEX_SUFFIX => |
1924 | | | '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d[0-9a-f]*?h(?![0-9a-z\.])', |
1925 | | | GESHI_NUMBER_FLT_NONSCI => |
1926 | | | '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d+?\.\d+?(?![0-9a-z\.])', |
1927 | | | GESHI_NUMBER_FLT_NONSCI_F => |
1928 | | | '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)f(?![0-9a-z\.])', |
1929 | | | GESHI_NUMBER_FLT_SCI_SHORT => |
1930 | | | '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\.\d+?(?:e[+\-]?\d+?)?(?![0-9a-z\.])', |
1931 | | | GESHI_NUMBER_FLT_SCI_ZERO => |
1932 | | | '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)(?:e[+\-]?\d+?)?(?![0-9a-z\.])' |
1933 | | | ); |
1934 | | | |
1935 | | | //At this step we have an associative array with flag groups for a |
1936 | | | //specific style or an string denoting a regexp given its index. |
1937 | | | $this->language_data['NUMBERS_RXCACHE'] = array(); |
1938 | | | foreach($this->language_data['NUMBERS_CACHE'] as $key => $rxdata) { |
1939 | | | if(is_string($rxdata)) { |
1940 | | | $regexp = $rxdata; |
1941 | | | } else { |
1942 | | | //This is a bitfield of number flags to highlight: |
1943 | | | //Build an array, implode them together and make this the actual RX |
1944 | | | $rxuse = array(); |
1945 | | | for($i = 1; $i <= $rxdata; $i<<=1) { |
1946 | | | if($rxdata & $i) { |
1947 | | | $rxuse[] = $numbers_format[$i]; |
1948 | | | } |
1949 | | | } |
1950 | | | $regexp = implode("|", $rxuse); |
1951 | | | } |
1952 | | | |
1953 | | | $this->language_data['NUMBERS_RXCACHE'][$key] = |
1954 | | | "/(?<!<\|\/NUM!)(?<!\d\/>)($regexp)(?!\|>)/i"; |
1955 | | | } |
1956 | | | } |
1957 | | | |
1958 | | | $this->parse_cache_built = true; |
1959 | | | } |
1960 | | | |
1961 | | | /** |
1962 | | | * Returns the code in $this->source, highlighted and surrounded by the |
1963 | | | * nessecary HTML. |
1964 | | | * |
1965 | | | * This should only be called ONCE, cos it's SLOW! If you want to highlight |
1966 | | | * the same source multiple times, you're better off doing a whole lot of |
1967 | | | * str_replaces to replace the <span>s |
1968 | | | * |
1969 | | | * @since 1.0.0 |
1970 | | | */ |
1971 | | | function parse_code () { |
1972 | | | // Start the timer |
1973 | | | $start_time = microtime(); |
1974 | | | |
1975 | | | // Firstly, if there is an error, we won't highlight |
1976 | | | if ($this->error) { |
1977 | | | //Escape the source for output |
1978 | | | $result = $this->hsc($this->source); |
1979 | | | |
1980 | | | //This fix is related to SF#1923020, but has to be applied regardless of |
1981 | | | //actually highlighting symbols. |
1982 | | | $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result); |
1983 | | | |
1984 | | | // Timing is irrelevant |
1985 | | | $this->set_time($start_time, $start_time); |
1986 | | | $this->finalise($result); |
1987 | | | return $result; |
1988 | | | } |
1989 | | | |
1990 | | | // make sure the parse cache is up2date |
1991 | | | if (!$this->parse_cache_built) { |
1992 | | | $this->build_parse_cache(); |
1993 | | | } |
1994 | | | |
1995 | | | // Replace all newlines to a common form. |
1996 | | | $code = str_replace("\r\n", "\n", $this->source); |
1997 | | | $code = str_replace("\r", "\n", $code); |
1998 | | | |
1999 | | | // Add spaces for regular expression matching and line numbers |
2000 | | | // $code = "\n" . $code . "\n"; |
2001 | | | |
2002 | | | // Initialise various stuff |
2003 | | | $length = strlen($code); |
2004 | | | $COMMENT_MATCHED = false; |
2005 | | | $stuff_to_parse = ''; |
2006 | | | $endresult = ''; |
2007 | | | |
2008 | | | // "Important" selections are handled like multiline comments |
2009 | | | // @todo GET RID OF THIS SHIZ |
2010 | | | if ($this->enable_important_blocks) { |
2011 | | | $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT; |
2012 | | | } |
2013 | | | |
2014 | | | if ($this->strict_mode) { |
2015 | | | // Break the source into bits. Each bit will be a portion of the code |
2016 | | | // within script delimiters - for example, HTML between < and > |
2017 | | | $k = 0; |
2018 | | | $parts = array(); |
2019 | | | $matches = array(); |
2020 | | | $next_match_pointer = null; |
2021 | | | // we use a copy to unset delimiters on demand (when they are not found) |
2022 | | | $delim_copy = $this->language_data['SCRIPT_DELIMITERS']; |
2023 | | | $i = 0; |
2024 | | | while ($i < $length) { |
2025 | | | $next_match_pos = $length + 1; // never true |
2026 | | | foreach ($delim_copy as $dk => $delimiters) { |
2027 | | | if(is_array($delimiters)) { |
2028 | | | foreach ($delimiters as $open => $close) { |
2029 | | | // make sure the cache is setup properly |
2030 | | | if (!isset($matches[$dk][$open])) { |
2031 | | | $matches[$dk][$open] = array( |
2032 | | | 'next_match' => -1, |
2033 | | | 'dk' => $dk, |
2034 | | | |
2035 | | | 'open' => $open, // needed for grouping of adjacent code blocks (see below) |
2036 | | | 'open_strlen' => strlen($open), |
2037 | | | |
2038 | | | 'close' => $close, |
2039 | | | 'close_strlen' => strlen($close), |
2040 | | | ); |
2041 | | | } |
2042 | | | // Get the next little bit for this opening string |
2043 | | | if ($matches[$dk][$open]['next_match'] < $i) { |
2044 | | | // only find the next pos if it was not already cached |
2045 | | | $open_pos = strpos($code, $open, $i); |
2046 | | | if ($open_pos === false) { |
2047 | | | // no match for this delimiter ever |
2048 | | | unset($delim_copy[$dk][$open]); |
2049 | | | continue; |
2050 | | | } |
2051 | | | $matches[$dk][$open]['next_match'] = $open_pos; |
2052 | | | } |
2053 | | | if ($matches[$dk][$open]['next_match'] < $next_match_pos) { |
2054 | | | //So we got a new match, update the close_pos |
2055 | | | $matches[$dk][$open]['close_pos'] = |
2056 | | | strpos($code, $close, $matches[$dk][$open]['next_match']+1); |
2057 | | | |
2058 | | | $next_match_pointer =& $matches[$dk][$open]; |
2059 | | | $next_match_pos = $matches[$dk][$open]['next_match']; |
2060 | | | } |
2061 | | | } |
2062 | | | } else { |
2063 | | | //So we should match an RegExp as Strict Block ... |
2064 | | | /** |
2065 | | | * The value in $delimiters is expected to be an RegExp |
2066 | | | * containing exactly 2 matching groups: |
2067 | | | * - Group 1 is the opener |
2068 | | | * - Group 2 is the closer |
2069 | | | */ |
2070 | | | if(!GESHI_PHP_PRE_433 && //Needs proper rewrite to work with PHP >=4.3.0; 4.3.3 is guaranteed to work. |
2071 | | | preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE, $i)) { |
2072 | | | //We got a match ... |
2073 | | | $matches[$dk] = array( |
2074 | | | 'next_match' => $matches_rx[1][1], |
2075 | | | 'dk' => $dk, |
2076 | | | |
2077 | | | 'close_strlen' => strlen($matches_rx[2][0]), |
2078 | | | 'close_pos' => $matches_rx[2][1], |
2079 | | | ); |
2080 | | | } else { |
2081 | | | // no match for this delimiter ever |
2082 | | | unset($delim_copy[$dk]); |
2083 | | | continue; |
2084 | | | } |
2085 | | | |
2086 | | | if ($matches[$dk]['next_match'] <= $next_match_pos) { |
2087 | | | $next_match_pointer =& $matches[$dk]; |
2088 | | | $next_match_pos = $matches[$dk]['next_match']; |
2089 | | | } |
2090 | | | } |
2091 | | | } |
2092 | | | // non-highlightable text |
2093 | | | $parts[$k] = array( |
2094 | | | 1 => substr($code, $i, $next_match_pos - $i) |
2095 | | | ); |
2096 | | | ++$k; |
2097 | | | |
2098 | | | if ($next_match_pos > $length) { |
2099 | | | // out of bounds means no next match was found |
2100 | | | break; |
2101 | | | } |
2102 | | | |
2103 | | | // highlightable code |
2104 | | | $parts[$k][0] = $next_match_pointer['dk']; |
2105 | | | |
2106 | | | //Only combine for non-rx script blocks |
2107 | | | if(is_array($delim_copy[$next_match_pointer['dk']])) { |
2108 | | | // group adjacent script blocks, e.g. <foobar><asdf> should be one block, not three! |
2109 | | | $i = $next_match_pos + $next_match_pointer['open_strlen']; |
2110 | | | while (true) { |
2111 | | | $close_pos = strpos($code, $next_match_pointer['close'], $i); |
2112 | | | if ($close_pos == false) { |
2113 | | | break; |
2114 | | | } |
2115 | | | $i = $close_pos + $next_match_pointer['close_strlen']; |
2116 | | | if ($i == $length) { |
2117 | | | break; |
2118 | | | } |
2119 | | | if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 || |
2120 | | | substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) { |
2121 | | | // merge adjacent but make sure we don't merge things like <tag><!-- comment --> |
2122 | | | foreach ($matches as $submatches) { |
2123 | | | foreach ($submatches as $match) { |
2124 | | | if ($match['next_match'] == $i) { |
2125 | | | // a different block already matches here! |
2126 | | | break 3; |
2127 | | | } |
2128 | | | } |
2129 | | | } |
2130 | | | } else { |
2131 | | | break; |
2132 | | | } |
2133 | | | } |
2134 | | | } else { |
2135 | | | $close_pos = $next_match_pointer['close_pos'] + $next_match_pointer['close_strlen']; |
2136 | | | $i = $close_pos; |
2137 | | | } |
2138 | | | |
2139 | | | if ($close_pos === false) { |
2140 | | | // no closing delimiter found! |
2141 | | | $parts[$k][1] = substr($code, $next_match_pos); |
2142 | | | ++$k; |
2143 | | | break; |
2144 | | | } else { |
2145 | | | $parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos); |
2146 | | | ++$k; |
2147 | | | } |
2148 | | | } |
2149 | | | unset($delim_copy, $next_match_pointer, $next_match_pos, $matches); |
2150 | | | $num_parts = $k; |
2151 | | | |
2152 | | | if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) { |
2153 | | | // when we have only one part, we don't have anything to highlight at all. |
2154 | | | // if we have a "maybe" strict language, this should be handled as highlightable code |
2155 | | | $parts = array( |
2156 | | | |
2157 | | | |
2158 | | | 1 => '' |
2159 | | | ), |
2160 | | | 1 => array( |
2161 | | | |
2162 | | | 1 => $parts[0][1] |
2163 | | | ) |
2164 | | | ); |
2165 | | | $num_parts = 2; |
2166 | | | } |
2167 | | | |
2168 | | | } else { |
2169 | | | // Not strict mode - simply dump the source into |
2170 | | | // the array at index 1 (the first highlightable block) |
2171 | | | $parts = array( |
2172 | | | |
2173 | | | |
2174 | | | 1 => '' |
2175 | | | ), |
2176 | | | 1 => array( |
2177 | | | |
2178 | | | 1 => $code |
2179 | | | ) |
2180 | | | ); |
2181 | | | $num_parts = 2; |
2182 | | | } |
2183 | | | |
2184 | | | //Unset variables we won't need any longer |
2185 | | | unset($code); |
2186 | | | |
2187 | | | //Preload some repeatedly used values regarding hardquotes ... |
2188 | | | $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false; |
2189 | | | $hq_strlen = strlen($hq); |
2190 | | | |
2191 | | | //Preload if line numbers are to be generated afterwards |
2192 | | | //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398 |
2193 | | | $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS || |
2194 | | | !empty($this->highlight_extra_lines) || !$this->allow_multiline_span; |
2195 | | | |
2196 | | | //preload the escape char for faster checking ... |
2197 | | | $escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']); |
2198 | | | |
2199 | | | // this is used for single-line comments |
2200 | | | $sc_disallowed_before = ""; |
2201 | | | $sc_disallowed_after = ""; |
2202 | | | |
2203 | | | if (isset($this->language_data['PARSER_CONTROL'])) { |
2204 | | | if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) { |
2205 | | | if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) { |
2206 | | | $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE']; |
2207 | | | } |
2208 | | | if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) { |
2209 | | | $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER']; |
2210 | | | } |
2211 | | | } |
2212 | | | } |
2213 | | | |
2214 | | | //Fix for SF#1932083: Multichar Quotemarks unsupported |
2215 | | | $is_string_starter = array(); |
2216 | | | if ($this->lexic_permissions['STRINGS']) { |
2217 | | | foreach ($this->language_data['QUOTEMARKS'] as $quotemark) { |
2218 | | | if (!isset($is_string_starter[$quotemark[0]])) { |
2219 | | | $is_string_starter[$quotemark[0]] = (string)$quotemark; |
2220 | | | } else if (is_string($is_string_starter[$quotemark[0]])) { |
2221 | | | $is_string_starter[$quotemark[0]] = array( |
2222 | | | $is_string_starter[$quotemark[0]], |
2223 | | | $quotemark); |
2224 | | | } else { |
2225 | | | $is_string_starter[$quotemark[0]][] = $quotemark; |
2226 | | | } |
2227 | | | } |
2228 | | | } |
2229 | | | |
2230 | | | // Now we go through each part. We know that even-indexed parts are |
2231 | | | // code that shouldn't be highlighted, and odd-indexed parts should |
2232 | | | // be highlighted |
2233 | | | for ($key = 0; $key < $num_parts; ++$key) { |
2234 | | | $STRICTATTRS = ''; |
2235 | | | |
2236 | | | // If this block should be highlighted... |
2237 | | | if (!($key & 1)) { |
2238 | | | // Else not a block to highlight |
2239 | | | $endresult .= $this->hsc($parts[$key][1]); |
2240 | | | unset($parts[$key]); |
2241 | | | continue; |
2242 | | | } |
2243 | | | |
2244 | | | $result = ''; |
2245 | | | $part = $parts[$key][1]; |
2246 | | | |
2247 | | | $highlight_part = true; |
2248 | | | if ($this->strict_mode && !is_null($parts[$key][0])) { |
2249 | | | // get the class key for this block of code |
2250 | | | $script_key = $parts[$key][0]; |
2251 | | | $highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key]; |
2252 | | | if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' && |
2253 | | | $this->lexic_permissions['SCRIPT']) { |
2254 | | | // Add a span element around the source to |
2255 | | | // highlight the overall source block |
2256 | | | if (!$this->use_classes && |
2257 | | | $this->language_data['STYLES']['SCRIPT'][$script_key] != '') { |
2258 | | | $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"'; |
2259 | | | } else { |
2260 | | | $attributes = ' class="sc' . $script_key . '"'; |
2261 | | | } |
2262 | | | $result .= "<span$attributes>"; |
2263 | | | $STRICTATTRS = $attributes; |
2264 | | | } |
2265 | | | } |
2266 | | | |
2267 | | | if ($highlight_part) { |
2268 | | | // Now, highlight the code in this block. This code |
2269 | | | // is really the engine of GeSHi (along with the method |
2270 | | | // parse_non_string_part). |
2271 | | | |
2272 | | | // cache comment regexps incrementally |
2273 | | | $next_comment_regexp_key = ''; |
2274 | | | $next_comment_regexp_pos = -1; |
2275 | | | $next_comment_multi_pos = -1; |
2276 | | | $next_comment_single_pos = -1; |
2277 | | | $comment_regexp_cache_per_key = array(); |
2278 | | | $comment_multi_cache_per_key = array(); |
2279 | | | $comment_single_cache_per_key = array(); |
2280 | | | $next_open_comment_multi = ''; |
2281 | | | $next_comment_single_key = ''; |
2282 | | | $escape_regexp_cache_per_key = array(); |
2283 | | | $next_escape_regexp_key = ''; |
2284 | | | $next_escape_regexp_pos = -1; |
2285 | | | |
2286 | | | $length = strlen($part); |
2287 | | | for ($i = 0; $i < $length; ++$i) { |
2288 | | | // Get the next char |
2289 | | | $char = $part[$i]; |
2290 | | | $char_len = 1; |
2291 | | | |
2292 | | | $string_started = false; |
2293 | | | |
2294 | | | if (isset($is_string_starter[$char])) { |
2295 | | | // Possibly the start of a new string ... |
2296 | | | |
2297 | | | //Check which starter it was ... |
2298 | | | //Fix for SF#1932083: Multichar Quotemarks unsupported |
2299 | | | if (is_array($is_string_starter[$char])) { |
2300 | | | $char_new = ''; |
2301 | | | foreach ($is_string_starter[$char] as $testchar) { |
2302 | | | if ($testchar === substr($part, $i, strlen($testchar)) && |
2303 | | | strlen($testchar) > strlen($char_new)) { |
2304 | | | $char_new = $testchar; |
2305 | | | $string_started = true; |
2306 | | | } |
2307 | | | } |
2308 | | | if ($string_started) { |
2309 | | | $char = $char_new; |
2310 | | | } |
2311 | | | } else { |
2312 | | | $testchar = $is_string_starter[$char]; |
2313 | | | if ($testchar === substr($part, $i, strlen($testchar))) { |
2314 | | | $char = $testchar; |
2315 | | | $string_started = true; |
2316 | | | } |
2317 | | | } |
2318 | | | $char_len = strlen($char); |
2319 | | | } |
2320 | | | |
2321 | | | if ($string_started) { |
2322 | | | // Hand out the correct style information for this string |
2323 | | | $string_key = array_search($char, $this->language_data['QUOTEMARKS']); |
2324 | | | if (!isset($this->language_data['STYLES']['STRINGS'][$string_key]) || |
2325 | | | !isset($this->language_data['STYLES']['ESCAPE_CHAR'][$string_key])) { |
2326 | | | $string_key = 0; |
2327 | | | } |
2328 | | | |
2329 | | | // parse the stuff before this |
2330 | | | $result .= $this->parse_non_string_part($stuff_to_parse); |
2331 | | | $stuff_to_parse = ''; |
2332 | | | |
2333 | | | if (!$this->use_classes) { |
2334 | | | $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][$string_key] . '"'; |
2335 | | | } else { |
2336 | | | $string_attributes = ' class="st'.$string_key.'"'; |
2337 | | | } |
2338 | | | |
2339 | | | // now handle the string |
2340 | | | $string = "<span$string_attributes>" . GeSHi::hsc($char); |
2341 | | | $start = $i + $char_len; |
2342 | | | $string_open = true; |
2343 | | | |
2344 | | | if(empty($this->language_data['ESCAPE_REGEXP'])) { |
2345 | | | $next_escape_regexp_pos = $length; |
2346 | | | } |
2347 | | | |
2348 | | | do { |
2349 | | | //Get the regular ending pos ... |
2350 | | | $close_pos = strpos($part, $char, $start); |
2351 | | | if(false === $close_pos) { |
2352 | | | $close_pos = $length; |
2353 | | | } |
2354 | | | |
2355 | | | if($this->lexic_permissions['ESCAPE_CHAR']) { |
2356 | | | // update escape regexp cache if needed |
2357 | | | if (isset($this->language_data['ESCAPE_REGEXP']) && $next_escape_regexp_pos < $start) { |
2358 | | | $next_escape_regexp_pos = $length; |
2359 | | | foreach ($this->language_data['ESCAPE_REGEXP'] as $escape_key => $regexp) { |
2360 | | | $match_i = false; |
2361 | | | if (isset($escape_regexp_cache_per_key[$escape_key]) && |
2362 | | | ($escape_regexp_cache_per_key[$escape_key]['pos'] >= $start || |
2363 | | | $escape_regexp_cache_per_key[$escape_key]['pos'] === false)) { |
2364 | | | // we have already matched something |
2365 | | | if ($escape_regexp_cache_per_key[$escape_key]['pos'] === false) { |
2366 | | | // this comment is never matched |
2367 | | | continue; |
2368 | | | } |
2369 | | | $match_i = $escape_regexp_cache_per_key[$escape_key]['pos']; |
2370 | | | } else if ( |
2371 | | | //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible |
2372 | | | (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $start), $match, PREG_OFFSET_CAPTURE)) || |
2373 | | | (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $start)) |
2374 | | | ) { |
2375 | | | $match_i = $match[0][1]; |
2376 | | | if (GESHI_PHP_PRE_433) { |
2377 | | | $match_i += $start; |
2378 | | | } |
2379 | | | |
2380 | | | $escape_regexp_cache_per_key[$escape_key] = array( |
2381 | | | 'key' => $escape_key, |
2382 | | | 'length' => strlen($match[0][0]), |
2383 | | | 'pos' => $match_i |
2384 | | | ); |
2385 | | | } else { |
2386 | | | $escape_regexp_cache_per_key[$escape_key]['pos'] = false; |
2387 | | | continue; |
2388 | | | } |
2389 | | | |
2390 | | | if ($match_i !== false && $match_i < $next_escape_regexp_pos) { |
2391 | | | $next_escape_regexp_pos = $match_i; |
2392 | | | $next_escape_regexp_key = $escape_key; |
2393 | | | if ($match_i === $start) { |
2394 | | | break; |
2395 | | | } |
2396 | | | } |
2397 | | | } |
2398 | | | } |
2399 | | | |
2400 | | | //Find the next simple escape position |
2401 | | | if('' != $this->language_data['ESCAPE_CHAR']) { |
2402 | | | $simple_escape = strpos($part, $this->language_data['ESCAPE_CHAR'], $start); |
2403 | | | if(false === $simple_escape) { |
2404 | | | $simple_escape = $length; |
2405 | | | } |
2406 | | | } else { |
2407 | | | $simple_escape = $length; |
2408 | | | } |
2409 | | | } else { |
2410 | | | $next_escape_regexp_pos = $length; |
2411 | | | $simple_escape = $length; |
2412 | | | } |
2413 | | | |
2414 | | | if($simple_escape < $next_escape_regexp_pos && |
2415 | | | $simple_escape < $length && |
2416 | | | $simple_escape < $close_pos) { |
2417 | | | //The nexxt escape sequence is a simple one ... |
2418 | | | $es_pos = $simple_escape; |
2419 | | | |
2420 | | | //Add the stuff not in the string yet ... |
2421 | | | $string .= $this->hsc(substr($part, $start, $es_pos - $start)); |
2422 | | | |
2423 | | | //Get the style for this escaped char ... |
2424 | | | if (!$this->use_classes) { |
2425 | | | $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"'; |
2426 | | | } else { |
2427 | | | $escape_char_attributes = ' class="es0"'; |
2428 | | | } |
2429 | | | |
2430 | | | //Add the style for the escape char ... |
2431 | | | $string .= "<span$escape_char_attributes>" . |
2432 | | | GeSHi::hsc($this->language_data['ESCAPE_CHAR']); |
2433 | | | |
2434 | | | //Get the byte AFTER the ESCAPE_CHAR we just found |
2435 | | | $es_char = $part[$es_pos + 1]; |
2436 | | | if ($es_char == "\n") { |
2437 | | | // don't put a newline around newlines |
2438 | | | $string .= "</span>\n"; |
2439 | | | $start = $es_pos + 2; |
2440 | | | } else if (ord($es_char) >= 128) { |
2441 | | | //This is an non-ASCII char (UTF8 or single byte) |
2442 | | | //This code tries to work around SF#2037598 ... |
2443 | | | if(function_exists('mb_substr')) { |
2444 | | | $es_char_m = mb_substr(substr($part, $es_pos+1, 16), 0, 1, $this->encoding); |
2445 | | | $string .= $es_char_m . '</span>'; |
2446 | | | } else if (!GESHI_PHP_PRE_433 && 'utf-8' == $this->encoding) { |
2447 | | | if(preg_match("/[\xC2-\xDF][\x80-\xBF]". |
2448 | | | "|\xE0[\xA0-\xBF][\x80-\xBF]". |
2449 | | | "|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}". |
2450 | | | "|\xED[\x80-\x9F][\x80-\xBF]". |
2451 | | | "|\xF0[\x90-\xBF][\x80-\xBF]{2}". |
2452 | | | "|[\xF1-\xF3][\x80-\xBF]{3}". |
2453 | | | "|\xF4[\x80-\x8F][\x80-\xBF]{2}/s", |
2454 | | | $part, $es_char_m, null, $es_pos + 1)) { |
2455 | | | $es_char_m = $es_char_m[0]; |
2456 | | | } else { |
2457 | | | $es_char_m = $es_char; |
2458 | | | } |
2459 | | | $string .= $this->hsc($es_char_m) . '</span>'; |
2460 | | | } else { |
2461 | | | $es_char_m = $this->hsc($es_char); |
2462 | | | } |
2463 | | | $start = $es_pos + strlen($es_char_m) + 1; |
2464 | | | } else { |
2465 | | | $string .= $this->hsc($es_char) . '</span>'; |
2466 | | | $start = $es_pos + 2; |
2467 | | | } |
2468 | | | } else if ($next_escape_regexp_pos < $length && |
2469 | | | $next_escape_regexp_pos < $close_pos) { |
2470 | | | $es_pos = $next_escape_regexp_pos; |
2471 | | | //Add the stuff not in the string yet ... |
2472 | | | $string .= $this->hsc(substr($part, $start, $es_pos - $start)); |
2473 | | | |
2474 | | | //Get the key and length of this match ... |
2475 | | | $escape = $escape_regexp_cache_per_key[$next_escape_regexp_key]; |
2476 | | | $escape_str = substr($part, $es_pos, $escape['length']); |
2477 | | | $escape_key = $escape['key']; |
2478 | | | |
2479 | | | //Get the style for this escaped char ... |
2480 | | | if (!$this->use_classes) { |
2481 | | | $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][$escape_key] . '"'; |
2482 | | | } else { |
2483 | | | $escape_char_attributes = ' class="es' . $escape_key . '"'; |
2484 | | | } |
2485 | | | |
2486 | | | //Add the style for the escape char ... |
2487 | | | $string .= "<span$escape_char_attributes>" . |
2488 | | | $this->hsc($escape_str) . '</span>'; |
2489 | | | |
2490 | | | $start = $es_pos + $escape['length']; |
2491 | | | } else { |
2492 | | | //Copy the remainder of the string ... |
2493 | | | $string .= $this->hsc(substr($part, $start, $close_pos - $start + $char_len)) . '</span>'; |
2494 | | | $start = $close_pos + $char_len; |
2495 | | | $string_open = false; |
2496 | | | } |
2497 | | | } while($string_open); |
2498 | | | |
2499 | | | if ($check_linenumbers) { |
2500 | | | // Are line numbers used? If, we should end the string before |
2501 | | | // the newline and begin it again (so when <li>s are put in the source |
2502 | | | // remains XHTML compliant) |
2503 | | | // note to self: This opens up possibility of config files specifying |
2504 | | | // that languages can/cannot have multiline strings??? |
2505 | | | $string = str_replace("\n", "</span>\n<span$string_attributes>", $string); |
2506 | | | } |
2507 | | | |
2508 | | | $result .= $string; |
2509 | | | $string = ''; |
2510 | | | $i = $start - 1; |
2511 | | | continue; |
2512 | | | } else if ($this->lexic_permissions['STRINGS'] && $hq && $hq[0] == $char && |
2513 | | | substr($part, $i, $hq_strlen) == $hq) { |
2514 | | | // The start of a hard quoted string |
2515 | | | if (!$this->use_classes) { |
2516 | | | $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS']['HARD'] . '"'; |
2517 | | | $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR']['HARD'] . '"'; |
2518 | | | } else { |
2519 | | | $string_attributes = ' class="st_h"'; |
2520 | | | $escape_char_attributes = ' class="es_h"'; |
2521 | | | } |
2522 | | | // parse the stuff before this |
2523 | | | $result .= $this->parse_non_string_part($stuff_to_parse); |
2524 | | | $stuff_to_parse = ''; |
2525 | | | |
2526 | | | // now handle the string |
2527 | | | $string = ''; |
2528 | | | |
2529 | | | // look for closing quote |
2530 | | | $start = $i + $hq_strlen; |
2531 | | | while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start)) { |
2532 | | | $start = $close_pos + 1; |
2533 | | | if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['ESCAPE_CHAR']) { |
2534 | | | // make sure this quote is not escaped |
2535 | | | foreach ($this->language_data['HARDESCAPE'] as $hardescape) { |
2536 | | | if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) { |
2537 | | | // check wether this quote is escaped or if it is something like '\\' |
2538 | | | $escape_char_pos = $close_pos - 1; |
2539 | | | while ($escape_char_pos > 0 |
2540 | | | && $part[$escape_char_pos - 1] == $this->language_data['ESCAPE_CHAR']) { |
2541 | | | --$escape_char_pos; |
2542 | | | } |
2543 | | | if (($close_pos - $escape_char_pos) & 1) { |
2544 | | | // uneven number of escape chars => this quote is escaped |
2545 | | | continue 2; |
2546 | | | } |
2547 | | | } |
2548 | | | } |
2549 | | | } |
2550 | | | |
2551 | | | // found closing quote |
2552 | | | break; |
2553 | | | } |
2554 | | | |
2555 | | | //Found the closing delimiter? |
2556 | | | if (!$close_pos) { |
2557 | | | // span till the end of this $part when no closing delimiter is found |
2558 | | | $close_pos = $length; |
2559 | | | } |
2560 | | | |
2561 | | | //Get the actual string |
2562 | | | $string = substr($part, $i, $close_pos - $i + 1); |
2563 | | | $i = $close_pos; |
2564 | | | |
2565 | | | // handle escape chars and encode html chars |
2566 | | | // (special because when we have escape chars within our string they may not be escaped) |
2567 | | | if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) { |
2568 | | | $start = 0; |
2569 | | | $new_string = ''; |
2570 | | | while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) { |
2571 | | | // hmtl escape stuff before |
2572 | | | $new_string .= $this->hsc(substr($string, $start, $es_pos - $start)); |
2573 | | | // check if this is a hard escape |
2574 | | | foreach ($this->language_data['HARDESCAPE'] as $hardescape) { |
2575 | | | if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) { |
2576 | | | // indeed, this is a hardescape |
2577 | | | $new_string .= "<span$escape_char_attributes>" . |
2578 | | | $this->hsc($hardescape) . '</span>'; |
2579 | | | $start = $es_pos + strlen($hardescape); |
2580 | | | continue 2; |
2581 | | | } |
2582 | | | } |
2583 | | | // not a hard escape, but a normal escape |
2584 | | | // they come in pairs of two |
2585 | | | $c = 0; |
2586 | | | while (isset($string[$es_pos + $c]) && isset($string[$es_pos + $c + 1]) |
2587 | | | && $string[$es_pos + $c] == $this->language_data['ESCAPE_CHAR'] |
2588 | | | && $string[$es_pos + $c + 1] == $this->language_data['ESCAPE_CHAR']) { |
2589 | | | $c += 2; |
2590 | | | } |
2591 | | | if ($c) { |
2592 | | | $new_string .= "<span$escape_char_attributes>" . |
2593 | | | str_repeat($escaped_escape_char, $c) . |
2594 | | | '</span>'; |
2595 | | | $start = $es_pos + $c; |
2596 | | | } else { |
2597 | | | // this is just a single lonely escape char... |
2598 | | | $new_string .= $escaped_escape_char; |
2599 | | | $start = $es_pos + 1; |
2600 | | | } |
2601 | | | } |
2602 | | | $string = $new_string . $this->hsc(substr($string, $start)); |
2603 | | | } else { |
2604 | | | $string = $this->hsc($string); |
2605 | | | } |
2606 | | | |
2607 | | | if ($check_linenumbers) { |
2608 | | | // Are line numbers used? If, we should end the string before |
2609 | | | // the newline and begin it again (so when <li>s are put in the source |
2610 | | | // remains XHTML compliant) |
2611 | | | // note to self: This opens up possibility of config files specifying |
2612 | | | // that languages can/cannot have multiline strings??? |
2613 | | | $string = str_replace("\n", "</span>\n<span$string_attributes>", $string); |
2614 | | | } |
2615 | | | |
2616 | | | $result .= "<span$string_attributes>" . $string . '</span>'; |
2617 | | | $string = ''; |
2618 | | | continue; |
2619 | | | } else { |
2620 | | | // update regexp comment cache if needed |
2621 | | | if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) { |
2622 | | | $next_comment_regexp_pos = $length; |
2623 | | | foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) { |
2624 | | | $match_i = false; |
2625 | | | if (isset($comment_regexp_cache_per_key[$comment_key]) && |
2626 | | | ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i || |
2627 | | | $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) { |
2628 | | | // we have already matched something |
2629 | | | if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) { |
2630 | | | // this comment is never matched |
2631 | | | continue; |
2632 | | | } |
2633 | | | $match_i = $comment_regexp_cache_per_key[$comment_key]['pos']; |
2634 | | | } else if ( |
2635 | | | //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible |
2636 | | | (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $i), $match, PREG_OFFSET_CAPTURE)) || |
2637 | | | (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i)) |
2638 | | | ) { |
2639 | | | $match_i = $match[0][1]; |
2640 | | | if (GESHI_PHP_PRE_433) { |
2641 | | | $match_i += $i; |
2642 | | | } |
2643 | | | |
2644 | | | $comment_regexp_cache_per_key[$comment_key] = array( |
2645 | | | 'key' => $comment_key, |
2646 | | | 'length' => strlen($match[0][0]), |
2647 | | | 'pos' => $match_i |
2648 | | | ); |
2649 | | | } else { |
2650 | | | $comment_regexp_cache_per_key[$comment_key]['pos'] = false; |
2651 | | | continue; |
2652 | | | } |
2653 | | | |
2654 | | | if ($match_i !== false && $match_i < $next_comment_regexp_pos) { |
2655 | | | $next_comment_regexp_pos = $match_i; |
2656 | | | $next_comment_regexp_key = $comment_key; |
2657 | | | if ($match_i === $i) { |
2658 | | | break; |
2659 | | | } |
2660 | | | } |
2661 | | | } |
2662 | | | } |
2663 | | | //Have a look for regexp comments |
2664 | | | if ($i == $next_comment_regexp_pos) { |
2665 | | | $COMMENT_MATCHED = true; |
2666 | | | $comment = $comment_regexp_cache_per_key[$next_comment_regexp_key]; |
2667 | | | $test_str = $this->hsc(substr($part, $i, $comment['length'])); |
2668 | | | |
2669 | | | //@todo If remove important do remove here |
2670 | | | if ($this->lexic_permissions['COMMENTS']['MULTI']) { |
2671 | | | if (!$this->use_classes) { |
2672 | | | $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment['key']] . '"'; |
2673 | | | } else { |
2674 | | | $attributes = ' class="co' . $comment['key'] . '"'; |
2675 | | | } |
2676 | | | |
2677 | | | $test_str = "<span$attributes>" . $test_str . "</span>"; |
2678 | | | |
2679 | | | // Short-cut through all the multiline code |
2680 | | | if ($check_linenumbers) { |
2681 | | | // strreplace to put close span and open span around multiline newlines |
2682 | | | $test_str = str_replace( |
2683 | | | "\n", "</span>\n<span$attributes>", |
2684 | | | str_replace("\n ", "\n ", $test_str) |
2685 | | | ); |
2686 | | | } |
2687 | | | } |
2688 | | | |
2689 | | | $i += $comment['length'] - 1; |
2690 | | | |
2691 | | | // parse the rest |
2692 | | | $result .= $this->parse_non_string_part($stuff_to_parse); |
2693 | | | $stuff_to_parse = ''; |
2694 | | | } |
2695 | | | |
2696 | | | // If we haven't matched a regexp comment, try multi-line comments |
2697 | | | if (!$COMMENT_MATCHED) { |
2698 | | | // Is this a multiline comment? |
2699 | | | if (!empty($this->language_data['COMMENT_MULTI']) && $next_comment_multi_pos < $i) { |
2700 | | | $next_comment_multi_pos = $length; |
2701 | | | foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) { |
2702 | | | $match_i = false; |
2703 | | | if (isset($comment_multi_cache_per_key[$open]) && |
2704 | | | ($comment_multi_cache_per_key[$open] >= $i || |
2705 | | | $comment_multi_cache_per_key[$open] === false)) { |
2706 | | | // we have already matched something |
2707 | | | if ($comment_multi_cache_per_key[$open] === false) { |
2708 | | | // this comment is never matched |
2709 | | | continue; |
2710 | | | } |
2711 | | | $match_i = $comment_multi_cache_per_key[$open]; |
2712 | | | } else if (($match_i = stripos($part, $open, $i)) !== false) { |
2713 | | | $comment_multi_cache_per_key[$open] = $match_i; |
2714 | | | } else { |
2715 | | | $comment_multi_cache_per_key[$open] = false; |
2716 | | | continue; |
2717 | | | } |
2718 | | | if ($match_i !== false && $match_i < $next_comment_multi_pos) { |
2719 | | | $next_comment_multi_pos = $match_i; |
2720 | | | $next_open_comment_multi = $open; |
2721 | | | if ($match_i === $i) { |
2722 | | | break; |
2723 | | | } |
2724 | | | } |
2725 | | | } |
2726 | | | } |
2727 | | | if ($i == $next_comment_multi_pos) { |
2728 | | | $open = $next_open_comment_multi; |
2729 | | | $close = $this->language_data['COMMENT_MULTI'][$open]; |
2730 | | | $open_strlen = strlen($open); |
2731 | | | $close_strlen = strlen($close); |
2732 | | | $COMMENT_MATCHED = true; |
2733 | | | $test_str_match = $open; |
2734 | | | //@todo If remove important do remove here |
2735 | | | if ($this->lexic_permissions['COMMENTS']['MULTI'] || |
2736 | | | $open == GESHI_START_IMPORTANT) { |
2737 | | | if ($open != GESHI_START_IMPORTANT) { |
2738 | | | if (!$this->use_classes) { |
2739 | | | $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"'; |
2740 | | | } else { |
2741 | | | $attributes = ' class="coMULTI"'; |
2742 | | | } |
2743 | | | $test_str = "<span$attributes>" . $this->hsc($open); |
2744 | | | } else { |
2745 | | | if (!$this->use_classes) { |
2746 | | | $attributes = ' style="' . $this->important_styles . '"'; |
2747 | | | } else { |
2748 | | | $attributes = ' class="imp"'; |
2749 | | | } |
2750 | | | |
2751 | | | // We don't include the start of the comment if it's an |
2752 | | | // "important" part |
2753 | | | $test_str = "<span$attributes>"; |
2754 | | | } |
2755 | | | } else { |
2756 | | | $test_str = $this->hsc($open); |
2757 | | | } |
2758 | | | |
2759 | | | $close_pos = strpos( $part, $close, $i + $open_strlen ); |
2760 | | | |
2761 | | | if ($close_pos === false) { |
2762 | | | $close_pos = $length; |
2763 | | | } |
2764 | | | |
2765 | | | // Short-cut through all the multiline code |
2766 | | | $rest_of_comment = $this->hsc(substr($part, $i + $open_strlen, $close_pos - $i - $open_strlen + $close_strlen)); |
2767 | | | if (($this->lexic_permissions['COMMENTS']['MULTI'] || |
2768 | | | $test_str_match == GESHI_START_IMPORTANT) && |
2769 | | | $check_linenumbers) { |
2770 | | | |
2771 | | | // strreplace to put close span and open span around multiline newlines |
2772 | | | $test_str .= str_replace( |
2773 | | | "\n", "</span>\n<span$attributes>", |
2774 | | | str_replace("\n ", "\n ", $rest_of_comment) |
2775 | | | ); |
2776 | | | } else { |
2777 | | | $test_str .= $rest_of_comment; |
2778 | | | } |
2779 | | | |
2780 | | | if ($this->lexic_permissions['COMMENTS']['MULTI'] || |
2781 | | | $test_str_match == GESHI_START_IMPORTANT) { |
2782 | | | $test_str .= '</span>'; |
2783 | | | } |
2784 | | | |
2785 | | | $i = $close_pos + $close_strlen - 1; |
2786 | | | |
2787 | | | // parse the rest |
2788 | | | $result .= $this->parse_non_string_part($stuff_to_parse); |
2789 | | | $stuff_to_parse = ''; |
2790 | | | } |
2791 | | | } |
2792 | | | |
2793 | | | // If we haven't matched a multiline comment, try single-line comments |
2794 | | | if (!$COMMENT_MATCHED) { |
2795 | | | // cache potential single line comment occurances |
2796 | | | if (!empty($this->language_data['COMMENT_SINGLE']) && $next_comment_single_pos < $i) { |
2797 | | | $next_comment_single_pos = $length; |
2798 | | | foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) { |
2799 | | | $match_i = false; |
2800 | | | if (isset($comment_single_cache_per_key[$comment_key]) && |
2801 | | | ($comment_single_cache_per_key[$comment_key] >= $i || |
2802 | | | $comment_single_cache_per_key[$comment_key] === false)) { |
2803 | | | // we have already matched something |
2804 | | | if ($comment_single_cache_per_key[$comment_key] === false) { |
2805 | | | // this comment is never matched |
2806 | | | continue; |
2807 | | | } |
2808 | | | $match_i = $comment_single_cache_per_key[$comment_key]; |
2809 | | | } else if ( |
2810 | | | // case sensitive comments |
2811 | | | ($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] && |
2812 | | | ($match_i = stripos($part, $comment_mark, $i)) !== false) || |
2813 | | | // non case sensitive |
2814 | | | (!$this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] && |
2815 | | | (($match_i = strpos($part, $comment_mark, $i)) !== false))) { |
2816 | | | $comment_single_cache_per_key[$comment_key] = $match_i; |
2817 | | | } else { |
2818 | | | $comment_single_cache_per_key[$comment_key] = false; |
2819 | | | continue; |
2820 | | | } |
2821 | | | if ($match_i !== false && $match_i < $next_comment_single_pos) { |
2822 | | | $next_comment_single_pos = $match_i; |
2823 | | | $next_comment_single_key = $comment_key; |
2824 | | | if ($match_i === $i) { |
2825 | | | break; |
2826 | | | } |
2827 | | | } |
2828 | | | } |
2829 | | | } |
2830 | | | if ($next_comment_single_pos == $i) { |
2831 | | | $comment_key = $next_comment_single_key; |
2832 | | | $comment_mark = $this->language_data['COMMENT_SINGLE'][$comment_key]; |
2833 | | | $com_len = strlen($comment_mark); |
2834 | | | |
2835 | | | // This check will find special variables like $# in bash |
2836 | | | // or compiler directives of Delphi beginning {$ |
2837 | | | if ((empty($sc_disallowed_before) || ($i == 0) || |
2838 | | | (false === strpos($sc_disallowed_before, $part[$i-1]))) && |
2839 | | | (empty($sc_disallowed_after) || ($length <= $i + $com_len) || |
2840 | | | (false === strpos($sc_disallowed_after, $part[$i + $com_len])))) |
2841 | | | { |
2842 | | | // this is a valid comment |
2843 | | | $COMMENT_MATCHED = true; |
2844 | | | if ($this->lexic_permissions['COMMENTS'][$comment_key]) { |
2845 | | | if (!$this->use_classes) { |
2846 | | | $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment_key] . '"'; |
2847 | | | } else { |
2848 | | | $attributes = ' class="co' . $comment_key . '"'; |
2849 | | | } |
2850 | | | $test_str = "<span$attributes>" . $this->hsc($this->change_case($comment_mark)); |
2851 | | | } else { |
2852 | | | $test_str = $this->hsc($comment_mark); |
2853 | | | } |
2854 | | | |
2855 | | | //Check if this comment is the last in the source |
2856 | | | $close_pos = strpos($part, "\n", $i); |
2857 | | | $oops = false; |
2858 | | | if ($close_pos === false) { |
2859 | | | $close_pos = $length; |
2860 | | | $oops = true; |
2861 | | | } |
2862 | | | $test_str .= $this->hsc(substr($part, $i + $com_len, $close_pos - $i - $com_len)); |
2863 | | | if ($this->lexic_permissions['COMMENTS'][$comment_key]) { |
2864 | | | $test_str .= "</span>"; |
2865 | | | } |
2866 | | | |
2867 | | | // Take into account that the comment might be the last in the source |
2868 | | | if (!$oops) { |
2869 | | | $test_str .= "\n"; |
2870 | | | } |
2871 | | | |
2872 | | | $i = $close_pos; |
2873 | | | |
2874 | | | // parse the rest |
2875 | | | $result .= $this->parse_non_string_part($stuff_to_parse); |
2876 | | | $stuff_to_parse = ''; |
2877 | | | } |
2878 | | | } |
2879 | | | } |
2880 | | | } |
2881 | | | |
2882 | | | // Where are we adding this char? |
2883 | | | if (!$COMMENT_MATCHED) { |
2884 | | | $stuff_to_parse .= $char; |
2885 | | | } else { |
2886 | | | $result .= $test_str; |
2887 | | | unset($test_str); |
2888 | | | $COMMENT_MATCHED = false; |
2889 | | | } |
2890 | | | } |
2891 | | | // Parse the last bit |
2892 | | | $result .= $this->parse_non_string_part($stuff_to_parse); |
2893 | | | $stuff_to_parse = ''; |
2894 | | | } else { |
2895 | | | $result .= $this->hsc($part); |
2896 | | | } |
2897 | | | // Close the <span> that surrounds the block |
2898 | | | if ($STRICTATTRS != '') { |
2899 | | | $result = str_replace("\n", "</span>\n<span$STRICTATTRS>", $result); |
2900 | | | $result .= '</span>'; |
2901 | | | } |
2902 | | | |
2903 | | | $endresult .= $result; |
2904 | | | unset($part, $parts[$key], $result); |
2905 | | | } |
2906 | | | |
2907 | | | //This fix is related to SF#1923020, but has to be applied regardless of |
2908 | | | //actually highlighting symbols. |
2909 | | | /** NOTE: memorypeak #3 */ |
2910 | | | $endresult = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $endresult); |
2911 | | | |
2912 | | | // // Parse the last stuff (redundant?) |
2913 | | | // $result .= $this->parse_non_string_part($stuff_to_parse); |
2914 | | | |
2915 | | | // Lop off the very first and last spaces |
2916 | | | // $result = substr($result, 1, -1); |
2917 | | | |
2918 | | | // We're finished: stop timing |
2919 | | | $this->set_time($start_time, microtime()); |
2920 | | | |
2921 | | | $this->finalise($endresult); |
2922 | | | return $endresult; |
2923 | | | } |
2924 | | | |
2925 | | | /** |
2926 | | | * Swaps out spaces and tabs for HTML indentation. Not needed if |
2927 | | | * the code is in a pre block... |
2928 | | | * |
2929 | | | * @param string The source to indent (reference!) |
2930 | | | * @since 1.0.0 |
2931 | | | * @access private |
2932 | | | */ |
2933 | | | function indent(&$result) { |
2934 | | | /// Replace tabs with the correct number of spaces |
2935 | | | if (false !== strpos($result, "\t")) { |
2936 | | | $lines = explode("\n", $result); |
2937 | | | $result = null;//Save memory while we process the lines individually |
2938 | | | $tab_width = $this->get_real_tab_width(); |
2939 | | | $tab_string = ' ' . str_repeat(' ', $tab_width); |
2940 | | | |
2941 | | | for ($key = 0, $n = count($lines); $key < $n; $key++) { |
2942 | | | $line = $lines[$key]; |
2943 | | | if (false === strpos($line, "\t")) { |
2944 | | | continue; |
2945 | | | } |
2946 | | | |
2947 | | | $pos = 0; |
2948 | | | $length = strlen($line); |
2949 | | | $lines[$key] = ''; // reduce memory |
2950 | | | |
2951 | | | $IN_TAG = false; |
2952 | | | for ($i = 0; $i < $length; ++$i) { |
2953 | | | $char = $line[$i]; |
2954 | | | // Simple engine to work out whether we're in a tag. |
2955 | | | // If we are we modify $pos. This is so we ignore HTML |
2956 | | | // in the line and only workout the tab replacement |
2957 | | | // via the actual content of the string |
2958 | | | // This test could be improved to include strings in the |
2959 | | | // html so that < or > would be allowed in user's styles |
2960 | | | // (e.g. quotes: '<' '>'; or similar) |
2961 | | | if ($IN_TAG) { |
2962 | | | if ('>' == $char) { |
2963 | | | $IN_TAG = false; |
2964 | | | } |
2965 | | | $lines[$key] .= $char; |
2966 | | | } else if ('<' == $char) { |
2967 | | | $IN_TAG = true; |
2968 | | | $lines[$key] .= '<'; |
2969 | | | } else if ('&' == $char) { |
2970 | | | $substr = substr($line, $i + 3, 5); |
2971 | | | $posi = strpos($substr, ';'); |
2972 | | | if (false === $posi) { |
2973 | | | ++$pos; |
2974 | | | } else { |
2975 | | | $pos -= $posi+2; |
2976 | | | } |
2977 | | | $lines[$key] .= $char; |
2978 | | | } else if ("\t" == $char) { |
2979 | | | $str = ''; |
2980 | | | // OPTIMISE - move $strs out. Make an array: |
2981 | | | // $tabs = array( |
2982 | | | // 1 => ' ', |
2983 | | | // 2 => ' ', |
2984 | | | // 3 => ' ' etc etc |
2985 | | | // to use instead of building a string every time |
2986 | | | $tab_end_width = $tab_width - ($pos % $tab_width); //Moved out of the look as it doesn't change within the loop |
2987 | | | if (($pos & 1) || 1 == $tab_end_width) { |
2988 | | | $str .= substr($tab_string, 6, $tab_end_width); |
2989 | | | } else { |
2990 | | | $str .= substr($tab_string, 0, $tab_end_width+5); |
2991 | | | } |
2992 | | | $lines[$key] .= $str; |
2993 | | | $pos += $tab_end_width; |
2994 | | | |
2995 | | | if (false === strpos($line, "\t", $i + 1)) { |
2996 | | | $lines[$key] .= substr($line, $i + 1); |
2997 | | | break; |
2998 | | | } |
2999 | | | } else if (0 == $pos && ' ' == $char) { |
3000 | | | $lines[$key] .= ' '; |
3001 | | | ++$pos; |
3002 | | | } else { |
3003 | | | $lines[$key] .= $char; |
3004 | | | ++$pos; |
3005 | | | } |
3006 | | | } |
3007 | | | } |
3008 | | | $result = implode("\n", $lines); |
3009 | | | unset($lines);//We don't need the lines separated beyond this --- free them! |
3010 | | | } |
3011 | | | // Other whitespace |
3012 | | | // BenBE: Fix to reduce the number of replacements to be done |
3013 | | | $result = preg_replace('/^ /m', ' ', $result); |
3014 | | | $result = str_replace(' ', ' ', $result); |
3015 | | | |
3016 | | | if ($this->line_numbers == GESHI_NO_LINE_NUMBERS) { |
3017 | | | if ($this->line_ending === null) { |
3018 | | | $result = nl2br($result); |
3019 | | | } else { |
3020 | | | $result = str_replace("\n", $this->line_ending, $result); |
3021 | | | } |
3022 | | | } |
3023 | | | } |
3024 | | | |
3025 | | | /** |
3026 | | | * Changes the case of a keyword for those languages where a change is asked for |
3027 | | | * |
3028 | | | * @param string The keyword to change the case of |
3029 | | | * @return string The keyword with its case changed |
3030 | | | * @since 1.0.0 |
3031 | | | * @access private |
3032 | | | */ |
3033 | | | function change_case($instr) { |
3034 | | | switch ($this->language_data['CASE_KEYWORDS']) { |
3035 | | | case GESHI_CAPS_UPPER: |
3036 | | | return strtoupper($instr); |
3037 | | | case GESHI_CAPS_LOWER: |
3038 | | | return strtolower($instr); |
3039 | | | default: |
3040 | | | return $instr; |
3041 | | | } |
3042 | | | } |
3043 | | | |
3044 | | | /** |
3045 | | | * Handles replacements of keywords to include markup and links if requested |
3046 | | | * |
3047 | | | * @param string The keyword to add the Markup to |
3048 | | | * @return The HTML for the match found |
3049 | | | * @since 1.0.8 |
3050 | | | * @access private |
3051 | | | * |
3052 | | | * @todo Get rid of ender in keyword links |
3053 | | | */ |
3054 | | | function handle_keyword_replace($match) { |
3055 | | | $k = $this->_kw_replace_group; |
3056 | | | $keyword = $match[0]; |
3057 | | | |
3058 | | | $before = ''; |
3059 | | | $after = ''; |
3060 | | | |
3061 | | | if ($this->keyword_links) { |
3062 | | | // Keyword links have been ebabled |
3063 | | | |
3064 | | | if (isset($this->language_data['URLS'][$k]) && |
3065 | | | $this->language_data['URLS'][$k] != '') { |
3066 | | | // There is a base group for this keyword |
3067 | | | |
3068 | | | // Old system: strtolower |
3069 | | | //$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword); |
3070 | | | // New system: get keyword from language file to get correct case |
3071 | | | if (!$this->language_data['CASE_SENSITIVE'][$k] && |
3072 | | | strpos($this->language_data['URLS'][$k], '{FNAME}') !== false) { |
3073 | | | foreach ($this->language_data['KEYWORDS'][$k] as $word) { |
3074 | | | if (strcasecmp($word, $keyword) == 0) { |
3075 | | | break; |
3076 | | | } |
3077 | | | } |
3078 | | | } else { |
3079 | | | $word = $keyword; |
3080 | | | } |
3081 | | | |
3082 | | | $before = '<|UR1|"' . |
3083 | | | str_replace( |
3084 | | | array('{FNAME}', '{FNAMEL}', '{FNAMEU}', '.'), |
3085 | | | array($this->hsc($word), $this->hsc(strtolower($word)), |
3086 | | | $this->hsc(strtoupper($word)), '<DOT>'), |
3087 | | | $this->language_data['URLS'][$k] |
3088 | | | ) . '">'; |
3089 | | | $after = '</a>'; |
3090 | | | } |
3091 | | | } |
3092 | | | |
3093 | | | return $before . '<|/'. $k .'/>' . $this->change_case($keyword) . '|>' . $after; |
3094 | | | } |
3095 | | | |
3096 | | | /** |
3097 | | | * handles regular expressions highlighting-definitions with callback functions |
3098 | | | * |
3099 | | | * @note this is a callback, don't use it directly |
3100 | | | * |
3101 | | | * @param array the matches array |
3102 | | | * @return The highlighted string |
3103 | | | * @since 1.0.8 |
3104 | | | * @access private |
3105 | | | */ |
3106 | | | function handle_regexps_callback($matches) { |
3107 | | | // before: "' style=\"' . call_user_func(\"$func\", '\\1') . '\"\\1|>'", |
3108 | | | return ' style="' . call_user_func($this->language_data['STYLES']['REGEXPS'][$this->_rx_key], $matches[1]) . '"'. $matches[1] . '|>'; |
3109 | | | } |
3110 | | | |
3111 | | | /** |
3112 | | | * handles newlines in REGEXPS matches. Set the _hmr_* vars before calling this |
3113 | | | * |
3114 | | | * @note this is a callback, don't use it directly |
3115 | | | * |
3116 | | | * @param array the matches array |
3117 | | | * @return string |
3118 | | | * @since 1.0.8 |
3119 | | | * @access private |
3120 | | | */ |
3121 | | | function handle_multiline_regexps($matches) { |
3122 | | | $before = $this->_hmr_before; |
3123 | | | $after = $this->_hmr_after; |
3124 | | | if ($this->_hmr_replace) { |
3125 | | | $replace = $this->_hmr_replace; |
3126 | | | $search = array(); |
3127 | | | |
3128 | | | foreach (array_keys($matches) as $k) { |
3129 | | | $search[] = '\\' . $k; |
3130 | | | } |
3131 | | | |
3132 | | | $before = str_replace($search, $matches, $before); |
3133 | | | $after = str_replace($search, $matches, $after); |
3134 | | | $replace = str_replace($search, $matches, $replace); |
3135 | | | } else { |
3136 | | | $replace = $matches[0]; |
3137 | | | } |
3138 | | | return $before |
3139 | | | . '<|!REG3XP' . $this->_hmr_key .'!>' |
3140 | | | . str_replace("\n", "|>\n<|!REG3XP" . $this->_hmr_key . '!>', $replace) |
3141 | | | . '|>' |
3142 | | | . $after; |
3143 | | | } |
3144 | | | |
3145 | | | /** |
3146 | | | * Takes a string that has no strings or comments in it, and highlights |
3147 | | | * stuff like keywords, numbers and methods. |
3148 | | | * |
3149 | | | * @param string The string to parse for keyword, numbers etc. |
3150 | | | * @since 1.0.0 |
3151 | | | * @access private |
3152 | | | * @todo BUGGY! Why? Why not build string and return? |
3153 | | | */ |
3154 | | | function parse_non_string_part($stuff_to_parse) { |
3155 | | | $stuff_to_parse = ' ' . $this->hsc($stuff_to_parse); |
3156 | | | |
3157 | | | // Regular expressions |
3158 | | | foreach ($this->language_data['REGEXPS'] as $key => $regexp) { |
3159 | | | if ($this->lexic_permissions['REGEXPS'][$key]) { |
3160 | | | if (is_array($regexp)) { |
3161 | | | if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { |
3162 | | | // produce valid HTML when we match multiple lines |
3163 | | | $this->_hmr_replace = $regexp[GESHI_REPLACE]; |
3164 | | | $this->_hmr_before = $regexp[GESHI_BEFORE]; |
3165 | | | $this->_hmr_key = $key; |
3166 | | | $this->_hmr_after = $regexp[GESHI_AFTER]; |
3167 | | | $stuff_to_parse = preg_replace_callback( |
3168 | | | "/" . $regexp[GESHI_SEARCH] . "/{$regexp[GESHI_MODIFIERS]}", |
3169 | | | array($this, 'handle_multiline_regexps'), |
3170 | | | $stuff_to_parse); |
3171 | | | $this->_hmr_replace = false; |
3172 | | | $this->_hmr_before = ''; |
3173 | | | $this->_hmr_after = ''; |
3174 | | | } else { |
3175 | | | $stuff_to_parse = preg_replace( |
3176 | | | '/' . $regexp[GESHI_SEARCH] . '/' . $regexp[GESHI_MODIFIERS], |
3177 | | | $regexp[GESHI_BEFORE] . '<|!REG3XP'. $key .'!>' . $regexp[GESHI_REPLACE] . '|>' . $regexp[GESHI_AFTER], |
3178 | | | $stuff_to_parse); |
3179 | | | } |
3180 | | | } else { |
3181 | | | if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { |
3182 | | | // produce valid HTML when we match multiple lines |
3183 | | | $this->_hmr_key = $key; |
3184 | | | $stuff_to_parse = preg_replace_callback( "/(" . $regexp . ")/", |
3185 | | | array($this, 'handle_multiline_regexps'), $stuff_to_parse); |
3186 | | | $this->_hmr_key = ''; |
3187 | | | } else { |
3188 | | | $stuff_to_parse = preg_replace( "/(" . $regexp . ")/", "<|!REG3XP$key!>\\1|>", $stuff_to_parse); |
3189 | | | } |
3190 | | | } |
3191 | | | } |
3192 | | | } |
3193 | | | |
3194 | | | // Highlight numbers. As of 1.0.8 we support diffent types of numbers |
3195 | | | $numbers_found = false; |
3196 | | | if ($this->lexic_permissions['NUMBERS'] && preg_match('#\d#', $stuff_to_parse )) { |
3197 | | | $numbers_found = true; |
3198 | | | |
3199 | | | //For each of the formats ... |
3200 | | | foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) { |
3201 | | | //Check if it should be highlighted ... |
3202 | | | $stuff_to_parse = preg_replace($regexp, "<|/NUM!$id/>\\1|>", $stuff_to_parse); |
3203 | | | } |
3204 | | | } |
3205 | | | |
3206 | | | // Highlight keywords |
3207 | | | $disallowed_before = "(?<![a-zA-Z0-9\$_\|\#;>|^&"; |
3208 | | | $disallowed_after = "(?![a-zA-Z0-9_\|%\\-&;"; |
3209 | | | if ($this->lexic_permissions['STRINGS']) { |
3210 | | | $quotemarks = preg_quote(implode($this->language_data['QUOTEMARKS']), '/'); |
3211 | | | $disallowed_before .= $quotemarks; |
3212 | | | $disallowed_after .= $quotemarks; |
3213 | | | } |
3214 | | | $disallowed_before .= "])"; |
3215 | | | $disallowed_after .= "])"; |
3216 | | | |
3217 | | | $parser_control_pergroup = false; |
3218 | | | if (isset($this->language_data['PARSER_CONTROL'])) { |
3219 | | | if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) { |
3220 | | | $x = 0; // check wether per-keyword-group parser_control is enabled |
3221 | | | if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) { |
3222 | | | $disallowed_before = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE']; |
3223 | | | ++$x; |
3224 | | | } |
3225 | | | if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) { |
3226 | | | $disallowed_after = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER']; |
3227 | | | ++$x; |
3228 | | | } |
3229 | | | $parser_control_pergroup = (count($this->language_data['PARSER_CONTROL']['KEYWORDS']) - $x) > 0; |
3230 | | | } |
3231 | | | } |
3232 | | | |
3233 | | | // if this is changed, don't forget to change it below |
3234 | | | // if (!empty($disallowed_before)) { |
3235 | | | // $disallowed_before = "(?<![$disallowed_before])"; |
3236 | | | // } |
3237 | | | // if (!empty($disallowed_after)) { |
3238 | | | // $disallowed_after = "(?![$disallowed_after])"; |
3239 | | | // } |
3240 | | | |
3241 | | | foreach (array_keys($this->language_data['KEYWORDS']) as $k) { |
3242 | | | if (!isset($this->lexic_permissions['KEYWORDS'][$k]) || |
3243 | | | $this->lexic_permissions['KEYWORDS'][$k]) { |
3244 | | | |
3245 | | | $case_sensitive = $this->language_data['CASE_SENSITIVE'][$k]; |
3246 | | | $modifiers = $case_sensitive ? '' : 'i'; |
3247 | | | |
3248 | | | // NEW in 1.0.8 - per-keyword-group parser control |
3249 | | | $disallowed_before_local = $disallowed_before; |
3250 | | | $disallowed_after_local = $disallowed_after; |
3251 | | | if ($parser_control_pergroup && isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k])) { |
3252 | | | if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'])) { |
3253 | | | $disallowed_before_local = |
3254 | | | $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE']; |
3255 | | | } |
3256 | | | |
3257 | | | if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'])) { |
3258 | | | $disallowed_after_local = |
3259 | | | $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER']; |
3260 | | | } |
3261 | | | } |
3262 | | | |
3263 | | | $this->_kw_replace_group = $k; |
3264 | | | |
3265 | | | //NEW in 1.0.8, the cached regexp list |
3266 | | | // since we don't want PHP / PCRE to crash due to too large patterns we split them into smaller chunks |
3267 | | | for ($set = 0, $set_length = count($this->language_data['CACHED_KEYWORD_LISTS'][$k]); $set < $set_length; ++$set) { |
3268 | | | $keywordset =& $this->language_data['CACHED_KEYWORD_LISTS'][$k][$set]; |
3269 | | | // Might make a more unique string for putting the number in soon |
3270 | | | // Basically, we don't put the styles in yet because then the styles themselves will |
3271 | | | // get highlighted if the language has a CSS keyword in it (like CSS, for example ;)) |
3272 | | | $stuff_to_parse = preg_replace_callback( |
3273 | | | "/$disallowed_before_local({$keywordset})(?!\<DOT\>(?:htm|php))$disallowed_after_local/$modifiers", |
3274 | | | array($this, 'handle_keyword_replace'), |
3275 | | | $stuff_to_parse |
3276 | | | ); |
3277 | | | } |
3278 | | | } |
3279 | | | } |
3280 | | | |
3281 | | | // |
3282 | | | // Now that's all done, replace /[number]/ with the correct styles |
3283 | | | // |
3284 | | | foreach (array_keys($this->language_data['KEYWORDS']) as $k) { |
3285 | | | if (!$this->use_classes) { |
3286 | | | $attributes = ' style="' . |
3287 | | | (isset($this->language_data['STYLES']['KEYWORDS'][$k]) ? |
3288 | | | $this->language_data['STYLES']['KEYWORDS'][$k] : "") . '"'; |
3289 | | | } else { |
3290 | | | $attributes = ' class="kw' . $k . '"'; |
3291 | | | } |
3292 | | | $stuff_to_parse = str_replace("<|/$k/>", "<|$attributes>", $stuff_to_parse); |
3293 | | | } |
3294 | | | |
3295 | | | if ($numbers_found) { |
3296 | | | // Put number styles in |
3297 | | | foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) { |
3298 | | | //Commented out for now, as this needs some review ... |
3299 | | | // if ($numbers_permissions & $id) { |
3300 | | | //Get the appropriate style ... |
3301 | | | //Checking for unset styles is done by the style cache builder ... |
3302 | | | if (!$this->use_classes) { |
3303 | | | $attributes = ' style="' . $this->language_data['STYLES']['NUMBERS'][$id] . '"'; |
3304 | | | } else { |
3305 | | | $attributes = ' class="nu'.$id.'"'; |
3306 | | | } |
3307 | | | |
3308 | | | //Set in the correct styles ... |
3309 | | | $stuff_to_parse = str_replace("/NUM!$id/", $attributes, $stuff_to_parse); |
3310 | | | // } |
3311 | | | } |
3312 | | | } |
3313 | | | |
3314 | | | // Highlight methods and fields in objects |
3315 | | | if ($this->lexic_permissions['METHODS'] && $this->language_data['OOLANG']) { |
3316 | | | $oolang_spaces = "[\s]*"; |
3317 | | | $oolang_before = ""; |
3318 | | | $oolang_after = "[a-zA-Z][a-zA-Z0-9_]*"; |
3319 | | | if (isset($this->language_data['PARSER_CONTROL'])) { |
3320 | | | if (isset($this->language_data['PARSER_CONTROL']['OOLANG'])) { |
3321 | | | if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'])) { |
3322 | | | $oolang_before = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE']; |
3323 | | | } |
3324 | | | if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'])) { |
3325 | | | $oolang_after = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER']; |
3326 | | | } |
3327 | | | if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'])) { |
3328 | | | $oolang_spaces = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES']; |
3329 | | | } |
3330 | | | } |
3331 | | | } |
3332 | | | |
3333 | | | foreach ($this->language_data['OBJECT_SPLITTERS'] as $key => $splitter) { |
3334 | | | if (false !== strpos($stuff_to_parse, $splitter)) { |
3335 | | | if (!$this->use_classes) { |
3336 | | | $attributes = ' style="' . $this->language_data['STYLES']['METHODS'][$key] . '"'; |
3337 | | | } else { |
3338 | | | $attributes = ' class="me' . $key . '"'; |
3339 | | | } |
3340 | | | $stuff_to_parse = preg_replace("/($oolang_before)(" . preg_quote($this->language_data['OBJECT_SPLITTERS'][$key], '/') . ")($oolang_spaces)($oolang_after)/", "\\1\\2\\3<|$attributes>\\4|>", $stuff_to_parse); |
3341 | | | } |
3342 | | | } |
3343 | | | } |
3344 | | | |
3345 | | | // |
3346 | | | // Highlight brackets. Yes, I've tried adding a semi-colon to this list. |
3347 | | | // You try it, and see what happens ;) |
3348 | | | // TODO: Fix lexic permissions not converting entities if shouldn't |
3349 | | | // be highlighting regardless |
3350 | | | // |
3351 | | | if ($this->lexic_permissions['BRACKETS']) { |
3352 | | | $stuff_to_parse = str_replace( $this->language_data['CACHE_BRACKET_MATCH'], |
3353 | | | $this->language_data['CACHE_BRACKET_REPLACE'], $stuff_to_parse ); |
3354 | | | } |
3355 | | | |
3356 | | | |
3357 | | | //FIX for symbol highlighting ... |
3358 | | | if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) { |
3359 | | | //Get all matches and throw away those witin a block that is already highlighted... (i.e. matched by a regexp) |
3360 | | | $n_symbols = preg_match_all("/<\|(?:<DOT>|[^>])+>(?:(?!\|>).*?)\|>|<\/a>|(?:" . $this->language_data['SYMBOL_SEARCH'] . ")+/", $stuff_to_parse, $pot_symbols, PREG_OFFSET_CAPTURE | PREG_SET_ORDER); |
3361 | | | $global_offset = 0; |
3362 | | | for ($s_id = 0; $s_id < $n_symbols; ++$s_id) { |
3363 | | | $symbol_match = $pot_symbols[$s_id][0][0]; |
3364 | | | if (strpos($symbol_match, '<') !== false || strpos($symbol_match, '>') !== false) { |
3365 | | | // already highlighted blocks _must_ include either < or > |
3366 | | | // so if this conditional applies, we have to skip this match |
3367 | | | // BenBE: UNLESS the block contains <SEMI> or <PIPE> |
3368 | | | if(strpos($symbol_match, '<SEMI>') === false && |
3369 | | | strpos($symbol_match, '<PIPE>') === false) { |
3370 | | | continue; |
3371 | | | } |
3372 | | | } |
3373 | | | |
3374 | | | // if we reach this point, we have a valid match which needs to be highlighted |
3375 | | | |
3376 | | | $symbol_length = strlen($symbol_match); |
3377 | | | $symbol_offset = $pot_symbols[$s_id][0][1]; |
3378 | | | unset($pot_symbols[$s_id]); |
3379 | | | $symbol_end = $symbol_length + $symbol_offset; |
3380 | | | $symbol_hl = ""; |
3381 | | | |
3382 | | | // if we have multiple styles, we have to handle them properly |
3383 | | | if ($this->language_data['MULTIPLE_SYMBOL_GROUPS']) { |
3384 | | | $old_sym = -1; |
3385 | | | // Split the current stuff to replace into its atomic symbols ... |
3386 | | | preg_match_all("/" . $this->language_data['SYMBOL_SEARCH'] . "/", $symbol_match, $sym_match_syms, PREG_PATTERN_ORDER); |
3387 | | | foreach ($sym_match_syms[0] as $sym_ms) { |
3388 | | | //Check if consequtive symbols belong to the same group to save output ... |
3389 | | | if (isset($this->language_data['SYMBOL_DATA'][$sym_ms]) |
3390 | | | && ($this->language_data['SYMBOL_DATA'][$sym_ms] != $old_sym)) { |
3391 | | | if (-1 != $old_sym) { |
3392 | | | $symbol_hl .= "|>"; |
3393 | | | } |
3394 | | | $old_sym = $this->language_data['SYMBOL_DATA'][$sym_ms]; |
3395 | | | if (!$this->use_classes) { |
3396 | | | $symbol_hl .= '<| style="' . $this->language_data['STYLES']['SYMBOLS'][$old_sym] . '">'; |
3397 | | | } else { |
3398 | | | $symbol_hl .= '<| class="sy' . $old_sym . '">'; |
3399 | | | } |
3400 | | | } |
3401 | | | $symbol_hl .= $sym_ms; |
3402 | | | } |
3403 | | | unset($sym_match_syms); |
3404 | | | |
3405 | | | //Close remaining tags and insert the replacement at the right position ... |
3406 | | | //Take caution if symbol_hl is empty to avoid doubled closing spans. |
3407 | | | if (-1 != $old_sym) { |
3408 | | | $symbol_hl .= "|>"; |
3409 | | | } |
3410 | | | } else { |
3411 | | | if (!$this->use_classes) { |
3412 | | | $symbol_hl = '<| style="' . $this->language_data['STYLES']['SYMBOLS'][0] . '">'; |
3413 | | | } else { |
3414 | | | $symbol_hl = '<| class="sy0">'; |
3415 | | | } |
3416 | | | $symbol_hl .= $symbol_match . '|>'; |
3417 | | | } |
3418 | | | |
3419 | | | $stuff_to_parse = substr_replace($stuff_to_parse, $symbol_hl, $symbol_offset + $global_offset, $symbol_length); |
3420 | | | |
3421 | | | // since we replace old text with something of different size, |
3422 | | | // we'll have to keep track of the differences |
3423 | | | $global_offset += strlen($symbol_hl) - $symbol_length; |
3424 | | | } |
3425 | | | } |
3426 | | | //FIX for symbol highlighting ... |
3427 | | | |
3428 | | | // Add class/style for regexps |
3429 | | | foreach (array_keys($this->language_data['REGEXPS']) as $key) { |
3430 | | | if ($this->lexic_permissions['REGEXPS'][$key]) { |
3431 | | | if (is_callable($this->language_data['STYLES']['REGEXPS'][$key])) { |
3432 | | | $this->_rx_key = $key; |
3433 | | | $stuff_to_parse = preg_replace_callback("/!REG3XP$key!(.*)\|>/U", |
3434 | | | array($this, 'handle_regexps_callback'), |
3435 | | | $stuff_to_parse); |
3436 | | | } else { |
3437 | | | if (!$this->use_classes) { |
3438 | | | $attributes = ' style="' . $this->language_data['STYLES']['REGEXPS'][$key] . '"'; |
3439 | | | } else { |
3440 | | | if (is_array($this->language_data['REGEXPS'][$key]) && |
3441 | | | array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$key])) { |
3442 | | | $attributes = ' class="' . |
3443 | | | $this->language_data['REGEXPS'][$key][GESHI_CLASS] . '"'; |
3444 | | | } else { |
3445 | | | $attributes = ' class="re' . $key . '"'; |
3446 | | | } |
3447 | | | } |
3448 | | | $stuff_to_parse = str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse); |
3449 | | | } |
3450 | | | } |
3451 | | | } |
3452 | | | |
3453 | | | // Replace <DOT> with . for urls |
3454 | | | $stuff_to_parse = str_replace('<DOT>', '.', $stuff_to_parse); |
3455 | | | // Replace <|UR1| with <a href= for urls also |
3456 | | | if (isset($this->link_styles[GESHI_LINK])) { |
3457 | | | if ($this->use_classes) { |
3458 | | | $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse); |
3459 | | | } else { |
3460 | | | $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' style="' . $this->link_styles[GESHI_LINK] . '" href=', $stuff_to_parse); |
3461 | | | } |
3462 | | | } else { |
3463 | | | $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse); |
3464 | | | } |
3465 | | | |
3466 | | | // |
3467 | | | // NOW we add the span thingy ;) |
3468 | | | // |
3469 | | | |
3470 | | | $stuff_to_parse = str_replace('<|', '<span', $stuff_to_parse); |
3471 | | | $stuff_to_parse = str_replace ( '|>', '</span>', $stuff_to_parse ); |
3472 | | | return substr($stuff_to_parse, 1); |
3473 | | | } |
3474 | | | |
3475 | | | /** |
3476 | | | * Sets the time taken to parse the code |
3477 | | | * |
3478 | | | * @param microtime The time when parsing started |
3479 | | | * @param microtime The time when parsing ended |
3480 | | | * @since 1.0.2 |
3481 | | | * @access private |
3482 | | | */ |
3483 | | | function set_time($start_time, $end_time) { |
3484 | | | $start = explode(' ', $start_time); |
3485 | | | $end = explode(' ', $end_time); |
3486 | | | $this->time = $end[0] + $end[1] - $start[0] - $start[1]; |
3487 | | | } |
3488 | | | |
3489 | | | /** |
3490 | | | * Gets the time taken to parse the code |
3491 | | | * |
3492 | | | * @return double The time taken to parse the code |
3493 | | | * @since 1.0.2 |
3494 | | | */ |
3495 | | | function get_time() { |
3496 | | | return $this->time; |
3497 | | | } |
3498 | | | |
3499 | | | /** |
3500 | | | * Merges arrays recursively, overwriting values of the first array with values of later arrays |
3501 | | | * |
3502 | | | * @since 1.0.8 |
3503 | | | * @access private |
3504 | | | */ |
3505 | | | function merge_arrays() { |
3506 | | | $arrays = func_get_args(); |
3507 | | | $narrays = count($arrays); |
3508 | | | |
3509 | | | // check arguments |
3510 | | | // comment out if more performance is necessary (in this case the foreach loop will trigger a warning if the argument is not an array) |
3511 | | | for ($i = 0; $i < $narrays; $i ++) { |
3512 | | | if (!is_array($arrays[$i])) { |
3513 | | | // also array_merge_recursive returns nothing in this case |
3514 | | | trigger_error('Argument #' . ($i+1) . ' is not an array - trying to merge array with scalar! Returning false!', E_USER_WARNING); |
3515 | | | return false; |
3516 | | | } |
3517 | | | } |
3518 | | | |
3519 | | | // the first array is in the output set in every case |
3520 | | | $ret = $arrays[0]; |
3521 | | | |
3522 | | | // merege $ret with the remaining arrays |
3523 | | | for ($i = 1; $i < $narrays; $i ++) { |
3524 | | | foreach ($arrays[$i] as $key => $value) { |
3525 | | | if (is_array($value) && isset($ret[$key])) { |
3526 | | | // if $ret[$key] is not an array you try to merge an scalar value with an array - the result is not defined (incompatible arrays) |
3527 | | | // in this case the call will trigger an E_USER_WARNING and the $ret[$key] will be false. |
3528 | | | $ret[$key] = $this->merge_arrays($ret[$key], $value); |
3529 | | | } else { |
3530 | | | $ret[$key] = $value; |
3531 | | | } |
3532 | | | } |
3533 | | | } |
3534 | | | |
3535 | | | return $ret; |
3536 | | | } |
3537 | | | |
3538 | | | /** |
3539 | | | * Gets language information and stores it for later use |
3540 | | | * |
3541 | | | * @param string The filename of the language file you want to load |
3542 | | | * @since 1.0.0 |
3543 | | | * @access private |
3544 | | | * @todo Needs to load keys for lexic permissions for keywords, regexps etc |
3545 | | | */ |
3546 | | | function load_language($file_name) { |
3547 | | | if ($file_name == $this->loaded_language) { |
3548 | | | // this file is already loaded! |
3549 | | | return; |
3550 | | | } |
3551 | | | |
3552 | | | //Prepare some stuff before actually loading the language file |
3553 | | | $this->loaded_language = $file_name; |
3554 | | | $this->parse_cache_built = false; |
3555 | | | $this->enable_highlighting(); |
3556 | | | $language_data = array(); |
3557 | | | |
3558 | | | //Load the language file |
3559 | | | require $file_name; |
3560 | | | |
3561 | | | // Perhaps some checking might be added here later to check that |
3562 | | | // $language data is a valid thing but maybe not |
3563 | | | $this->language_data = $language_data; |
3564 | | | |
3565 | | | // Set strict mode if should be set |
3566 | | | $this->strict_mode = $this->language_data['STRICT_MODE_APPLIES']; |
3567 | | | |
3568 | | | // Set permissions for all lexics to true |
3569 | | | // so they'll be highlighted by default |
3570 | | | foreach (array_keys($this->language_data['KEYWORDS']) as $key) { |
3571 | | | if (!empty($this->language_data['KEYWORDS'][$key])) { |
3572 | | | $this->lexic_permissions['KEYWORDS'][$key] = true; |
3573 | | | } else { |
3574 | | | $this->lexic_permissions['KEYWORDS'][$key] = false; |
3575 | | | } |
3576 | | | } |
3577 | | | |
3578 | | | foreach (array_keys($this->language_data['COMMENT_SINGLE']) as $key) { |
3579 | | | $this->lexic_permissions['COMMENTS'][$key] = true; |
3580 | | | } |
3581 | | | foreach (array_keys($this->language_data['REGEXPS']) as $key) { |
3582 | | | $this->lexic_permissions['REGEXPS'][$key] = true; |
3583 | | | } |
3584 | | | |
3585 | | | // for BenBE and future code reviews: |
3586 | | | // we can use empty here since we only check for existance and emptiness of an array |
3587 | | | // if it is not an array at all but rather false or null this will work as intended as well |
3588 | | | // even if $this->language_data['PARSER_CONTROL'] is undefined this won't trigger a notice |
3589 | | | if (!empty($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'])) { |
3590 | | | foreach ($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'] as $flag => $value) { |
3591 | | | // it's either true or false and maybe is true as well |
3592 | | | $perm = $value !== GESHI_NEVER; |
3593 | | | if ($flag == 'ALL') { |
3594 | | | $this->enable_highlighting($perm); |
3595 | | | continue; |
3596 | | | } |
3597 | | | if (!isset($this->lexic_permissions[$flag])) { |
3598 | | | // unknown lexic permission |
3599 | | | continue; |
3600 | | | } |
3601 | | | if (is_array($this->lexic_permissions[$flag])) { |
3602 | | | foreach ($this->lexic_permissions[$flag] as $key => $val) { |
3603 | | | $this->lexic_permissions[$flag][$key] = $perm; |
3604 | | | } |
3605 | | | } else { |
3606 | | | $this->lexic_permissions[$flag] = $perm; |
3607 | | | } |
3608 | | | } |
3609 | | | unset($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS']); |
3610 | | | } |
3611 | | | |
3612 | | | //NEW in 1.0.8: Allow styles to be loaded from a separate file to override defaults |
3613 | | | $style_filename = substr($file_name, 0, -4) . '.style.php'; |
3614 | | | if (is_readable($style_filename)) { |
3615 | | | //Clear any style_data that could have been set before ... |
3616 | | | if (isset($style_data)) { |
3617 | | | unset($style_data); |
3618 | | | } |
3619 | | | |
3620 | | | //Read the Style Information from the style file |
3621 | | | include $style_filename; |
3622 | | | |
3623 | | | //Apply the new styles to our current language styles |
3624 | | | if (isset($style_data) && is_array($style_data)) { |
3625 | | | $this->language_data['STYLES'] = |
3626 | | | $this->merge_arrays($this->language_data['STYLES'], $style_data); |
3627 | | | } |
3628 | | | } |
3629 | | | } |
3630 | | | |
3631 | | | /** |
3632 | | | * Takes the parsed code and various options, and creates the HTML |
3633 | | | * surrounding it to make it look nice. |
3634 | | | * |
3635 | | | * @param string The code already parsed (reference!) |
3636 | | | * @since 1.0.0 |
3637 | | | * @access private |
3638 | | | */ |
3639 | | | function finalise(&$parsed_code) { |
3640 | | | // Remove end parts of important declarations |
3641 | | | // This is BUGGY!! My fault for bad code: fix coming in 1.2 |
3642 | | | // @todo Remove this crap |
3643 | | | if ($this->enable_important_blocks && |
3644 | | | (strpos($parsed_code, $this->hsc(GESHI_START_IMPORTANT)) === false)) { |
3645 | | | $parsed_code = str_replace($this->hsc(GESHI_END_IMPORTANT), '', $parsed_code); |
3646 | | | } |
3647 | | | |
3648 | | | // Add HTML whitespace stuff if we're using the <div> header |
3649 | | | if ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) { |
3650 | | | $this->indent($parsed_code); |
3651 | | | } |
3652 | | | |
3653 | | | // purge some unnecessary stuff |
3654 | | | /** NOTE: memorypeak #1 */ |
3655 | | | $parsed_code = preg_replace('#<span[^>]+>(\s*)</span>#', '\\1', $parsed_code); |
3656 | | | |
3657 | | | // If we are using IDs for line numbers, there needs to be an overall |
3658 | | | // ID set to prevent collisions. |
3659 | | | if ($this->add_ids && !$this->overall_id) { |
3660 | | | $this->overall_id = 'geshi-' . substr(md5(microtime()), 0, 4); |
3661 | | | } |
3662 | | | |
3663 | | | // Get code into lines |
3664 | | | /** NOTE: memorypeak #2 */ |
3665 | | | $code = explode("\n", $parsed_code); |
3666 | | | $parsed_code = $this->header(); |
3667 | | | |
3668 | | | // If we're using line numbers, we insert <li>s and appropriate |
3669 | | | // markup to style them (otherwise we don't need to do anything) |
3670 | | | if ($this->line_numbers != GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) { |
3671 | | | // If we're using the <pre> header, we shouldn't add newlines because |
3672 | | | // the <pre> will line-break them (and the <li>s already do this for us) |
3673 | | | $ls = ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) ? "\n" : ''; |
3674 | | | |
3675 | | | // Set vars to defaults for following loop |
3676 | | | $i = 0; |
3677 | | | |
3678 | | | // Foreach line... |
3679 | | | for ($i = 0, $n = count($code); $i < $n;) { |
3680 | | | //Reset the attributes for a new line ... |
3681 | | | $attrs = array(); |
3682 | | | |
3683 | | | // Make lines have at least one space in them if they're empty |
3684 | | | // BenBE: Checking emptiness using trim instead of relying on blanks |
3685 | | | if ('' == trim($code[$i])) { |
3686 | | | $code[$i] = ' '; |
3687 | | | } |
3688 | | | |
3689 | | | // If this is a "special line"... |
3690 | | | if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS && |
3691 | | | $i % $this->line_nth_row == ($this->line_nth_row - 1)) { |
3692 | | | // Set the attributes to style the line |
3693 | | | if ($this->use_classes) { |
3694 | | | //$attr = ' class="li2"'; |
3695 | | | $attrs['class'][] = 'li2'; |
3696 | | | $def_attr = ' class="de2"'; |
3697 | | | } else { |
3698 | | | //$attr = ' style="' . $this->line_style2 . '"'; |
3699 | | | $attrs['style'][] = $this->line_style2; |
3700 | | | // This style "covers up" the special styles set for special lines |
3701 | | | // so that styles applied to special lines don't apply to the actual |
3702 | | | // code on that line |
3703 | | | $def_attr = ' style="' . $this->code_style . '"'; |
3704 | | | } |
3705 | | | } else { |
3706 | | | if ($this->use_classes) { |
3707 | | | //$attr = ' class="li1"'; |
3708 | | | $attrs['class'][] = 'li1'; |
3709 | | | $def_attr = ' class="de1"'; |
3710 | | | } else { |
3711 | | | //$attr = ' style="' . $this->line_style1 . '"'; |
3712 | | | $attrs['style'][] = $this->line_style1; |
3713 | | | $def_attr = ' style="' . $this->code_style . '"'; |
3714 | | | } |
3715 | | | } |
3716 | | | |
3717 | | | //Check which type of tag to insert for this line |
3718 | | | if ($this->header_type == GESHI_HEADER_PRE_VALID) { |
3719 | | | $start = "<pre$def_attr>"; |
3720 | | | $end = '</pre>'; |
3721 | | | } else { |
3722 | | | // Span or div? |
3723 | | | $start = "<div$def_attr>"; |
3724 | | | $end = '</div>'; |
3725 | | | } |
3726 | | | |
3727 | | | ++$i; |
3728 | | | |
3729 | | | // Are we supposed to use ids? If so, add them |
3730 | | | if ($this->add_ids) { |
3731 | | | $attrs['id'][] = "$this->overall_id-$i"; |
3732 | | | } |
3733 | | | |
3734 | | | //Is this some line with extra styles??? |
3735 | | | if (in_array($i, $this->highlight_extra_lines)) { |
3736 | | | if ($this->use_classes) { |
3737 | | | if (isset($this->highlight_extra_lines_styles[$i])) { |
3738 | | | $attrs['class'][] = "lx$i"; |
3739 | | | } else { |
3740 | | | $attrs['class'][] = "ln-xtra"; |
3741 | | | } |
3742 | | | } else { |
3743 | | | array_push($attrs['style'], $this->get_line_style($i)); |
3744 | | | } |
3745 | | | } |
3746 | | | |
3747 | | | // Add in the line surrounded by appropriate list HTML |
3748 | | | $attr_string = ''; |
3749 | | | foreach ($attrs as $key => $attr) { |
3750 | | | $attr_string .= ' ' . $key . '="' . implode(' ', $attr) . '"'; |
3751 | | | } |
3752 | | | |
3753 | | | $parsed_code .= "<li$attr_string>$start{$code[$i-1]}$end</li>$ls"; |
3754 | | | unset($code[$i - 1]); |
3755 | | | } |
3756 | | | } else { |
3757 | | | $n = count($code); |
3758 | | | if ($this->use_classes) { |
3759 | | | $attributes = ' class="de1"'; |
3760 | | | } else { |
3761 | | | $attributes = ' style="'. $this->code_style .'"'; |
3762 | | | } |
3763 | | | if ($this->header_type == GESHI_HEADER_PRE_VALID) { |
3764 | | | $parsed_code .= '<pre'. $attributes .'>'; |
3765 | | | } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) { |
3766 | | | if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { |
3767 | | | if ($this->use_classes) { |
3768 | | | $attrs = ' class="ln"'; |
3769 | | | } else { |
3770 | | | $attrs = ' style="'. $this->table_linenumber_style .'"'; |
3771 | | | } |
3772 | | | $parsed_code .= '<td'.$attrs.'><pre'.$attributes.'>'; |
3773 | | | // get linenumbers |
3774 | | | // we don't merge it with the for below, since it should be better for |
3775 | | | // memory consumption this way |
3776 | | | // @todo: but... actually it would still be somewhat nice to merge the two loops |
3777 | | | // the mem peaks are at different positions |
3778 | | | for ($i = 0; $i < $n; ++$i) { |
3779 | | | $close = 0; |
3780 | | | // fancy lines |
3781 | | | if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS && |
3782 | | | $i % $this->line_nth_row == ($this->line_nth_row - 1)) { |
3783 | | | // Set the attributes to style the line |
3784 | | | if ($this->use_classes) { |
3785 | | | $parsed_code .= '<span class="xtra li2"><span class="de2">'; |
3786 | | | } else { |
3787 | | | // This style "covers up" the special styles set for special lines |
3788 | | | // so that styles applied to special lines don't apply to the actual |
3789 | | | // code on that line |
3790 | | | $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">' |
3791 | | | .'<span style="' . $this->code_style .'">'; |
3792 | | | } |
3793 | | | $close += 2; |
3794 | | | } |
3795 | | | //Is this some line with extra styles??? |
3796 | | | if (in_array($i + 1, $this->highlight_extra_lines)) { |
3797 | | | if ($this->use_classes) { |
3798 | | | if (isset($this->highlight_extra_lines_styles[$i])) { |
3799 | | | $parsed_code .= "<span class=\"xtra lx$i\">"; |
3800 | | | } else { |
3801 | | | $parsed_code .= "<span class=\"xtra ln-xtra\">"; |
3802 | | | } |
3803 | | | } else { |
3804 | | | $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">"; |
3805 | | | } |
3806 | | | ++$close; |
3807 | | | } |
3808 | | | $parsed_code .= $this->line_numbers_start + $i; |
3809 | | | if ($close) { |
3810 | | | $parsed_code .= str_repeat('</span>', $close); |
3811 | | | } else if ($i != $n) { |
3812 | | | $parsed_code .= "\n"; |
3813 | | | } |
3814 | | | } |
3815 | | | $parsed_code .= '</pre></td><td'.$attributes.'>'; |
3816 | | | } |
3817 | | | $parsed_code .= '<pre'. $attributes .'>'; |
3818 | | | } |
3819 | | | // No line numbers, but still need to handle highlighting lines extra. |
3820 | | | // Have to use divs so the full width of the code is highlighted |
3821 | | | $close = 0; |
3822 | | | for ($i = 0; $i < $n; ++$i) { |
3823 | | | // Make lines have at least one space in them if they're empty |
3824 | | | // BenBE: Checking emptiness using trim instead of relying on blanks |
3825 | | | if ('' == trim($code[$i])) { |
3826 | | | $code[$i] = ' '; |
3827 | | | } |
3828 | | | // fancy lines |
3829 | | | if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS && |
3830 | | | $i % $this->line_nth_row == ($this->line_nth_row - 1)) { |
3831 | | | // Set the attributes to style the line |
3832 | | | if ($this->use_classes) { |
3833 | | | $parsed_code .= '<span class="xtra li2"><span class="de2">'; |
3834 | | | } else { |
3835 | | | // This style "covers up" the special styles set for special lines |
3836 | | | // so that styles applied to special lines don't apply to the actual |
3837 | | | // code on that line |
3838 | | | $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">' |
3839 | | | .'<span style="' . $this->code_style .'">'; |
3840 | | | } |
3841 | | | $close += 2; |
3842 | | | } |
3843 | | | //Is this some line with extra styles??? |
3844 | | | if (in_array($i + 1, $this->highlight_extra_lines)) { |
3845 | | | if ($this->use_classes) { |
3846 | | | if (isset($this->highlight_extra_lines_styles[$i])) { |
3847 | | | $parsed_code .= "<span class=\"xtra lx$i\">"; |
3848 | | | } else { |
3849 | | | $parsed_code .= "<span class=\"xtra ln-xtra\">"; |
3850 | | | } |
3851 | | | } else { |
3852 | | | $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">"; |
3853 | | | } |
3854 | | | ++$close; |
3855 | | | } |
3856 | | | |
3857 | | | $parsed_code .= $code[$i]; |
3858 | | | |
3859 | | | if ($close) { |
3860 | | | $parsed_code .= str_repeat('</span>', $close); |
3861 | | | $close = 0; |
3862 | | | } |
3863 | | | elseif ($i + 1 < $n) { |
3864 | | | $parsed_code .= "\n"; |
3865 | | | } |
3866 | | | unset($code[$i]); |
3867 | | | } |
3868 | | | |
3869 | | | if ($this->header_type == GESHI_HEADER_PRE_VALID || $this->header_type == GESHI_HEADER_PRE_TABLE) { |
3870 | | | $parsed_code .= '</pre>'; |
3871 | | | } |
3872 | | | if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) { |
3873 | | | $parsed_code .= '</td>'; |
3874 | | | } |
3875 | | | } |
3876 | | | |
3877 | | | $parsed_code .= $this->footer(); |
3878 | | | } |
3879 | | | |
3880 | | | /** |
3881 | | | * Creates the header for the code block (with correct attributes) |
3882 | | | * |
3883 | | | * @return string The header for the code block |
3884 | | | * @since 1.0.0 |
3885 | | | * @access private |
3886 | | | */ |
3887 | | | function header() { |
3888 | | | // Get attributes needed |
3889 | | | /** |
3890 | | | * @todo Document behaviour change - class is outputted regardless of whether |
3891 | | | * we're using classes or not. Same with style |
3892 | | | */ |
3893 | | | $attributes = ' class="' . $this->language; |
3894 | | | if ($this->overall_class != '') { |
3895 | | | $attributes .= " ".$this->overall_class; |
3896 | | | } |
3897 | | | $attributes .= '"'; |
3898 | | | |
3899 | | | if ($this->overall_id != '') { |
3900 | | | $attributes .= " id=\"{$this->overall_id}\""; |
3901 | | | } |
3902 | | | if ($this->overall_style != '') { |
3903 | | | $attributes .= ' style="' . $this->overall_style . '"'; |
3904 | | | } |
3905 | | | |
3906 | | | $ol_attributes = ''; |
3907 | | | |
3908 | | | if ($this->line_numbers_start != 1) { |
3909 | | | $ol_attributes .= ' start="' . $this->line_numbers_start . '"'; |
3910 | | | } |
3911 | | | |
3912 | | | // Get the header HTML |
3913 | | | $header = $this->header_content; |
3914 | | | if ($header) { |
3915 | | | if ($this->header_type == GESHI_HEADER_PRE || $this->header_type == GESHI_HEADER_PRE_VALID) { |
3916 | | | $header = str_replace("\n", '', $header); |
3917 | | | } |
3918 | | | $header = $this->replace_keywords($header); |
3919 | | | |
3920 | | | if ($this->use_classes) { |
3921 | | | $attr = ' class="head"'; |
3922 | | | } else { |
3923 | | | $attr = " style=\"{$this->header_content_style}\""; |
3924 | | | } |
3925 | | | if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) { |
3926 | | | $header = "<thead><tr><td colspan=\"2\" $attr>$header</td></tr></thead>"; |
3927 | | | } else { |
3928 | | | $header = "<div$attr>$header</div>"; |
3929 | | | } |
3930 | | | } |
3931 | | | |
3932 | | | if (GESHI_HEADER_NONE == $this->header_type) { |
3933 | | | if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { |
3934 | | | return "$header<ol$attributes$ol_attributes>"; |
3935 | | | } |
3936 | | | return $header . ($this->force_code_block ? '<div>' : ''); |
3937 | | | } |
3938 | | | |
3939 | | | // Work out what to return and do it |
3940 | | | if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { |
3941 | | | if ($this->header_type == GESHI_HEADER_PRE) { |
3942 | | | return "<pre$attributes>$header<ol$ol_attributes>"; |
3943 | | | } else if ($this->header_type == GESHI_HEADER_DIV || |
3944 | | | $this->header_type == GESHI_HEADER_PRE_VALID) { |
3945 | | | return "<div$attributes>$header<ol$ol_attributes>"; |
3946 | | | } else if ($this->header_type == GESHI_HEADER_PRE_TABLE) { |
3947 | | | return "<table$attributes>$header<tbody><tr class=\"li1\">"; |
3948 | | | } |
3949 | | | } else { |
3950 | | | if ($this->header_type == GESHI_HEADER_PRE) { |
3951 | | | return "<pre$attributes>$header" . |
3952 | | | ($this->force_code_block ? '<div>' : ''); |
3953 | | | } else { |
3954 | | | return "<div$attributes>$header" . |
3955 | | | ($this->force_code_block ? '<div>' : ''); |
3956 | | | } |
3957 | | | } |
3958 | | | } |
3959 | | | |
3960 | | | /** |
3961 | | | * Returns the footer for the code block. |
3962 | | | * |
3963 | | | * @return string The footer for the code block |
3964 | | | * @since 1.0.0 |
3965 | | | * @access private |
3966 | | | */ |
3967 | | | function footer() { |
3968 | | | $footer = $this->footer_content; |
3969 | | | if ($footer) { |
3970 | | | if ($this->header_type == GESHI_HEADER_PRE) { |
3971 | | | $footer = str_replace("\n", '', $footer);; |
3972 | | | } |
3973 | | | $footer = $this->replace_keywords($footer); |
3974 | | | |
3975 | | | if ($this->use_classes) { |
3976 | | | $attr = ' class="foot"'; |
3977 | | | } else { |
3978 | | | $attr = " style=\"{$this->footer_content_style}\""; |
3979 | | | } |
3980 | | | if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->linenumbers != GESHI_NO_LINE_NUMBERS) { |
3981 | | | $footer = "<tfoot><tr><td colspan=\"2\">$footer</td></tr></tfoot>"; |
3982 | | | } else { |
3983 | | | $footer = "<div$attr>$footer</div>"; |
3984 | | | } |
3985 | | | } |
3986 | | | |
3987 | | | if (GESHI_HEADER_NONE == $this->header_type) { |
3988 | | | return ($this->line_numbers != GESHI_NO_LINE_NUMBERS) ? '</ol>' . $footer : $footer; |
3989 | | | } |
3990 | | | |
3991 | | | if ($this->header_type == GESHI_HEADER_DIV || $this->header_type == GESHI_HEADER_PRE_VALID) { |
3992 | | | if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { |
3993 | | | return "</ol>$footer</div>"; |
3994 | | | } |
3995 | | | return ($this->force_code_block ? '</div>' : '') . |
3996 | | | "$footer</div>"; |
3997 | | | } |
3998 | | | elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) { |
3999 | | | if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { |
4000 | | | return "</tr></tbody>$footer</table>"; |
4001 | | | } |
4002 | | | return ($this->force_code_block ? '</div>' : '') . |
4003 | | | "$footer</div>"; |
4004 | | | } |
4005 | | | else { |
4006 | | | if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { |
4007 | | | return "</ol>$footer</pre>"; |
4008 | | | } |
4009 | | | return ($this->force_code_block ? '</div>' : '') . |
4010 | | | "$footer</pre>"; |
4011 | | | } |
4012 | | | } |
4013 | | | |
4014 | | | /** |
4015 | | | * Replaces certain keywords in the header and footer with |
4016 | | | * certain configuration values |
4017 | | | * |
4018 | | | * @param string The header or footer content to do replacement on |
4019 | | | * @return string The header or footer with replaced keywords |
4020 | | | * @since 1.0.2 |
4021 | | | * @access private |
4022 | | | */ |
4023 | | | function replace_keywords($instr) { |
4024 | | | $keywords = $replacements = array(); |
4025 | | | |
4026 | | | $keywords[] = '<TIME>'; |
4027 | | | $keywords[] = '{TIME}'; |
4028 | | | $replacements[] = $replacements[] = number_format($time = $this->get_time(), 3); |
4029 | | | |
4030 | | | $keywords[] = '<LANGUAGE>'; |
4031 | | | $keywords[] = '{LANGUAGE}'; |
4032 | | | $replacements[] = $replacements[] = $this->language_data['LANG_NAME']; |
4033 | | | |
4034 | | | $keywords[] = '<VERSION>'; |
4035 | | | $keywords[] = '{VERSION}'; |
4036 | | | $replacements[] = $replacements[] = GESHI_VERSION; |
4037 | | | |
4038 | | | $keywords[] = '<SPEED>'; |
4039 | | | $keywords[] = '{SPEED}'; |
4040 | | | if ($time <= 0) { |
4041 | | | $speed = 'N/A'; |
4042 | | | } else { |
4043 | | | $speed = strlen($this->source) / $time; |
4044 | | | if ($speed >= 1024) { |
4045 | | | $speed = sprintf("%.2f KB/s", $speed / 1024.0); |
4046 | | | } else { |
4047 | | | $speed = sprintf("%.0f B/s", $speed); |
4048 | | | } |
4049 | | | } |
4050 | | | $replacements[] = $replacements[] = $speed; |
4051 | | | |
4052 | | | return str_replace($keywords, $replacements, $instr); |
4053 | | | } |
4054 | | | |
4055 | | | /** |
4056 | | | * Secure replacement for PHP built-in function htmlspecialchars(). |
4057 | | | * |
4058 | | | * See ticket #427 (http://wush.net/trac/wikka/ticket/427) for the rationale |
4059 | | | * for this replacement function. |
4060 | | | * |
4061 | | | * The INTERFACE for this function is almost the same as that for |
4062 | | | * htmlspecialchars(), with the same default for quote style; however, there |
4063 | | | * is no 'charset' parameter. The reason for this is as follows: |
4064 | | | * |
4065 | | | * The PHP docs say: |
4066 | | | * "The third argument charset defines character set used in conversion." |
4067 | | | * |
4068 | | | * I suspect PHP's htmlspecialchars() is working at the byte-value level and |
4069 | | | * thus _needs_ to know (or asssume) a character set because the special |
4070 | | | * characters to be replaced could exist at different code points in |
4071 | | | * different character sets. (If indeed htmlspecialchars() works at |
4072 | | | * byte-value level that goes some way towards explaining why the |
4073 | | | * vulnerability would exist in this function, too, and not only in |
4074 | | | * htmlentities() which certainly is working at byte-value level.) |
4075 | | | * |
4076 | | | * This replacement function however works at character level and should |
4077 | | | * therefore be "immune" to character set differences - so no charset |
4078 | | | * parameter is needed or provided. If a third parameter is passed, it will |
4079 | | | * be silently ignored. |
4080 | | | * |
4081 | | | * In the OUTPUT there is a minor difference in that we use ''' instead |
4082 | | | * of PHP's ''' for a single quote: this provides compatibility with |
4083 | | | * get_html_translation_table(HTML_SPECIALCHARS, ENT_QUOTES) |
4084 | | | * (see comment by mikiwoz at yahoo dot co dot uk on |
4085 | | | * http://php.net/htmlspecialchars); it also matches the entity definition |
4086 | | | * for XML 1.0 |
4087 | | | * (http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters). |
4088 | | | * Like PHP we use a numeric character reference instead of ''' for the |
4089 | | | * single quote. For the other special characters we use the named entity |
4090 | | | * references, as PHP is doing. |
4091 | | | * |
4092 | | | * @author {@link http://wikkawiki.org/JavaWoman Marjolein Katsma} |
4093 | | | * |
4094 | | | * @license http://www.gnu.org/copyleft/lgpl.html |
4095 | | | * GNU Lesser General Public License |
4096 | | | * @copyright Copyright 2007, {@link http://wikkawiki.org/CreditsPage |
4097 | | | * Wikka Development Team} |
4098 | | | * |
4099 | | | * @access private |
4100 | | | * @param string $string string to be converted |
4101 | | | * @param integer $quote_style |
4102 | | | * - ENT_COMPAT: escapes &, <, > and double quote (default) |
4103 | | | * - ENT_NOQUOTES: escapes only &, < and > |
4104 | | | * - ENT_QUOTES: escapes &, <, >, double and single quotes |
4105 | | | * @return string converted string |
4106 | | | * @since 1.0.7.18 |
4107 | | | */ |
4108 | | | function hsc($string, $quote_style = ENT_COMPAT) { |
4109 | | | // init |
4110 | | | static $aTransSpecchar = array( |
4111 | | | '&' => '&', |
4112 | | | '"' => '"', |
4113 | | | '<' => '<', |
4114 | | | '>' => '>', |
4115 | | | |
4116 | | | //This fix is related to SF#1923020, but has to be applied |
4117 | | | //regardless of actually highlighting symbols. |
4118 | | | |
4119 | | | //Circumvent a bug with symbol highlighting |
4120 | | | //This is required as ; would produce undesirable side-effects if it |
4121 | | | //was not to be processed as an entity. |
4122 | | | ';' => '<SEMI>', // Force ; to be processed as entity |
4123 | | | '|' => '<PIPE>' // Force | to be processed as entity |
4124 | | | ); // ENT_COMPAT set |
4125 | | | |
4126 | | | switch ($quote_style) { |
4127 | | | case ENT_NOQUOTES: // don't convert double quotes |
4128 | | | unset($aTransSpecchar['"']); |
4129 | | | break; |
4130 | | | case ENT_QUOTES: // convert single quotes as well |
4131 | | | $aTransSpecchar["'"] = '''; // (apos) htmlspecialchars() uses ''' |
4132 | | | break; |
4133 | | | } |
4134 | | | |
4135 | | | // return translated string |
4136 | | | return strtr($string, $aTransSpecchar); |
4137 | | | } |
4138 | | | |
4139 | | | /** |
4140 | | | * Returns a stylesheet for the highlighted code. If $economy mode |
4141 | | | * is true, we only return the stylesheet declarations that matter for |
4142 | | | * this code block instead of the whole thing |
4143 | | | * |
4144 | | | * @param boolean Whether to use economy mode or not |
4145 | | | * @return string A stylesheet built on the data for the current language |
4146 | | | * @since 1.0.0 |
4147 | | | */ |
4148 | | | function get_stylesheet($economy_mode = true) { |
4149 | | | // If there's an error, chances are that the language file |
4150 | | | // won't have populated the language data file, so we can't |
4151 | | | // risk getting a stylesheet... |
4152 | | | if ($this->error) { |
4153 | | | return ''; |
4154 | | | } |
4155 | | | |
4156 | | | //Check if the style rearrangements have been processed ... |
4157 | | | //This also does some preprocessing to check which style groups are useable ... |
4158 | | | if(!isset($this->language_data['NUMBERS_CACHE'])) { |
4159 | | | $this->build_style_cache(); |
4160 | | | } |
4161 | | | |
4162 | | | // First, work out what the selector should be. If there's an ID, |
4163 | | | // that should be used, the same for a class. Otherwise, a selector |
4164 | | | // of '' means that these styles will be applied anywhere |
4165 | | | if ($this->overall_id) { |
4166 | | | $selector = '#' . $this->overall_id; |
4167 | | | } else { |
4168 | | | $selector = '.' . $this->language; |
4169 | | | if ($this->overall_class) { |
4170 | | | $selector .= '.' . $this->overall_class; |
4171 | | | } |
4172 | | | } |
4173 | | | $selector .= ' '; |
4174 | | | |
4175 | | | // Header of the stylesheet |
4176 | | | if (!$economy_mode) { |
4177 | | | $stylesheet = "/**\n". |
4178 | | | " * GeSHi Dynamically Generated Stylesheet\n". |
4179 | | | " * --------------------------------------\n". |
4180 | | | " * Dynamically generated stylesheet for {$this->language}\n". |
4181 | | | " * CSS class: {$this->overall_class}, CSS id: {$this->overall_id}\n". |
4182 | | | " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" . |
4183 | | | " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n". |
4184 | | | " * --------------------------------------\n". |
4185 | | | " */\n"; |
4186 | | | } else { |
4187 | | | $stylesheet = "/**\n". |
4188 | | | " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" . |
4189 | | | " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n". |
4190 | | | " */\n"; |
4191 | | | } |
4192 | | | |
4193 | | | // Set the <ol> to have no effect at all if there are line numbers |
4194 | | | // (<ol>s have margins that should be destroyed so all layout is |
4195 | | | // controlled by the set_overall_style method, which works on the |
4196 | | | // <pre> or <div> container). Additionally, set default styles for lines |
4197 | | | if (!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) { |
4198 | | | //$stylesheet .= "$selector, {$selector}ol, {$selector}ol li {margin: 0;}\n"; |
4199 | | | $stylesheet .= "$selector.de1, $selector.de2 {{$this->code_style}}\n"; |
4200 | | | } |
4201 | | | |
4202 | | | // Add overall styles |
4203 | | | // note: neglect economy_mode, empty styles are meaningless |
4204 | | | if ($this->overall_style != '') { |
4205 | | | $stylesheet .= "$selector {{$this->overall_style}}\n"; |
4206 | | | } |
4207 | | | |
4208 | | | // Add styles for links |
4209 | | | // note: economy mode does not make _any_ sense here |
4210 | | | // either the style is empty and thus no selector is needed |
4211 | | | // or the appropriate key is given. |
4212 | | | foreach ($this->link_styles as $key => $style) { |
4213 | | | if ($style != '') { |
4214 | | | switch ($key) { |
4215 | | | case GESHI_LINK: |
4216 | | | $stylesheet .= "{$selector}a:link {{$style}}\n"; |
4217 | | | break; |
4218 | | | case GESHI_HOVER: |
4219 | | | $stylesheet .= "{$selector}a:hover {{$style}}\n"; |
4220 | | | break; |
4221 | | | case GESHI_ACTIVE: |
4222 | | | $stylesheet .= "{$selector}a:active {{$style}}\n"; |
4223 | | | break; |
4224 | | | case GESHI_VISITED: |
4225 | | | $stylesheet .= "{$selector}a:visited {{$style}}\n"; |
4226 | | | break; |
4227 | | | } |
4228 | | | } |
4229 | | | } |
4230 | | | |
4231 | | | // Header and footer |
4232 | | | // note: neglect economy_mode, empty styles are meaningless |
4233 | | | if ($this->header_content_style != '') { |
4234 | | | $stylesheet .= "$selector.head {{$this->header_content_style}}\n"; |
4235 | | | } |
4236 | | | if ($this->footer_content_style != '') { |
4237 | | | $stylesheet .= "$selector.foot {{$this->footer_content_style}}\n"; |
4238 | | | } |
4239 | | | |
4240 | | | // Styles for important stuff |
4241 | | | // note: neglect economy_mode, empty styles are meaningless |
4242 | | | if ($this->important_styles != '') { |
4243 | | | $stylesheet .= "$selector.imp {{$this->important_styles}}\n"; |
4244 | | | } |
4245 | | | |
4246 | | | // Simple line number styles |
4247 | | | if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->line_style1 != '') { |
4248 | | | $stylesheet .= "{$selector}li, {$selector}.li1 {{$this->line_style1}}\n"; |
4249 | | | } |
4250 | | | if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->table_linenumber_style != '') { |
4251 | | | $stylesheet .= "{$selector}.ln {{$this->table_linenumber_style}}\n"; |
4252 | | | } |
4253 | | | // If there is a style set for fancy line numbers, echo it out |
4254 | | | if ((!$economy_mode || $this->line_numbers == GESHI_FANCY_LINE_NUMBERS) && $this->line_style2 != '') { |
4255 | | | $stylesheet .= "{$selector}.li2 {{$this->line_style2}}\n"; |
4256 | | | } |
4257 | | | |
4258 | | | // note: empty styles are meaningless |
4259 | | | foreach ($this->language_data['STYLES']['KEYWORDS'] as $group => $styles) { |
4260 | | | if ($styles != '' && (!$economy_mode || |
4261 | | | (isset($this->lexic_permissions['KEYWORDS'][$group]) && |
4262 | | | $this->lexic_permissions['KEYWORDS'][$group]))) { |
4263 | | | $stylesheet .= "$selector.kw$group {{$styles}}\n"; |
4264 | | | } |
4265 | | | } |
4266 | | | foreach ($this->language_data['STYLES']['COMMENTS'] as $group => $styles) { |
4267 | | | if ($styles != '' && (!$economy_mode || |
4268 | | | (isset($this->lexic_permissions['COMMENTS'][$group]) && |
4269 | | | $this->lexic_permissions['COMMENTS'][$group]) || |
4270 | | | (!empty($this->language_data['COMMENT_REGEXP']) && |
4271 | | | !empty($this->language_data['COMMENT_REGEXP'][$group])))) { |
4272 | | | $stylesheet .= "$selector.co$group {{$styles}}\n"; |
4273 | | | } |
4274 | | | } |
4275 | | | foreach ($this->language_data['STYLES']['ESCAPE_CHAR'] as $group => $styles) { |
4276 | | | if ($styles != '' && (!$economy_mode || $this->lexic_permissions['ESCAPE_CHAR'])) { |
4277 | | | // NEW: since 1.0.8 we have to handle hardescapes |
4278 | | | if ($group === 'HARD') { |
4279 | | | $group = '_h'; |
4280 | | | } |
4281 | | | $stylesheet .= "$selector.es$group {{$styles}}\n"; |
4282 | | | } |
4283 | | | } |
4284 | | | foreach ($this->language_data['STYLES']['BRACKETS'] as $group => $styles) { |
4285 | | | if ($styles != '' && (!$economy_mode || $this->lexic_permissions['BRACKETS'])) { |
4286 | | | $stylesheet .= "$selector.br$group {{$styles}}\n"; |
4287 | | | } |
4288 | | | } |
4289 | | | foreach ($this->language_data['STYLES']['SYMBOLS'] as $group => $styles) { |
4290 | | | if ($styles != '' && (!$economy_mode || $this->lexic_permissions['SYMBOLS'])) { |
4291 | | | $stylesheet .= "$selector.sy$group {{$styles}}\n"; |
4292 | | | } |
4293 | | | } |
4294 | | | foreach ($this->language_data['STYLES']['STRINGS'] as $group => $styles) { |
4295 | | | if ($styles != '' && (!$economy_mode || $this->lexic_permissions['STRINGS'])) { |
4296 | | | // NEW: since 1.0.8 we have to handle hardquotes |
4297 | | | if ($group === 'HARD') { |
4298 | | | $group = '_h'; |
4299 | | | } |
4300 | | | $stylesheet .= "$selector.st$group {{$styles}}\n"; |
4301 | | | } |
4302 | | | } |
4303 | | | foreach ($this->language_data['STYLES']['NUMBERS'] as $group => $styles) { |
4304 | | | if ($styles != '' && (!$economy_mode || $this->lexic_permissions['NUMBERS'])) { |
4305 | | | $stylesheet .= "$selector.nu$group {{$styles}}\n"; |
4306 | | | } |
4307 | | | } |
4308 | | | foreach ($this->language_data['STYLES']['METHODS'] as $group => $styles) { |
4309 | | | if ($styles != '' && (!$economy_mode || $this->lexic_permissions['METHODS'])) { |
4310 | | | $stylesheet .= "$selector.me$group {{$styles}}\n"; |
4311 | | | } |
4312 | | | } |
4313 | | | // note: neglect economy_mode, empty styles are meaningless |
4314 | | | foreach ($this->language_data['STYLES']['SCRIPT'] as $group => $styles) { |
4315 | | | if ($styles != '') { |
4316 | | | $stylesheet .= "$selector.sc$group {{$styles}}\n"; |
4317 | | | } |
4318 | | | } |
4319 | | | foreach ($this->language_data['STYLES']['REGEXPS'] as $group => $styles) { |
4320 | | | if ($styles != '' && (!$economy_mode || |
4321 | | | (isset($this->lexic_permissions['REGEXPS'][$group]) && |
4322 | | | $this->lexic_permissions['REGEXPS'][$group]))) { |
4323 | | | if (is_array($this->language_data['REGEXPS'][$group]) && |
4324 | | | array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$group])) { |
4325 | | | $stylesheet .= "$selector."; |
4326 | | | $stylesheet .= $this->language_data['REGEXPS'][$group][GESHI_CLASS]; |
4327 | | | $stylesheet .= " {{$styles}}\n"; |
4328 | | | } else { |
4329 | | | $stylesheet .= "$selector.re$group {{$styles}}\n"; |
4330 | | | } |
4331 | | | } |
4332 | | | } |
4333 | | | // Styles for lines being highlighted extra |
4334 | | | if (!$economy_mode || (count($this->highlight_extra_lines)!=count($this->highlight_extra_lines_styles))) { |
4335 | | | $stylesheet .= "{$selector}.ln-xtra, {$selector}li.ln-xtra, {$selector}div.ln-xtra {{$this->highlight_extra_lines_style}}\n"; |
4336 | | | } |
4337 | | | $stylesheet .= "{$selector}span.xtra { display:block; }\n"; |
4338 | | | foreach ($this->highlight_extra_lines_styles as $lineid => $linestyle) { |
4339 | | | $stylesheet .= "{$selector}.lx$lineid, {$selector}li.lx$lineid, {$selector}div.lx$lineid {{$linestyle}}\n"; |
4340 | | | } |
4341 | | | |
4342 | | | return $stylesheet; |
4343 | | | } |
4344 | | | |
4345 | | | /** |
4346 | | | * Get's the style that is used for the specified line |
4347 | | | * |
4348 | | | * @param int The line number information is requested for |
4349 | | | * @access private |
4350 | | | * @since 1.0.7.21 |
4351 | | | */ |
4352 | | | function get_line_style($line) { |
4353 | | | //$style = null; |
4354 | | | $style = null; |
4355 | | | if (isset($this->highlight_extra_lines_styles[$line])) { |
4356 | | | $style = $this->highlight_extra_lines_styles[$line]; |
4357 | | | } else { // if no "extra" style assigned |
4358 | | | $style = $this->highlight_extra_lines_style; |
4359 | | | } |
4360 | | | |
4361 | | | return $style; |
4362 | | | } |
4363 | | | |
4364 | | | /** |
4365 | | | * this functions creates an optimized regular expression list |
4366 | | | * of an array of strings. |
4367 | | | * |
4368 | | | * Example: |
4369 | | | * <code>$list = array('faa', 'foo', 'foobar'); |
4370 | | | * => string 'f(aa|oo(bar)?)'</code> |
4371 | | | * |
4372 | | | * @param $list array of (unquoted) strings |
4373 | | | * @param $regexp_delimiter your regular expression delimiter, @see preg_quote() |
4374 | | | * @return string for regular expression |
4375 | | | * @author Milian Wolff <mail@milianw.de> |
4376 | | | * @since 1.0.8 |
4377 | | | * @access private |
4378 | | | */ |
4379 | | | function optimize_regexp_list($list, $regexp_delimiter = '/') { |
4380 | | | $regex_chars = array('.', '\\', '+', '*', '?', '[', '^', ']', '$', |
4381 | | | '(', ')', '{', '}', '=', '!', '<', '>', '|', ':', $regexp_delimiter); |
4382 | | | sort($list); |
4383 | | | $regexp_list = array(''); |
4384 | | | $num_subpatterns = 0; |
4385 | | | $list_key = 0; |
4386 | | | |
4387 | | | // the tokens which we will use to generate the regexp list |
4388 | | | $tokens = array(); |
4389 | | | $prev_keys = array(); |
4390 | | | // go through all entries of the list and generate the token list |
4391 | | | $cur_len = 0; |
4392 | | | for ($i = 0, $i_max = count($list); $i < $i_max; ++$i) { |
4393 | | | if ($cur_len > GESHI_MAX_PCRE_LENGTH) { |
4394 | | | // seems like the length of this pcre is growing exorbitantly |
4395 | | | $regexp_list[++$list_key] = $this->_optimize_regexp_list_tokens_to_string($tokens); |
4396 | | | $num_subpatterns = substr_count($regexp_list[$list_key], '(?:'); |
4397 | | | $tokens = array(); |
4398 | | | $cur_len = 0; |
4399 | | | } |
4400 | | | $level = 0; |
4401 | | | $entry = preg_quote((string) $list[$i], $regexp_delimiter); |
4402 | | | $pointer = &$tokens; |
4403 | | | // properly assign the new entry to the correct position in the token array |
4404 | | | // possibly generate smaller common denominator keys |
4405 | | | while (true) { |
4406 | | | // get the common denominator |
4407 | | | if (isset($prev_keys[$level])) { |
4408 | | | if ($prev_keys[$level] == $entry) { |
4409 | | | // this is a duplicate entry, skip it |
4410 | | | continue 2; |
4411 | | | } |
4412 | | | $char = 0; |
4413 | | | while (isset($entry[$char]) && isset($prev_keys[$level][$char]) |
4414 | | | && $entry[$char] == $prev_keys[$level][$char]) { |
4415 | | | ++$char; |
4416 | | | } |
4417 | | | if ($char > 0) { |
4418 | | | // this entry has at least some chars in common with the current key |
4419 | | | if ($char == strlen($prev_keys[$level])) { |
4420 | | | // current key is totally matched, i.e. this entry has just some bits appended |
4421 | | | $pointer = &$pointer[$prev_keys[$level]]; |
4422 | | | } else { |
4423 | | | // only part of the keys match |
4424 | | | $new_key_part1 = substr($prev_keys[$level], 0, $char); |
4425 | | | $new_key_part2 = substr($prev_keys[$level], $char); |
4426 | | | |
4427 | | | if (in_array($new_key_part1[0], $regex_chars) |
4428 | | | || in_array($new_key_part2[0], $regex_chars)) { |
4429 | | | // this is bad, a regex char as first character |
4430 | | | $pointer[$entry] = array('' => true); |
4431 | | | array_splice($prev_keys, $level, count($prev_keys), $entry); |
4432 | | | $cur_len += strlen($entry); |
4433 | | | continue; |
4434 | | | } else { |
4435 | | | // relocate previous tokens |
4436 | | | $pointer[$new_key_part1] = array($new_key_part2 => $pointer[$prev_keys[$level]]); |
4437 | | | unset($pointer[$prev_keys[$level]]); |
4438 | | | $pointer = &$pointer[$new_key_part1]; |
4439 | | | // recreate key index |
4440 | | | array_splice($prev_keys, $level, count($prev_keys), array($new_key_part1, $new_key_part2)); |
4441 | | | $cur_len += strlen($new_key_part2); |
4442 | | | } |
4443 | | | } |
4444 | | | ++$level; |
4445 | | | $entry = substr($entry, $char); |
4446 | | | continue; |
4447 | | | } |
4448 | | | // else: fall trough, i.e. no common denominator was found |
4449 | | | } |
4450 | | | if ($level == 0 && !empty($tokens)) { |
4451 | | | // we can dump current tokens into the string and throw them away afterwards |
4452 | | | $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens); |
4453 | | | $new_subpatterns = substr_count($new_entry, '(?:'); |
4454 | | | if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + $new_subpatterns > GESHI_MAX_PCRE_SUBPATTERNS) { |
4455 | | | $regexp_list[++$list_key] = $new_entry; |
4456 | | | $num_subpatterns = $new_subpatterns; |
4457 | | | } else { |
4458 | | | if (!empty($regexp_list[$list_key])) { |
4459 | | | $new_entry = '|' . $new_entry; |
4460 | | | } |
4461 | | | $regexp_list[$list_key] .= $new_entry; |
4462 | | | $num_subpatterns += $new_subpatterns; |
4463 | | | } |
4464 | | | $tokens = array(); |
4465 | | | $cur_len = 0; |
4466 | | | } |
4467 | | | // no further common denominator found |
4468 | | | $pointer[$entry] = array('' => true); |
4469 | | | array_splice($prev_keys, $level, count($prev_keys), $entry); |
4470 | | | |
4471 | | | $cur_len += strlen($entry); |
4472 | | | break; |
4473 | | | } |
4474 | | | unset($list[$i]); |
4475 | | | } |
4476 | | | // make sure the last tokens get converted as well |
4477 | | | $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens); |
4478 | | | if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + substr_count($new_entry, '(?:') > GESHI_MAX_PCRE_SUBPATTERNS) { |
4479 | | | $regexp_list[++$list_key] = $new_entry; |
4480 | | | } else { |
4481 | | | if (!empty($regexp_list[$list_key])) { |
4482 | | | $new_entry = '|' . $new_entry; |
4483 | | | } |
4484 | | | $regexp_list[$list_key] .= $new_entry; |
4485 | | | } |
4486 | | | return $regexp_list; |
4487 | | | } |
4488 | | | /** |
4489 | | | * this function creates the appropriate regexp string of an token array |
4490 | | | * you should not call this function directly, @see $this->optimize_regexp_list(). |
4491 | | | * |
4492 | | | * @param &$tokens array of tokens |
4493 | | | * @param $recursed bool to know wether we recursed or not |
4494 | | | * @return string |
4495 | | | * @author Milian Wolff <mail@milianw.de> |
4496 | | | * @since 1.0.8 |
4497 | | | * @access private |
4498 | | | */ |
4499 | | | function _optimize_regexp_list_tokens_to_string(&$tokens, $recursed = false) { |
4500 | | | $list = ''; |
4501 | | | foreach ($tokens as $token => $sub_tokens) { |
4502 | | | $list .= $token; |
4503 | | | $close_entry = isset($sub_tokens['']); |
4504 | | | unset($sub_tokens['']); |
4505 | | | if (!empty($sub_tokens)) { |
4506 | | | $list .= '(?:' . $this->_optimize_regexp_list_tokens_to_string($sub_tokens, true) . ')'; |
4507 | | | if ($close_entry) { |
4508 | | | // make sub_tokens optional |
4509 | | | $list .= '?'; |
4510 | | | } |
4511 | | | } |
4512 | | | $list .= '|'; |
4513 | | | } |
4514 | | | if (!$recursed) { |
4515 | | | // do some optimizations |
4516 | | | // common trailing strings |
4517 | | | // BUGGY! |
4518 | | | //$list = preg_replace_callback('#(?<=^|\:|\|)\w+?(\w+)(?:\|.+\1)+(?=\|)#', create_function( |
4519 | | | // '$matches', 'return "(?:" . preg_replace("#" . preg_quote($matches[1], "#") . "(?=\||$)#", "", $matches[0]) . ")" . $matches[1];'), $list); |
4520 | | | // (?:p)? => p? |
4521 | | | $list = preg_replace('#\(\?\:(.)\)\?#', '\1?', $list); |
4522 | | | // (?:a|b|c|d|...)? => [abcd...]? |
4523 | | | // TODO: a|bb|c => [ac]|bb |
4524 | | | static $callback_2; |
4525 | | | if (!isset($callback_2)) { |
4526 | | | $callback_2 = create_function('$matches', 'return "[" . str_replace("|", "", $matches[1]) . "]";'); |
4527 | | | } |
4528 | | | $list = preg_replace_callback('#\(\?\:((?:.\|)+.)\)#', $callback_2, $list); |
4529 | | | } |
4530 | | | // return $list without trailing pipe |
4531 | | | return substr($list, 0, -1); |
4532 | | | } |
4533 | | | } // End Class GeSHi |
4534 | | | |
4535 | | | |
4536 | | | if (!function_exists('geshi_highlight')) { |
4537 | | | /** |
4538 | | | * Easy way to highlight stuff. Behaves just like highlight_string |
4539 | | | * |
4540 | | | * @param string The code to highlight |
4541 | | | * @param string The language to highlight the code in |
4542 | | | * @param string The path to the language files. You can leave this blank if you need |
4543 | | | * as from version 1.0.7 the path should be automatically detected |
4544 | | | * @param boolean Whether to return the result or to echo |
4545 | | | * @return string The code highlighted (if $return is true) |
4546 | | | * @since 1.0.2 |
4547 | | | */ |
4548 | | | function geshi_highlight($string, $language, $path = null, $return = false) { |
4549 | | | $geshi = new GeSHi($string, $language, $path); |
4550 | | | $geshi->set_header_type(GESHI_HEADER_NONE); |
4551 | | | |
4552 | | | if ($return) { |
4553 | | | return '<code>' . $geshi->parse_code() . '</code>'; |
4554 | | | } |
4555 | | | |
4556 | | | echo '<code>' . $geshi->parse_code() . '</code>'; |
4557 | | | |
4558 | | | if ($geshi->error()) { |
4559 | | | return false; |
4560 | | | } |
4561 | | | return true; |
4562 | | | } |
4563 | | | } |
4564 | | | |
4565 | | | ?> |