jablonka.czprosek.czf

websvn

Subversion Repositories:
[/] [lib/] [geshi.php] - Blame information for rev 2

 

Line No. Rev Author Line
11simandl<?php
2/**
3 * GeSHi - Generic Syntax Highlighter
4 *
5 * The GeSHi class for Generic Syntax Highlighting. Please refer to the
6 * documentation at http://qbnz.com/highlighter/documentation.php for more
7 * information about how to use this class.
8 *
9 * For changes, release notes, TODOs etc, see the relevant files in the docs/
10 * directory.
11 *
12 * This file is part of GeSHi.
13 *
14 * GeSHi is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
18 *
19 * GeSHi is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License
25 * along with GeSHi; if not, write to the Free Software
26 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 *
28 * @package geshi
29 * @subpackage core
30 * @author Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de>
31 * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
32 * @license http://gnu.org/copyleft/gpl.html GNU GPL
33 *
34 */
35 
36//
37// GeSHi Constants
38// You should use these constant names in your programs instead of
39// their values - you never know when a value may change in a future
40// version
41//
42 
43/** The version of this GeSHi file */
44define('GESHI_VERSION', '1.0.8.1');
45 
46// Define the root directory for the GeSHi code tree
47if (!defined('GESHI_ROOT')) {
48 /** The root directory for GeSHi */
49 define('GESHI_ROOT', dirname(__FILE__) . DIRECTORY_SEPARATOR);
50}
51/** The language file directory for GeSHi
52 @access private */
53define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . DIRECTORY_SEPARATOR);
54 
55// Define if GeSHi should be paranoid about security
56if (!defined('GESHI_SECURITY_PARANOID')) {
57 /** Tells GeSHi to be paranoid about security settings */
58 define('GESHI_SECURITY_PARANOID', false);
59}
60 
61// Line numbers - use with enable_line_numbers()
62/** Use no line numbers when building the result */
63define('GESHI_NO_LINE_NUMBERS', 0);
64/** Use normal line numbers when building the result */
65define('GESHI_NORMAL_LINE_NUMBERS', 1);
66/** Use fancy line numbers when building the result */
67define('GESHI_FANCY_LINE_NUMBERS', 2);
68 
69// Container HTML type
70/** Use nothing to surround the source */
71define('GESHI_HEADER_NONE', 0);
72/** Use a "div" to surround the source */
73define('GESHI_HEADER_DIV', 1);
74/** Use a "pre" to surround the source */
75define('GESHI_HEADER_PRE', 2);
76/** Use a pre to wrap lines when line numbers are enabled or to wrap the whole code. */
77define('GESHI_HEADER_PRE_VALID', 3);
78/**
79 * Use a "table" to surround the source:
80 *
81 * <table>
82 * <thead><tr><td colspan="2">$header</td></tr></thead>
83 * <tbody><tr><td><pre>$linenumbers</pre></td><td><pre>$code></pre></td></tr></tbody>
84 * <tfooter><tr><td colspan="2">$footer</td></tr></tfoot>
85 * </table>
86 *
87 * this is essentially only a workaround for Firefox, see sf#1651996 or take a look at
88 * https://bugzilla.mozilla.org/show_bug.cgi?id=365805
89 * @note when linenumbers are disabled this is essentially the same as GESHI_HEADER_PRE
90 */
91define('GESHI_HEADER_PRE_TABLE', 4);
92 
93// Capatalisation constants
94/** Lowercase keywords found */
95define('GESHI_CAPS_NO_CHANGE', 0);
96/** Uppercase keywords found */
97define('GESHI_CAPS_UPPER', 1);
98/** Leave keywords found as the case that they are */
99define('GESHI_CAPS_LOWER', 2);
100 
101// Link style constants
102/** Links in the source in the :link state */
103define('GESHI_LINK', 0);
104/** Links in the source in the :hover state */
105define('GESHI_HOVER', 1);
106/** Links in the source in the :active state */
107define('GESHI_ACTIVE', 2);
108/** Links in the source in the :visited state */
109define('GESHI_VISITED', 3);
110 
111// Important string starter/finisher
112// Note that if you change these, they should be as-is: i.e., don't
113// write them as if they had been run through htmlentities()
114/** The starter for important parts of the source */
115define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>');
116/** The ender for important parts of the source */
117define('GESHI_END_IMPORTANT', '<END GeSHi>');
118 
119/**#@+
120 * @access private
121 */
122// When strict mode applies for a language
123/** Strict mode never applies (this is the most common) */
124define('GESHI_NEVER', 0);
125/** Strict mode *might* apply, and can be enabled or
126 disabled by {@link GeSHi->enable_strict_mode()} */
127define('GESHI_MAYBE', 1);
128/** Strict mode always applies */
129define('GESHI_ALWAYS', 2);
130 
131// Advanced regexp handling constants, used in language files
132/** The key of the regex array defining what to search for */
133define('GESHI_SEARCH', 0);
134/** The key of the regex array defining what bracket group in a
135 matched search to use as a replacement */
136define('GESHI_REPLACE', 1);
137/** The key of the regex array defining any modifiers to the regular expression */
138define('GESHI_MODIFIERS', 2);
139/** The key of the regex array defining what bracket group in a
140 matched search to put before the replacement */
141define('GESHI_BEFORE', 3);
142/** The key of the regex array defining what bracket group in a
143 matched search to put after the replacement */
144define('GESHI_AFTER', 4);
145/** The key of the regex array defining a custom keyword to use
146 for this regexp's html tag class */
147define('GESHI_CLASS', 5);
148 
149/** Used in language files to mark comments */
150define('GESHI_COMMENTS', 0);
151 
152/** Used to work around missing PHP features **/
153define('GESHI_PHP_PRE_433', !(version_compare(PHP_VERSION, '4.3.3') === 1));
154 
155/** make sure we can call stripos **/
156if (!function_exists('stripos')) {
157 // the offset param of preg_match is not supported below PHP 4.3.3
158 if (GESHI_PHP_PRE_433) {
159 /**
160 * @ignore
161 */
162 function stripos($haystack, $needle, $offset = null) {
163 if (!is_null($offset)) {
164 $haystack = substr($haystack, $offset);
165 }
166 if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE)) {
167 return $match[0][1];
168 }
169 return false;
170 }
171 }
172 else {
173 /**
174 * @ignore
175 */
176 function stripos($haystack, $needle, $offset = null) {
177 if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE, $offset)) {
178 return $match[0][1];
179 }
180 return false;
181 }
182 }
183}
184 
185/** some old PHP / PCRE subpatterns only support up to xxx subpatterns in
186 regular expressions. Set this to false if your PCRE lib is up to date
187 @see GeSHi->optimize_regexp_list()
188 **/
189define('GESHI_MAX_PCRE_SUBPATTERNS', 500);
190/** it's also important not to generate too long regular expressions
191 be generous here... but keep in mind, that when reaching this limit we
192 still have to close open patterns. 12k should do just fine on a 16k limit.
193 @see GeSHi->optimize_regexp_list()
194 **/
195define('GESHI_MAX_PCRE_LENGTH', 12288);
196 
197//Number format specification
198/** Basic number format for integers */
199define('GESHI_NUMBER_INT_BASIC', 1); //Default integers \d+
200/** Enhanced number format for integers like seen in C */
201define('GESHI_NUMBER_INT_CSTYLE', 2); //Default C-Style \d+[lL]?
202/** Number format to highlight binary numbers with a suffix "b" */
203define('GESHI_NUMBER_BIN_SUFFIX', 16); //[01]+[bB]
204/** Number format to highlight binary numbers with a prefix % */
205define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32); //%[01]+
206/** Number format to highlight binary numbers with a prefix 0b (C) */
207define('GESHI_NUMBER_BIN_PREFIX_0B', 64); //0b[01]+
208/** Number format to highlight octal numbers with a leading zero */
209define('GESHI_NUMBER_OCT_PREFIX', 256); //0[0-7]+
210/** Number format to highlight octal numbers with a suffix of o */
211define('GESHI_NUMBER_OCT_SUFFIX', 512); //[0-7]+[oO]
212/** Number format to highlight hex numbers with a prefix 0x */
213define('GESHI_NUMBER_HEX_PREFIX', 4096); //0x[0-9a-fA-F]+
214/** Number format to highlight hex numbers with a suffix of h */
215define('GESHI_NUMBER_HEX_SUFFIX', 8192); //[0-9][0-9a-fA-F]*h
216/** Number format to highlight floating-point numbers without support for scientific notation */
217define('GESHI_NUMBER_FLT_NONSCI', 65536); //\d+\.\d+
218/** Number format to highlight floating-point numbers without support for scientific notation */
219define('GESHI_NUMBER_FLT_NONSCI_F', 131072); //\d+(\.\d+)?f
220/** Number format to highlight floating-point numbers with support for scientific notation (E) and optional leading zero */
221define('GESHI_NUMBER_FLT_SCI_SHORT', 262144); //\.\d+e\d+
222/** Number format to highlight floating-point numbers with support for scientific notation (E) and required leading digit */
223define('GESHI_NUMBER_FLT_SCI_ZERO', 524288); //\d+(\.\d+)?e\d+
224//Custom formats are passed by RX array
225 
226// Error detection - use these to analyse faults
227/** No sourcecode to highlight was specified
228 * @deprecated
229 */
230define('GESHI_ERROR_NO_INPUT', 1);
231/** The language specified does not exist */
232define('GESHI_ERROR_NO_SUCH_LANG', 2);
233/** GeSHi could not open a file for reading (generally a language file) */
234define('GESHI_ERROR_FILE_NOT_READABLE', 3);
235/** The header type passed to {@link GeSHi->set_header_type()} was invalid */
236define('GESHI_ERROR_INVALID_HEADER_TYPE', 4);
237/** The line number type passed to {@link GeSHi->enable_line_numbers()} was invalid */
238define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5);
239/**#@-*/
240 
241 
242/**
243 * The GeSHi Class.
244 *
245 * Please refer to the documentation for GeSHi 1.0.X that is available
246 * at http://qbnz.com/highlighter/documentation.php for more information
247 * about how to use this class.
248 *
249 * @package geshi
250 * @author Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de>
251 * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
252 */
253class GeSHi {
254 /**#@+
255 * @access private
256 */
257 /**
258 * The source code to highlight
259 * @var string
260 */
261 var $source = '';
262 
263 /**
264 * The language to use when highlighting
265 * @var string
266 */
267 var $language = '';
268 
269 /**
270 * The data for the language used
271 * @var array
272 */
273 var $language_data = array();
274 
275 /**
276 * The path to the language files
277 * @var string
278 */
279 var $language_path = GESHI_LANG_ROOT;
280 
281 /**
282 * The error message associated with an error
283 * @var string
284 * @todo check err reporting works
285 */
286 var $error = false;
287 
288 /**
289 * Possible error messages
290 * @var array
291 */
292 var $error_messages = array(
293 GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE} (using path {PATH})',
294 GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file was not readable',
295 GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid',
296 GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified is invalid'
297 );
298 
299 /**
300 * Whether highlighting is strict or not
301 * @var boolean
302 */
303 var $strict_mode = false;
304 
305 /**
306 * Whether to use CSS classes in output
307 * @var boolean
308 */
309 var $use_classes = false;
310 
311 /**
312 * The type of header to use. Can be one of the following
313 * values:
314 *
315 * - GESHI_HEADER_PRE: Source is outputted in a "pre" HTML element.
316 * - GESHI_HEADER_DIV: Source is outputted in a "div" HTML element.
317 * - GESHI_HEADER_NONE: No header is outputted.
318 *
319 * @var int
320 */
321 var $header_type = GESHI_HEADER_PRE;
322 
323 /**
324 * Array of permissions for which lexics should be highlighted
325 * @var array
326 */
327 var $lexic_permissions = array(
328 'KEYWORDS' => array(),
329 'COMMENTS' => array('MULTI' => true),
330 'REGEXPS' => array(),
331 'ESCAPE_CHAR' => true,
332 'BRACKETS' => true,
333 'SYMBOLS' => false,
334 'STRINGS' => true,
335 'NUMBERS' => true,
336 'METHODS' => true,
337 'SCRIPT' => true
338 );
339 
340 /**
341 * The time it took to parse the code
342 * @var double
343 */
344 var $time = 0;
345 
346 /**
347 * The content of the header block
348 * @var string
349 */
350 var $header_content = '';
351 
352 /**
353 * The content of the footer block
354 * @var string
355 */
356 var $footer_content = '';
357 
358 /**
359 * The style of the header block
360 * @var string
361 */
362 var $header_content_style = '';
363 
364 /**
365 * The style of the footer block
366 * @var string
367 */
368 var $footer_content_style = '';
369 
370 /**
371 * Tells if a block around the highlighted source should be forced
372 * if not using line numbering
373 * @var boolean
374 */
375 var $force_code_block = false;
376 
377 /**
378 * The styles for hyperlinks in the code
379 * @var array
380 */
381 var $link_styles = array();
382 
383 /**
384 * Whether important blocks should be recognised or not
385 * @var boolean
386 * @deprecated
387 * @todo REMOVE THIS FUNCTIONALITY!
388 */
389 var $enable_important_blocks = false;
390 
391 /**
392 * Styles for important parts of the code
393 * @var string
394 * @deprecated
395 * @todo As above - rethink the whole idea of important blocks as it is buggy and
396 * will be hard to implement in 1.2
397 */
398 var $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code
399 
400 /**
401 * Whether CSS IDs should be added to the code
402 * @var boolean
403 */
404 var $add_ids = false;
405 
406 /**
407 * Lines that should be highlighted extra
408 * @var array
409 */
410 var $highlight_extra_lines = array();
411 
412 /**
413 * Styles of lines that should be highlighted extra
414 * @var array
415 */
416 var $highlight_extra_lines_styles = array();
417 
418 /**
419 * Styles of extra-highlighted lines
420 * @var string
421 */
422 var $highlight_extra_lines_style = 'background-color: #ffc;';
423 
424 /**
425 * The line ending
426 * If null, nl2br() will be used on the result string.
427 * Otherwise, all instances of \n will be replaced with $line_ending
428 * @var string
429 */
430 var $line_ending = null;
431 
432 /**
433 * Number at which line numbers should start at
434 * @var int
435 */
436 var $line_numbers_start = 1;
437 
438 /**
439 * The overall style for this code block
440 * @var string
441 */
442 var $overall_style = 'font-family:monospace;';
443 
444 /**
445 * The style for the actual code
446 * @var string
447 */
448 var $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;';
449 
450 /**
451 * The overall class for this code block
452 * @var string
453 */
454 var $overall_class = '';
455 
456 /**
457 * The overall ID for this code block
458 * @var string
459 */
460 var $overall_id = '';
461 
462 /**
463 * Line number styles
464 * @var string
465 */
466 var $line_style1 = 'font-weight: normal; vertical-align:top;';
467 
468 /**
469 * Line number styles for fancy lines
470 * @var string
471 */
472 var $line_style2 = 'font-weight: bold; vertical-align:top;';
473 
474 /**
475 * Style for line numbers when GESHI_HEADER_PRE_TABLE is chosen
476 * @var string
477 */
478 var $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;';
479 
480 /**
481 * Flag for how line numbers are displayed
482 * @var boolean
483 */
484 var $line_numbers = GESHI_NO_LINE_NUMBERS;
485 
486 /**
487 * Flag to decide if multi line spans are allowed. Set it to false to make sure
488 * each tag is closed before and reopened after each linefeed.
489 * @var boolean
490 */
491 var $allow_multiline_span = true;
492 
493 /**
494 * The "nth" value for fancy line highlighting
495 * @var int
496 */
497 var $line_nth_row = 0;
498 
499 /**
500 * The size of tab stops
501 * @var int
502 */
503 var $tab_width = 8;
504 
505 /**
506 * Should we use language-defined tab stop widths?
507 * @var int
508 */
509 var $use_language_tab_width = false;
510 
511 /**
512 * Default target for keyword links
513 * @var string
514 */
515 var $link_target = '';
516 
517 /**
518 * The encoding to use for entity encoding
519 * NOTE: Used with Escape Char Sequences to fix UTF-8 handling (cf. SF#2037598)
520 * @var string
521 */
522 var $encoding = 'utf-8';
523 
524 /**
525 * Should keywords be linked?
526 * @var boolean
527 */
528 var $keyword_links = true;
529 
530 /**
531 * Currently loaded language file
532 * @var string
533 * @since 1.0.7.22
534 */
535 var $loaded_language = '';
536 
537 /**
538 * Wether the caches needed for parsing are built or not
539 *
540 * @var bool
541 * @since 1.0.8
542 */
543 var $parse_cache_built = false;
544 
545 /**
546 * Work around for Suhosin Patch with disabled /e modifier
547 *
548 * Note from suhosins author in config file:
549 * <blockquote>
550 * The /e modifier inside <code>preg_replace()</code> allows code execution.
551 * Often it is the cause for remote code execution exploits. It is wise to
552 * deactivate this feature and test where in the application it is used.
553 * The developer using the /e modifier should be made aware that he should
554 * use <code>preg_replace_callback()</code> instead
555 * </blockquote>
556 *
557 * @var array
558 * @since 1.0.8
559 */
560 var $_kw_replace_group = 0;
561 var $_rx_key = 0;
562 
563 /**
564 * some "callback parameters" for handle_multiline_regexps
565 *
566 * @since 1.0.8
567 * @access private
568 * @var string
569 */
570 var $_hmr_before = '';
571 var $_hmr_replace = '';
572 var $_hmr_after = '';
573 var $_hmr_key = 0;
574 
575 /**#@-*/
576 
577 /**
578 * Creates a new GeSHi object, with source and language
579 *
580 * @param string The source code to highlight
581 * @param string The language to highlight the source with
582 * @param string The path to the language file directory. <b>This
583 * is deprecated!</b> I've backported the auto path
584 * detection from the 1.1.X dev branch, so now it
585 * should be automatically set correctly. If you have
586 * renamed the language directory however, you will
587 * still need to set the path using this parameter or
588 * {@link GeSHi->set_language_path()}
589 * @since 1.0.0
590 */
591 function GeSHi($source = '', $language = '', $path = '') {
592 if (!empty($source)) {
593 $this->set_source($source);
594 }
595 if (!empty($language)) {
596 $this->set_language($language);
597 }
598 $this->set_language_path($path);
599 }
600 
601 /**
602 * Returns an error message associated with the last GeSHi operation,
603 * or false if no error has occured
604 *
605 * @return string|false An error message if there has been an error, else false
606 * @since 1.0.0
607 */
608 function error() {
609 if ($this->error) {
610 //Put some template variables for debugging here ...
611 $debug_tpl_vars = array(
612 '{LANGUAGE}' => $this->language,
613 '{PATH}' => $this->language_path
614 );
615 $msg = str_replace(
616 array_keys($debug_tpl_vars),
617 array_values($debug_tpl_vars),
618 $this->error_messages[$this->error]);
619 
620 return "<br /><strong>GeSHi Error:</strong> $msg (code {$this->error})<br />";
621 }
622 return false;
623 }
624 
625 /**
626 * Gets a human-readable language name (thanks to Simon Patterson
627 * for the idea :))
628 *
629 * @return string The name for the current language
630 * @since 1.0.2
631 */
632 function get_language_name() {
633 if (GESHI_ERROR_NO_SUCH_LANG == $this->error) {
634 return $this->language_data['LANG_NAME'] . ' (Unknown Language)';
635 }
636 return $this->language_data['LANG_NAME'];
637 }
638 
639 /**
640 * Sets the source code for this object
641 *
642 * @param string The source code to highlight
643 * @since 1.0.0
644 */
645 function set_source($source) {
646 $this->source = $source;
647 $this->highlight_extra_lines = array();
648 }
649 
650 /**
651 * Sets the language for this object
652 *
653 * @note since 1.0.8 this function won't reset language-settings by default anymore!
654 * if you need this set $force_reset = true
655 *
656 * @param string The name of the language to use
657 * @since 1.0.0
658 */
659 function set_language($language, $force_reset = false) {
660 if ($force_reset) {
661 $this->loaded_language = false;
662 }
663 
664 //Clean up the language name to prevent malicious code injection
665 $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
666 
667 $language = strtolower($language);
668 
669 //Retreive the full filename
670 $file_name = $this->language_path . $language . '.php';
671 if ($file_name == $this->loaded_language) {
672 // this language is already loaded!
673 return;
674 }
675 
676 $this->language = $language;
677 
678 $this->error = false;
679 $this->strict_mode = GESHI_NEVER;
680 
681 //Check if we can read the desired file
682 if (!is_readable($file_name)) {
683 $this->error = GESHI_ERROR_NO_SUCH_LANG;
684 return;
685 }
686 
687 // Load the language for parsing
688 $this->load_language($file_name);
689 }
690 
691 /**
692 * Sets the path to the directory containing the language files. Note
693 * that this path is relative to the directory of the script that included
694 * geshi.php, NOT geshi.php itself.
695 *
696 * @param string The path to the language directory
697 * @since 1.0.0
698 * @deprecated The path to the language files should now be automatically
699 * detected, so this method should no longer be needed. The
700 * 1.1.X branch handles manual setting of the path differently
701 * so this method will disappear in 1.2.0.
702 */
703 function set_language_path($path) {
704 if(strpos($path,':')) {
705 //Security Fix to prevent external directories using fopen wrappers.
706 if(DIRECTORY_SEPARATOR == "\\") {
707 if(!preg_match('#^[a-zA-Z]:#', $path) || false !== strpos($path, ':', 2)) {
708 return;
709 }
710 } else {
711 return;
712 }
713 }
714 if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) {
715 //Security Fix to prevent external directories using fopen wrappers.
716 return;
717 }
718 if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) {
719 //Security Fix to prevent external directories using fopen wrappers.
720 return;
721 }
722 if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) {
723 //Security Fix to prevent external directories using fopen wrappers.
724 return;
725 }
726 if ($path) {
727 $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/';
728 $this->set_language($this->language); // otherwise set_language_path has no effect
729 }
730 }
731 
732 /**
733 * Sets the type of header to be used.
734 *
735 * If GESHI_HEADER_DIV is used, the code is surrounded in a "div".This
736 * means more source code but more control over tab width and line-wrapping.
737 * GESHI_HEADER_PRE means that a "pre" is used - less source, but less
738 * control. Default is GESHI_HEADER_PRE.
739 *
740 * From 1.0.7.2, you can use GESHI_HEADER_NONE to specify that no header code
741 * should be outputted.
742 *
743 * @param int The type of header to be used
744 * @since 1.0.0
745 */
746 function set_header_type($type) {
747 //Check if we got a valid header type
748 if (!in_array($type, array(GESHI_HEADER_NONE, GESHI_HEADER_DIV,
749 GESHI_HEADER_PRE, GESHI_HEADER_PRE_VALID, GESHI_HEADER_PRE_TABLE))) {
750 $this->error = GESHI_ERROR_INVALID_HEADER_TYPE;
751 return;
752 }
753 
754 //Set that new header type
755 $this->header_type = $type;
756 }
757 
758 /**
759 * Sets the styles for the code that will be outputted
760 * when this object is parsed. The style should be a
761 * string of valid stylesheet declarations
762 *
763 * @param string The overall style for the outputted code block
764 * @param boolean Whether to merge the styles with the current styles or not
765 * @since 1.0.0
766 */
767 function set_overall_style($style, $preserve_defaults = false) {
768 if (!$preserve_defaults) {
769 $this->overall_style = $style;
770 } else {
771 $this->overall_style .= $style;
772 }
773 }
774 
775 /**
776 * Sets the overall classname for this block of code. This
777 * class can then be used in a stylesheet to style this object's
778 * output
779 *
780 * @param string The class name to use for this block of code
781 * @since 1.0.0
782 */
783 function set_overall_class($class) {
784 $this->overall_class = $class;
785 }
786 
787 /**
788 * Sets the overall id for this block of code. This id can then
789 * be used in a stylesheet to style this object's output
790 *
791 * @param string The ID to use for this block of code
792 * @since 1.0.0
793 */
794 function set_overall_id($id) {
795 $this->overall_id = $id;
796 }
797 
798 /**
799 * Sets whether CSS classes should be used to highlight the source. Default
800 * is off, calling this method with no arguments will turn it on
801 *
802 * @param boolean Whether to turn classes on or not
803 * @since 1.0.0
804 */
805 function enable_classes($flag = true) {
806 $this->use_classes = ($flag) ? true : false;
807 }
808 
809 /**
810 * Sets the style for the actual code. This should be a string
811 * containing valid stylesheet declarations. If $preserve_defaults is
812 * true, then styles are merged with the default styles, with the
813 * user defined styles having priority
814 *
815 * Note: Use this method to override any style changes you made to
816 * the line numbers if you are using line numbers, else the line of
817 * code will have the same style as the line number! Consult the
818 * GeSHi documentation for more information about this.
819 *
820 * @param string The style to use for actual code
821 * @param boolean Whether to merge the current styles with the new styles
822 * @since 1.0.2
823 */
824 function set_code_style($style, $preserve_defaults = false) {
825 if (!$preserve_defaults) {
826 $this->code_style = $style;
827 } else {
828 $this->code_style .= $style;
829 }
830 }
831 
832 /**
833 * Sets the styles for the line numbers.
834 *
835 * @param string The style for the line numbers that are "normal"
836 * @param string|boolean If a string, this is the style of the line
837 * numbers that are "fancy", otherwise if boolean then this
838 * defines whether the normal styles should be merged with the
839 * new normal styles or not
840 * @param boolean If set, is the flag for whether to merge the "fancy"
841 * styles with the current styles or not
842 * @since 1.0.2
843 */
844 function set_line_style($style1, $style2 = '', $preserve_defaults = false) {
845 //Check if we got 2 or three parameters
846 if (is_bool($style2)) {
847 $preserve_defaults = $style2;
848 $style2 = '';
849 }
850 
851 //Actually set the new styles
852 if (!$preserve_defaults) {
853 $this->line_style1 = $style1;
854 $this->line_style2 = $style2;
855 } else {
856 $this->line_style1 .= $style1;
857 $this->line_style2 .= $style2;
858 }
859 }
860 
861 /**
862 * Sets whether line numbers should be displayed.
863 *
864 * Valid values for the first parameter are:
865 *
866 * - GESHI_NO_LINE_NUMBERS: Line numbers will not be displayed
867 * - GESHI_NORMAL_LINE_NUMBERS: Line numbers will be displayed
868 * - GESHI_FANCY_LINE_NUMBERS: Fancy line numbers will be displayed
869 *
870 * For fancy line numbers, the second parameter is used to signal which lines
871 * are to be fancy. For example, if the value of this parameter is 5 then every
872 * 5th line will be fancy.
873 *
874 * @param int How line numbers should be displayed
875 * @param int Defines which lines are fancy
876 * @since 1.0.0
877 */
878 function enable_line_numbers($flag, $nth_row = 5) {
879 if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag
880 && GESHI_FANCY_LINE_NUMBERS != $flag) {
881 $this->error = GESHI_ERROR_INVALID_LINE_NUMBER_TYPE;
882 }
883 $this->line_numbers = $flag;
884 $this->line_nth_row = $nth_row;
885 }
886 
887 /**
888 * Sets wether spans and other HTML markup generated by GeSHi can
889 * span over multiple lines or not. Defaults to true to reduce overhead.
890 * Set it to false if you want to manipulate the output or manually display
891 * the code in an ordered list.
892 *
893 * @param boolean Wether multiline spans are allowed or not
894 * @since 1.0.7.22
895 */
896 function enable_multiline_span($flag) {
897 $this->allow_multiline_span = (bool) $flag;
898 }
899 
900 /**
901 * Get current setting for multiline spans, see GeSHi->enable_multiline_span().
902 *
903 * @see enable_multiline_span
904 * @return bool
905 */
906 function get_multiline_span() {
907 return $this->allow_multiline_span;
908 }
909 
910 /**
911 * Sets the style for a keyword group. If $preserve_defaults is
912 * true, then styles are merged with the default styles, with the
913 * user defined styles having priority
914 *
915 * @param int The key of the keyword group to change the styles of
916 * @param string The style to make the keywords
917 * @param boolean Whether to merge the new styles with the old or just
918 * to overwrite them
919 * @since 1.0.0
920 */
921 function set_keyword_group_style($key, $style, $preserve_defaults = false) {
922 //Set the style for this keyword group
923 if (!$preserve_defaults) {
924 $this->language_data['STYLES']['KEYWORDS'][$key] = $style;
925 } else {
926 $this->language_data['STYLES']['KEYWORDS'][$key] .= $style;
927 }
928 
929 //Update the lexic permissions
930 if (!isset($this->lexic_permissions['KEYWORDS'][$key])) {
931 $this->lexic_permissions['KEYWORDS'][$key] = true;
932 }
933 }
934 
935 /**
936 * Turns highlighting on/off for a keyword group
937 *
938 * @param int The key of the keyword group to turn on or off
939 * @param boolean Whether to turn highlighting for that group on or off
940 * @since 1.0.0
941 */
942 function set_keyword_group_highlighting($key, $flag = true) {
943 $this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false;
944 }
945 
946 /**
947 * Sets the styles for comment groups. If $preserve_defaults is
948 * true, then styles are merged with the default styles, with the
949 * user defined styles having priority
950 *
951 * @param int The key of the comment group to change the styles of
952 * @param string The style to make the comments
953 * @param boolean Whether to merge the new styles with the old or just
954 * to overwrite them
955 * @since 1.0.0
956 */
957 function set_comments_style($key, $style, $preserve_defaults = false) {
958 if (!$preserve_defaults) {
959 $this->language_data['STYLES']['COMMENTS'][$key] = $style;
960 } else {
961 $this->language_data['STYLES']['COMMENTS'][$key] .= $style;
962 }
963 }
964 
965 /**
966 * Turns highlighting on/off for comment groups
967 *
968 * @param int The key of the comment group to turn on or off
969 * @param boolean Whether to turn highlighting for that group on or off
970 * @since 1.0.0
971 */
972 function set_comments_highlighting($key, $flag = true) {
973 $this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false;
974 }
975 
976 /**
977 * Sets the styles for escaped characters. If $preserve_defaults is
978 * true, then styles are merged with the default styles, with the
979 * user defined styles having priority
980 *
981 * @param string The style to make the escape characters
982 * @param boolean Whether to merge the new styles with the old or just
983 * to overwrite them
984 * @since 1.0.0
985 */
986 function set_escape_characters_style($style, $preserve_defaults = false) {
987 if (!$preserve_defaults) {
988 $this->language_data['STYLES']['ESCAPE_CHAR'][0] = $style;
989 } else {
990 $this->language_data['STYLES']['ESCAPE_CHAR'][0] .= $style;
991 }
992 }
993 
994 /**
995 * Turns highlighting on/off for escaped characters
996 *
997 * @param boolean Whether to turn highlighting for escape characters on or off
998 * @since 1.0.0
999 */
1000 function set_escape_characters_highlighting($flag = true) {
1001 $this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false;
1002 }
1003 
1004 /**
1005 * Sets the styles for brackets. If $preserve_defaults is
1006 * true, then styles are merged with the default styles, with the
1007 * user defined styles having priority
1008 *
1009 * This method is DEPRECATED: use set_symbols_style instead.
1010 * This method will be removed in 1.2.X
1011 *
1012 * @param string The style to make the brackets
1013 * @param boolean Whether to merge the new styles with the old or just
1014 * to overwrite them
1015 * @since 1.0.0
1016 * @deprecated In favour of set_symbols_style
1017 */
1018 function set_brackets_style($style, $preserve_defaults = false) {
1019 if (!$preserve_defaults) {
1020 $this->language_data['STYLES']['BRACKETS'][0] = $style;
1021 } else {
1022 $this->language_data['STYLES']['BRACKETS'][0] .= $style;
1023 }
1024 }
1025 
1026 /**
1027 * Turns highlighting on/off for brackets
1028 *
1029 * This method is DEPRECATED: use set_symbols_highlighting instead.
1030 * This method will be remove in 1.2.X
1031 *
1032 * @param boolean Whether to turn highlighting for brackets on or off
1033 * @since 1.0.0
1034 * @deprecated In favour of set_symbols_highlighting
1035 */
1036 function set_brackets_highlighting($flag) {
1037 $this->lexic_permissions['BRACKETS'] = ($flag) ? true : false;
1038 }
1039 
1040 /**
1041 * Sets the styles for symbols. If $preserve_defaults is
1042 * true, then styles are merged with the default styles, with the
1043 * user defined styles having priority
1044 *
1045 * @param string The style to make the symbols
1046 * @param boolean Whether to merge the new styles with the old or just
1047 * to overwrite them
1048 * @param int Tells the group of symbols for which style should be set.
1049 * @since 1.0.1
1050 */
1051 function set_symbols_style($style, $preserve_defaults = false, $group = 0) {
1052 // Update the style of symbols
1053 if (!$preserve_defaults) {
1054 $this->language_data['STYLES']['SYMBOLS'][$group] = $style;
1055 } else {
1056 $this->language_data['STYLES']['SYMBOLS'][$group] .= $style;
1057 }
1058 
1059 // For backward compatibility
1060 if (0 == $group) {
1061 $this->set_brackets_style ($style, $preserve_defaults);
1062 }
1063 }
1064 
1065 /**
1066 * Turns highlighting on/off for symbols
1067 *
1068 * @param boolean Whether to turn highlighting for symbols on or off
1069 * @since 1.0.0
1070 */
1071 function set_symbols_highlighting($flag) {
1072 // Update lexic permissions for this symbol group
1073 $this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false;
1074 
1075 // For backward compatibility
1076 $this->set_brackets_highlighting ($flag);
1077 }
1078 
1079 /**
1080 * Sets the styles for strings. If $preserve_defaults is
1081 * true, then styles are merged with the default styles, with the
1082 * user defined styles having priority
1083 *
1084 * @param string The style to make the escape characters
1085 * @param boolean Whether to merge the new styles with the old or just
1086 * to overwrite them
1087 * @since 1.0.0
1088 */
1089 function set_strings_style($style, $preserve_defaults = false) {
1090 if (!$preserve_defaults) {
1091 $this->language_data['STYLES']['STRINGS'][0] = $style;
1092 } else {
1093 $this->language_data['STYLES']['STRINGS'][0] .= $style;
1094 }
1095 }
1096 
1097 /**
1098 * Turns highlighting on/off for strings
1099 *
1100 * @param boolean Whether to turn highlighting for strings on or off
1101 * @since 1.0.0
1102 */
1103 function set_strings_highlighting($flag) {
1104 $this->lexic_permissions['STRINGS'] = ($flag) ? true : false;
1105 }
1106 
1107 /**
1108 * Sets the styles for numbers. If $preserve_defaults is
1109 * true, then styles are merged with the default styles, with the
1110 * user defined styles having priority
1111 *
1112 * @param string The style to make the numbers
1113 * @param boolean Whether to merge the new styles with the old or just
1114 * to overwrite them
1115 * @since 1.0.0
1116 */
1117 function set_numbers_style($style, $preserve_defaults = false) {
1118 if (!$preserve_defaults) {
1119 $this->language_data['STYLES']['NUMBERS'][0] = $style;
1120 } else {
1121 $this->language_data['STYLES']['NUMBERS'][0] .= $style;
1122 }
1123 }
1124 
1125 /**
1126 * Turns highlighting on/off for numbers
1127 *
1128 * @param boolean Whether to turn highlighting for numbers on or off
1129 * @since 1.0.0
1130 */
1131 function set_numbers_highlighting($flag) {
1132 $this->lexic_permissions['NUMBERS'] = ($flag) ? true : false;
1133 }
1134 
1135 /**
1136 * Sets the styles for methods. $key is a number that references the
1137 * appropriate "object splitter" - see the language file for the language
1138 * you are highlighting to get this number. If $preserve_defaults is
1139 * true, then styles are merged with the default styles, with the
1140 * user defined styles having priority
1141 *
1142 * @param int The key of the object splitter to change the styles of
1143 * @param string The style to make the methods
1144 * @param boolean Whether to merge the new styles with the old or just
1145 * to overwrite them
1146 * @since 1.0.0
1147 */
1148 function set_methods_style($key, $style, $preserve_defaults = false) {
1149 if (!$preserve_defaults) {
1150 $this->language_data['STYLES']['METHODS'][$key] = $style;
1151 } else {
1152 $this->language_data['STYLES']['METHODS'][$key] .= $style;
1153 }
1154 }
1155 
1156 /**
1157 * Turns highlighting on/off for methods
1158 *
1159 * @param boolean Whether to turn highlighting for methods on or off
1160 * @since 1.0.0
1161 */
1162 function set_methods_highlighting($flag) {
1163 $this->lexic_permissions['METHODS'] = ($flag) ? true : false;
1164 }
1165 
1166 /**
1167 * Sets the styles for regexps. If $preserve_defaults is
1168 * true, then styles are merged with the default styles, with the
1169 * user defined styles having priority
1170 *
1171 * @param string The style to make the regular expression matches
1172 * @param boolean Whether to merge the new styles with the old or just
1173 * to overwrite them
1174 * @since 1.0.0
1175 */
1176 function set_regexps_style($key, $style, $preserve_defaults = false) {
1177 if (!$preserve_defaults) {
1178 $this->language_data['STYLES']['REGEXPS'][$key] = $style;
1179 } else {
1180 $this->language_data['STYLES']['REGEXPS'][$key] .= $style;
1181 }
1182 }
1183 
1184 /**
1185 * Turns highlighting on/off for regexps
1186 *
1187 * @param int The key of the regular expression group to turn on or off
1188 * @param boolean Whether to turn highlighting for the regular expression group on or off
1189 * @since 1.0.0
1190 */
1191 function set_regexps_highlighting($key, $flag) {
1192 $this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false;
1193 }
1194 
1195 /**
1196 * Sets whether a set of keywords are checked for in a case sensitive manner
1197 *
1198 * @param int The key of the keyword group to change the case sensitivity of
1199 * @param boolean Whether to check in a case sensitive manner or not
1200 * @since 1.0.0
1201 */
1202 function set_case_sensitivity($key, $case) {
1203 $this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false;
1204 }
1205 
1206 /**
1207 * Sets the case that keywords should use when found. Use the constants:
1208 *
1209 * - GESHI_CAPS_NO_CHANGE: leave keywords as-is
1210 * - GESHI_CAPS_UPPER: convert all keywords to uppercase where found
1211 * - GESHI_CAPS_LOWER: convert all keywords to lowercase where found
1212 *
1213 * @param int A constant specifying what to do with matched keywords
1214 * @since 1.0.1
1215 */
1216 function set_case_keywords($case) {
1217 if (in_array($case, array(
1218 GESHI_CAPS_NO_CHANGE, GESHI_CAPS_UPPER, GESHI_CAPS_LOWER))) {
1219 $this->language_data['CASE_KEYWORDS'] = $case;
1220 }
1221 }
1222 
1223 /**
1224 * Sets how many spaces a tab is substituted for
1225 *
1226 * Widths below zero are ignored
1227 *
1228 * @param int The tab width
1229 * @since 1.0.0
1230 */
1231 function set_tab_width($width) {
1232 $this->tab_width = intval($width);
1233 
1234 //Check if it fit's the constraints:
1235 if ($this->tab_width < 1) {
1236 //Return it to the default
1237 $this->tab_width = 8;
1238 }
1239 }
1240 
1241 /**
1242 * Sets whether or not to use tab-stop width specifed by language
1243 *
1244 * @param boolean Whether to use language-specific tab-stop widths
1245 * @since 1.0.7.20
1246 */
1247 function set_use_language_tab_width($use) {
1248 $this->use_language_tab_width = (bool) $use;
1249 }
1250 
1251 /**
1252 * Returns the tab width to use, based on the current language and user
1253 * preference
1254 *
1255 * @return int Tab width
1256 * @since 1.0.7.20
1257 */
1258 function get_real_tab_width() {
1259 if (!$this->use_language_tab_width ||
1260 !isset($this->language_data['TAB_WIDTH'])) {
1261 return $this->tab_width;
1262 } else {
1263 return $this->language_data['TAB_WIDTH'];
1264 }
1265 }
1266 
1267 /**
1268 * Enables/disables strict highlighting. Default is off, calling this
1269 * method without parameters will turn it on. See documentation
1270 * for more details on strict mode and where to use it.
1271 *
1272 * @param boolean Whether to enable strict mode or not
1273 * @since 1.0.0
1274 */
1275 function enable_strict_mode($mode = true) {
1276 if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) {
1277 $this->strict_mode = ($mode) ? GESHI_ALWAYS : GESHI_NEVER;
1278 }
1279 }
1280 
1281 /**
1282 * Disables all highlighting
1283 *
1284 * @since 1.0.0
1285 * @todo Rewrite with array traversal
1286 * @deprecated In favour of enable_highlighting
1287 */
1288 function disable_highlighting() {
1289 $this->enable_highlighting(false);
1290 }
1291 
1292 /**
1293 * Enables all highlighting
1294 *
1295 * The optional flag parameter was added in version 1.0.7.21 and can be used
1296 * to enable (true) or disable (false) all highlighting.
1297 *
1298 * @since 1.0.0
1299 * @param boolean A flag specifying whether to enable or disable all highlighting
1300 * @todo Rewrite with array traversal
1301 */
1302 function enable_highlighting($flag = true) {
1303 $flag = $flag ? true : false;
1304 foreach ($this->lexic_permissions as $key => $value) {
1305 if (is_array($value)) {
1306 foreach ($value as $k => $v) {
1307 $this->lexic_permissions[$key][$k] = $flag;
1308 }
1309 } else {
1310 $this->lexic_permissions[$key] = $flag;
1311 }
1312 }
1313 
1314 // Context blocks
1315 $this->enable_important_blocks = $flag;
1316 }
1317 
1318 /**
1319 * Given a file extension, this method returns either a valid geshi language
1320 * name, or the empty string if it couldn't be found
1321 *
1322 * @param string The extension to get a language name for
1323 * @param array A lookup array to use instead of the default one
1324 * @since 1.0.5
1325 * @todo Re-think about how this method works (maybe make it private and/or make it
1326 * a extension->lang lookup?)
1327 * @todo static?
1328 */
1329 function get_language_name_from_extension( $extension, $lookup = array() ) {
1330 if ( !is_array($lookup) || empty($lookup)) {
1331 $lookup = array(
1332 'actionscript' => array('as'),
1333 'ada' => array('a', 'ada', 'adb', 'ads'),
1334 'apache' => array('conf'),
1335 'asm' => array('ash', 'asm'),
1336 'asp' => array('asp'),
1337 'bash' => array('sh'),
1338 'c' => array('c', 'h'),
1339 'c_mac' => array('c', 'h'),
1340 'caddcl' => array(),
1341 'cadlisp' => array(),
1342 'cdfg' => array('cdfg'),
1343 'cobol' => array('cbl'),
1344 'cpp' => array('cpp', 'h', 'hpp'),
1345 'csharp' => array(),
1346 'css' => array('css'),
1347 'delphi' => array('dpk', 'dpr', 'pp', 'pas'),
1348 'dos' => array('bat', 'cmd'),
1349 'gettext' => array('po', 'pot'),
1350 'html4strict' => array('html', 'htm'),
1351 'ini' => array('ini', 'desktop'),
1352 'java' => array('java'),
1353 'javascript' => array('js'),
1354 'klonec' => array('kl1'),
1355 'klonecpp' => array('klx'),
1356 'lisp' => array('lisp'),
1357 'lua' => array('lua'),
1358 'matlab' => array('m'),
1359 'mpasm' => array(),
1360 'nsis' => array(),
1361 'objc' => array(),
1362 'oobas' => array(),
1363 'oracle8' => array(),
1364 'pascal' => array(),
1365 'perl' => array('pl', 'pm'),
1366 'php' => array('php', 'php5', 'phtml', 'phps'),
1367 'python' => array('py'),
1368 'qbasic' => array('bi'),
1369 'sas' => array('sas'),
1370 'scilab' => array('sci'),
1371 'smarty' => array(),
1372 'vb' => array('bas'),
1373 'vbnet' => array(),
1374 'visualfoxpro' => array(),
1375 'xml' => array('xml')
1376 );
1377 }
1378 
1379 foreach ($lookup as $lang => $extensions) {
1380 if (in_array($extension, $extensions)) {
1381 return $lang;
1382 }
1383 }
1384 return '';
1385 }
1386 
1387 /**
1388 * Given a file name, this method loads its contents in, and attempts
1389 * to set the language automatically. An optional lookup table can be
1390 * passed for looking up the language name. If not specified a default
1391 * table is used
1392 *
1393 * The language table is in the form
1394 * <pre>array(
1395 * 'lang_name' => array('extension', 'extension', ...),
1396 * 'lang_name' ...
1397 * );</pre>
1398 *
1399 * @param string The filename to load the source from
1400 * @param array A lookup array to use instead of the default one
1401 * @todo Complete rethink of this and above method
1402 * @since 1.0.5
1403 */
1404 function load_from_file($file_name, $lookup = array()) {
1405 if (is_readable($file_name)) {
1406 $this->set_source(file_get_contents($file_name));
1407 $this->set_language($this->get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup));
1408 } else {
1409 $this->error = GESHI_ERROR_FILE_NOT_READABLE;
1410 }
1411 }
1412 
1413 /**
1414 * Adds a keyword to a keyword group for highlighting
1415 *
1416 * @param int The key of the keyword group to add the keyword to
1417 * @param string The word to add to the keyword group
1418 * @since 1.0.0
1419 */
1420 function add_keyword($key, $word) {
1421 if (!in_array($word, $this->language_data['KEYWORDS'][$key])) {
1422 $this->language_data['KEYWORDS'][$key][] = $word;
1423 
1424 //NEW in 1.0.8 don't recompile the whole optimized regexp, simply append it
1425 if ($this->parse_cache_built) {
1426 $subkey = count($this->language_data['CACHED_KEYWORD_LISTS'][$key]) - 1;
1427 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '|' . preg_quote($word, '/');
1428 }
1429 }
1430 }
1431 
1432 /**
1433 * Removes a keyword from a keyword group
1434 *
1435 * @param int The key of the keyword group to remove the keyword from
1436 * @param string The word to remove from the keyword group
1437 * @param bool Wether to automatically recompile the optimized regexp list or not.
1438 * Note: if you set this to false and @see GeSHi->parse_code() was already called once,
1439 * for the current language, you have to manually call @see GeSHi->optimize_keyword_group()
1440 * or the removed keyword will stay in cache and still be highlighted! On the other hand
1441 * it might be too expensive to recompile the regexp list for every removal if you want to
1442 * remove a lot of keywords.
1443 * @since 1.0.0
1444 */
1445 function remove_keyword($key, $word, $recompile = true) {
1446 $key_to_remove = array_search($word, $this->language_data['KEYWORDS'][$key]);
1447 if ($key_to_remove !== false) {
1448 unset($this->language_data['KEYWORDS'][$key][$key_to_remove]);
1449 
1450 //NEW in 1.0.8, optionally recompile keyword group
1451 if ($recompile && $this->parse_cache_built) {
1452 $this->optimize_keyword_group($key);
1453 }
1454 }
1455 }
1456 
1457 /**
1458 * Creates a new keyword group
1459 *
1460 * @param int The key of the keyword group to create
1461 * @param string The styles for the keyword group
1462 * @param boolean Whether the keyword group is case sensitive ornot
1463 * @param array The words to use for the keyword group
1464 * @since 1.0.0
1465 */
1466 function add_keyword_group($key, $styles, $case_sensitive = true, $words = array()) {
1467 $words = (array) $words;
1468 if (empty($words)) {
1469 // empty word lists mess up highlighting
1470 return false;
1471 }
1472 
1473 //Add the new keyword group internally
1474 $this->language_data['KEYWORDS'][$key] = $words;
1475 $this->lexic_permissions['KEYWORDS'][$key] = true;
1476 $this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive;
1477 $this->language_data['STYLES']['KEYWORDS'][$key] = $styles;
1478 
1479 //NEW in 1.0.8, cache keyword regexp
1480 if ($this->parse_cache_built) {
1481 $this->optimize_keyword_group($key);
1482 }
1483 }
1484 
1485 /**
1486 * Removes a keyword group
1487 *
1488 * @param int The key of the keyword group to remove
1489 * @since 1.0.0
1490 */
1491 function remove_keyword_group ($key) {
1492 //Remove the keyword group internally
1493 unset($this->language_data['KEYWORDS'][$key]);
1494 unset($this->lexic_permissions['KEYWORDS'][$key]);
1495 unset($this->language_data['CASE_SENSITIVE'][$key]);
1496 unset($this->language_data['STYLES']['KEYWORDS'][$key]);
1497 
1498 //NEW in 1.0.8
1499 unset($this->language_data['CACHED_KEYWORD_LISTS'][$key]);
1500 }
1501 
1502 /**
1503 * compile optimized regexp list for keyword group
1504 *
1505 * @param int The key of the keyword group to compile & optimize
1506 * @since 1.0.8
1507 */
1508 function optimize_keyword_group($key) {
1509 $this->language_data['CACHED_KEYWORD_LISTS'][$key] =
1510 $this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]);
1511 }
1512 
1513 /**
1514 * Sets the content of the header block
1515 *
1516 * @param string The content of the header block
1517 * @since 1.0.2
1518 */
1519 function set_header_content($content) {
1520 $this->header_content = $content;
1521 }
1522 
1523 /**
1524 * Sets the content of the footer block
1525 *
1526 * @param string The content of the footer block
1527 * @since 1.0.2
1528 */
1529 function set_footer_content($content) {
1530 $this->footer_content = $content;
1531 }
1532 
1533 /**
1534 * Sets the style for the header content
1535 *
1536 * @param string The style for the header content
1537 * @since 1.0.2
1538 */
1539 function set_header_content_style($style) {
1540 $this->header_content_style = $style;
1541 }
1542 
1543 /**
1544 * Sets the style for the footer content
1545 *
1546 * @param string The style for the footer content
1547 * @since 1.0.2
1548 */
1549 function set_footer_content_style($style) {
1550 $this->footer_content_style = $style;
1551 }
1552 
1553 /**
1554 * Sets whether to force a surrounding block around
1555 * the highlighted code or not
1556 *
1557 * @param boolean Tells whether to enable or disable this feature
1558 * @since 1.0.7.20
1559 */
1560 function enable_inner_code_block($flag) {
1561 $this->force_code_block = (bool)$flag;
1562 }
1563 
1564 /**
1565 * Sets the base URL to be used for keywords
1566 *
1567 * @param int The key of the keyword group to set the URL for
1568 * @param string The URL to set for the group. If {FNAME} is in
1569 * the url somewhere, it is replaced by the keyword
1570 * that the URL is being made for
1571 * @since 1.0.2
1572 */
1573 function set_url_for_keyword_group($group, $url) {
1574 $this->language_data['URLS'][$group] = $url;
1575 }
1576 
1577 /**
1578 * Sets styles for links in code
1579 *
1580 * @param int A constant that specifies what state the style is being
1581 * set for - e.g. :hover or :visited
1582 * @param string The styles to use for that state
1583 * @since 1.0.2
1584 */
1585 function set_link_styles($type, $styles) {
1586 $this->link_styles[$type] = $styles;
1587 }
1588 
1589 /**
1590 * Sets the target for links in code
1591 *
1592 * @param string The target for links in the code, e.g. _blank
1593 * @since 1.0.3
1594 */
1595 function set_link_target($target) {
1596 if (!$target) {
1597 $this->link_target = '';
1598 } else {
1599 $this->link_target = ' target="' . $target . '" ';
1600 }
1601 }
1602 
1603 /**
1604 * Sets styles for important parts of the code
1605 *
1606 * @param string The styles to use on important parts of the code
1607 * @since 1.0.2
1608 */
1609 function set_important_styles($styles) {
1610 $this->important_styles = $styles;
1611 }
1612 
1613 /**
1614 * Sets whether context-important blocks are highlighted
1615 *
1616 * @param boolean Tells whether to enable or disable highlighting of important blocks
1617 * @todo REMOVE THIS SHIZ FROM GESHI!
1618 * @deprecated
1619 * @since 1.0.2
1620 */
1621 function enable_important_blocks($flag) {
1622 $this->enable_important_blocks = ( $flag ) ? true : false;
1623 }
1624 
1625 /**
1626 * Whether CSS IDs should be added to each line
1627 *
1628 * @param boolean If true, IDs will be added to each line.
1629 * @since 1.0.2
1630 */
1631 function enable_ids($flag = true) {
1632 $this->add_ids = ($flag) ? true : false;
1633 }
1634 
1635 /**
1636 * Specifies which lines to highlight extra
1637 *
1638 * The extra style parameter was added in 1.0.7.21.
1639 *
1640 * @param mixed An array of line numbers to highlight, or just a line
1641 * number on its own.
1642 * @param string A string specifying the style to use for this line.
1643 * If null is specified, the default style is used.
1644 * If false is specified, the line will be removed from
1645 * special highlighting
1646 * @since 1.0.2
1647 * @todo Some data replication here that could be cut down on
1648 */
1649 function highlight_lines_extra($lines, $style = null) {
1650 if (is_array($lines)) {
1651 //Split up the job using single lines at a time
1652 foreach ($lines as $line) {
1653 $this->highlight_lines_extra($line, $style);
1654 }
1655 } else {
1656 //Mark the line as being highlighted specially
1657 $lines = intval($lines);
1658 $this->highlight_extra_lines[$lines] = $lines;
1659 
1660 //Decide on which style to use
1661 if ($style === null) { //Check if we should use default style
1662 unset($this->highlight_extra_lines_styles[$lines]);
1663 } else if ($style === false) { //Check if to remove this line
1664 unset($this->highlight_extra_lines[$lines]);
1665 unset($this->highlight_extra_lines_styles[$lines]);
1666 } else {
1667 $this->highlight_extra_lines_styles[$lines] = $style;
1668 }
1669 }
1670 }
1671 
1672 /**
1673 * Sets the style for extra-highlighted lines
1674 *
1675 * @param string The style for extra-highlighted lines
1676 * @since 1.0.2
1677 */
1678 function set_highlight_lines_extra_style($styles) {
1679 $this->highlight_extra_lines_style = $styles;
1680 }
1681 
1682 /**
1683 * Sets the line-ending
1684 *
1685 * @param string The new line-ending
1686 * @since 1.0.2
1687 */
1688 function set_line_ending($line_ending) {
1689 $this->line_ending = (string)$line_ending;
1690 }
1691 
1692 /**
1693 * Sets what number line numbers should start at. Should
1694 * be a positive integer, and will be converted to one.
1695 *
1696 * <b>Warning:</b> Using this method will add the "start"
1697 * attribute to the &lt;ol&gt; that is used for line numbering.
1698 * This is <b>not</b> valid XHTML strict, so if that's what you
1699 * care about then don't use this method. Firefox is getting
1700 * support for the CSS method of doing this in 1.1 and Opera
1701 * has support for the CSS method, but (of course) IE doesn't
1702 * so it's not worth doing it the CSS way yet.
1703 *
1704 * @param int The number to start line numbers at
1705 * @since 1.0.2
1706 */
1707 function start_line_numbers_at($number) {
1708 $this->line_numbers_start = abs(intval($number));
1709 }
1710 
1711 /**
1712 * Sets the encoding used for htmlspecialchars(), for international
1713 * support.
1714 *
1715 * NOTE: This is not needed for now because htmlspecialchars() is not
1716 * being used (it has a security hole in PHP4 that has not been patched).
1717 * Maybe in a future version it may make a return for speed reasons, but
1718 * I doubt it.
1719 *
1720 * @param string The encoding to use for the source
1721 * @since 1.0.3
1722 */
1723 function set_encoding($encoding) {
1724 if ($encoding) {
1725 $this->encoding = strtolower($encoding);
1726 }
1727 }
1728 
1729 /**
1730 * Turns linking of keywords on or off.
1731 *
1732 * @param boolean If true, links will be added to keywords
1733 * @since 1.0.2
1734 */
1735 function enable_keyword_links($enable = true) {
1736 $this->keyword_links = (bool) $enable;
1737 }
1738 
1739 /**
1740 * Setup caches needed for styling. This is automatically called in
1741 * parse_code() and get_stylesheet() when appropriate. This function helps
1742 * stylesheet generators as they rely on some style information being
1743 * preprocessed
1744 *
1745 * @since 1.0.8
1746 * @access private
1747 */
1748 function build_style_cache() {
1749 //Build the style cache needed to highlight numbers appropriate
1750 if($this->lexic_permissions['NUMBERS']) {
1751 //First check what way highlighting information for numbers are given
1752 if(!isset($this->language_data['NUMBERS'])) {
1753 $this->language_data['NUMBERS'] = 0;
1754 }
1755 
1756 if(is_array($this->language_data['NUMBERS'])) {
1757 $this->language_data['NUMBERS_CACHE'] = $this->language_data['NUMBERS'];
1758 } else {
1759 $this->language_data['NUMBERS_CACHE'] = array();
1760 if(!$this->language_data['NUMBERS']) {
1761 $this->language_data['NUMBERS'] =
1762 GESHI_NUMBER_INT_BASIC |
1763 GESHI_NUMBER_FLT_NONSCI;
1764 }
1765 
1766 for($i = 0, $j = $this->language_data['NUMBERS']; $j > 0; ++$i, $j>>=1) {
1767 //Rearrange style indices if required ...
1768 if(isset($this->language_data['STYLES']['NUMBERS'][1<<$i])) {
1769 $this->language_data['STYLES']['NUMBERS'][$i] =
1770 $this->language_data['STYLES']['NUMBERS'][1<<$i];
1771 unset($this->language_data['STYLES']['NUMBERS'][1<<$i]);
1772 }
1773 
1774 //Check if this bit is set for highlighting
1775 if($j&1) {
1776 //So this bit is set ...
1777 //Check if it belongs to group 0 or the actual stylegroup
1778 if(isset($this->language_data['STYLES']['NUMBERS'][$i])) {
1779 $this->language_data['NUMBERS_CACHE'][$i] = 1 << $i;
1780 } else {
1781 if(!isset($this->language_data['NUMBERS_CACHE'][0])) {
1782 $this->language_data['NUMBERS_CACHE'][0] = 0;
1783 }
1784 $this->language_data['NUMBERS_CACHE'][0] |= 1 << $i;
1785 }
1786 }
1787 }
1788 }
1789 }
1790 }
1791 
1792 /**
1793 * Setup caches needed for parsing. This is automatically called in parse_code() when appropriate.
1794 * This function makes stylesheet generators much faster as they do not need these caches.
1795 *
1796 * @since 1.0.8
1797 * @access private
1798 */
1799 function build_parse_cache() {
1800 // cache symbol regexp
1801 //As this is a costy operation, we avoid doing it for multiple groups ...
1802 //Instead we perform it for all symbols at once.
1803 //
1804 //For this to work, we need to reorganize the data arrays.
1805 if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
1806 $this->language_data['MULTIPLE_SYMBOL_GROUPS'] = count($this->language_data['STYLES']['SYMBOLS']) > 1;
1807 
1808 $this->language_data['SYMBOL_DATA'] = array();
1809 $symbol_preg_multi = array(); // multi char symbols
1810 $symbol_preg_single = array(); // single char symbols
1811 foreach ($this->language_data['SYMBOLS'] as $key => $symbols) {
1812 if (is_array($symbols)) {
1813 foreach ($symbols as $sym) {
1814 $sym = $this->hsc($sym);
1815 if (!isset($this->language_data['SYMBOL_DATA'][$sym])) {
1816 $this->language_data['SYMBOL_DATA'][$sym] = $key;
1817 if (isset($sym[1])) { // multiple chars
1818 $symbol_preg_multi[] = preg_quote($sym, '/');
1819 } else { // single char
1820 if ($sym == '-') {
1821 // don't trigger range out of order error
1822 $symbol_preg_single[] = '\-';
1823 } else {
1824 $symbol_preg_single[] = preg_quote($sym, '/');
1825 }
1826 }
1827 }
1828 }
1829 } else {
1830 $symbols = $this->hsc($symbols);
1831 if (!isset($this->language_data['SYMBOL_DATA'][$symbols])) {
1832 $this->language_data['SYMBOL_DATA'][$symbols] = 0;
1833 if (isset($symbols[1])) { // multiple chars
1834 $symbol_preg_multi[] = preg_quote($symbols, '/');
1835 } else if ($symbols == '-') {
1836 // don't trigger range out of order error
1837 $symbol_preg_single[] = '\-';
1838 } else { // single char
1839 $symbol_preg_single[] = preg_quote($symbols, '/');
1840 }
1841 }
1842 }
1843 }
1844 
1845 //Now we have an array with each possible symbol as the key and the style as the actual data.
1846 //This way we can set the correct style just the moment we highlight ...
1847 //
1848 //Now we need to rewrite our array to get a search string that
1849 $symbol_preg = array();
1850 if (!empty($symbol_preg_multi)) {
1851 rsort($symbol_preg_multi);
1852 $symbol_preg[] = implode('|', $symbol_preg_multi);
1853 }
1854 if (!empty($symbol_preg_single)) {
1855 rsort($symbol_preg_single);
1856 $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']';
1857 }
1858 $this->language_data['SYMBOL_SEARCH'] = implode("|", $symbol_preg);
1859 }
1860 
1861 // cache optimized regexp for keyword matching
1862 // remove old cache
1863 $this->language_data['CACHED_KEYWORD_LISTS'] = array();
1864 foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
1865 if (!isset($this->lexic_permissions['KEYWORDS'][$key]) ||
1866 $this->lexic_permissions['KEYWORDS'][$key]) {
1867 $this->optimize_keyword_group($key);
1868 }
1869 }
1870 
1871 // brackets
1872 if ($this->lexic_permissions['BRACKETS']) {
1873 $this->language_data['CACHE_BRACKET_MATCH'] = array('[', ']', '(', ')', '{', '}');
1874 if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) {
1875 $this->language_data['CACHE_BRACKET_REPLACE'] = array(
1876 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#91;|>',
1877 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#93;|>',
1878 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#40;|>',
1879 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#41;|>',
1880 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#123;|>',
1881 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#125;|>',
1882 );
1883 }
1884 else {
1885 $this->language_data['CACHE_BRACKET_REPLACE'] = array(
1886 '<| class="br0">&#91;|>',
1887 '<| class="br0">&#93;|>',
1888 '<| class="br0">&#40;|>',
1889 '<| class="br0">&#41;|>',
1890 '<| class="br0">&#123;|>',
1891 '<| class="br0">&#125;|>',
1892 );
1893 }
1894 }
1895 
1896 //Build the parse cache needed to highlight numbers appropriate
1897 if($this->lexic_permissions['NUMBERS']) {
1898 //Check if the style rearrangements have been processed ...
1899 //This also does some preprocessing to check which style groups are useable ...
1900 if(!isset($this->language_data['NUMBERS_CACHE'])) {
1901 $this->build_style_cache();
1902 }
1903 
1904 //Number format specification
1905 //All this formats are matched case-insensitively!
1906 static $numbers_format = array(
1907 GESHI_NUMBER_INT_BASIC =>
1908 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)(?![0-9a-z\.])',
1909 GESHI_NUMBER_INT_CSTYLE =>
1910 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)l(?![0-9a-z\.])',
1911 GESHI_NUMBER_BIN_SUFFIX =>
1912 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?b(?![0-9a-z\.])',
1913 GESHI_NUMBER_BIN_PREFIX_PERCENT =>
1914 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])%[01]+?(?![0-9a-z\.])',
1915 GESHI_NUMBER_BIN_PREFIX_0B =>
1916 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0b[01]+?(?![0-9a-z\.])',
1917 GESHI_NUMBER_OCT_PREFIX =>
1918 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0[0-7]+?(?![0-9a-z\.])',
1919 GESHI_NUMBER_OCT_SUFFIX =>
1920 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[0-7]+?o(?![0-9a-z\.])',
1921 GESHI_NUMBER_HEX_PREFIX =>
1922 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0x[0-9a-f]+?(?![0-9a-z\.])',
1923 GESHI_NUMBER_HEX_SUFFIX =>
1924 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d[0-9a-f]*?h(?![0-9a-z\.])',
1925 GESHI_NUMBER_FLT_NONSCI =>
1926 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d+?\.\d+?(?![0-9a-z\.])',
1927 GESHI_NUMBER_FLT_NONSCI_F =>
1928 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)f(?![0-9a-z\.])',
1929 GESHI_NUMBER_FLT_SCI_SHORT =>
1930 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\.\d+?(?:e[+\-]?\d+?)?(?![0-9a-z\.])',
1931 GESHI_NUMBER_FLT_SCI_ZERO =>
1932 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)(?:e[+\-]?\d+?)?(?![0-9a-z\.])'
1933 );
1934 
1935 //At this step we have an associative array with flag groups for a
1936 //specific style or an string denoting a regexp given its index.
1937 $this->language_data['NUMBERS_RXCACHE'] = array();
1938 foreach($this->language_data['NUMBERS_CACHE'] as $key => $rxdata) {
1939 if(is_string($rxdata)) {
1940 $regexp = $rxdata;
1941 } else {
1942 //This is a bitfield of number flags to highlight:
1943 //Build an array, implode them together and make this the actual RX
1944 $rxuse = array();
1945 for($i = 1; $i <= $rxdata; $i<<=1) {
1946 if($rxdata & $i) {
1947 $rxuse[] = $numbers_format[$i];
1948 }
1949 }
1950 $regexp = implode("|", $rxuse);
1951 }
1952 
1953 $this->language_data['NUMBERS_RXCACHE'][$key] =
1954 "/(?<!<\|\/NUM!)(?<!\d\/>)($regexp)(?!\|>)/i";
1955 }
1956 }
1957 
1958 $this->parse_cache_built = true;
1959 }
1960 
1961 /**
1962 * Returns the code in $this->source, highlighted and surrounded by the
1963 * nessecary HTML.
1964 *
1965 * This should only be called ONCE, cos it's SLOW! If you want to highlight
1966 * the same source multiple times, you're better off doing a whole lot of
1967 * str_replaces to replace the &lt;span&gt;s
1968 *
1969 * @since 1.0.0
1970 */
1971 function parse_code () {
1972 // Start the timer
1973 $start_time = microtime();
1974 
1975 // Firstly, if there is an error, we won't highlight
1976 if ($this->error) {
1977 //Escape the source for output
1978 $result = $this->hsc($this->source);
1979 
1980 //This fix is related to SF#1923020, but has to be applied regardless of
1981 //actually highlighting symbols.
1982 $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result);
1983 
1984 // Timing is irrelevant
1985 $this->set_time($start_time, $start_time);
1986 $this->finalise($result);
1987 return $result;
1988 }
1989 
1990 // make sure the parse cache is up2date
1991 if (!$this->parse_cache_built) {
1992 $this->build_parse_cache();
1993 }
1994 
1995 // Replace all newlines to a common form.
1996 $code = str_replace("\r\n", "\n", $this->source);
1997 $code = str_replace("\r", "\n", $code);
1998 
1999 // Add spaces for regular expression matching and line numbers
2000// $code = "\n" . $code . "\n";
2001 
2002 // Initialise various stuff
2003 $length = strlen($code);
2004 $COMMENT_MATCHED = false;
2005 $stuff_to_parse = '';
2006 $endresult = '';
2007 
2008 // "Important" selections are handled like multiline comments
2009 // @todo GET RID OF THIS SHIZ
2010 if ($this->enable_important_blocks) {
2011 $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT;
2012 }
2013 
2014 if ($this->strict_mode) {
2015 // Break the source into bits. Each bit will be a portion of the code
2016 // within script delimiters - for example, HTML between < and >
2017 $k = 0;
2018 $parts = array();
2019 $matches = array();
2020 $next_match_pointer = null;
2021 // we use a copy to unset delimiters on demand (when they are not found)
2022 $delim_copy = $this->language_data['SCRIPT_DELIMITERS'];
2023 $i = 0;
2024 while ($i < $length) {
2025 $next_match_pos = $length + 1; // never true
2026 foreach ($delim_copy as $dk => $delimiters) {
2027 if(is_array($delimiters)) {
2028 foreach ($delimiters as $open => $close) {
2029 // make sure the cache is setup properly
2030 if (!isset($matches[$dk][$open])) {
2031 $matches[$dk][$open] = array(
2032 'next_match' => -1,
2033 'dk' => $dk,
2034 
2035 'open' => $open, // needed for grouping of adjacent code blocks (see below)
2036 'open_strlen' => strlen($open),
2037 
2038 'close' => $close,
2039 'close_strlen' => strlen($close),
2040 );
2041 }
2042 // Get the next little bit for this opening string
2043 if ($matches[$dk][$open]['next_match'] < $i) {
2044 // only find the next pos if it was not already cached
2045 $open_pos = strpos($code, $open, $i);
2046 if ($open_pos === false) {
2047 // no match for this delimiter ever
2048 unset($delim_copy[$dk][$open]);
2049 continue;
2050 }
2051 $matches[$dk][$open]['next_match'] = $open_pos;
2052 }
2053 if ($matches[$dk][$open]['next_match'] < $next_match_pos) {
2054 //So we got a new match, update the close_pos
2055 $matches[$dk][$open]['close_pos'] =
2056 strpos($code, $close, $matches[$dk][$open]['next_match']+1);
2057 
2058 $next_match_pointer =& $matches[$dk][$open];
2059 $next_match_pos = $matches[$dk][$open]['next_match'];
2060 }
2061 }
2062 } else {
2063 //So we should match an RegExp as Strict Block ...
2064 /**
2065 * The value in $delimiters is expected to be an RegExp
2066 * containing exactly 2 matching groups:
2067 * - Group 1 is the opener
2068 * - Group 2 is the closer
2069 */
2070 if(!GESHI_PHP_PRE_433 && //Needs proper rewrite to work with PHP >=4.3.0; 4.3.3 is guaranteed to work.
2071 preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE, $i)) {
2072 //We got a match ...
2073 $matches[$dk] = array(
2074 'next_match' => $matches_rx[1][1],
2075 'dk' => $dk,
2076 
2077 'close_strlen' => strlen($matches_rx[2][0]),
2078 'close_pos' => $matches_rx[2][1],
2079 );
2080 } else {
2081 // no match for this delimiter ever
2082 unset($delim_copy[$dk]);
2083 continue;
2084 }
2085 
2086 if ($matches[$dk]['next_match'] <= $next_match_pos) {
2087 $next_match_pointer =& $matches[$dk];
2088 $next_match_pos = $matches[$dk]['next_match'];
2089 }
2090 }
2091 }
2092 // non-highlightable text
2093 $parts[$k] = array(
2094 1 => substr($code, $i, $next_match_pos - $i)
2095 );
2096 ++$k;
2097 
2098 if ($next_match_pos > $length) {
2099 // out of bounds means no next match was found
2100 break;
2101 }
2102 
2103 // highlightable code
2104 $parts[$k][0] = $next_match_pointer['dk'];
2105 
2106 //Only combine for non-rx script blocks
2107 if(is_array($delim_copy[$next_match_pointer['dk']])) {
2108 // group adjacent script blocks, e.g. <foobar><asdf> should be one block, not three!
2109 $i = $next_match_pos + $next_match_pointer['open_strlen'];
2110 while (true) {
2111 $close_pos = strpos($code, $next_match_pointer['close'], $i);
2112 if ($close_pos == false) {
2113 break;
2114 }
2115 $i = $close_pos + $next_match_pointer['close_strlen'];
2116 if ($i == $length) {
2117 break;
2118 }
2119 if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 ||
2120 substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) {
2121 // merge adjacent but make sure we don't merge things like <tag><!-- comment -->
2122 foreach ($matches as $submatches) {
2123 foreach ($submatches as $match) {
2124 if ($match['next_match'] == $i) {
2125 // a different block already matches here!
2126 break 3;
2127 }
2128 }
2129 }
2130 } else {
2131 break;
2132 }
2133 }
2134 } else {
2135 $close_pos = $next_match_pointer['close_pos'] + $next_match_pointer['close_strlen'];
2136 $i = $close_pos;
2137 }
2138 
2139 if ($close_pos === false) {
2140 // no closing delimiter found!
2141 $parts[$k][1] = substr($code, $next_match_pos);
2142 ++$k;
2143 break;
2144 } else {
2145 $parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos);
2146 ++$k;
2147 }
2148 }
2149 unset($delim_copy, $next_match_pointer, $next_match_pos, $matches);
2150 $num_parts = $k;
2151 
2152 if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) {
2153 // when we have only one part, we don't have anything to highlight at all.
2154 // if we have a "maybe" strict language, this should be handled as highlightable code
2155 $parts = array(
2156 
2157 
2158 1 => ''
2159 ),
2160 1 => array(
2161 
2162 1 => $parts[0][1]
2163 )
2164 );
2165 $num_parts = 2;
2166 }
2167 
2168 } else {
2169 // Not strict mode - simply dump the source into
2170 // the array at index 1 (the first highlightable block)
2171 $parts = array(
2172 
2173 
2174 1 => ''
2175 ),
2176 1 => array(
2177 
2178 1 => $code
2179 )
2180 );
2181 $num_parts = 2;
2182 }
2183 
2184 //Unset variables we won't need any longer
2185 unset($code);
2186 
2187 //Preload some repeatedly used values regarding hardquotes ...
2188 $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false;
2189 $hq_strlen = strlen($hq);
2190 
2191 //Preload if line numbers are to be generated afterwards
2192 //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398
2193 $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS ||
2194 !empty($this->highlight_extra_lines) || !$this->allow_multiline_span;
2195 
2196 //preload the escape char for faster checking ...
2197 $escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']);
2198 
2199 // this is used for single-line comments
2200 $sc_disallowed_before = "";
2201 $sc_disallowed_after = "";
2202 
2203 if (isset($this->language_data['PARSER_CONTROL'])) {
2204 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) {
2205 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) {
2206 $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'];
2207 }
2208 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) {
2209 $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'];
2210 }
2211 }
2212 }
2213 
2214 //Fix for SF#1932083: Multichar Quotemarks unsupported
2215 $is_string_starter = array();
2216 if ($this->lexic_permissions['STRINGS']) {
2217 foreach ($this->language_data['QUOTEMARKS'] as $quotemark) {
2218 if (!isset($is_string_starter[$quotemark[0]])) {
2219 $is_string_starter[$quotemark[0]] = (string)$quotemark;
2220 } else if (is_string($is_string_starter[$quotemark[0]])) {
2221 $is_string_starter[$quotemark[0]] = array(
2222 $is_string_starter[$quotemark[0]],
2223 $quotemark);
2224 } else {
2225 $is_string_starter[$quotemark[0]][] = $quotemark;
2226 }
2227 }
2228 }
2229 
2230 // Now we go through each part. We know that even-indexed parts are
2231 // code that shouldn't be highlighted, and odd-indexed parts should
2232 // be highlighted
2233 for ($key = 0; $key < $num_parts; ++$key) {
2234 $STRICTATTRS = '';
2235 
2236 // If this block should be highlighted...
2237 if (!($key & 1)) {
2238 // Else not a block to highlight
2239 $endresult .= $this->hsc($parts[$key][1]);
2240 unset($parts[$key]);
2241 continue;
2242 }
2243 
2244 $result = '';
2245 $part = $parts[$key][1];
2246 
2247 $highlight_part = true;
2248 if ($this->strict_mode && !is_null($parts[$key][0])) {
2249 // get the class key for this block of code
2250 $script_key = $parts[$key][0];
2251 $highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key];
2252 if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' &&
2253 $this->lexic_permissions['SCRIPT']) {
2254 // Add a span element around the source to
2255 // highlight the overall source block
2256 if (!$this->use_classes &&
2257 $this->language_data['STYLES']['SCRIPT'][$script_key] != '') {
2258 $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"';
2259 } else {
2260 $attributes = ' class="sc' . $script_key . '"';
2261 }
2262 $result .= "<span$attributes>";
2263 $STRICTATTRS = $attributes;
2264 }
2265 }
2266 
2267 if ($highlight_part) {
2268 // Now, highlight the code in this block. This code
2269 // is really the engine of GeSHi (along with the method
2270 // parse_non_string_part).
2271 
2272 // cache comment regexps incrementally
2273 $next_comment_regexp_key = '';
2274 $next_comment_regexp_pos = -1;
2275 $next_comment_multi_pos = -1;
2276 $next_comment_single_pos = -1;
2277 $comment_regexp_cache_per_key = array();
2278 $comment_multi_cache_per_key = array();
2279 $comment_single_cache_per_key = array();
2280 $next_open_comment_multi = '';
2281 $next_comment_single_key = '';
2282 $escape_regexp_cache_per_key = array();
2283 $next_escape_regexp_key = '';
2284 $next_escape_regexp_pos = -1;
2285 
2286 $length = strlen($part);
2287 for ($i = 0; $i < $length; ++$i) {
2288 // Get the next char
2289 $char = $part[$i];
2290 $char_len = 1;
2291 
2292 $string_started = false;
2293 
2294 if (isset($is_string_starter[$char])) {
2295 // Possibly the start of a new string ...
2296 
2297 //Check which starter it was ...
2298 //Fix for SF#1932083: Multichar Quotemarks unsupported
2299 if (is_array($is_string_starter[$char])) {
2300 $char_new = '';
2301 foreach ($is_string_starter[$char] as $testchar) {
2302 if ($testchar === substr($part, $i, strlen($testchar)) &&
2303 strlen($testchar) > strlen($char_new)) {
2304 $char_new = $testchar;
2305 $string_started = true;
2306 }
2307 }
2308 if ($string_started) {
2309 $char = $char_new;
2310 }
2311 } else {
2312 $testchar = $is_string_starter[$char];
2313 if ($testchar === substr($part, $i, strlen($testchar))) {
2314 $char = $testchar;
2315 $string_started = true;
2316 }
2317 }
2318 $char_len = strlen($char);
2319 }
2320 
2321 if ($string_started) {
2322 // Hand out the correct style information for this string
2323 $string_key = array_search($char, $this->language_data['QUOTEMARKS']);
2324 if (!isset($this->language_data['STYLES']['STRINGS'][$string_key]) ||
2325 !isset($this->language_data['STYLES']['ESCAPE_CHAR'][$string_key])) {
2326 $string_key = 0;
2327 }
2328 
2329 // parse the stuff before this
2330 $result .= $this->parse_non_string_part($stuff_to_parse);
2331 $stuff_to_parse = '';
2332 
2333 if (!$this->use_classes) {
2334 $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][$string_key] . '"';
2335 } else {
2336 $string_attributes = ' class="st'.$string_key.'"';
2337 }
2338 
2339 // now handle the string
2340 $string = "<span$string_attributes>" . GeSHi::hsc($char);
2341 $start = $i + $char_len;
2342 $string_open = true;
2343 
2344 if(empty($this->language_data['ESCAPE_REGEXP'])) {
2345 $next_escape_regexp_pos = $length;
2346 }
2347 
2348 do {
2349 //Get the regular ending pos ...
2350 $close_pos = strpos($part, $char, $start);
2351 if(false === $close_pos) {
2352 $close_pos = $length;
2353 }
2354 
2355 if($this->lexic_permissions['ESCAPE_CHAR']) {
2356 // update escape regexp cache if needed
2357 if (isset($this->language_data['ESCAPE_REGEXP']) && $next_escape_regexp_pos < $start) {
2358 $next_escape_regexp_pos = $length;
2359 foreach ($this->language_data['ESCAPE_REGEXP'] as $escape_key => $regexp) {
2360 $match_i = false;
2361 if (isset($escape_regexp_cache_per_key[$escape_key]) &&
2362 ($escape_regexp_cache_per_key[$escape_key]['pos'] >= $start ||
2363 $escape_regexp_cache_per_key[$escape_key]['pos'] === false)) {
2364 // we have already matched something
2365 if ($escape_regexp_cache_per_key[$escape_key]['pos'] === false) {
2366 // this comment is never matched
2367 continue;
2368 }
2369 $match_i = $escape_regexp_cache_per_key[$escape_key]['pos'];
2370 } else if (
2371 //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
2372 (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $start), $match, PREG_OFFSET_CAPTURE)) ||
2373 (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $start))
2374 ) {
2375 $match_i = $match[0][1];
2376 if (GESHI_PHP_PRE_433) {
2377 $match_i += $start;
2378 }
2379 
2380 $escape_regexp_cache_per_key[$escape_key] = array(
2381 'key' => $escape_key,
2382 'length' => strlen($match[0][0]),
2383 'pos' => $match_i
2384 );
2385 } else {
2386 $escape_regexp_cache_per_key[$escape_key]['pos'] = false;
2387 continue;
2388 }
2389 
2390 if ($match_i !== false && $match_i < $next_escape_regexp_pos) {
2391 $next_escape_regexp_pos = $match_i;
2392 $next_escape_regexp_key = $escape_key;
2393 if ($match_i === $start) {
2394 break;
2395 }
2396 }
2397 }
2398 }
2399 
2400 //Find the next simple escape position
2401 if('' != $this->language_data['ESCAPE_CHAR']) {
2402 $simple_escape = strpos($part, $this->language_data['ESCAPE_CHAR'], $start);
2403 if(false === $simple_escape) {
2404 $simple_escape = $length;
2405 }
2406 } else {
2407 $simple_escape = $length;
2408 }
2409 } else {
2410 $next_escape_regexp_pos = $length;
2411 $simple_escape = $length;
2412 }
2413 
2414 if($simple_escape < $next_escape_regexp_pos &&
2415 $simple_escape < $length &&
2416 $simple_escape < $close_pos) {
2417 //The nexxt escape sequence is a simple one ...
2418 $es_pos = $simple_escape;
2419 
2420 //Add the stuff not in the string yet ...
2421 $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2422 
2423 //Get the style for this escaped char ...
2424 if (!$this->use_classes) {
2425 $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
2426 } else {
2427 $escape_char_attributes = ' class="es0"';
2428 }
2429 
2430 //Add the style for the escape char ...
2431 $string .= "<span$escape_char_attributes>" .
2432 GeSHi::hsc($this->language_data['ESCAPE_CHAR']);
2433 
2434 //Get the byte AFTER the ESCAPE_CHAR we just found
2435 $es_char = $part[$es_pos + 1];
2436 if ($es_char == "\n") {
2437 // don't put a newline around newlines
2438 $string .= "</span>\n";
2439 $start = $es_pos + 2;
2440 } else if (ord($es_char) >= 128) {
2441 //This is an non-ASCII char (UTF8 or single byte)
2442 //This code tries to work around SF#2037598 ...
2443 if(function_exists('mb_substr')) {
2444 $es_char_m = mb_substr(substr($part, $es_pos+1, 16), 0, 1, $this->encoding);
2445 $string .= $es_char_m . '</span>';
2446 } else if (!GESHI_PHP_PRE_433 && 'utf-8' == $this->encoding) {
2447 if(preg_match("/[\xC2-\xDF][\x80-\xBF]".
2448 "|\xE0[\xA0-\xBF][\x80-\xBF]".
2449 "|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}".
2450 "|\xED[\x80-\x9F][\x80-\xBF]".
2451 "|\xF0[\x90-\xBF][\x80-\xBF]{2}".
2452 "|[\xF1-\xF3][\x80-\xBF]{3}".
2453 "|\xF4[\x80-\x8F][\x80-\xBF]{2}/s",
2454 $part, $es_char_m, null, $es_pos + 1)) {
2455 $es_char_m = $es_char_m[0];
2456 } else {
2457 $es_char_m = $es_char;
2458 }
2459 $string .= $this->hsc($es_char_m) . '</span>';
2460 } else {
2461 $es_char_m = $this->hsc($es_char);
2462 }
2463 $start = $es_pos + strlen($es_char_m) + 1;
2464 } else {
2465 $string .= $this->hsc($es_char) . '</span>';
2466 $start = $es_pos + 2;
2467 }
2468 } else if ($next_escape_regexp_pos < $length &&
2469 $next_escape_regexp_pos < $close_pos) {
2470 $es_pos = $next_escape_regexp_pos;
2471 //Add the stuff not in the string yet ...
2472 $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2473 
2474 //Get the key and length of this match ...
2475 $escape = $escape_regexp_cache_per_key[$next_escape_regexp_key];
2476 $escape_str = substr($part, $es_pos, $escape['length']);
2477 $escape_key = $escape['key'];
2478 
2479 //Get the style for this escaped char ...
2480 if (!$this->use_classes) {
2481 $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][$escape_key] . '"';
2482 } else {
2483 $escape_char_attributes = ' class="es' . $escape_key . '"';
2484 }
2485 
2486 //Add the style for the escape char ...
2487 $string .= "<span$escape_char_attributes>" .
2488 $this->hsc($escape_str) . '</span>';
2489 
2490 $start = $es_pos + $escape['length'];
2491 } else {
2492 //Copy the remainder of the string ...
2493 $string .= $this->hsc(substr($part, $start, $close_pos - $start + $char_len)) . '</span>';
2494 $start = $close_pos + $char_len;
2495 $string_open = false;
2496 }
2497 } while($string_open);
2498 
2499 if ($check_linenumbers) {
2500 // Are line numbers used? If, we should end the string before
2501 // the newline and begin it again (so when <li>s are put in the source
2502 // remains XHTML compliant)
2503 // note to self: This opens up possibility of config files specifying
2504 // that languages can/cannot have multiline strings???
2505 $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2506 }
2507 
2508 $result .= $string;
2509 $string = '';
2510 $i = $start - 1;
2511 continue;
2512 } else if ($this->lexic_permissions['STRINGS'] && $hq && $hq[0] == $char &&
2513 substr($part, $i, $hq_strlen) == $hq) {
2514 // The start of a hard quoted string
2515 if (!$this->use_classes) {
2516 $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS']['HARD'] . '"';
2517 $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR']['HARD'] . '"';
2518 } else {
2519 $string_attributes = ' class="st_h"';
2520 $escape_char_attributes = ' class="es_h"';
2521 }
2522 // parse the stuff before this
2523 $result .= $this->parse_non_string_part($stuff_to_parse);
2524 $stuff_to_parse = '';
2525 
2526 // now handle the string
2527 $string = '';
2528 
2529 // look for closing quote
2530 $start = $i + $hq_strlen;
2531 while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start)) {
2532 $start = $close_pos + 1;
2533 if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['ESCAPE_CHAR']) {
2534 // make sure this quote is not escaped
2535 foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2536 if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) {
2537 // check wether this quote is escaped or if it is something like '\\'
2538 $escape_char_pos = $close_pos - 1;
2539 while ($escape_char_pos > 0
2540 && $part[$escape_char_pos - 1] == $this->language_data['ESCAPE_CHAR']) {
2541 --$escape_char_pos;
2542 }
2543 if (($close_pos - $escape_char_pos) & 1) {
2544 // uneven number of escape chars => this quote is escaped
2545 continue 2;
2546 }
2547 }
2548 }
2549 }
2550 
2551 // found closing quote
2552 break;
2553 }
2554 
2555 //Found the closing delimiter?
2556 if (!$close_pos) {
2557 // span till the end of this $part when no closing delimiter is found
2558 $close_pos = $length;
2559 }
2560 
2561 //Get the actual string
2562 $string = substr($part, $i, $close_pos - $i + 1);
2563 $i = $close_pos;
2564 
2565 // handle escape chars and encode html chars
2566 // (special because when we have escape chars within our string they may not be escaped)
2567 if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) {
2568 $start = 0;
2569 $new_string = '';
2570 while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) {
2571 // hmtl escape stuff before
2572 $new_string .= $this->hsc(substr($string, $start, $es_pos - $start));
2573 // check if this is a hard escape
2574 foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2575 if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) {
2576 // indeed, this is a hardescape
2577 $new_string .= "<span$escape_char_attributes>" .
2578 $this->hsc($hardescape) . '</span>';
2579 $start = $es_pos + strlen($hardescape);
2580 continue 2;
2581 }
2582 }
2583 // not a hard escape, but a normal escape
2584 // they come in pairs of two
2585 $c = 0;
2586 while (isset($string[$es_pos + $c]) && isset($string[$es_pos + $c + 1])
2587 && $string[$es_pos + $c] == $this->language_data['ESCAPE_CHAR']
2588 && $string[$es_pos + $c + 1] == $this->language_data['ESCAPE_CHAR']) {
2589 $c += 2;
2590 }
2591 if ($c) {
2592 $new_string .= "<span$escape_char_attributes>" .
2593 str_repeat($escaped_escape_char, $c) .
2594 '</span>';
2595 $start = $es_pos + $c;
2596 } else {
2597 // this is just a single lonely escape char...
2598 $new_string .= $escaped_escape_char;
2599 $start = $es_pos + 1;
2600 }
2601 }
2602 $string = $new_string . $this->hsc(substr($string, $start));
2603 } else {
2604 $string = $this->hsc($string);
2605 }
2606 
2607 if ($check_linenumbers) {
2608 // Are line numbers used? If, we should end the string before
2609 // the newline and begin it again (so when <li>s are put in the source
2610 // remains XHTML compliant)
2611 // note to self: This opens up possibility of config files specifying
2612 // that languages can/cannot have multiline strings???
2613 $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2614 }
2615 
2616 $result .= "<span$string_attributes>" . $string . '</span>';
2617 $string = '';
2618 continue;
2619 } else {
2620 // update regexp comment cache if needed
2621 if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) {
2622 $next_comment_regexp_pos = $length;
2623 foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) {
2624 $match_i = false;
2625 if (isset($comment_regexp_cache_per_key[$comment_key]) &&
2626 ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i ||
2627 $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) {
2628 // we have already matched something
2629 if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) {
2630 // this comment is never matched
2631 continue;
2632 }
2633 $match_i = $comment_regexp_cache_per_key[$comment_key]['pos'];
2634 } else if (
2635 //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
2636 (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $i), $match, PREG_OFFSET_CAPTURE)) ||
2637 (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i))
2638 ) {
2639 $match_i = $match[0][1];
2640 if (GESHI_PHP_PRE_433) {
2641 $match_i += $i;
2642 }
2643 
2644 $comment_regexp_cache_per_key[$comment_key] = array(
2645 'key' => $comment_key,
2646 'length' => strlen($match[0][0]),
2647 'pos' => $match_i
2648 );
2649 } else {
2650 $comment_regexp_cache_per_key[$comment_key]['pos'] = false;
2651 continue;
2652 }
2653 
2654 if ($match_i !== false && $match_i < $next_comment_regexp_pos) {
2655 $next_comment_regexp_pos = $match_i;
2656 $next_comment_regexp_key = $comment_key;
2657 if ($match_i === $i) {
2658 break;
2659 }
2660 }
2661 }
2662 }
2663 //Have a look for regexp comments
2664 if ($i == $next_comment_regexp_pos) {
2665 $COMMENT_MATCHED = true;
2666 $comment = $comment_regexp_cache_per_key[$next_comment_regexp_key];
2667 $test_str = $this->hsc(substr($part, $i, $comment['length']));
2668 
2669 //@todo If remove important do remove here
2670 if ($this->lexic_permissions['COMMENTS']['MULTI']) {
2671 if (!$this->use_classes) {
2672 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment['key']] . '"';
2673 } else {
2674 $attributes = ' class="co' . $comment['key'] . '"';
2675 }
2676 
2677 $test_str = "<span$attributes>" . $test_str . "</span>";
2678 
2679 // Short-cut through all the multiline code
2680 if ($check_linenumbers) {
2681 // strreplace to put close span and open span around multiline newlines
2682 $test_str = str_replace(
2683 "\n", "</span>\n<span$attributes>",
2684 str_replace("\n ", "\n&nbsp;", $test_str)
2685 );
2686 }
2687 }
2688 
2689 $i += $comment['length'] - 1;
2690 
2691 // parse the rest
2692 $result .= $this->parse_non_string_part($stuff_to_parse);
2693 $stuff_to_parse = '';
2694 }
2695 
2696 // If we haven't matched a regexp comment, try multi-line comments
2697 if (!$COMMENT_MATCHED) {
2698 // Is this a multiline comment?
2699 if (!empty($this->language_data['COMMENT_MULTI']) && $next_comment_multi_pos < $i) {
2700 $next_comment_multi_pos = $length;
2701 foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
2702 $match_i = false;
2703 if (isset($comment_multi_cache_per_key[$open]) &&
2704 ($comment_multi_cache_per_key[$open] >= $i ||
2705 $comment_multi_cache_per_key[$open] === false)) {
2706 // we have already matched something
2707 if ($comment_multi_cache_per_key[$open] === false) {
2708 // this comment is never matched
2709 continue;
2710 }
2711 $match_i = $comment_multi_cache_per_key[$open];
2712 } else if (($match_i = stripos($part, $open, $i)) !== false) {
2713 $comment_multi_cache_per_key[$open] = $match_i;
2714 } else {
2715 $comment_multi_cache_per_key[$open] = false;
2716 continue;
2717 }
2718 if ($match_i !== false && $match_i < $next_comment_multi_pos) {
2719 $next_comment_multi_pos = $match_i;
2720 $next_open_comment_multi = $open;
2721 if ($match_i === $i) {
2722 break;
2723 }
2724 }
2725 }
2726 }
2727 if ($i == $next_comment_multi_pos) {
2728 $open = $next_open_comment_multi;
2729 $close = $this->language_data['COMMENT_MULTI'][$open];
2730 $open_strlen = strlen($open);
2731 $close_strlen = strlen($close);
2732 $COMMENT_MATCHED = true;
2733 $test_str_match = $open;
2734 //@todo If remove important do remove here
2735 if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2736 $open == GESHI_START_IMPORTANT) {
2737 if ($open != GESHI_START_IMPORTANT) {
2738 if (!$this->use_classes) {
2739 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"';
2740 } else {
2741 $attributes = ' class="coMULTI"';
2742 }
2743 $test_str = "<span$attributes>" . $this->hsc($open);
2744 } else {
2745 if (!$this->use_classes) {
2746 $attributes = ' style="' . $this->important_styles . '"';
2747 } else {
2748 $attributes = ' class="imp"';
2749 }
2750 
2751 // We don't include the start of the comment if it's an
2752 // "important" part
2753 $test_str = "<span$attributes>";
2754 }
2755 } else {
2756 $test_str = $this->hsc($open);
2757 }
2758 
2759 $close_pos = strpos( $part, $close, $i + $open_strlen );
2760 
2761 if ($close_pos === false) {
2762 $close_pos = $length;
2763 }
2764 
2765 // Short-cut through all the multiline code
2766 $rest_of_comment = $this->hsc(substr($part, $i + $open_strlen, $close_pos - $i - $open_strlen + $close_strlen));
2767 if (($this->lexic_permissions['COMMENTS']['MULTI'] ||
2768 $test_str_match == GESHI_START_IMPORTANT) &&
2769 $check_linenumbers) {
2770 
2771 // strreplace to put close span and open span around multiline newlines
2772 $test_str .= str_replace(
2773 "\n", "</span>\n<span$attributes>",
2774 str_replace("\n ", "\n&nbsp;", $rest_of_comment)
2775 );
2776 } else {
2777 $test_str .= $rest_of_comment;
2778 }
2779 
2780 if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2781 $test_str_match == GESHI_START_IMPORTANT) {
2782 $test_str .= '</span>';
2783 }
2784 
2785 $i = $close_pos + $close_strlen - 1;
2786 
2787 // parse the rest
2788 $result .= $this->parse_non_string_part($stuff_to_parse);
2789 $stuff_to_parse = '';
2790 }
2791 }
2792 
2793 // If we haven't matched a multiline comment, try single-line comments
2794 if (!$COMMENT_MATCHED) {
2795 // cache potential single line comment occurances
2796 if (!empty($this->language_data['COMMENT_SINGLE']) && $next_comment_single_pos < $i) {
2797 $next_comment_single_pos = $length;
2798 foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) {
2799 $match_i = false;
2800 if (isset($comment_single_cache_per_key[$comment_key]) &&
2801 ($comment_single_cache_per_key[$comment_key] >= $i ||
2802 $comment_single_cache_per_key[$comment_key] === false)) {
2803 // we have already matched something
2804 if ($comment_single_cache_per_key[$comment_key] === false) {
2805 // this comment is never matched
2806 continue;
2807 }
2808 $match_i = $comment_single_cache_per_key[$comment_key];
2809 } else if (
2810 // case sensitive comments
2811 ($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
2812 ($match_i = stripos($part, $comment_mark, $i)) !== false) ||
2813 // non case sensitive
2814 (!$this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
2815 (($match_i = strpos($part, $comment_mark, $i)) !== false))) {
2816 $comment_single_cache_per_key[$comment_key] = $match_i;
2817 } else {
2818 $comment_single_cache_per_key[$comment_key] = false;
2819 continue;
2820 }
2821 if ($match_i !== false && $match_i < $next_comment_single_pos) {
2822 $next_comment_single_pos = $match_i;
2823 $next_comment_single_key = $comment_key;
2824 if ($match_i === $i) {
2825 break;
2826 }
2827 }
2828 }
2829 }
2830 if ($next_comment_single_pos == $i) {
2831 $comment_key = $next_comment_single_key;
2832 $comment_mark = $this->language_data['COMMENT_SINGLE'][$comment_key];
2833 $com_len = strlen($comment_mark);
2834 
2835 // This check will find special variables like $# in bash
2836 // or compiler directives of Delphi beginning {$
2837 if ((empty($sc_disallowed_before) || ($i == 0) ||
2838 (false === strpos($sc_disallowed_before, $part[$i-1]))) &&
2839 (empty($sc_disallowed_after) || ($length <= $i + $com_len) ||
2840 (false === strpos($sc_disallowed_after, $part[$i + $com_len]))))
2841 {
2842 // this is a valid comment
2843 $COMMENT_MATCHED = true;
2844 if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
2845 if (!$this->use_classes) {
2846 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment_key] . '"';
2847 } else {
2848 $attributes = ' class="co' . $comment_key . '"';
2849 }
2850 $test_str = "<span$attributes>" . $this->hsc($this->change_case($comment_mark));
2851 } else {
2852 $test_str = $this->hsc($comment_mark);
2853 }
2854 
2855 //Check if this comment is the last in the source
2856 $close_pos = strpos($part, "\n", $i);
2857 $oops = false;
2858 if ($close_pos === false) {
2859 $close_pos = $length;
2860 $oops = true;
2861 }
2862 $test_str .= $this->hsc(substr($part, $i + $com_len, $close_pos - $i - $com_len));
2863 if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
2864 $test_str .= "</span>";
2865 }
2866 
2867 // Take into account that the comment might be the last in the source
2868 if (!$oops) {
2869 $test_str .= "\n";
2870 }
2871 
2872 $i = $close_pos;
2873 
2874 // parse the rest
2875 $result .= $this->parse_non_string_part($stuff_to_parse);
2876 $stuff_to_parse = '';
2877 }
2878 }
2879 }
2880 }
2881 
2882 // Where are we adding this char?
2883 if (!$COMMENT_MATCHED) {
2884 $stuff_to_parse .= $char;
2885 } else {
2886 $result .= $test_str;
2887 unset($test_str);
2888 $COMMENT_MATCHED = false;
2889 }
2890 }
2891 // Parse the last bit
2892 $result .= $this->parse_non_string_part($stuff_to_parse);
2893 $stuff_to_parse = '';
2894 } else {
2895 $result .= $this->hsc($part);
2896 }
2897 // Close the <span> that surrounds the block
2898 if ($STRICTATTRS != '') {
2899 $result = str_replace("\n", "</span>\n<span$STRICTATTRS>", $result);
2900 $result .= '</span>';
2901 }
2902 
2903 $endresult .= $result;
2904 unset($part, $parts[$key], $result);
2905 }
2906 
2907 //This fix is related to SF#1923020, but has to be applied regardless of
2908 //actually highlighting symbols.
2909 /** NOTE: memorypeak #3 */
2910 $endresult = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $endresult);
2911 
2912// // Parse the last stuff (redundant?)
2913// $result .= $this->parse_non_string_part($stuff_to_parse);
2914 
2915 // Lop off the very first and last spaces
2916// $result = substr($result, 1, -1);
2917 
2918 // We're finished: stop timing
2919 $this->set_time($start_time, microtime());
2920 
2921 $this->finalise($endresult);
2922 return $endresult;
2923 }
2924 
2925 /**
2926 * Swaps out spaces and tabs for HTML indentation. Not needed if
2927 * the code is in a pre block...
2928 *
2929 * @param string The source to indent (reference!)
2930 * @since 1.0.0
2931 * @access private
2932 */
2933 function indent(&$result) {
2934 /// Replace tabs with the correct number of spaces
2935 if (false !== strpos($result, "\t")) {
2936 $lines = explode("\n", $result);
2937 $result = null;//Save memory while we process the lines individually
2938 $tab_width = $this->get_real_tab_width();
2939 $tab_string = '&nbsp;' . str_repeat(' ', $tab_width);
2940 
2941 for ($key = 0, $n = count($lines); $key < $n; $key++) {
2942 $line = $lines[$key];
2943 if (false === strpos($line, "\t")) {
2944 continue;
2945 }
2946 
2947 $pos = 0;
2948 $length = strlen($line);
2949 $lines[$key] = ''; // reduce memory
2950 
2951 $IN_TAG = false;
2952 for ($i = 0; $i < $length; ++$i) {
2953 $char = $line[$i];
2954 // Simple engine to work out whether we're in a tag.
2955 // If we are we modify $pos. This is so we ignore HTML
2956 // in the line and only workout the tab replacement
2957 // via the actual content of the string
2958 // This test could be improved to include strings in the
2959 // html so that < or > would be allowed in user's styles
2960 // (e.g. quotes: '<' '>'; or similar)
2961 if ($IN_TAG) {
2962 if ('>' == $char) {
2963 $IN_TAG = false;
2964 }
2965 $lines[$key] .= $char;
2966 } else if ('<' == $char) {
2967 $IN_TAG = true;
2968 $lines[$key] .= '<';
2969 } else if ('&' == $char) {
2970 $substr = substr($line, $i + 3, 5);
2971 $posi = strpos($substr, ';');
2972 if (false === $posi) {
2973 ++$pos;
2974 } else {
2975 $pos -= $posi+2;
2976 }
2977 $lines[$key] .= $char;
2978 } else if ("\t" == $char) {
2979 $str = '';
2980 // OPTIMISE - move $strs out. Make an array:
2981 // $tabs = array(
2982 // 1 => '&nbsp;',
2983 // 2 => '&nbsp; ',
2984 // 3 => '&nbsp; &nbsp;' etc etc
2985 // to use instead of building a string every time
2986 $tab_end_width = $tab_width - ($pos % $tab_width); //Moved out of the look as it doesn't change within the loop
2987 if (($pos & 1) || 1 == $tab_end_width) {
2988 $str .= substr($tab_string, 6, $tab_end_width);
2989 } else {
2990 $str .= substr($tab_string, 0, $tab_end_width+5);
2991 }
2992 $lines[$key] .= $str;
2993 $pos += $tab_end_width;
2994 
2995 if (false === strpos($line, "\t", $i + 1)) {
2996 $lines[$key] .= substr($line, $i + 1);
2997 break;
2998 }
2999 } else if (0 == $pos && ' ' == $char) {
3000 $lines[$key] .= '&nbsp;';
3001 ++$pos;
3002 } else {
3003 $lines[$key] .= $char;
3004 ++$pos;
3005 }
3006 }
3007 }
3008 $result = implode("\n", $lines);
3009 unset($lines);//We don't need the lines separated beyond this --- free them!
3010 }
3011 // Other whitespace
3012 // BenBE: Fix to reduce the number of replacements to be done
3013 $result = preg_replace('/^ /m', '&nbsp;', $result);
3014 $result = str_replace(' ', ' &nbsp;', $result);
3015 
3016 if ($this->line_numbers == GESHI_NO_LINE_NUMBERS) {
3017 if ($this->line_ending === null) {
3018 $result = nl2br($result);
3019 } else {
3020 $result = str_replace("\n", $this->line_ending, $result);
3021 }
3022 }
3023 }
3024 
3025 /**
3026 * Changes the case of a keyword for those languages where a change is asked for
3027 *
3028 * @param string The keyword to change the case of
3029 * @return string The keyword with its case changed
3030 * @since 1.0.0
3031 * @access private
3032 */
3033 function change_case($instr) {
3034 switch ($this->language_data['CASE_KEYWORDS']) {
3035 case GESHI_CAPS_UPPER:
3036 return strtoupper($instr);
3037 case GESHI_CAPS_LOWER:
3038 return strtolower($instr);
3039 default:
3040 return $instr;
3041 }
3042 }
3043 
3044 /**
3045 * Handles replacements of keywords to include markup and links if requested
3046 *
3047 * @param string The keyword to add the Markup to
3048 * @return The HTML for the match found
3049 * @since 1.0.8
3050 * @access private
3051 *
3052 * @todo Get rid of ender in keyword links
3053 */
3054 function handle_keyword_replace($match) {
3055 $k = $this->_kw_replace_group;
3056 $keyword = $match[0];
3057 
3058 $before = '';
3059 $after = '';
3060 
3061 if ($this->keyword_links) {
3062 // Keyword links have been ebabled
3063 
3064 if (isset($this->language_data['URLS'][$k]) &&
3065 $this->language_data['URLS'][$k] != '') {
3066 // There is a base group for this keyword
3067 
3068 // Old system: strtolower
3069 //$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword);
3070 // New system: get keyword from language file to get correct case
3071 if (!$this->language_data['CASE_SENSITIVE'][$k] &&
3072 strpos($this->language_data['URLS'][$k], '{FNAME}') !== false) {
3073 foreach ($this->language_data['KEYWORDS'][$k] as $word) {
3074 if (strcasecmp($word, $keyword) == 0) {
3075 break;
3076 }
3077 }
3078 } else {
3079 $word = $keyword;
3080 }
3081 
3082 $before = '<|UR1|"' .
3083 str_replace(
3084 array('{FNAME}', '{FNAMEL}', '{FNAMEU}', '.'),
3085 array($this->hsc($word), $this->hsc(strtolower($word)),
3086 $this->hsc(strtoupper($word)), '<DOT>'),
3087 $this->language_data['URLS'][$k]
3088 ) . '">';
3089 $after = '</a>';
3090 }
3091 }
3092 
3093 return $before . '<|/'. $k .'/>' . $this->change_case($keyword) . '|>' . $after;
3094 }
3095 
3096 /**
3097 * handles regular expressions highlighting-definitions with callback functions
3098 *
3099 * @note this is a callback, don't use it directly
3100 *
3101 * @param array the matches array
3102 * @return The highlighted string
3103 * @since 1.0.8
3104 * @access private
3105 */
3106 function handle_regexps_callback($matches) {
3107 // before: "' style=\"' . call_user_func(\"$func\", '\\1') . '\"\\1|>'",
3108 return ' style="' . call_user_func($this->language_data['STYLES']['REGEXPS'][$this->_rx_key], $matches[1]) . '"'. $matches[1] . '|>';
3109 }
3110 
3111 /**
3112 * handles newlines in REGEXPS matches. Set the _hmr_* vars before calling this
3113 *
3114 * @note this is a callback, don't use it directly
3115 *
3116 * @param array the matches array
3117 * @return string
3118 * @since 1.0.8
3119 * @access private
3120 */
3121 function handle_multiline_regexps($matches) {
3122 $before = $this->_hmr_before;
3123 $after = $this->_hmr_after;
3124 if ($this->_hmr_replace) {
3125 $replace = $this->_hmr_replace;
3126 $search = array();
3127 
3128 foreach (array_keys($matches) as $k) {
3129 $search[] = '\\' . $k;
3130 }
3131 
3132 $before = str_replace($search, $matches, $before);
3133 $after = str_replace($search, $matches, $after);
3134 $replace = str_replace($search, $matches, $replace);
3135 } else {
3136 $replace = $matches[0];
3137 }
3138 return $before
3139 . '<|!REG3XP' . $this->_hmr_key .'!>'
3140 . str_replace("\n", "|>\n<|!REG3XP" . $this->_hmr_key . '!>', $replace)
3141 . '|>'
3142 . $after;
3143 }
3144 
3145 /**
3146 * Takes a string that has no strings or comments in it, and highlights
3147 * stuff like keywords, numbers and methods.
3148 *
3149 * @param string The string to parse for keyword, numbers etc.
3150 * @since 1.0.0
3151 * @access private
3152 * @todo BUGGY! Why? Why not build string and return?
3153 */
3154 function parse_non_string_part($stuff_to_parse) {
3155 $stuff_to_parse = ' ' . $this->hsc($stuff_to_parse);
3156 
3157 // Regular expressions
3158 foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
3159 if ($this->lexic_permissions['REGEXPS'][$key]) {
3160 if (is_array($regexp)) {
3161 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3162 // produce valid HTML when we match multiple lines
3163 $this->_hmr_replace = $regexp[GESHI_REPLACE];
3164 $this->_hmr_before = $regexp[GESHI_BEFORE];
3165 $this->_hmr_key = $key;
3166 $this->_hmr_after = $regexp[GESHI_AFTER];
3167 $stuff_to_parse = preg_replace_callback(
3168 "/" . $regexp[GESHI_SEARCH] . "/{$regexp[GESHI_MODIFIERS]}",
3169 array($this, 'handle_multiline_regexps'),
3170 $stuff_to_parse);
3171 $this->_hmr_replace = false;
3172 $this->_hmr_before = '';
3173 $this->_hmr_after = '';
3174 } else {
3175 $stuff_to_parse = preg_replace(
3176 '/' . $regexp[GESHI_SEARCH] . '/' . $regexp[GESHI_MODIFIERS],
3177 $regexp[GESHI_BEFORE] . '<|!REG3XP'. $key .'!>' . $regexp[GESHI_REPLACE] . '|>' . $regexp[GESHI_AFTER],
3178 $stuff_to_parse);
3179 }
3180 } else {
3181 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3182 // produce valid HTML when we match multiple lines
3183 $this->_hmr_key = $key;
3184 $stuff_to_parse = preg_replace_callback( "/(" . $regexp . ")/",
3185 array($this, 'handle_multiline_regexps'), $stuff_to_parse);
3186 $this->_hmr_key = '';
3187 } else {
3188 $stuff_to_parse = preg_replace( "/(" . $regexp . ")/", "<|!REG3XP$key!>\\1|>", $stuff_to_parse);
3189 }
3190 }
3191 }
3192 }
3193 
3194 // Highlight numbers. As of 1.0.8 we support diffent types of numbers
3195 $numbers_found = false;
3196 if ($this->lexic_permissions['NUMBERS'] && preg_match('#\d#', $stuff_to_parse )) {
3197 $numbers_found = true;
3198 
3199 //For each of the formats ...
3200 foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3201 //Check if it should be highlighted ...
3202 $stuff_to_parse = preg_replace($regexp, "<|/NUM!$id/>\\1|>", $stuff_to_parse);
3203 }
3204 }
3205 
3206 // Highlight keywords
3207 $disallowed_before = "(?<![a-zA-Z0-9\$_\|\#;>|^&";
3208 $disallowed_after = "(?![a-zA-Z0-9_\|%\\-&;";
3209 if ($this->lexic_permissions['STRINGS']) {
3210 $quotemarks = preg_quote(implode($this->language_data['QUOTEMARKS']), '/');
3211 $disallowed_before .= $quotemarks;
3212 $disallowed_after .= $quotemarks;
3213 }
3214 $disallowed_before .= "])";
3215 $disallowed_after .= "])";
3216 
3217 $parser_control_pergroup = false;
3218 if (isset($this->language_data['PARSER_CONTROL'])) {
3219 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
3220 $x = 0; // check wether per-keyword-group parser_control is enabled
3221 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) {
3222 $disallowed_before = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'];
3223 ++$x;
3224 }
3225 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) {
3226 $disallowed_after = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'];
3227 ++$x;
3228 }
3229 $parser_control_pergroup = (count($this->language_data['PARSER_CONTROL']['KEYWORDS']) - $x) > 0;
3230 }
3231 }
3232 
3233 // if this is changed, don't forget to change it below
3234// if (!empty($disallowed_before)) {
3235// $disallowed_before = "(?<![$disallowed_before])";
3236// }
3237// if (!empty($disallowed_after)) {
3238// $disallowed_after = "(?![$disallowed_after])";
3239// }
3240 
3241 foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3242 if (!isset($this->lexic_permissions['KEYWORDS'][$k]) ||
3243 $this->lexic_permissions['KEYWORDS'][$k]) {
3244 
3245 $case_sensitive = $this->language_data['CASE_SENSITIVE'][$k];
3246 $modifiers = $case_sensitive ? '' : 'i';
3247 
3248 // NEW in 1.0.8 - per-keyword-group parser control
3249 $disallowed_before_local = $disallowed_before;
3250 $disallowed_after_local = $disallowed_after;
3251 if ($parser_control_pergroup && isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k])) {
3252 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'])) {
3253 $disallowed_before_local =
3254 $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'];
3255 }
3256 
3257 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'])) {
3258 $disallowed_after_local =
3259 $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'];
3260 }
3261 }
3262 
3263 $this->_kw_replace_group = $k;
3264 
3265 //NEW in 1.0.8, the cached regexp list
3266 // since we don't want PHP / PCRE to crash due to too large patterns we split them into smaller chunks
3267 for ($set = 0, $set_length = count($this->language_data['CACHED_KEYWORD_LISTS'][$k]); $set < $set_length; ++$set) {
3268 $keywordset =& $this->language_data['CACHED_KEYWORD_LISTS'][$k][$set];
3269 // Might make a more unique string for putting the number in soon
3270 // Basically, we don't put the styles in yet because then the styles themselves will
3271 // get highlighted if the language has a CSS keyword in it (like CSS, for example ;))
3272 $stuff_to_parse = preg_replace_callback(
3273 "/$disallowed_before_local({$keywordset})(?!\<DOT\>(?:htm|php))$disallowed_after_local/$modifiers",
3274 array($this, 'handle_keyword_replace'),
3275 $stuff_to_parse
3276 );
3277 }
3278 }
3279 }
3280 
3281 //
3282 // Now that's all done, replace /[number]/ with the correct styles
3283 //
3284 foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3285 if (!$this->use_classes) {
3286 $attributes = ' style="' .
3287 (isset($this->language_data['STYLES']['KEYWORDS'][$k]) ?
3288 $this->language_data['STYLES']['KEYWORDS'][$k] : "") . '"';
3289 } else {
3290 $attributes = ' class="kw' . $k . '"';
3291 }
3292 $stuff_to_parse = str_replace("<|/$k/>", "<|$attributes>", $stuff_to_parse);
3293 }
3294 
3295 if ($numbers_found) {
3296 // Put number styles in
3297 foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3298//Commented out for now, as this needs some review ...
3299// if ($numbers_permissions & $id) {
3300 //Get the appropriate style ...
3301 //Checking for unset styles is done by the style cache builder ...
3302 if (!$this->use_classes) {
3303 $attributes = ' style="' . $this->language_data['STYLES']['NUMBERS'][$id] . '"';
3304 } else {
3305 $attributes = ' class="nu'.$id.'"';
3306 }
3307 
3308 //Set in the correct styles ...
3309 $stuff_to_parse = str_replace("/NUM!$id/", $attributes, $stuff_to_parse);
3310// }
3311 }
3312 }
3313 
3314 // Highlight methods and fields in objects
3315 if ($this->lexic_permissions['METHODS'] && $this->language_data['OOLANG']) {
3316 $oolang_spaces = "[\s]*";
3317 $oolang_before = "";
3318 $oolang_after = "[a-zA-Z][a-zA-Z0-9_]*";
3319 if (isset($this->language_data['PARSER_CONTROL'])) {
3320 if (isset($this->language_data['PARSER_CONTROL']['OOLANG'])) {
3321 if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'])) {
3322 $oolang_before = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'];
3323 }
3324 if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'])) {
3325 $oolang_after = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'];
3326 }
3327 if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'])) {
3328 $oolang_spaces = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'];
3329 }
3330 }
3331 }
3332 
3333 foreach ($this->language_data['OBJECT_SPLITTERS'] as $key => $splitter) {
3334 if (false !== strpos($stuff_to_parse, $splitter)) {
3335 if (!$this->use_classes) {
3336 $attributes = ' style="' . $this->language_data['STYLES']['METHODS'][$key] . '"';
3337 } else {
3338 $attributes = ' class="me' . $key . '"';
3339 }
3340 $stuff_to_parse = preg_replace("/($oolang_before)(" . preg_quote($this->language_data['OBJECT_SPLITTERS'][$key], '/') . ")($oolang_spaces)($oolang_after)/", "\\1\\2\\3<|$attributes>\\4|>", $stuff_to_parse);
3341 }
3342 }
3343 }
3344 
3345 //
3346 // Highlight brackets. Yes, I've tried adding a semi-colon to this list.
3347 // You try it, and see what happens ;)
3348 // TODO: Fix lexic permissions not converting entities if shouldn't
3349 // be highlighting regardless
3350 //
3351 if ($this->lexic_permissions['BRACKETS']) {
3352 $stuff_to_parse = str_replace( $this->language_data['CACHE_BRACKET_MATCH'],
3353 $this->language_data['CACHE_BRACKET_REPLACE'], $stuff_to_parse );
3354 }
3355 
3356 
3357 //FIX for symbol highlighting ...
3358 if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
3359 //Get all matches and throw away those witin a block that is already highlighted... (i.e. matched by a regexp)
3360 $n_symbols = preg_match_all("/<\|(?:<DOT>|[^>])+>(?:(?!\|>).*?)\|>|<\/a>|(?:" . $this->language_data['SYMBOL_SEARCH'] . ")+/", $stuff_to_parse, $pot_symbols, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
3361 $global_offset = 0;
3362 for ($s_id = 0; $s_id < $n_symbols; ++$s_id) {
3363 $symbol_match = $pot_symbols[$s_id][0][0];
3364 if (strpos($symbol_match, '<') !== false || strpos($symbol_match, '>') !== false) {
3365 // already highlighted blocks _must_ include either < or >
3366 // so if this conditional applies, we have to skip this match
3367 // BenBE: UNLESS the block contains <SEMI> or <PIPE>
3368 if(strpos($symbol_match, '<SEMI>') === false &&
3369 strpos($symbol_match, '<PIPE>') === false) {
3370 continue;
3371 }
3372 }
3373 
3374 // if we reach this point, we have a valid match which needs to be highlighted
3375 
3376 $symbol_length = strlen($symbol_match);
3377 $symbol_offset = $pot_symbols[$s_id][0][1];
3378 unset($pot_symbols[$s_id]);
3379 $symbol_end = $symbol_length + $symbol_offset;
3380 $symbol_hl = "";
3381 
3382 // if we have multiple styles, we have to handle them properly
3383 if ($this->language_data['MULTIPLE_SYMBOL_GROUPS']) {
3384 $old_sym = -1;
3385 // Split the current stuff to replace into its atomic symbols ...
3386 preg_match_all("/" . $this->language_data['SYMBOL_SEARCH'] . "/", $symbol_match, $sym_match_syms, PREG_PATTERN_ORDER);
3387 foreach ($sym_match_syms[0] as $sym_ms) {
3388 //Check if consequtive symbols belong to the same group to save output ...
3389 if (isset($this->language_data['SYMBOL_DATA'][$sym_ms])
3390 && ($this->language_data['SYMBOL_DATA'][$sym_ms] != $old_sym)) {
3391 if (-1 != $old_sym) {
3392 $symbol_hl .= "|>";
3393 }
3394 $old_sym = $this->language_data['SYMBOL_DATA'][$sym_ms];
3395 if (!$this->use_classes) {
3396 $symbol_hl .= '<| style="' . $this->language_data['STYLES']['SYMBOLS'][$old_sym] . '">';
3397 } else {
3398 $symbol_hl .= '<| class="sy' . $old_sym . '">';
3399 }
3400 }
3401 $symbol_hl .= $sym_ms;
3402 }
3403 unset($sym_match_syms);
3404 
3405 //Close remaining tags and insert the replacement at the right position ...
3406 //Take caution if symbol_hl is empty to avoid doubled closing spans.
3407 if (-1 != $old_sym) {
3408 $symbol_hl .= "|>";
3409 }
3410 } else {
3411 if (!$this->use_classes) {
3412 $symbol_hl = '<| style="' . $this->language_data['STYLES']['SYMBOLS'][0] . '">';
3413 } else {
3414 $symbol_hl = '<| class="sy0">';
3415 }
3416 $symbol_hl .= $symbol_match . '|>';
3417 }
3418 
3419 $stuff_to_parse = substr_replace($stuff_to_parse, $symbol_hl, $symbol_offset + $global_offset, $symbol_length);
3420 
3421 // since we replace old text with something of different size,
3422 // we'll have to keep track of the differences
3423 $global_offset += strlen($symbol_hl) - $symbol_length;
3424 }
3425 }
3426 //FIX for symbol highlighting ...
3427 
3428 // Add class/style for regexps
3429 foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3430 if ($this->lexic_permissions['REGEXPS'][$key]) {
3431 if (is_callable($this->language_data['STYLES']['REGEXPS'][$key])) {
3432 $this->_rx_key = $key;
3433 $stuff_to_parse = preg_replace_callback("/!REG3XP$key!(.*)\|>/U",
3434 array($this, 'handle_regexps_callback'),
3435 $stuff_to_parse);
3436 } else {
3437 if (!$this->use_classes) {
3438 $attributes = ' style="' . $this->language_data['STYLES']['REGEXPS'][$key] . '"';
3439 } else {
3440 if (is_array($this->language_data['REGEXPS'][$key]) &&
3441 array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$key])) {
3442 $attributes = ' class="' .
3443 $this->language_data['REGEXPS'][$key][GESHI_CLASS] . '"';
3444 } else {
3445 $attributes = ' class="re' . $key . '"';
3446 }
3447 }
3448 $stuff_to_parse = str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse);
3449 }
3450 }
3451 }
3452 
3453 // Replace <DOT> with . for urls
3454 $stuff_to_parse = str_replace('<DOT>', '.', $stuff_to_parse);
3455 // Replace <|UR1| with <a href= for urls also
3456 if (isset($this->link_styles[GESHI_LINK])) {
3457 if ($this->use_classes) {
3458 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3459 } else {
3460 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' style="' . $this->link_styles[GESHI_LINK] . '" href=', $stuff_to_parse);
3461 }
3462 } else {
3463 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3464 }
3465 
3466 //
3467 // NOW we add the span thingy ;)
3468 //
3469 
3470 $stuff_to_parse = str_replace('<|', '<span', $stuff_to_parse);
3471 $stuff_to_parse = str_replace ( '|>', '</span>', $stuff_to_parse );
3472 return substr($stuff_to_parse, 1);
3473 }
3474 
3475 /**
3476 * Sets the time taken to parse the code
3477 *
3478 * @param microtime The time when parsing started
3479 * @param microtime The time when parsing ended
3480 * @since 1.0.2
3481 * @access private
3482 */
3483 function set_time($start_time, $end_time) {
3484 $start = explode(' ', $start_time);
3485 $end = explode(' ', $end_time);
3486 $this->time = $end[0] + $end[1] - $start[0] - $start[1];
3487 }
3488 
3489 /**
3490 * Gets the time taken to parse the code
3491 *
3492 * @return double The time taken to parse the code
3493 * @since 1.0.2
3494 */
3495 function get_time() {
3496 return $this->time;
3497 }
3498 
3499 /**
3500 * Merges arrays recursively, overwriting values of the first array with values of later arrays
3501 *
3502 * @since 1.0.8
3503 * @access private
3504 */
3505 function merge_arrays() {
3506 $arrays = func_get_args();
3507 $narrays = count($arrays);
3508 
3509 // check arguments
3510 // comment out if more performance is necessary (in this case the foreach loop will trigger a warning if the argument is not an array)
3511 for ($i = 0; $i < $narrays; $i ++) {
3512 if (!is_array($arrays[$i])) {
3513 // also array_merge_recursive returns nothing in this case
3514 trigger_error('Argument #' . ($i+1) . ' is not an array - trying to merge array with scalar! Returning false!', E_USER_WARNING);
3515 return false;
3516 }
3517 }
3518 
3519 // the first array is in the output set in every case
3520 $ret = $arrays[0];
3521 
3522 // merege $ret with the remaining arrays
3523 for ($i = 1; $i < $narrays; $i ++) {
3524 foreach ($arrays[$i] as $key => $value) {
3525 if (is_array($value) && isset($ret[$key])) {
3526 // if $ret[$key] is not an array you try to merge an scalar value with an array - the result is not defined (incompatible arrays)
3527 // in this case the call will trigger an E_USER_WARNING and the $ret[$key] will be false.
3528 $ret[$key] = $this->merge_arrays($ret[$key], $value);
3529 } else {
3530 $ret[$key] = $value;
3531 }
3532 }
3533 }
3534 
3535 return $ret;
3536 }
3537 
3538 /**
3539 * Gets language information and stores it for later use
3540 *
3541 * @param string The filename of the language file you want to load
3542 * @since 1.0.0
3543 * @access private
3544 * @todo Needs to load keys for lexic permissions for keywords, regexps etc
3545 */
3546 function load_language($file_name) {
3547 if ($file_name == $this->loaded_language) {
3548 // this file is already loaded!
3549 return;
3550 }
3551 
3552 //Prepare some stuff before actually loading the language file
3553 $this->loaded_language = $file_name;
3554 $this->parse_cache_built = false;
3555 $this->enable_highlighting();
3556 $language_data = array();
3557 
3558 //Load the language file
3559 require $file_name;
3560 
3561 // Perhaps some checking might be added here later to check that
3562 // $language data is a valid thing but maybe not
3563 $this->language_data = $language_data;
3564 
3565 // Set strict mode if should be set
3566 $this->strict_mode = $this->language_data['STRICT_MODE_APPLIES'];
3567 
3568 // Set permissions for all lexics to true
3569 // so they'll be highlighted by default
3570 foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
3571 if (!empty($this->language_data['KEYWORDS'][$key])) {
3572 $this->lexic_permissions['KEYWORDS'][$key] = true;
3573 } else {
3574 $this->lexic_permissions['KEYWORDS'][$key] = false;
3575 }
3576 }
3577 
3578 foreach (array_keys($this->language_data['COMMENT_SINGLE']) as $key) {
3579 $this->lexic_permissions['COMMENTS'][$key] = true;
3580 }
3581 foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3582 $this->lexic_permissions['REGEXPS'][$key] = true;
3583 }
3584 
3585 // for BenBE and future code reviews:
3586 // we can use empty here since we only check for existance and emptiness of an array
3587 // if it is not an array at all but rather false or null this will work as intended as well
3588 // even if $this->language_data['PARSER_CONTROL'] is undefined this won't trigger a notice
3589 if (!empty($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'])) {
3590 foreach ($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'] as $flag => $value) {
3591 // it's either true or false and maybe is true as well
3592 $perm = $value !== GESHI_NEVER;
3593 if ($flag == 'ALL') {
3594 $this->enable_highlighting($perm);
3595 continue;
3596 }
3597 if (!isset($this->lexic_permissions[$flag])) {
3598 // unknown lexic permission
3599 continue;
3600 }
3601 if (is_array($this->lexic_permissions[$flag])) {
3602 foreach ($this->lexic_permissions[$flag] as $key => $val) {
3603 $this->lexic_permissions[$flag][$key] = $perm;
3604 }
3605 } else {
3606 $this->lexic_permissions[$flag] = $perm;
3607 }
3608 }
3609 unset($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS']);
3610 }
3611 
3612 //NEW in 1.0.8: Allow styles to be loaded from a separate file to override defaults
3613 $style_filename = substr($file_name, 0, -4) . '.style.php';
3614 if (is_readable($style_filename)) {
3615 //Clear any style_data that could have been set before ...
3616 if (isset($style_data)) {
3617 unset($style_data);
3618 }
3619 
3620 //Read the Style Information from the style file
3621 include $style_filename;
3622 
3623 //Apply the new styles to our current language styles
3624 if (isset($style_data) && is_array($style_data)) {
3625 $this->language_data['STYLES'] =
3626 $this->merge_arrays($this->language_data['STYLES'], $style_data);
3627 }
3628 }
3629 }
3630 
3631 /**
3632 * Takes the parsed code and various options, and creates the HTML
3633 * surrounding it to make it look nice.
3634 *
3635 * @param string The code already parsed (reference!)
3636 * @since 1.0.0
3637 * @access private
3638 */
3639 function finalise(&$parsed_code) {
3640 // Remove end parts of important declarations
3641 // This is BUGGY!! My fault for bad code: fix coming in 1.2
3642 // @todo Remove this crap
3643 if ($this->enable_important_blocks &&
3644 (strpos($parsed_code, $this->hsc(GESHI_START_IMPORTANT)) === false)) {
3645 $parsed_code = str_replace($this->hsc(GESHI_END_IMPORTANT), '', $parsed_code);
3646 }
3647 
3648 // Add HTML whitespace stuff if we're using the <div> header
3649 if ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) {
3650 $this->indent($parsed_code);
3651 }
3652 
3653 // purge some unnecessary stuff
3654 /** NOTE: memorypeak #1 */
3655 $parsed_code = preg_replace('#<span[^>]+>(\s*)</span>#', '\\1', $parsed_code);
3656 
3657 // If we are using IDs for line numbers, there needs to be an overall
3658 // ID set to prevent collisions.
3659 if ($this->add_ids && !$this->overall_id) {
3660 $this->overall_id = 'geshi-' . substr(md5(microtime()), 0, 4);
3661 }
3662 
3663 // Get code into lines
3664 /** NOTE: memorypeak #2 */
3665 $code = explode("\n", $parsed_code);
3666 $parsed_code = $this->header();
3667 
3668 // If we're using line numbers, we insert <li>s and appropriate
3669 // markup to style them (otherwise we don't need to do anything)
3670 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
3671 // If we're using the <pre> header, we shouldn't add newlines because
3672 // the <pre> will line-break them (and the <li>s already do this for us)
3673 $ls = ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) ? "\n" : '';
3674 
3675 // Set vars to defaults for following loop
3676 $i = 0;
3677 
3678 // Foreach line...
3679 for ($i = 0, $n = count($code); $i < $n;) {
3680 //Reset the attributes for a new line ...
3681 $attrs = array();
3682 
3683 // Make lines have at least one space in them if they're empty
3684 // BenBE: Checking emptiness using trim instead of relying on blanks
3685 if ('' == trim($code[$i])) {
3686 $code[$i] = '&nbsp;';
3687 }
3688 
3689 // If this is a "special line"...
3690 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3691 $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3692 // Set the attributes to style the line
3693 if ($this->use_classes) {
3694 //$attr = ' class="li2"';
3695 $attrs['class'][] = 'li2';
3696 $def_attr = ' class="de2"';
3697 } else {
3698 //$attr = ' style="' . $this->line_style2 . '"';
3699 $attrs['style'][] = $this->line_style2;
3700 // This style "covers up" the special styles set for special lines
3701 // so that styles applied to special lines don't apply to the actual
3702 // code on that line
3703 $def_attr = ' style="' . $this->code_style . '"';
3704 }
3705 } else {
3706 if ($this->use_classes) {
3707 //$attr = ' class="li1"';
3708 $attrs['class'][] = 'li1';
3709 $def_attr = ' class="de1"';
3710 } else {
3711 //$attr = ' style="' . $this->line_style1 . '"';
3712 $attrs['style'][] = $this->line_style1;
3713 $def_attr = ' style="' . $this->code_style . '"';
3714 }
3715 }
3716 
3717 //Check which type of tag to insert for this line
3718 if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3719 $start = "<pre$def_attr>";
3720 $end = '</pre>';
3721 } else {
3722 // Span or div?
3723 $start = "<div$def_attr>";
3724 $end = '</div>';
3725 }
3726 
3727 ++$i;
3728 
3729 // Are we supposed to use ids? If so, add them
3730 if ($this->add_ids) {
3731 $attrs['id'][] = "$this->overall_id-$i";
3732 }
3733 
3734 //Is this some line with extra styles???
3735 if (in_array($i, $this->highlight_extra_lines)) {
3736 if ($this->use_classes) {
3737 if (isset($this->highlight_extra_lines_styles[$i])) {
3738 $attrs['class'][] = "lx$i";
3739 } else {
3740 $attrs['class'][] = "ln-xtra";
3741 }
3742 } else {
3743 array_push($attrs['style'], $this->get_line_style($i));
3744 }
3745 }
3746 
3747 // Add in the line surrounded by appropriate list HTML
3748 $attr_string = '';
3749 foreach ($attrs as $key => $attr) {
3750 $attr_string .= ' ' . $key . '="' . implode(' ', $attr) . '"';
3751 }
3752 
3753 $parsed_code .= "<li$attr_string>$start{$code[$i-1]}$end</li>$ls";
3754 unset($code[$i - 1]);
3755 }
3756 } else {
3757 $n = count($code);
3758 if ($this->use_classes) {
3759 $attributes = ' class="de1"';
3760 } else {
3761 $attributes = ' style="'. $this->code_style .'"';
3762 }
3763 if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3764 $parsed_code .= '<pre'. $attributes .'>';
3765 } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
3766 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3767 if ($this->use_classes) {
3768 $attrs = ' class="ln"';
3769 } else {
3770 $attrs = ' style="'. $this->table_linenumber_style .'"';
3771 }
3772 $parsed_code .= '<td'.$attrs.'><pre'.$attributes.'>';
3773 // get linenumbers
3774 // we don't merge it with the for below, since it should be better for
3775 // memory consumption this way
3776 // @todo: but... actually it would still be somewhat nice to merge the two loops
3777 // the mem peaks are at different positions
3778 for ($i = 0; $i < $n; ++$i) {
3779 $close = 0;
3780 // fancy lines
3781 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3782 $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3783 // Set the attributes to style the line
3784 if ($this->use_classes) {
3785 $parsed_code .= '<span class="xtra li2"><span class="de2">';
3786 } else {
3787 // This style "covers up" the special styles set for special lines
3788 // so that styles applied to special lines don't apply to the actual
3789 // code on that line
3790 $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
3791 .'<span style="' . $this->code_style .'">';
3792 }
3793 $close += 2;
3794 }
3795 //Is this some line with extra styles???
3796 if (in_array($i + 1, $this->highlight_extra_lines)) {
3797 if ($this->use_classes) {
3798 if (isset($this->highlight_extra_lines_styles[$i])) {
3799 $parsed_code .= "<span class=\"xtra lx$i\">";
3800 } else {
3801 $parsed_code .= "<span class=\"xtra ln-xtra\">";
3802 }
3803 } else {
3804 $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
3805 }
3806 ++$close;
3807 }
3808 $parsed_code .= $this->line_numbers_start + $i;
3809 if ($close) {
3810 $parsed_code .= str_repeat('</span>', $close);
3811 } else if ($i != $n) {
3812 $parsed_code .= "\n";
3813 }
3814 }
3815 $parsed_code .= '</pre></td><td'.$attributes.'>';
3816 }
3817 $parsed_code .= '<pre'. $attributes .'>';
3818 }
3819 // No line numbers, but still need to handle highlighting lines extra.
3820 // Have to use divs so the full width of the code is highlighted
3821 $close = 0;
3822 for ($i = 0; $i < $n; ++$i) {
3823 // Make lines have at least one space in them if they're empty
3824 // BenBE: Checking emptiness using trim instead of relying on blanks
3825 if ('' == trim($code[$i])) {
3826 $code[$i] = '&nbsp;';
3827 }
3828 // fancy lines
3829 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3830 $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3831 // Set the attributes to style the line
3832 if ($this->use_classes) {
3833 $parsed_code .= '<span class="xtra li2"><span class="de2">';
3834 } else {
3835 // This style "covers up" the special styles set for special lines
3836 // so that styles applied to special lines don't apply to the actual
3837 // code on that line
3838 $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
3839 .'<span style="' . $this->code_style .'">';
3840 }
3841 $close += 2;
3842 }
3843 //Is this some line with extra styles???
3844 if (in_array($i + 1, $this->highlight_extra_lines)) {
3845 if ($this->use_classes) {
3846 if (isset($this->highlight_extra_lines_styles[$i])) {
3847 $parsed_code .= "<span class=\"xtra lx$i\">";
3848 } else {
3849 $parsed_code .= "<span class=\"xtra ln-xtra\">";
3850 }
3851 } else {
3852 $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
3853 }
3854 ++$close;
3855 }
3856 
3857 $parsed_code .= $code[$i];
3858 
3859 if ($close) {
3860 $parsed_code .= str_repeat('</span>', $close);
3861 $close = 0;
3862 }
3863 elseif ($i + 1 < $n) {
3864 $parsed_code .= "\n";
3865 }
3866 unset($code[$i]);
3867 }
3868 
3869 if ($this->header_type == GESHI_HEADER_PRE_VALID || $this->header_type == GESHI_HEADER_PRE_TABLE) {
3870 $parsed_code .= '</pre>';
3871 }
3872 if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3873 $parsed_code .= '</td>';
3874 }
3875 }
3876 
3877 $parsed_code .= $this->footer();
3878 }
3879 
3880 /**
3881 * Creates the header for the code block (with correct attributes)
3882 *
3883 * @return string The header for the code block
3884 * @since 1.0.0
3885 * @access private
3886 */
3887 function header() {
3888 // Get attributes needed
3889 /**
3890 * @todo Document behaviour change - class is outputted regardless of whether
3891 * we're using classes or not. Same with style
3892 */
3893 $attributes = ' class="' . $this->language;
3894 if ($this->overall_class != '') {
3895 $attributes .= " ".$this->overall_class;
3896 }
3897 $attributes .= '"';
3898 
3899 if ($this->overall_id != '') {
3900 $attributes .= " id=\"{$this->overall_id}\"";
3901 }
3902 if ($this->overall_style != '') {
3903 $attributes .= ' style="' . $this->overall_style . '"';
3904 }
3905 
3906 $ol_attributes = '';
3907 
3908 if ($this->line_numbers_start != 1) {
3909 $ol_attributes .= ' start="' . $this->line_numbers_start . '"';
3910 }
3911 
3912 // Get the header HTML
3913 $header = $this->header_content;
3914 if ($header) {
3915 if ($this->header_type == GESHI_HEADER_PRE || $this->header_type == GESHI_HEADER_PRE_VALID) {
3916 $header = str_replace("\n", '', $header);
3917 }
3918 $header = $this->replace_keywords($header);
3919 
3920 if ($this->use_classes) {
3921 $attr = ' class="head"';
3922 } else {
3923 $attr = " style=\"{$this->header_content_style}\"";
3924 }
3925 if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3926 $header = "<thead><tr><td colspan=\"2\" $attr>$header</td></tr></thead>";
3927 } else {
3928 $header = "<div$attr>$header</div>";
3929 }
3930 }
3931 
3932 if (GESHI_HEADER_NONE == $this->header_type) {
3933 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3934 return "$header<ol$attributes$ol_attributes>";
3935 }
3936 return $header . ($this->force_code_block ? '<div>' : '');
3937 }
3938 
3939 // Work out what to return and do it
3940 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3941 if ($this->header_type == GESHI_HEADER_PRE) {
3942 return "<pre$attributes>$header<ol$ol_attributes>";
3943 } else if ($this->header_type == GESHI_HEADER_DIV ||
3944 $this->header_type == GESHI_HEADER_PRE_VALID) {
3945 return "<div$attributes>$header<ol$ol_attributes>";
3946 } else if ($this->header_type == GESHI_HEADER_PRE_TABLE) {
3947 return "<table$attributes>$header<tbody><tr class=\"li1\">";
3948 }
3949 } else {
3950 if ($this->header_type == GESHI_HEADER_PRE) {
3951 return "<pre$attributes>$header" .
3952 ($this->force_code_block ? '<div>' : '');
3953 } else {
3954 return "<div$attributes>$header" .
3955 ($this->force_code_block ? '<div>' : '');
3956 }
3957 }
3958 }
3959 
3960 /**
3961 * Returns the footer for the code block.
3962 *
3963 * @return string The footer for the code block
3964 * @since 1.0.0
3965 * @access private
3966 */
3967 function footer() {
3968 $footer = $this->footer_content;
3969 if ($footer) {
3970 if ($this->header_type == GESHI_HEADER_PRE) {
3971 $footer = str_replace("\n", '', $footer);;
3972 }
3973 $footer = $this->replace_keywords($footer);
3974 
3975 if ($this->use_classes) {
3976 $attr = ' class="foot"';
3977 } else {
3978 $attr = " style=\"{$this->footer_content_style}\"";
3979 }
3980 if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->linenumbers != GESHI_NO_LINE_NUMBERS) {
3981 $footer = "<tfoot><tr><td colspan=\"2\">$footer</td></tr></tfoot>";
3982 } else {
3983 $footer = "<div$attr>$footer</div>";
3984 }
3985 }
3986 
3987 if (GESHI_HEADER_NONE == $this->header_type) {
3988 return ($this->line_numbers != GESHI_NO_LINE_NUMBERS) ? '</ol>' . $footer : $footer;
3989 }
3990 
3991 if ($this->header_type == GESHI_HEADER_DIV || $this->header_type == GESHI_HEADER_PRE_VALID) {
3992 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3993 return "</ol>$footer</div>";
3994 }
3995 return ($this->force_code_block ? '</div>' : '') .
3996 "$footer</div>";
3997 }
3998 elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
3999 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4000 return "</tr></tbody>$footer</table>";
4001 }
4002 return ($this->force_code_block ? '</div>' : '') .
4003 "$footer</div>";
4004 }
4005 else {
4006 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4007 return "</ol>$footer</pre>";
4008 }
4009 return ($this->force_code_block ? '</div>' : '') .
4010 "$footer</pre>";
4011 }
4012 }
4013 
4014 /**
4015 * Replaces certain keywords in the header and footer with
4016 * certain configuration values
4017 *
4018 * @param string The header or footer content to do replacement on
4019 * @return string The header or footer with replaced keywords
4020 * @since 1.0.2
4021 * @access private
4022 */
4023 function replace_keywords($instr) {
4024 $keywords = $replacements = array();
4025 
4026 $keywords[] = '<TIME>';
4027 $keywords[] = '{TIME}';
4028 $replacements[] = $replacements[] = number_format($time = $this->get_time(), 3);
4029 
4030 $keywords[] = '<LANGUAGE>';
4031 $keywords[] = '{LANGUAGE}';
4032 $replacements[] = $replacements[] = $this->language_data['LANG_NAME'];
4033 
4034 $keywords[] = '<VERSION>';
4035 $keywords[] = '{VERSION}';
4036 $replacements[] = $replacements[] = GESHI_VERSION;
4037 
4038 $keywords[] = '<SPEED>';
4039 $keywords[] = '{SPEED}';
4040 if ($time <= 0) {
4041 $speed = 'N/A';
4042 } else {
4043 $speed = strlen($this->source) / $time;
4044 if ($speed >= 1024) {
4045 $speed = sprintf("%.2f KB/s", $speed / 1024.0);
4046 } else {
4047 $speed = sprintf("%.0f B/s", $speed);
4048 }
4049 }
4050 $replacements[] = $replacements[] = $speed;
4051 
4052 return str_replace($keywords, $replacements, $instr);
4053 }
4054 
4055 /**
4056 * Secure replacement for PHP built-in function htmlspecialchars().
4057 *
4058 * See ticket #427 (http://wush.net/trac/wikka/ticket/427) for the rationale
4059 * for this replacement function.
4060 *
4061 * The INTERFACE for this function is almost the same as that for
4062 * htmlspecialchars(), with the same default for quote style; however, there
4063 * is no 'charset' parameter. The reason for this is as follows:
4064 *
4065 * The PHP docs say:
4066 * "The third argument charset defines character set used in conversion."
4067 *
4068 * I suspect PHP's htmlspecialchars() is working at the byte-value level and
4069 * thus _needs_ to know (or asssume) a character set because the special
4070 * characters to be replaced could exist at different code points in
4071 * different character sets. (If indeed htmlspecialchars() works at
4072 * byte-value level that goes some way towards explaining why the
4073 * vulnerability would exist in this function, too, and not only in
4074 * htmlentities() which certainly is working at byte-value level.)
4075 *
4076 * This replacement function however works at character level and should
4077 * therefore be "immune" to character set differences - so no charset
4078 * parameter is needed or provided. If a third parameter is passed, it will
4079 * be silently ignored.
4080 *
4081 * In the OUTPUT there is a minor difference in that we use '&#39;' instead
4082 * of PHP's '&#039;' for a single quote: this provides compatibility with
4083 * get_html_translation_table(HTML_SPECIALCHARS, ENT_QUOTES)
4084 * (see comment by mikiwoz at yahoo dot co dot uk on
4085 * http://php.net/htmlspecialchars); it also matches the entity definition
4086 * for XML 1.0
4087 * (http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters).
4088 * Like PHP we use a numeric character reference instead of '&apos;' for the
4089 * single quote. For the other special characters we use the named entity
4090 * references, as PHP is doing.
4091 *
4092 * @author {@link http://wikkawiki.org/JavaWoman Marjolein Katsma}
4093 *
4094 * @license http://www.gnu.org/copyleft/lgpl.html
4095 * GNU Lesser General Public License
4096 * @copyright Copyright 2007, {@link http://wikkawiki.org/CreditsPage
4097 * Wikka Development Team}
4098 *
4099 * @access private
4100 * @param string $string string to be converted
4101 * @param integer $quote_style
4102 * - ENT_COMPAT: escapes &, <, > and double quote (default)
4103 * - ENT_NOQUOTES: escapes only &, < and >
4104 * - ENT_QUOTES: escapes &, <, >, double and single quotes
4105 * @return string converted string
4106 * @since 1.0.7.18
4107 */
4108 function hsc($string, $quote_style = ENT_COMPAT) {
4109 // init
4110 static $aTransSpecchar = array(
4111 '&' => '&amp;',
4112 '"' => '&quot;',
4113 '<' => '&lt;',
4114 '>' => '&gt;',
4115 
4116 //This fix is related to SF#1923020, but has to be applied
4117 //regardless of actually highlighting symbols.
4118 
4119 //Circumvent a bug with symbol highlighting
4120 //This is required as ; would produce undesirable side-effects if it
4121 //was not to be processed as an entity.
4122 ';' => '<SEMI>', // Force ; to be processed as entity
4123 '|' => '<PIPE>' // Force | to be processed as entity
4124 ); // ENT_COMPAT set
4125 
4126 switch ($quote_style) {
4127 case ENT_NOQUOTES: // don't convert double quotes
4128 unset($aTransSpecchar['"']);
4129 break;
4130 case ENT_QUOTES: // convert single quotes as well
4131 $aTransSpecchar["'"] = '&#39;'; // (apos) htmlspecialchars() uses '&#039;'
4132 break;
4133 }
4134 
4135 // return translated string
4136 return strtr($string, $aTransSpecchar);
4137 }
4138 
4139 /**
4140 * Returns a stylesheet for the highlighted code. If $economy mode
4141 * is true, we only return the stylesheet declarations that matter for
4142 * this code block instead of the whole thing
4143 *
4144 * @param boolean Whether to use economy mode or not
4145 * @return string A stylesheet built on the data for the current language
4146 * @since 1.0.0
4147 */
4148 function get_stylesheet($economy_mode = true) {
4149 // If there's an error, chances are that the language file
4150 // won't have populated the language data file, so we can't
4151 // risk getting a stylesheet...
4152 if ($this->error) {
4153 return '';
4154 }
4155 
4156 //Check if the style rearrangements have been processed ...
4157 //This also does some preprocessing to check which style groups are useable ...
4158 if(!isset($this->language_data['NUMBERS_CACHE'])) {
4159 $this->build_style_cache();
4160 }
4161 
4162 // First, work out what the selector should be. If there's an ID,
4163 // that should be used, the same for a class. Otherwise, a selector
4164 // of '' means that these styles will be applied anywhere
4165 if ($this->overall_id) {
4166 $selector = '#' . $this->overall_id;
4167 } else {
4168 $selector = '.' . $this->language;
4169 if ($this->overall_class) {
4170 $selector .= '.' . $this->overall_class;
4171 }
4172 }
4173 $selector .= ' ';
4174 
4175 // Header of the stylesheet
4176 if (!$economy_mode) {
4177 $stylesheet = "/**\n".
4178 " * GeSHi Dynamically Generated Stylesheet\n".
4179 " * --------------------------------------\n".
4180 " * Dynamically generated stylesheet for {$this->language}\n".
4181 " * CSS class: {$this->overall_class}, CSS id: {$this->overall_id}\n".
4182 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" .
4183 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4184 " * --------------------------------------\n".
4185 " */\n";
4186 } else {
4187 $stylesheet = "/**\n".
4188 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" .
4189 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4190 " */\n";
4191 }
4192 
4193 // Set the <ol> to have no effect at all if there are line numbers
4194 // (<ol>s have margins that should be destroyed so all layout is
4195 // controlled by the set_overall_style method, which works on the
4196 // <pre> or <div> container). Additionally, set default styles for lines
4197 if (!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4198 //$stylesheet .= "$selector, {$selector}ol, {$selector}ol li {margin: 0;}\n";
4199 $stylesheet .= "$selector.de1, $selector.de2 {{$this->code_style}}\n";
4200 }
4201 
4202 // Add overall styles
4203 // note: neglect economy_mode, empty styles are meaningless
4204 if ($this->overall_style != '') {
4205 $stylesheet .= "$selector {{$this->overall_style}}\n";
4206 }
4207 
4208 // Add styles for links
4209 // note: economy mode does not make _any_ sense here
4210 // either the style is empty and thus no selector is needed
4211 // or the appropriate key is given.
4212 foreach ($this->link_styles as $key => $style) {
4213 if ($style != '') {
4214 switch ($key) {
4215 case GESHI_LINK:
4216 $stylesheet .= "{$selector}a:link {{$style}}\n";
4217 break;
4218 case GESHI_HOVER:
4219 $stylesheet .= "{$selector}a:hover {{$style}}\n";
4220 break;
4221 case GESHI_ACTIVE:
4222 $stylesheet .= "{$selector}a:active {{$style}}\n";
4223 break;
4224 case GESHI_VISITED:
4225 $stylesheet .= "{$selector}a:visited {{$style}}\n";
4226 break;
4227 }
4228 }
4229 }
4230 
4231 // Header and footer
4232 // note: neglect economy_mode, empty styles are meaningless
4233 if ($this->header_content_style != '') {
4234 $stylesheet .= "$selector.head {{$this->header_content_style}}\n";
4235 }
4236 if ($this->footer_content_style != '') {
4237 $stylesheet .= "$selector.foot {{$this->footer_content_style}}\n";
4238 }
4239 
4240 // Styles for important stuff
4241 // note: neglect economy_mode, empty styles are meaningless
4242 if ($this->important_styles != '') {
4243 $stylesheet .= "$selector.imp {{$this->important_styles}}\n";
4244 }
4245 
4246 // Simple line number styles
4247 if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->line_style1 != '') {
4248 $stylesheet .= "{$selector}li, {$selector}.li1 {{$this->line_style1}}\n";
4249 }
4250 if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->table_linenumber_style != '') {
4251 $stylesheet .= "{$selector}.ln {{$this->table_linenumber_style}}\n";
4252 }
4253 // If there is a style set for fancy line numbers, echo it out
4254 if ((!$economy_mode || $this->line_numbers == GESHI_FANCY_LINE_NUMBERS) && $this->line_style2 != '') {
4255 $stylesheet .= "{$selector}.li2 {{$this->line_style2}}\n";
4256 }
4257 
4258 // note: empty styles are meaningless
4259 foreach ($this->language_data['STYLES']['KEYWORDS'] as $group => $styles) {
4260 if ($styles != '' && (!$economy_mode ||
4261 (isset($this->lexic_permissions['KEYWORDS'][$group]) &&
4262 $this->lexic_permissions['KEYWORDS'][$group]))) {
4263 $stylesheet .= "$selector.kw$group {{$styles}}\n";
4264 }
4265 }
4266 foreach ($this->language_data['STYLES']['COMMENTS'] as $group => $styles) {
4267 if ($styles != '' && (!$economy_mode ||
4268 (isset($this->lexic_permissions['COMMENTS'][$group]) &&
4269 $this->lexic_permissions['COMMENTS'][$group]) ||
4270 (!empty($this->language_data['COMMENT_REGEXP']) &&
4271 !empty($this->language_data['COMMENT_REGEXP'][$group])))) {
4272 $stylesheet .= "$selector.co$group {{$styles}}\n";
4273 }
4274 }
4275 foreach ($this->language_data['STYLES']['ESCAPE_CHAR'] as $group => $styles) {
4276 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['ESCAPE_CHAR'])) {
4277 // NEW: since 1.0.8 we have to handle hardescapes
4278 if ($group === 'HARD') {
4279 $group = '_h';
4280 }
4281 $stylesheet .= "$selector.es$group {{$styles}}\n";
4282 }
4283 }
4284 foreach ($this->language_data['STYLES']['BRACKETS'] as $group => $styles) {
4285 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['BRACKETS'])) {
4286 $stylesheet .= "$selector.br$group {{$styles}}\n";
4287 }
4288 }
4289 foreach ($this->language_data['STYLES']['SYMBOLS'] as $group => $styles) {
4290 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['SYMBOLS'])) {
4291 $stylesheet .= "$selector.sy$group {{$styles}}\n";
4292 }
4293 }
4294 foreach ($this->language_data['STYLES']['STRINGS'] as $group => $styles) {
4295 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['STRINGS'])) {
4296 // NEW: since 1.0.8 we have to handle hardquotes
4297 if ($group === 'HARD') {
4298 $group = '_h';
4299 }
4300 $stylesheet .= "$selector.st$group {{$styles}}\n";
4301 }
4302 }
4303 foreach ($this->language_data['STYLES']['NUMBERS'] as $group => $styles) {
4304 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['NUMBERS'])) {
4305 $stylesheet .= "$selector.nu$group {{$styles}}\n";
4306 }
4307 }
4308 foreach ($this->language_data['STYLES']['METHODS'] as $group => $styles) {
4309 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['METHODS'])) {
4310 $stylesheet .= "$selector.me$group {{$styles}}\n";
4311 }
4312 }
4313 // note: neglect economy_mode, empty styles are meaningless
4314 foreach ($this->language_data['STYLES']['SCRIPT'] as $group => $styles) {
4315 if ($styles != '') {
4316 $stylesheet .= "$selector.sc$group {{$styles}}\n";
4317 }
4318 }
4319 foreach ($this->language_data['STYLES']['REGEXPS'] as $group => $styles) {
4320 if ($styles != '' && (!$economy_mode ||
4321 (isset($this->lexic_permissions['REGEXPS'][$group]) &&
4322 $this->lexic_permissions['REGEXPS'][$group]))) {
4323 if (is_array($this->language_data['REGEXPS'][$group]) &&
4324 array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$group])) {
4325 $stylesheet .= "$selector.";
4326 $stylesheet .= $this->language_data['REGEXPS'][$group][GESHI_CLASS];
4327 $stylesheet .= " {{$styles}}\n";
4328 } else {
4329 $stylesheet .= "$selector.re$group {{$styles}}\n";
4330 }
4331 }
4332 }
4333 // Styles for lines being highlighted extra
4334 if (!$economy_mode || (count($this->highlight_extra_lines)!=count($this->highlight_extra_lines_styles))) {
4335 $stylesheet .= "{$selector}.ln-xtra, {$selector}li.ln-xtra, {$selector}div.ln-xtra {{$this->highlight_extra_lines_style}}\n";
4336 }
4337 $stylesheet .= "{$selector}span.xtra { display:block; }\n";
4338 foreach ($this->highlight_extra_lines_styles as $lineid => $linestyle) {
4339 $stylesheet .= "{$selector}.lx$lineid, {$selector}li.lx$lineid, {$selector}div.lx$lineid {{$linestyle}}\n";
4340 }
4341 
4342 return $stylesheet;
4343 }
4344 
4345 /**
4346 * Get's the style that is used for the specified line
4347 *
4348 * @param int The line number information is requested for
4349 * @access private
4350 * @since 1.0.7.21
4351 */
4352 function get_line_style($line) {
4353 //$style = null;
4354 $style = null;
4355 if (isset($this->highlight_extra_lines_styles[$line])) {
4356 $style = $this->highlight_extra_lines_styles[$line];
4357 } else { // if no "extra" style assigned
4358 $style = $this->highlight_extra_lines_style;
4359 }
4360 
4361 return $style;
4362 }
4363 
4364 /**
4365 * this functions creates an optimized regular expression list
4366 * of an array of strings.
4367 *
4368 * Example:
4369 * <code>$list = array('faa', 'foo', 'foobar');
4370 * => string 'f(aa|oo(bar)?)'</code>
4371 *
4372 * @param $list array of (unquoted) strings
4373 * @param $regexp_delimiter your regular expression delimiter, @see preg_quote()
4374 * @return string for regular expression
4375 * @author Milian Wolff <mail@milianw.de>
4376 * @since 1.0.8
4377 * @access private
4378 */
4379 function optimize_regexp_list($list, $regexp_delimiter = '/') {
4380 $regex_chars = array('.', '\\', '+', '*', '?', '[', '^', ']', '$',
4381 '(', ')', '{', '}', '=', '!', '<', '>', '|', ':', $regexp_delimiter);
4382 sort($list);
4383 $regexp_list = array('');
4384 $num_subpatterns = 0;
4385 $list_key = 0;
4386 
4387 // the tokens which we will use to generate the regexp list
4388 $tokens = array();
4389 $prev_keys = array();
4390 // go through all entries of the list and generate the token list
4391 $cur_len = 0;
4392 for ($i = 0, $i_max = count($list); $i < $i_max; ++$i) {
4393 if ($cur_len > GESHI_MAX_PCRE_LENGTH) {
4394 // seems like the length of this pcre is growing exorbitantly
4395 $regexp_list[++$list_key] = $this->_optimize_regexp_list_tokens_to_string($tokens);
4396 $num_subpatterns = substr_count($regexp_list[$list_key], '(?:');
4397 $tokens = array();
4398 $cur_len = 0;
4399 }
4400 $level = 0;
4401 $entry = preg_quote((string) $list[$i], $regexp_delimiter);
4402 $pointer = &$tokens;
4403 // properly assign the new entry to the correct position in the token array
4404 // possibly generate smaller common denominator keys
4405 while (true) {
4406 // get the common denominator
4407 if (isset($prev_keys[$level])) {
4408 if ($prev_keys[$level] == $entry) {
4409 // this is a duplicate entry, skip it
4410 continue 2;
4411 }
4412 $char = 0;
4413 while (isset($entry[$char]) && isset($prev_keys[$level][$char])
4414 && $entry[$char] == $prev_keys[$level][$char]) {
4415 ++$char;
4416 }
4417 if ($char > 0) {
4418 // this entry has at least some chars in common with the current key
4419 if ($char == strlen($prev_keys[$level])) {
4420 // current key is totally matched, i.e. this entry has just some bits appended
4421 $pointer = &$pointer[$prev_keys[$level]];
4422 } else {
4423 // only part of the keys match
4424 $new_key_part1 = substr($prev_keys[$level], 0, $char);
4425 $new_key_part2 = substr($prev_keys[$level], $char);
4426 
4427 if (in_array($new_key_part1[0], $regex_chars)
4428 || in_array($new_key_part2[0], $regex_chars)) {
4429 // this is bad, a regex char as first character
4430 $pointer[$entry] = array('' => true);
4431 array_splice($prev_keys, $level, count($prev_keys), $entry);
4432 $cur_len += strlen($entry);
4433 continue;
4434 } else {
4435 // relocate previous tokens
4436 $pointer[$new_key_part1] = array($new_key_part2 => $pointer[$prev_keys[$level]]);
4437 unset($pointer[$prev_keys[$level]]);
4438 $pointer = &$pointer[$new_key_part1];
4439 // recreate key index
4440 array_splice($prev_keys, $level, count($prev_keys), array($new_key_part1, $new_key_part2));
4441 $cur_len += strlen($new_key_part2);
4442 }
4443 }
4444 ++$level;
4445 $entry = substr($entry, $char);
4446 continue;
4447 }
4448 // else: fall trough, i.e. no common denominator was found
4449 }
4450 if ($level == 0 && !empty($tokens)) {
4451 // we can dump current tokens into the string and throw them away afterwards
4452 $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4453 $new_subpatterns = substr_count($new_entry, '(?:');
4454 if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + $new_subpatterns > GESHI_MAX_PCRE_SUBPATTERNS) {
4455 $regexp_list[++$list_key] = $new_entry;
4456 $num_subpatterns = $new_subpatterns;
4457 } else {
4458 if (!empty($regexp_list[$list_key])) {
4459 $new_entry = '|' . $new_entry;
4460 }
4461 $regexp_list[$list_key] .= $new_entry;
4462 $num_subpatterns += $new_subpatterns;
4463 }
4464 $tokens = array();
4465 $cur_len = 0;
4466 }
4467 // no further common denominator found
4468 $pointer[$entry] = array('' => true);
4469 array_splice($prev_keys, $level, count($prev_keys), $entry);
4470 
4471 $cur_len += strlen($entry);
4472 break;
4473 }
4474 unset($list[$i]);
4475 }
4476 // make sure the last tokens get converted as well
4477 $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4478 if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + substr_count($new_entry, '(?:') > GESHI_MAX_PCRE_SUBPATTERNS) {
4479 $regexp_list[++$list_key] = $new_entry;
4480 } else {
4481 if (!empty($regexp_list[$list_key])) {
4482 $new_entry = '|' . $new_entry;
4483 }
4484 $regexp_list[$list_key] .= $new_entry;
4485 }
4486 return $regexp_list;
4487 }
4488 /**
4489 * this function creates the appropriate regexp string of an token array
4490 * you should not call this function directly, @see $this->optimize_regexp_list().
4491 *
4492 * @param &$tokens array of tokens
4493 * @param $recursed bool to know wether we recursed or not
4494 * @return string
4495 * @author Milian Wolff <mail@milianw.de>
4496 * @since 1.0.8
4497 * @access private
4498 */
4499 function _optimize_regexp_list_tokens_to_string(&$tokens, $recursed = false) {
4500 $list = '';
4501 foreach ($tokens as $token => $sub_tokens) {
4502 $list .= $token;
4503 $close_entry = isset($sub_tokens['']);
4504 unset($sub_tokens['']);
4505 if (!empty($sub_tokens)) {
4506 $list .= '(?:' . $this->_optimize_regexp_list_tokens_to_string($sub_tokens, true) . ')';
4507 if ($close_entry) {
4508 // make sub_tokens optional
4509 $list .= '?';
4510 }
4511 }
4512 $list .= '|';
4513 }
4514 if (!$recursed) {
4515 // do some optimizations
4516 // common trailing strings
4517 // BUGGY!
4518 //$list = preg_replace_callback('#(?<=^|\:|\|)\w+?(\w+)(?:\|.+\1)+(?=\|)#', create_function(
4519 // '$matches', 'return "(?:" . preg_replace("#" . preg_quote($matches[1], "#") . "(?=\||$)#", "", $matches[0]) . ")" . $matches[1];'), $list);
4520 // (?:p)? => p?
4521 $list = preg_replace('#\(\?\:(.)\)\?#', '\1?', $list);
4522 // (?:a|b|c|d|...)? => [abcd...]?
4523 // TODO: a|bb|c => [ac]|bb
4524 static $callback_2;
4525 if (!isset($callback_2)) {
4526 $callback_2 = create_function('$matches', 'return "[" . str_replace("|", "", $matches[1]) . "]";');
4527 }
4528 $list = preg_replace_callback('#\(\?\:((?:.\|)+.)\)#', $callback_2, $list);
4529 }
4530 // return $list without trailing pipe
4531 return substr($list, 0, -1);
4532 }
4533} // End Class GeSHi
4534 
4535 
4536if (!function_exists('geshi_highlight')) {
4537 /**
4538 * Easy way to highlight stuff. Behaves just like highlight_string
4539 *
4540 * @param string The code to highlight
4541 * @param string The language to highlight the code in
4542 * @param string The path to the language files. You can leave this blank if you need
4543 * as from version 1.0.7 the path should be automatically detected
4544 * @param boolean Whether to return the result or to echo
4545 * @return string The code highlighted (if $return is true)
4546 * @since 1.0.2
4547 */
4548 function geshi_highlight($string, $language, $path = null, $return = false) {
4549 $geshi = new GeSHi($string, $language, $path);
4550 $geshi->set_header_type(GESHI_HEADER_NONE);
4551 
4552 if ($return) {
4553 return '<code>' . $geshi->parse_code() . '</code>';
4554 }
4555 
4556 echo '<code>' . $geshi->parse_code() . '</code>';
4557 
4558 if ($geshi->error()) {
4559 return false;
4560 }
4561 return true;
4562 }
4563}
4564 
4565?>

Powered by WebSVN 2.2.1