Source code for file /geshi/geshi.php
Documentation is available at geshi.php
* GeSHi - Generic Syntax Highlighter
* The GeSHi class for Generic Syntax Highlighting. Please refer to the documentation
* at http://qbnz.com/highlighter/documentation.php for more information about how to
* For changes, release notes, TODOs etc, see the relevant files in the docs/ directory
* This file is part of GeSHi.
* GeSHi is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
* GeSHi is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with GeSHi; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
* @copyright Copyright © 2004, 2005, Nigel McNie
* @license http://gnu.org/copyleft/gpl.html GNU GPL
* @version $Id: geshi.php,v 1.32 2006/03/23 07:24:17 oracleshinoda Exp $
// You should use these constant names in your programs instead of
// their values - you never know when a value may change in a future
/** The version of this GeSHi file */
define('GESHI_VERSION', '1.0.7.8');
/** Set the correct directory separator */
define('GESHI_DIR_SEPARATOR', ('WIN' !=
substr(PHP_OS, 0, 3)) ?
'/' :
'\\');
// Define the root directory for the GeSHi code tree
/** The root directory for GeSHi */
define('GESHI_ROOT', dirname(__FILE__
) .
GESHI_DIR_SEPARATOR);
/** The language file directory for GeSHi
define('GESHI_LANG_ROOT', GESHI_ROOT .
'geshi' .
GESHI_DIR_SEPARATOR);
// Line numbers - use with enable_line_numbers()
/** Use no line numbers when building the result */
define('GESHI_NO_LINE_NUMBERS', 0);
/** Use normal line numbers when building the result */
define('GESHI_NORMAL_LINE_NUMBERS', 1);
/** Use fancy line numbers when building the result */
define('GESHI_FANCY_LINE_NUMBERS', 2);
/** Use nothing to surround the source */
define('GESHI_HEADER_NONE', 0);
/** Use a "div" to surround the source */
define('GESHI_HEADER_DIV', 1);
/** Use a "pre" to surround the source */
define('GESHI_HEADER_PRE', 2);
// Capatalisation constants
/** Lowercase keywords found */
define('GESHI_CAPS_NO_CHANGE', 0);
/** Uppercase keywords found */
define('GESHI_CAPS_UPPER', 1);
/** Leave keywords found as the case that they are */
define('GESHI_CAPS_LOWER', 2);
/** Links in the source in the :link state */
/** Links in the source in the :hover state */
/** Links in the source in the :active state */
/** Links in the source in the :visited state */
// Important string starter/finisher
// Note that if you change these, they should be as-is: i.e., don't
// write them as if they had been run through htmlentities()
/** The starter for important parts of the source */
define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>');
/** The ender for important parts of the source */
define('GESHI_END_IMPORTANT', '<END GeSHi>');
// When strict mode applies for a language
/** Strict mode never applies (this is the most common) */
/** Strict mode *might* apply, and can be enabled or
disabled by {@link GeSHi::enable_strict_mode()} */
/** Strict mode always applies */
// Advanced regexp handling constants, used in language files
/** The key of the regex array defining what to search for */
/** The key of the regex array defining what bracket group in a
matched search to use as a replacement */
/** The key of the regex array defining any modifiers to the regular expression */
/** The key of the regex array defining what bracket group in a
matched search to put before the replacement */
/** The key of the regex array defining what bracket group in a
matched search to put after the replacement */
/** Used in language files to mark comments */
// Error detection - use these to analyse faults
/** No sourcecode to highlight was specified
define('GESHI_ERROR_NO_INPUT', 1);
/** The language specified does not exist */
define('GESHI_ERROR_NO_SUCH_LANG', 2);
/** GeSHi could not open a file for reading (generally a language file) */
define('GESHI_ERROR_FILE_NOT_READABLE', 3);
/** The header type passed to {@link GeSHi::set_header_type()} was invalid */
define('GESHI_ERROR_INVALID_HEADER_TYPE', 4);
/** The line number type passed to {@link GeSHi::enable_line_numbers()} was invalid */
define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5);
* Please refer to the documentation for GeSHi 1.0.X that is available
* at http://qbnz.com/highlighter/documentation.php for more information
* about how to use this class.
* @copyright Copyright © 2004, 2005 Nigel McNie
* The source code to highlight
* The language to use when highlighting
* The data for the language used
var $language_data =
array();
* The path to the language files
var $language_path =
GESHI_LANG_ROOT;
* The error message associated with an error
* @todo check err reporting works
* Possible error messages
var $error_messages =
array(
//GESHI_ERROR_NO_INPUT => 'No source code inputted',
GESHI_ERROR_NO_SUCH_LANG =>
'GeSHi could not find the language {LANGUAGE} (using path {PATH})',
GESHI_ERROR_FILE_NOT_READABLE =>
'The file specified for load_from_file was not readable',
GESHI_ERROR_INVALID_HEADER_TYPE =>
'The header type specified is invalid',
GESHI_ERROR_INVALID_LINE_NUMBER_TYPE =>
'The line number type specified is invalid'
* Whether highlighting is strict or not
var $strict_mode =
false;
* Whether to use CSS classes in output
var $use_classes =
false;
* The type of header to use. Can be one of the following
* <li><b>GESHI_HEADER_PRE</b>: Source is outputted in
* a <pre> HTML element.</li>
* <li><b>GESHI_HEADER_DIV</b>: Source is outputted in
* a <div> HTML element.</li>
* <li><b>GESHI_HEADER_NONE</b>: No header is outputted.</li>
var $header_type =
GESHI_HEADER_PRE;
* Array of permissions for which lexics should be highlighted
var $lexic_permissions =
array(
'COMMENTS' =>
array('MULTI' =>
true),
* The time it took to parse the code
* The content of the header block
var $header_content =
'';
* The content of the footer block
var $footer_content =
'';
* The style of the header block
var $header_content_style =
'';
* The style of the footer block
var $footer_content_style =
'';
* The styles for hyperlinks in the code
var $link_styles =
array();
* Whether important blocks should be recognised or not
* @todo REMOVE THIS FUNCTIONALITY!
var $enable_important_blocks =
false;
* Styles for important parts of the code
* @todo As above - rethink the whole idea of important blocks as it is buggy and
* will be hard to implement in 1.2
var $important_styles =
'font-weight: bold; color: red;'; // Styles for important parts of the code
* Whether CSS IDs should be added to the code
* Lines that should be highlighted extra
var $highlight_extra_lines =
array();
* Styles of extra-highlighted lines
var $highlight_extra_lines_style =
'color: #cc0; background-color: #ffc;';
* Number at which line numbers should start at
* @todo Warning documentation about XHTML compliance
var $line_numbers_start =
1;
* The overall style for this code block
* The style for the actual code
var $code_style =
'font-family: \'Courier New\', Courier, monospace; font-weight: normal;';
* The overall class for this code block
* The overall ID for this code block
var $line_style1 =
'font-family: \'Courier New\', Courier, monospace; color: black; font-weight: normal; font-style: normal;';
* Line number styles for fancy lines
var $line_style2 =
'font-weight: bold;';
* Flag for how line nubmers are displayed
var $line_numbers =
GESHI_NO_LINE_NUMBERS;
* The "nth" value for fancy line highlighting
* Default target for keyword links
* The encoding to use for entity encoding
var $encoding =
'ISO-8859-1';
* Creates a new GeSHi object, with source and language
* @param string The source code to highlight
* @param string The language to highlight the source with
* @param string The path to the language file directory. <b>This
* is deprecated!</b> I've backported the auto path
* detection from the 1.1.X dev branch, so now it
* should be automatically set correctly. If you have
* renamed the language directory however, you will
* still need to set the path using this parameter or
* {@link GeSHi::set_language_path()}
function GeSHi ($source, $language, $path =
'')
* Returns an error message associated with the last GeSHi operation,
* or false if no error has occured
* @return string|falseAn error message if there has been an error, else false
$msg =
$this->error_messages[$this->error];
'{LANGUAGE}' =>
$this->language,
'{PATH}' =>
$this->language_path
foreach ($debug_tpl_vars as $tpl =>
$var) {
return "<br /><strong>GeSHi Error:</strong> $msg (code $this->error)<br />";
* Gets a human-readable language name (thanks to Simon Patterson
* @return string The name for the current language
if (GESHI_ERROR_NO_SUCH_LANG ==
$this->_error) {
return $this->language_data['LANG_NAME'] .
' (Unknown Language)';
return $this->language_data['LANG_NAME'];
* Sets the source code for this object
* @param string The source code to highlight
* Sets the language for this object
* @param string The name of the language to use
$this->strict_mode =
GESHI_NEVER;
$language =
preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
$file_name =
$this->language_path .
$this->language .
'.php';
$this->error =
GESHI_ERROR_NO_SUCH_LANG;
// Load the language for parsing
$this->load_language($file_name);
* Sets the path to the directory containing the language files. Note
* that this path is relative to the directory of the script that included
* geshi.php, NOT geshi.php itself.
* @param string The path to the language directory
* @deprecated The path to the language files should now be automatically
* detected, so this method should no longer be needed. The
* 1.1.X branch handles manual setting of the path differently
* so this method will disappear in 1.2.0.
$this->language_path =
('/' ==
substr($path, strlen($path) -
1, 1)) ?
$path :
$path .
'/';
$this->set_language($this->language); // otherwise set_language_path has no effect
* Sets the type of header to be used.
* If GESHI_HEADER_DIV is used, the code is surrounded in a "div".This
* means more source code but more control over tab width and line-wrapping.
* GESHI_HEADER_PRE means that a "pre" is used - less source, but less
* control. Default is GESHI_HEADER_PRE.
* From 1.0.7.2, you can use GESHI_HEADER_NONE to specify that no header code
* @param int The type of header to be used
$this->error =
GESHI_ERROR_INVALID_HEADER_TYPE;
$this->header_type =
$type;
* Sets the styles for the code that will be outputted
* when this object is parsed. The style should be a
* string of valid stylesheet declarations
* @param string The overall style for the outputted code block
* @param boolean Whether to merge the styles with the current styles or not
if (!$preserve_defaults) {
$this->overall_style =
$style;
$this->overall_style .=
$style;
* Sets the overall classname for this block of code. This
* class can then be used in a stylesheet to style this object's
* @param string The class name to use for this block of code
$this->overall_class =
$class;
* Sets the overall id for this block of code. This id can then
* be used in a stylesheet to style this object's output
* @param string The ID to use for this block of code
* Sets whether CSS classes should be used to highlight the source. Default
* is off, calling this method with no arguments will turn it on
* @param boolean Whether to turn classes on or not
$this->use_classes =
($flag) ?
true :
false;
* Sets the style for the actual code. This should be a string
* containing valid stylesheet declarations. If $preserve_defaults is
* true, then styles are merged with the default styles, with the
* user defined styles having priority
* Note: Use this method to override any style changes you made to
* the line numbers if you are using line numbers, else the line of
* code will have the same style as the line number! Consult the
* GeSHi documentation for more information about this.
* @param string The style to use for actual code
* @param boolean Whether to merge the current styles with the new styles
if (!$preserve_defaults) {
$this->code_style =
$style;
$this->code_style .=
$style;
* Sets the styles for the line numbers.
* @param string The style for the line numbers that are "normal"
* @param string|booleanIf a string, this is the style of the line
* numbers that are "fancy", otherwise if boolean then this
* defines whether the normal styles should be merged with the
* new normal styles or not
* @param boolean If set, is the flag for whether to merge the "fancy"
* styles with the current styles or not
function set_line_style ($style1, $style2 =
'', $preserve_defaults =
false)
$preserve_defaults =
$style2;
if (!$preserve_defaults) {
$this->line_style1 =
$style1;
$this->line_style2 =
$style2;
$this->line_style1 .=
$style1;
$this->line_style2 .=
$style2;
* Sets whether line numbers should be displayed.
* Valid values for the first parameter are:
* <li><b>GESHI_NO_LINE_NUMBERS</b>: Line numbers will not be displayed</li>
* <li><b>GESHI_NORMAL_LINE_NUMBERS</b>: Line numbers will be displayed</li>
* <li><b>GESHI_FANCY_LINE_NUMBERS</b>: Fancy line numbers will be displayed</li>
* For fancy line numbers, the second parameter is used to signal which lines
* are to be fancy. For example, if the value of this parameter is 5 then every
* 5th line will be fancy.
* @param int How line numbers should be displayed
* @param int Defines which lines are fancy
$this->error =
GESHI_ERROR_INVALID_LINE_NUMBER_TYPE;
$this->line_numbers =
$flag;
$this->line_nth_row =
$nth_row;
* Sets the style for a keyword group. If $preserve_defaults is
* true, then styles are merged with the default styles, with the
* user defined styles having priority
* @param int The key of the keyword group to change the styles of
* @param string The style to make the keywords
* @param boolean Whether to merge the new styles with the old or just
if (!$preserve_defaults) {
$this->language_data['STYLES']['KEYWORDS'][$key] =
$style;
$this->language_data['STYLES']['KEYWORDS'][$key] .=
$style;
* Turns highlighting on/off for a keyword group
* @param int The key of the keyword group to turn on or off
* @param boolean Whether to turn highlighting for that group on or off
$this->lexic_permissions['KEYWORDS'][$key] =
($flag) ?
true :
false;
* Sets the styles for comment groups. If $preserve_defaults is
* true, then styles are merged with the default styles, with the
* user defined styles having priority
* @param int The key of the comment group to change the styles of
* @param string The style to make the comments
* @param boolean Whether to merge the new styles with the old or just
if (!$preserve_defaults) {
$this->language_data['STYLES']['COMMENTS'][$key] =
$style;
$this->language_data['STYLES']['COMMENTS'][$key] .=
$style;
* Turns highlighting on/off for comment groups
* @param int The key of the comment group to turn on or off
* @param boolean Whether to turn highlighting for that group on or off
$this->lexic_permissions['COMMENTS'][$key] =
($flag) ?
true :
false;
* Sets the styles for escaped characters. If $preserve_defaults is
* true, then styles are merged with the default styles, with the
* user defined styles having priority
* @param string The style to make the escape characters
* @param boolean Whether to merge the new styles with the old or just
if (!$preserve_defaults) {
$this->language_data['STYLES']['ESCAPE_CHAR'][0] =
$style;
$this->language_data['STYLES']['ESCAPE_CHAR'][0] .=
$style;
* Turns highlighting on/off for escaped characters
* @param boolean Whether to turn highlighting for escape characters on or off
$this->lexic_permissions['ESCAPE_CHAR'] =
($flag) ?
true :
false;
* Sets the styles for brackets. If $preserve_defaults is
* true, then styles are merged with the default styles, with the
* user defined styles having priority
* This method is DEPRECATED: use set_symbols_style instead.
* This method will be removed in 1.2.X
* @param string The style to make the brackets
* @param boolean Whether to merge the new styles with the old or just
* @deprecated In favour of set_symbols_style
if (!$preserve_defaults) {
$this->language_data['STYLES']['BRACKETS'][0] =
$style;
$this->language_data['STYLES']['BRACKETS'][0] .=
$style;
* Turns highlighting on/off for brackets
* This method is DEPRECATED: use set_symbols_highlighting instead.
* This method will be remove in 1.2.X
* @param boolean Whether to turn highlighting for brackets on or off
* @deprecated In favour of set_symbols_highlighting
$this->lexic_permissions['BRACKETS'] =
($flag) ?
true :
false;
* Sets the styles for symbols. If $preserve_defaults is
* true, then styles are merged with the default styles, with the
* user defined styles having priority
* @param string The style to make the symbols
* @param boolean Whether to merge the new styles with the old or just
if (!$preserve_defaults) {
$this->language_data['STYLES']['SYMBOLS'][0] =
$style;
$this->language_data['STYLES']['SYMBOLS'][0] .=
$style;
// For backward compatibility
* Turns highlighting on/off for symbols
* @param boolean Whether to turn highlighting for symbols on or off
$this->lexic_permissions['SYMBOLS'] =
($flag) ?
true :
false;
// For backward compatibility
* Sets the styles for strings. If $preserve_defaults is
* true, then styles are merged with the default styles, with the
* user defined styles having priority
* @param string The style to make the escape characters
* @param boolean Whether to merge the new styles with the old or just
if (!$preserve_defaults) {
$this->language_data['STYLES']['STRINGS'][0] =
$style;
$this->language_data['STYLES']['STRINGS'][0] .=
$style;
* Turns highlighting on/off for strings
* @param boolean Whether to turn highlighting for strings on or off
$this->lexic_permissions['STRINGS'] =
($flag) ?
true :
false;
* Sets the styles for numbers. If $preserve_defaults is
* true, then styles are merged with the default styles, with the
* user defined styles having priority
* @param string The style to make the numbers
* @param boolean Whether to merge the new styles with the old or just
if (!$preserve_defaults) {
$this->language_data['STYLES']['NUMBERS'][0] =
$style;
$this->language_data['STYLES']['NUMBERS'][0] .=
$style;
* Turns highlighting on/off for numbers
* @param boolean Whether to turn highlighting for numbers on or off
$this->lexic_permissions['NUMBERS'] =
($flag) ?
true :
false;
* Sets the styles for methods. $key is a number that references the
* appropriate "object splitter" - see the language file for the language
* you are highlighting to get this number. If $preserve_defaults is
* true, then styles are merged with the default styles, with the
* user defined styles having priority
* @param int The key of the object splitter to change the styles of
* @param string The style to make the methods
* @param boolean Whether to merge the new styles with the old or just
if (!$preserve_defaults) {
$this->language_data['STYLES']['METHODS'][$key] =
$style;
$this->language_data['STYLES']['METHODS'][$key] .=
$style;
* Turns highlighting on/off for methods
* @param boolean Whether to turn highlighting for methods on or off
$this->lexic_permissions['METHODS'] =
($flag) ?
true :
false;
* Sets the styles for regexps. If $preserve_defaults is
* true, then styles are merged with the default styles, with the
* user defined styles having priority
* @param string The style to make the regular expression matches
* @param boolean Whether to merge the new styles with the old or just
if (!$preserve_defaults) {
$this->language_data['STYLES']['REGEXPS'][$key] =
$style;
$this->language_data['STYLES']['REGEXPS'][$key] .=
$style;
* Turns highlighting on/off for regexps
* @param int The key of the regular expression group to turn on or off
* @param boolean Whether to turn highlighting for the regular expression group on or off
$this->lexic_permissions['REGEXPS'][$key] =
($flag) ?
true :
false;
* Sets whether a set of keywords are checked for in a case sensitive manner
* @param int The key of the keyword group to change the case sensitivity of
* @param boolean Whether to check in a case sensitive manner or not
$this->language_data['CASE_SENSITIVE'][$key] =
($case) ?
true :
false;
* Sets the case that keywords should use when found. Use the constants:
* <li><b>GESHI_CAPS_NO_CHANGE</b>: leave keywords as-is</li>
* <li><b>GESHI_CAPS_UPPER</b>: convert all keywords to uppercase where found</li>
* <li><b>GESHI_CAPS_LOWER</b>: convert all keywords to lowercase where found</li>
* @param int A constant specifying what to do with matched keywords
* @todo Error check the passed value
$this->language_data['CASE_KEYWORDS'] =
$case;
* Sets how many spaces a tab is substituted for
* Widths below zero are ignored
* @param int The tab width
$this->tab_width =
intval($width);
* Enables/disables strict highlighting. Default is off, calling this
* method without parameters will turn it on. See documentation
* for more details on strict mode and where to use it.
* @param boolean Whether to enable strict mode or not
if (GESHI_MAYBE ==
$this->language_data['STRICT_MODE_APPLIES']) {
$this->strict_mode =
($mode) ?
true :
false;
* Disables all highlighting
* @todo Rewrite with an array traversal
foreach ($this->lexic_permissions as $key =>
$value) {
foreach ($value as $k =>
$v) {
$this->lexic_permissions[$key][$k] =
false;
$this->lexic_permissions[$key] =
false;
$this->enable_important_blocks =
false;
* Enables all highlighting
* @todo Rewrite with array traversal
foreach ($this->lexic_permissions as $key =>
$value) {
foreach ($value as $k =>
$v) {
$this->lexic_permissions[$key][$k] =
true;
$this->lexic_permissions[$key] =
true;
$this->enable_important_blocks =
true;
* Given a file extension, this method returns either a valid geshi language
* name, or the empty string if it couldn't be found
* @param string The extension to get a language name for
* @param array A lookup array to use instead of the default
* @todo Re-think about how this method works (maybe make it private and/or make it
* a extension->lang lookup?)
'actionscript' =>
array('as'),
'ada' =>
array('a', 'ada', 'adb', 'ads'),
'apache' =>
array('conf'),
'asm' =>
array('ash', 'asm'),
'delphi' =>
array('dpk', 'dpr'),
'html4strict' =>
array('html', 'htm'),
'javascript' =>
array('js'),
'pascal' =>
array('pas'),
'perl' =>
array('pl', 'pm'),
'php' =>
array('php', 'php5', 'phtml', 'phps'),
'visualfoxpro' =>
array(),
foreach ($lookup as $lang =>
$extensions) {
foreach ($extensions as $ext) {
if ($ext ==
$extension) {
* Given a file name, this method loads its contents in, and attempts
* to set the language automatically. An optional lookup table can be
* passed for looking up the language name. If not specified a default
* The language table is in the form
* 'lang_name' => array('extension', 'extension', ...),
* @todo Complete rethink of this and above method
$this->error =
GESHI_ERROR_FILE_NOT_READABLE;
* Adds a keyword to a keyword group for highlighting
* @param int The key of the keyword group to add the keyword to
* @param string The word to add to the keyword group
$this->language_data['KEYWORDS'][$key][] =
$word;
* Removes a keyword from a keyword group
* @param int The key of the keyword group to remove the keyword from
* @param string The word to remove from the keyword group
$this->language_data['KEYWORDS'][$key] =
array_diff($this->language_data['KEYWORDS'][$key], array($word));
* Creates a new keyword group
* @param int The key of the keyword group to create
* @param string The styles for the keyword group
* @param boolean Whether the keyword group is case sensitive ornot
* @param array The words to use for the keyword group
function add_keyword_group ( $key, $styles, $case_sensitive =
true, $words =
array() )
$this->language_data['KEYWORDS'][$key] =
$words;
$this->lexic_permissions['KEYWORDS'][$key] =
true;
$this->language_data['CASE_SENSITIVE'][$key] =
$case_sensitive;
$this->language_data['STYLES']['KEYWORDS'][$key] =
$styles;
* Removes a keyword group
* @param int The key of the keyword group to remove
unset
($this->language_data['KEYWORDS'][$key]);
unset
($this->lexic_permissions['KEYWORDS'][$key]);
unset
($this->language_data['CASE_SENSITIVE'][$key]);
unset
($this->language_data['STYLES']['KEYWORDS'][$key]);
* Sets the content of the header block
* @param string The content of the header block
$this->header_content =
$content;
* Sets the content of the footer block
* @param string The content of the footer block
$this->footer_content =
$content;
* Sets the style for the header content
* @param string The style for the header content
$this->header_content_style =
$style;
* Sets the style for the footer content
* @param string The style for the footer content
$this->footer_content_style =
$style;
* Sets the base URL to be used for keywords
* @param int The key of the keyword group to set the URL for
* @param string The URL to set for the group. If {FNAME} is in
* the url somewhere, it is replaced by the keyword
* that the URL is being made for
$this->language_data['URLS'][$group] =
$url;
* Sets styles for links in code
* @param int A constant that specifies what state the style is being
* set for - e.g. :hover or :visited
* @param string The styles to use for that state
$this->link_styles[$type] =
$styles;
* Sets the target for links in code
* @param string The target for links in the code, e.g. _blank
$this->link_target =
' target="' .
$target .
'" ';
* Sets styles for important parts of the code
* @param string The styles to use on important parts of the code
$this->important_styles =
$styles;
* Sets whether context-important blocks are highlighted
* @todo REMOVE THIS SHIZ FROM GESHI!
$this->enable_important_blocks =
( $flag ) ?
true :
false;
* Whether CSS IDs should be added to each line
* @param boolean If true, IDs will be added to each line.
$this->add_ids =
($flag) ?
true :
false;
* Specifies which lines to highlight extra
* @param mixed An array of line numbers to highlight, or just a line
* @todo Some data replication here that could be cut down on
foreach ($lines as $line) {
$this->highlight_extra_lines[intval($line)] =
intval($line);
$this->highlight_extra_lines[intval($lines)] =
intval($lines);
* Sets the style for extra-highlighted lines
* @param string The style for extra-highlighted lines
$this->highlight_extra_lines_style =
$styles;
* Sets what number line numbers should start at. Should
* be a positive integer, and will be converted to one.
* <b>Warning:</b> Using this method will add the "start"
* attribute to the <ol> that is used for line numbering.
* This is <b>not</b> valid XHTML strict, so if that's what you
* care about then don't use this method. Firefox is getting
* support for the CSS method of doing this in 1.1 and Opera
* has support for the CSS method, but (of course) IE doesn't
* so it's not worth doing it the CSS way yet.
* @param int The number to start line numbers at
$this->line_numbers_start =
abs(intval($number));
* Sets the encoding used for htmlspecialchars(), for international
* @param string The encoding to use for the source
$this->encoding =
$encoding;
* Returns the code in $this->source, highlighted and surrounded by the
* This should only be called ONCE, cos it's SLOW! If you want to highlight
* the same source multiple times, you're better off doing a whole lot of
* str_replaces to replace the <span>s
// Firstly, if there is an error, we won't highlight
$this->set_time($start_time, $start_time);
return $this->finalise($result);
// Add spaces for regular expression matching and line numbers
$code =
' ' .
$this->source .
' ';
// Replace all newlines to a common form.
// Initialise various stuff
$ESCAPE_CHAR_OPEN =
false;
$COMMENT_MATCHED =
false;
// Turn highlighting on if strict mode doesn't apply to this language
$HIGHLIGHTING_ON =
( !$this->strict_mode ) ?
true :
'';
// Whether to highlight inside a block of code
$HIGHLIGHT_INSIDE_STRICT =
false;
// "Important" selections are handled like multiline comments
// @todo GET RID OF THIS SHIZ
if ($this->enable_important_blocks) {
if ($this->strict_mode) {
// Break the source into bits. Each bit will be a portion of the code
// within script delimiters - for example, HTML between < and >
$parts =
array(0 =>
array(0 =>
''));
for ($i =
0; $i <
$length; $i++
) {
foreach ($this->language_data['SCRIPT_DELIMITERS'] as $key =>
$delimiters) {
foreach ($delimiters as $open =>
$close) {
// Get the next little bit for this opening string
// We start a new block with the highlightable
$HIGHLIGHTING_ON =
$open;
// No point going around again...
foreach ($this->language_data['SCRIPT_DELIMITERS'] as $key =>
$delimiters) {
foreach ($delimiters as $open =>
$close) {
if ($open ==
$HIGHLIGHTING_ON) {
// We check code from our current position BACKWARDS. This is so
// the ending string for highlighting can be included in the block
// Add the string to the rest of the string for this part
$parts[$k][1] =
( isset
($parts[$k][1]) ) ?
$parts[$k][1] .
$char :
$char;
$parts[$k][1] =
( isset
($parts[$k][1]) ) ?
$parts[$k][1] .
$char :
$char;
// Not strict mode - simply dump the source into
// the array at index 1 (the first highlightable block)
// Now we go through each part. We know that even-indexed parts are
// code that shouldn't be highlighted, and odd-indexed parts should
foreach ($parts as $key =>
$data) {
// If this block should be highlighted...
if ($this->strict_mode) {
// Find the class key for this block of code
foreach ($this->language_data['SCRIPT_DELIMITERS'] as $script_key =>
$script_data) {
foreach ($script_data as $open =>
$close) {
if ($this->language_data['STYLES']['SCRIPT'][$script_key] !=
'' &&
$this->lexic_permissions['SCRIPT']) {
// Add a span element around the source to
// highlight the overall source block
if (!$this->use_classes &&
$this->language_data['STYLES']['SCRIPT'][$script_key] !=
'') {
$attributes =
' style="' .
$this->language_data['STYLES']['SCRIPT'][$script_key] .
'"';
$attributes =
' class="sc' .
$script_key .
'"';
$result .=
"<span$attributes>";
if (!$this->strict_mode ||
$this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key]) {
// Now, highlight the code in this block. This code
// is really the engine of GeSHi (along with the method
// parse_non_string_part).
for ($i =
0; $i <
$length; $i++
) {
$hq = isset
($this->language_data['HARDQUOTE']) ?
$this->language_data['HARDQUOTE'][0] :
false;
// Is this char the newline and line numbers being used?
||
count($this->highlight_extra_lines) >
0)
// If so, is there a string open? If there is, we should end it before
// the newline and begin it again (so when <li>s are put in the source
// remains XHTML compliant)
// note to self: This opens up possibility of config files specifying
// that languages can/cannot have multiline strings???
if (!$this->use_classes) {
$attributes =
' style="' .
$this->language_data['STYLES']['STRINGS'][0] .
'"';
$attributes =
' class="st0"';
$char =
'</span>' .
$char .
"<span$attributes>";
} elseif ($char ==
$STRING_OPEN) {
// A match of a string delimiter
if (($this->lexic_permissions['ESCAPE_CHAR'] &&
$ESCAPE_CHAR_OPEN) ||
($this->lexic_permissions['STRINGS'] &&
!$ESCAPE_CHAR_OPEN)) {
foreach ($this->language_data['HARDESCAPE'] as $hardesc)
if (!$ESCAPE_CHAR_OPEN) {
$ESCAPE_CHAR_OPEN =
false;
} elseif (in_array($char, $this->language_data['QUOTEMARKS']) &&
($STRING_OPEN ==
'') &&
$this->lexic_permissions['STRINGS']) {
// The start of a new string
if (!$this->use_classes) {
$attributes =
' style="' .
$this->language_data['STYLES']['STRINGS'][0] .
'"';
$attributes =
' class="st0"';
$char =
"<span$attributes>" .
$char;
$result .=
$this->parse_non_string_part( $stuff_to_parse );
($STRING_OPEN ==
'') &&
$this->lexic_permissions['STRINGS']
// The start of a hard quoted string
$STRING_OPEN =
$this->language_data['HARDQUOTE'][1];
if (!$this->use_classes) {
$attributes =
' style="' .
$this->language_data['STYLES']['STRINGS'][0] .
'"';
$attributes =
' class="st0"';
$char =
"<span$attributes>" .
$hq;
$result .=
$this->parse_non_string_part( $stuff_to_parse );
} elseif ($char ==
$this->language_data['ESCAPE_CHAR'] &&
$STRING_OPEN !=
'')
if (!$ESCAPE_CHAR_OPEN) {
$ESCAPE_CHAR_OPEN =
!$HARDQUOTE_OPEN; // true unless $HARDQUOTE_OPEN
foreach ($this->language_data['HARDESCAPE'] as $hard)
$ESCAPE_CHAR_OPEN =
true;
if ($ESCAPE_CHAR_OPEN &&
$this->lexic_permissions['ESCAPE_CHAR']) {
if (!$this->use_classes) {
$attributes =
' style="' .
$this->language_data['STYLES']['ESCAPE_CHAR'][0] .
'"';
$attributes =
' class="es0"';
$char =
"<span$attributes>" .
$char;
if (substr($code, $i +
1, 1) ==
"\n") {
// escaping a newline, what's the point in putting the span around
// the newline? It only causes hassles when inserting line numbers
$ESCAPE_CHAR_OPEN =
false;
$ESCAPE_CHAR_OPEN =
false;
if ($this->lexic_permissions['ESCAPE_CHAR']) {
} elseif ($ESCAPE_CHAR_OPEN) {
if ($this->lexic_permissions['ESCAPE_CHAR']) {
$ESCAPE_CHAR_OPEN =
false;
} elseif ($STRING_OPEN ==
'') {
// Is this a multiline comment?
foreach ($this->language_data['COMMENT_MULTI'] as $open =>
$close) {
$test_str =
substr( $part, $i, $com_len );
$test_str_match =
$test_str;
if ($open ==
$test_str) {
//@todo If remove important do remove here
if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
if (!$this->use_classes) {
$attributes =
' style="' .
$this->language_data['STYLES']['COMMENTS']['MULTI'] .
'"';
$attributes =
' class="coMULTI"';
$test_str =
"<span$attributes>" .
@htmlspecialchars($test_str, ENT_COMPAT, $this->encoding);
if (!$this->use_classes) {
$attributes =
' style="' .
$this->important_styles .
'"';
$attributes =
' class="imp"';
// We don't include the start of the comment if it's an
$test_str =
"<span$attributes>";
if ($close_pos ===
false) {
// Short-cut through all the multiline code
$rest_of_comment =
@htmlspecialchars(substr($part, $i +
$com_len, $close_pos -
$i), ENT_COMPAT, $this->encoding);
if (($this->lexic_permissions['COMMENTS']['MULTI'] ||
count($this->highlight_extra_lines) >
0)) {
// strreplace to put close span and open span around multiline newlines
$test_str .=
str_replace("\n", "</span>\n<span$attributes>", $rest_of_comment);
$test_str .=
$rest_of_comment;
if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
$i =
$close_pos +
$com_len -
1;
$result .=
$this->parse_non_string_part($stuff_to_parse);
// If we haven't matched a multiline comment, try single-line comments
foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key =>
$comment_mark) {
$com_len =
strlen($comment_mark);
$test_str =
substr($part, $i, $com_len);
if ($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS]) {
$match =
($comment_mark ==
$test_str);
if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
if (!$this->use_classes) {
$attributes =
' style="' .
$this->language_data['STYLES']['COMMENTS'][$comment_key] .
'"';
$attributes =
' class="co' .
$comment_key .
'"';
$test_str =
"<span$attributes>" .
@htmlspecialchars($this->change_case($test_str), ENT_COMPAT, $this->encoding);
$close_pos =
strpos($part, "\n", $i);
if ($close_pos ===
false) {
$test_str .=
@htmlspecialchars(substr($part, $i +
$com_len, $close_pos -
$i -
$com_len), ENT_COMPAT, $this->encoding);
if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
// Take into account that the comment might be the last in the source
$result .=
$this->parse_non_string_part($stuff_to_parse);
} elseif ($STRING_OPEN !=
'') {
// Otherwise, convert it to HTML form
//only escape <128 (we don't want to break multibyte chars)
// Where are we adding this char?
if (($STRING_OPEN ==
'') &&
!$CLOSE_STRING) {
$stuff_to_parse .=
$char;
$COMMENT_MATCHED =
false;
$result .=
$this->parse_non_string_part($stuff_to_parse);
// Close the <span> that surrounds the block
if ($this->strict_mode &&
$this->language_data['STYLES']['SCRIPT'][$script_key] !=
'' &&
$this->lexic_permissions['SCRIPT']) {
// Else not a block to highlight
// Parse the last stuff (redundant?)
$result .=
$this->parse_non_string_part($stuff_to_parse);
// Lop off the very first and last spaces
// Are we still in a string?
// We're finished: stop timing
return $this->finalise($result);
* Swaps out spaces and tabs for HTML indentation. Not needed if
* the code is in a pre block...
* @param string The source to indent
* @return string The source with HTML indenting applied
function indent ($result)
/// Replace tabs with the correct number of spaces
if (false !==
strpos($result, "\t")) {
foreach ($lines as $key =>
$line) {
if (false ===
strpos($line, "\t")) {
}//echo 'checking line ' . $key . '<br />';
$tab_width =
$this->tab_width;
//echo '<pre>line: ' . htmlspecialchars($line) . '</pre>';
for ($i =
0; $i <
$length; $i++
) {
// Simple engine to work out whether we're in a tag.
// If we are we modify $pos. This is so we ignore HTML
// in the line and only workout the tab replacement
// via the actual content of the string
// This test could be improved to include strings in the
// html so that < or > would be allowed in user's styles
// (e.g. quotes: '<' '>'; or similar)
if ($IN_TAG &&
'>' ==
$char) {
} elseif (!$IN_TAG &&
'<' ==
$char) {
} elseif (!$IN_TAG &&
'&' ==
$char) {
//echo "matched & in line... ";
$substr =
substr($line, $i +
3, 4);
//$substr_5 = substr($line, 5, 1);
//echo "found entity at $posi\n";
} elseif (!$IN_TAG &&
"\t" ==
$char) {
// OPTIMISE - move $strs out. Make an array:
// 3 => ' ' etc etc
// to use instead of building a string every time
$strs =
array(0 =>
' ', 1 =>
' ');
//echo "building (pos=$pos i=$i) (" . ($i - $pos) . ") " . ($tab_width - (($i - $pos) % $tab_width)) . " spaces\n";
for ($k =
0; $k <
($tab_width -
(($i -
$pos) %
$tab_width)); $k++
) $str .=
$strs[$k %
2];
if (false ===
strpos($line, "\t", $i +
1)) {
//$lines[$key] = $result_line;
$result_line .=
substr($line, $i +
1);
$lines[$key] =
$result_line;
$result =
nl2br($result);
* Changes the case of a keyword for those languages where a change is asked for
* @param string The keyword to change the case of
* @return string The keyword with its case changed
function change_case ($instr)
* Adds a url to a keyword where needed.
* @param string The keyword to add the URL HTML to
* @param int What group the keyword is from
* @param boolean Whether to get the HTML for the start or end
* @return The HTML for either the start or end of the HTML <a> tag
function add_url_to_keyword ($keyword, $group, $start_or_end)
if (isset
($this->language_data['URLS'][$group]) &&
$this->language_data['URLS'][$group] !=
'' &&
substr($keyword, 0, 5) !=
'</') {
// There is a base group for this keyword
if ($start_or_end ==
'BEGIN') {
// HTML workaround... not good form (tm) but should work for 1.0.X
// Old system: strtolower
//$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword);
// New system: get keyword from language file to get correct case
foreach ($this->language_data['KEYWORDS'][$group] as $word) {
$word =
( substr($word, 0, 4) ==
'<' ) ?
substr($word, 4) :
$word;
$this->language_data['URLS'][$group]
// HTML fix. Again, dirty hackage...
} elseif (!($this->language ==
'html4strict' &&
'>' ==
$keyword)) {
* Takes a string that has no strings or comments in it, and highlights
* stuff like keywords, numbers and methods.
* @param string The string to parse for keyword, numbers etc.
* @todo BUGGY! Why? Why not build string and return?
function parse_non_string_part (&$stuff_to_parse)
$stuff_to_parse =
' ' .
@htmlspecialchars($stuff_to_parse, ENT_COMPAT, $this->encoding);
$stuff_to_parse_pregquote =
preg_quote($stuff_to_parse, '/');
// These vars will disappear in the future
$func =
'$this->change_case';
$func2 =
'$this->add_url_to_keyword';
foreach ($this->language_data['REGEXPS'] as $key =>
$regexp) {
if ($this->lexic_permissions['REGEXPS'][$key]) {
"/{$regexp[GESHI_MODIFIERS]}",
"{
$regexp[GESHI_BEFORE]}<|!
REG3XP$key!>{
$regexp[GESHI_REPLACE]}|>{
$regexp[GESHI_AFTER]}",
$stuff_to_parse =
preg_replace( "/(" .
str_replace('/', '\/', $regexp) .
")/", "<|!REG3XP$key!>\\1|>", $stuff_to_parse);
// Highlight numbers. This regexp sucks... anyone with a regexp that WORKS
// here wins a cookie if they send it to me. At the moment there's two doing
// almost exactly the same thing, except the second one prevents a number
// being highlighted twice (eg <span...><span...>5</span></span>)
// Put /NUM!/ in for the styles, which gets replaced at the end.
// NEW ONE: Brice Bernard
// $stuff_to_parse = preg_replace('/([^(\\w|#|\\\|"|\')])(\\d+)/', '\\1<|/NUM!/>\\2|>', $stuff_to_parse);
//$stuff_to_parse = preg_replace('/([-+]?\\b(?:[0-9]*\\.)?[0-9]+\\b)/', '<|/NUM!/>\\1|>', $stuff_to_parse);
if ($this->lexic_permissions['NUMBERS'] &&
preg_match('#[0-9]#', $stuff_to_parse )) {
//$stuff_to_parse = preg_replace('#([^a-zA-Z0-9_\#])([0-9]+)([^a-zA-Z0-9])#', "\\1<|/NUM!/>\\2|>\\3", $stuff_to_parse);
//$stuff_to_parse = preg_replace('#([^a-zA-Z0-9_\#>])([0-9]+)([^a-zA-Z0-9])#', "\\1<|/NUM!/>\\2|>\\3", $stuff_to_parse);
$stuff_to_parse =
preg_replace('/([-+]?\\b(?:[0-9]*\\.)?[0-9]+\\b)/', '<|/NUM!/>\\1|>', $stuff_to_parse);
// if there is a couple of alpha symbols there *might* be a keyword
if (preg_match('#[a-zA-Z]{2,}#', $stuff_to_parse)) {
foreach ($this->language_data['KEYWORDS'] as $k =>
$keywordset) {
if ($this->lexic_permissions['KEYWORDS'][$k]) {
foreach ($keywordset as $keyword) {
// This replacement checks the word is on it's own (except if brackets etc
// are next to it), then highlights it. We don't put the color=" for the span
// in just yet - otherwise languages with the keywords "color" or "or" have
if (false !==
stristr($stuff_to_parse_pregquote, $keyword )) {
// Might make a more unique string for putting the number in soon
// Basically, we don't put the styles in yet because then the styles themselves will
// get highlighted if the language has a CSS keyword in it (like CSS, for example ;))
if ($this->language_data['CASE_SENSITIVE'][$k]) {
"/([^a-zA-Z0-9\$_\|\#;>|^])($keyword)(?=[^a-zA-Z0-9_<\|%\-&])/e",
"'\\1' . $func2('\\2', '$k', 'BEGIN') . '<|$styles>' . $func('\\2') . '|>' . $func2('\\2', '$k', 'END')",
// Change the case of the word.
// hackage again... must... release... 1.2...
if ('smarty' ==
$this->language) { $hackage =
'\/'; } else { $hackage =
''; }
"/([^a-zA-Z0-9\$_\|\#;>$hackage|^])($keyword)(?=[^a-zA-Z0-9_<\|%\-&])/ie",
"'\\1' . $func2('\\2', '$k', 'BEGIN') . '<|$styles>' . $func('\\2') . '|>' . $func2('\\2', '$k', 'END')",
$stuff_to_parse =
substr($stuff_to_parse, 0, strlen($stuff_to_parse) -
1);
// Now that's all done, replace /[number]/ with the correct styles
foreach ($this->language_data['KEYWORDS'] as $k =>
$kws) {
if (!$this->use_classes) {
$attributes =
' style="' .
$this->language_data['STYLES']['KEYWORDS'][$k] .
'"';
$attributes =
' class="kw' .
$k .
'"';
$stuff_to_parse =
str_replace("/$k/", $attributes, $stuff_to_parse);
if (!$this->use_classes &&
$this->lexic_permissions['NUMBERS']) {
$attributes =
' style="' .
$this->language_data['STYLES']['NUMBERS'][0] .
'"';
$attributes =
' class="nu0"';
$stuff_to_parse =
str_replace('/NUM!/', $attributes, $stuff_to_parse);
// Highlight methods and fields in objects
if ($this->lexic_permissions['METHODS'] &&
$this->language_data['OOLANG']) {
foreach ($this->language_data['OBJECT_SPLITTERS'] as $key =>
$splitter) {
if (false !==
stristr($stuff_to_parse, $splitter)) {
if (!$this->use_classes) {
$attributes =
' style="' .
$this->language_data['STYLES']['METHODS'][$key] .
'"';
$attributes =
' class="me' .
$key .
'"';
$stuff_to_parse =
preg_replace("/(" .
preg_quote($this->language_data['OBJECT_SPLITTERS'][$key], 1) .
"[\s]*)([a-zA-Z\*\(][a-zA-Z0-9_\*]*)/", "\\1<|$attributes>\\2|>", $stuff_to_parse);
// Highlight brackets. Yes, I've tried adding a semi-colon to this list.
// You try it, and see what happens ;)
// TODO: Fix lexic permissions not converting entities if shouldn't
// be highlighting regardless
if ($this->lexic_permissions['BRACKETS']) {
$code_entities_match =
array('[', ']', '(', ')', '{', '}');
if (!$this->use_classes) {
$code_entities_replace =
array(
'<| style="' .
$this->language_data['STYLES']['BRACKETS'][0] .
'">[|>',
'<| style="' .
$this->language_data['STYLES']['BRACKETS'][0] .
'">]|>',
'<| style="' .
$this->language_data['STYLES']['BRACKETS'][0] .
'">(|>',
'<| style="' .
$this->language_data['STYLES']['BRACKETS'][0] .
'">)|>',
'<| style="' .
$this->language_data['STYLES']['BRACKETS'][0] .
'">{|>',
'<| style="' .
$this->language_data['STYLES']['BRACKETS'][0] .
'">}|>',
$code_entities_replace =
array(
'<| class="br0">[|>',
'<| class="br0">]|>',
'<| class="br0">(|>',
'<| class="br0">)|>',
'<| class="br0">{|>',
'<| class="br0">}|>',
$stuff_to_parse =
str_replace( $code_entities_match, $code_entities_replace, $stuff_to_parse );
// Add class/style for regexps
foreach ($this->language_data['REGEXPS'] as $key =>
$regexp) {
if ($this->lexic_permissions['REGEXPS'][$key]) {
if (!$this->use_classes) {
$attributes =
' style="' .
$this->language_data['STYLES']['REGEXPS'][$key] .
'"';
$attributes =
' class="re' .
$key .
'"';
$stuff_to_parse =
str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse);
// Replace <DOT> with . for urls
$stuff_to_parse =
str_replace('<DOT>', '.', $stuff_to_parse);
// Replace <|UR1| with <a href= for urls also
if ($this->use_classes) {
$stuff_to_parse =
str_replace('<|UR1|', '<a' .
$this->link_target .
' href=', $stuff_to_parse);
$stuff_to_parse =
str_replace('<|UR1|', '<a' .
$this->link_target .
' style="' .
$this->link_styles[GESHI_LINK] .
'" href=', $stuff_to_parse);
$stuff_to_parse =
str_replace('<|UR1|', '<a' .
$this->link_target .
' href=', $stuff_to_parse);
// NOW we add the span thingy ;)
$stuff_to_parse =
str_replace('<|', '<span', $stuff_to_parse);
$stuff_to_parse =
str_replace ( '|>', '</span>', $stuff_to_parse );
* Sets the time taken to parse the code
* @param microtime The time when parsing started
* @param microtime The time when parsing ended
function set_time ($start_time, $end_time)
$start =
explode(' ', $start_time);
$this->time =
$end[0] +
$end[1] -
$start[0] -
$start[1];
* Gets the time taken to parse the code
* @return double The time taken to parse the code
* Gets language information and stores it for later use
* @todo Needs to load keys for lexic permissions for keywords, regexps etc
function load_language ($file_name)
$language_data =
array();
// Perhaps some checking might be added here later to check that
// $language data is a valid thing but maybe not
$this->language_data =
$language_data;
// Set strict mode if should be set
if ($this->language_data['STRICT_MODE_APPLIES'] ==
GESHI_ALWAYS) {
$this->strict_mode =
true;
// Set permissions for all lexics to true
// so they'll be highlighted by default
foreach ($this->language_data['KEYWORDS'] as $key =>
$words) {
$this->lexic_permissions['KEYWORDS'][$key] =
true;
foreach ($this->language_data['COMMENT_SINGLE'] as $key =>
$comment) {
$this->lexic_permissions['COMMENTS'][$key] =
true;
foreach ($this->language_data['REGEXPS'] as $key =>
$regexp) {
$this->lexic_permissions['REGEXPS'][$key] =
true;
// Set default class for CSS
$this->overall_class =
$this->language;
* Takes the parsed code and various options, and creates the HTML
* surrounding it to make it look nice.
* @param string The code already parsed
* @return string The code nicely finalised
function finalise ($parsed_code)
// Remove end parts of important declarations
// This is BUGGY!! My fault for bad code: fix coming in 1.2
// @todo Remove this crap
if ($this->enable_important_blocks &&
// Add HTML whitespace stuff if we're using the <div> header
$parsed_code =
$this->indent($parsed_code);
// If we're using line numbers, we insert <li>s and appropriate
// markup to style them (otherwise we don't need to do anything)
// If we're using the <pre> header, we shouldn't add newlines because
// the <pre> will line-break them (and the <li>s already do this for us)
$code =
explode("\n", $parsed_code);
// Set vars to defaults for following loop
foreach ($code as $line) {
$line =
( $line ) ?
$line :
' ';
// If this is a "special line"...
$i %
$this->line_nth_row ==
($this->line_nth_row -
1)) {
// Set the attributes to style the line
if ($this->use_classes) {
$def_attr =
' class="de2"';
$attr =
' style="' .
$this->line_style2 .
'"';
// This style "covers up" the special styles set for special lines
// so that styles applied to special lines don't apply to the actual
$def_attr =
' style="' .
$this->code_style .
'"';
$start =
"<div$def_attr>";
if ($this->use_classes) {
$def_attr =
' class="de1"';
$attr =
' style="' .
$this->line_style1 .
'"';
$def_attr =
' style="' .
$this->code_style .
'"';
$start =
"<div$def_attr>";
// Are we supposed to use ids? If so, add them
$attr .=
" id=\"{$this->overall_id}-{
$i}\"
";
if ($this->use_classes &&
in_array($i, $this->highlight_extra_lines)) {
$attr .= " class=\"ln-xtra\"";
if (!$this->use_classes &&
in_array($i, $this->highlight_extra_lines)) {
$attr .= " style=\"{
$this->highlight_extra_lines_style}\"
";
// Add in the line surrounded by appropriate list HTML
$parsed_code .= "<li$attr>$start$line$end</li>$ls";
// No line numbers, but still need to handle highlighting lines extra.
// Have to use divs so the full width of the code is highlighted
$code = explode("\n", $parsed_code);
// Make lines have at least one space in them if they're empty
$line = ($line) ? $line : ' ';
if (in_array(++$i, $this->highlight_extra_lines)) {
if ($this->use_classes) {
$parsed_code .= '<div class="ln-xtra">';
$parsed_code .= "<div style=\"{
$this->highlight_extra_lines_style}\">
";
$parsed_code .= $line . "</div>\n";
$parsed_code .= $line . "\n";
// purge some unnecessary stuff
$parsed_code = preg_replace('#<span[^>]+>(\s*)</span>#', '\\1', $parsed_code);
$parsed_code = preg_replace('#<div[^>]+>(\s*)</div>#', '\\1', $parsed_code);
if ($this->header_type ==
GESHI_HEADER_PRE) {
// enforce line numbers when using pre
$parsed_code = str_replace('<li></li>', '<li> </li>', $parsed_code);
return $this->header() .
chop($parsed_code) .
$this->footer();
* Creates the header for the code block (with correct attributes)
* @return string The header for the code block
$attributes = $this->get_attributes();
if ($this->line_numbers_start !=
1) {
$ol_attributes .= ' start="' . $this->line_numbers_start .
'"';
$header = $this->format_header_content();
if (GESHI_HEADER_NONE ==
$this->header_type) {
if ($this->line_numbers !=
GESHI_NO_LINE_NUMBERS) {
return "$header<ol$ol_attributes>";
// Work out what to return and do it
if ($this->line_numbers !=
GESHI_NO_LINE_NUMBERS) {
if ($this->header_type ==
GESHI_HEADER_PRE) {
return "<pre$attributes>$header<ol$ol_attributes>";
} elseif ($this->header_type ==
GESHI_HEADER_DIV) {
return "<div$attributes>$header<ol$ol_attributes>";
if ($this->header_type ==
GESHI_HEADER_PRE) {
return "<pre$attributes>$header";
} elseif ($this->header_type ==
GESHI_HEADER_DIV) {
return "<div$attributes>$header";
* Returns the header content, formatted for output
* @return string The header content, formatted for output
function format_header_content ()
$header = $this->header_content;
if ($this->header_type ==
GESHI_HEADER_PRE) {
$header = str_replace("\n", '', $header);
$header = $this->replace_keywords($header);
if ($this->use_classes) {
$attr = " style=\"{
$this->header_content_style}\"
";
return "<div$attr>$header</div>";
* Returns the footer for the code block.
* @return string The footer for the code block
$footer_content = $this->format_footer_content();
if (GESHI_HEADER_NONE ==
$this->header_type) {
return ($this->line_numbers !=
GESHI_NO_LINE_NUMBERS) ?
'</ol>' .
$footer_content
if ($this->header_type ==
GESHI_HEADER_DIV) {
if ($this->line_numbers !=
GESHI_NO_LINE_NUMBERS) {
return "</ol>$footer_content</div>";
return "$footer_content</div>";
if ($this->line_numbers !=
GESHI_NO_LINE_NUMBERS) {
return "</ol>$footer_content</pre>";
return "$footer_content</pre>";
* Returns the footer content, formatted for output
* @return string The footer content, formatted for output
function format_footer_content ()
$footer = $this->footer_content;
if ($this->header_type ==
GESHI_HEADER_PRE) {
$footer = str_replace("\n", '', $footer);;
$footer = $this->replace_keywords($footer);
if ($this->use_classes) {
$attr = " style=\"{
$this->footer_content_style}\"
";
return "<div$attr>$footer</div>";
* Replaces certain keywords in the header and footer with
* certain configuration values
* @param string The header or footer content to do replacement on
* @return string The header or footer with replaced keywords
function replace_keywords ($instr)
$keywords = $replacements = array();
$replacements[] = number_format($this->get_time(), 3);
$keywords[] =
'<LANGUAGE>';
$replacements[] =
$this->language;
$keywords[] =
'<VERSION>';
$replacements[] =
GESHI_VERSION;
return str_replace($keywords, $replacements, $instr);
* Gets the CSS attributes for this code
* @return The CSS attributes for this code
* @todo Document behaviour change - class is outputted regardless of whether we're using classes or not.
function get_attributes ()
if ($this->overall_class !=
'') {
$attributes .= " class=\"{
$this->overall_class}\"
";
if ($this->overall_id !=
'') {
$attributes .= " id=\"{
$this->overall_id}\"
";
if ($this->overall_style !=
'') {
$attributes .= ' style="' . $this->overall_style .
'"';
* Returns a stylesheet for the highlighted code. If $economy mode
* is true, we only return the stylesheet declarations that matter for
* this code block instead of the whole thing
* @param boolean Whether to use economy mode or not
* @return string A stylesheet built on the data for the current language
function get_stylesheet ($economy_mode = true)
// If there's an error, chances are that the language file
// won't have populated the language data file, so we can't
// risk getting a stylesheet...
// First, work out what the selector should be. If there's an ID,
// that should be used, the same for a class. Otherwise, a selector
// of '' means that these styles will be applied anywhere
$selector = ($this->overall_id !=
'') ?
"#{
$this->overall_id} " :
'';
$selector = ($selector == '' && $this->overall_class !=
'') ?
".{
$this->overall_class} " :
$selector;
// Header of the stylesheet
$stylesheet = "/**\n * GeSHi Dynamically Generated Stylesheet\n * --------------------------------------\n * Dynamically generated stylesheet for {
$this->language}\n *
CSS class: {
$this->overall_class},
CSS id: {
$this->overall_id}\n *
GeSHi (
c)
Nigel McNie 2004 (
http://
qbnz.
com/
highlighter)\n */\n
";
$stylesheet = '/* GeSHi (c) Nigel McNie 2004 (http://qbnz.com/highlighter) */' . "\n";
// Set the <ol> to have no effect at all if there are line numbers
// (<ol>s have margins that should be destroyed so all layout is
// controlled by the set_overall_style method, which works on the
// <pre> or <div> container). Additionally, set default styles for lines
if (!$economy_mode || $this->line_numbers !=
GESHI_NO_LINE_NUMBERS) {
//$stylesheet .= "$selector, {$selector}ol, {$selector}ol li {margin: 0;}\n";
$stylesheet .= "$selector.de1, $selector.de2 {{
$this->code_style}}\n
";
if (!$economy_mode || $this->overall_style !=
'') {
$stylesheet .= "$selector {{
$this->overall_style}}\n
";
foreach ($this->link_styles as $key =>
$style) {
if (!$economy_mode || $key == GESHI_LINK && $style != '') {
$stylesheet .= "{
$selector}a:
link {{
$style}}\n
";
if (!$economy_mode || $key == GESHI_HOVER && $style != '') {
$stylesheet .= "{
$selector}a:
hover {{
$style}}\n
";
if (!$economy_mode || $key == GESHI_ACTIVE && $style != '') {
$stylesheet .= "{
$selector}a:
active {{
$style}}\n
";
if (!$economy_mode || $key == GESHI_VISITED && $style != '') {
$stylesheet .= "{
$selector}a:
visited {{
$style}}\n
";
if (!$economy_mode || $this->header_content_style !=
'') {
$stylesheet .= "$selector.head {{
$this->header_content_style}}\n
";
if (!$economy_mode || $this->footer_content_style !=
'') {
$stylesheet .= "$selector.foot {{
$this->footer_content_style}}\n
";
// Styles for important stuff
if (!$economy_mode || $this->important_styles !=
'') {
$stylesheet .= "$selector.imp {{
$this->important_styles}}\n
";
// Styles for lines being highlighted extra
if (!$economy_mode || count($this->highlight_extra_lines)) {
$stylesheet .= "$selector.ln-xtra {{
$this->highlight_extra_lines_style}}\n
";
// Simple line number styles
if (!$economy_mode || ($this->line_numbers !=
GESHI_NO_LINE_NUMBERS &&
$this->line_style1 !=
'')) {
$stylesheet .= "{
$selector}li {{
$this->line_style1}}\n
";
// If there is a style set for fancy line numbers, echo it out
if (!$economy_mode || ($this->line_numbers ==
GESHI_FANCY_LINE_NUMBERS &&
$this->line_style2 !=
'')) {
$stylesheet .= "{
$selector}li.
li2 {{
$this->line_style2}}\n
";
foreach ($this->language_data['STYLES']['KEYWORDS'] as $group =>
$styles) {
if (!$economy_mode || !($economy_mode && (!$this->lexic_permissions['KEYWORDS'][$group] ||
$styles ==
''))) {
$stylesheet .= "$selector.kw$group {{
$styles}}\n
";
foreach ($this->language_data['STYLES']['COMMENTS'] as $group =>
$styles) {
if (!$economy_mode || !($economy_mode && $styles == '') &&
!($economy_mode && !$this->lexic_permissions['COMMENTS'][$group])) {
$stylesheet .= "$selector.co$group {{
$styles}}\n
";
foreach ($this->language_data['STYLES']['ESCAPE_CHAR'] as $group =>
$styles) {
if (!$economy_mode || !($economy_mode && $styles == '') && !($economy_mode &&
!$this->lexic_permissions['ESCAPE_CHAR'])) {
$stylesheet .= "$selector.es$group {{
$styles}}\n
";
foreach ($this->language_data['STYLES']['SYMBOLS'] as $group =>
$styles) {
if (!$economy_mode || !($economy_mode && $styles == '') && !($economy_mode &&
!$this->lexic_permissions['BRACKETS'])) {
$stylesheet .= "$selector.br$group {{
$styles}}\n
";
foreach ($this->language_data['STYLES']['STRINGS'] as $group =>
$styles) {
if (!$economy_mode || !($economy_mode && $styles == '') && !($economy_mode &&
!$this->lexic_permissions['STRINGS'])) {
$stylesheet .= "$selector.st$group {{
$styles}}\n
";
foreach ($this->language_data['STYLES']['NUMBERS'] as $group =>
$styles) {
if (!$economy_mode || !($economy_mode && $styles == '') && !($economy_mode &&
!$this->lexic_permissions['NUMBERS'])) {
$stylesheet .= "$selector.nu$group {{
$styles}}\n
";
foreach ($this->language_data['STYLES']['METHODS'] as $group =>
$styles) {
if (!$economy_mode || !($economy_mode && $styles == '') && !($economy_mode &&
!$this->lexic_permissions['METHODS'])) {
$stylesheet .= "$selector.me$group {{
$styles}}\n
";
foreach ($this->language_data['STYLES']['SCRIPT'] as $group =>
$styles) {
if (!$economy_mode || !($economy_mode && $styles == '')) {
$stylesheet .= "$selector.sc$group {{
$styles}}\n
";
foreach ($this->language_data['STYLES']['REGEXPS'] as $group =>
$styles) {
if (!$economy_mode || !($economy_mode && $styles == '') && !($economy_mode &&
!$this->lexic_permissions['REGEXPS'][$group])) {
$stylesheet .= "$selector.re$group {{
$styles}}\n
";
if (!function_exists('geshi_highlight')) {
* Easy way to highlight stuff. Behaves just like highlight_string
* @param string The code to highlight
* @param string The language to highlight the code in
* @param string The path to the language files. You can leave this blank if you need
* as from version 1.0.7 the path should be automatically detected
* @param boolean Whether to return the result or to echo
* @return string The code highlighted (if $return is true)
function geshi_highlight ($string, $language, $path, $return = false)
$geshi = new GeSHi($string, $language, $path);
$geshi->set_header_type(GESHI_HEADER_NONE);
return '<code>' . $geshi->parse_code() .
'</code>';
echo '<code>' . $geshi->parse_code() .
'</code>';