Source for file WordParser.inc
Documentation is available at WordParser.inc
* phpDocumentor :: automatic documentation generator
* Copyright (c) 2000-2006 Joshua Eichorn
* This library is free software; you can redistribute it
* and/or modify it under the terms of the GNU Lesser General
* Public License as published by the Free Software Foundation;
* either version 2.1 of the License, or (at your option) any
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
* @subpackage WordParsers
* @copyright 2000-2006 Joshua Eichorn
* @license http://www.opensource.org/licenses/lgpl-license.php LGPL
* @version CVS: $Id: WordParser.inc,v 1.3 2006/04/30 22:18:14 cellog Exp $
* @link http://www.phpdoc.org
* @link http://pear.php.net/PhpDocumentor
* Retrieves tokens from source code for use by the Parser
* @version $Id: WordParser.inc,v 1.3 2006/04/30 22:18:14 cellog Exp $
* @subpackage WordParsers
New lines around the world
* List of text that separates tokens, used to retrieve tokens
var $wordseperators = array();
* Position within input of the cursor pointing to the next text to be
* Size of the input source code
* Position the cursor was at the last time line numbers were counted, used
* to guarantee that line numbers are incremented
* Used for {@}source} tag, contains currently parsed function source
* flag, determines whether tokens are added to {@link $source}
* If true, then white space is returned as a part of tokens, otherwise
var $returnWhiteSpace = false;
* Initialize the WordParser
* @param string source code
//$this->word = WORD_PARSER_RET_WORD;
* Retrieve source code for the last function/method
$this->getsource = false;
* Used to tell the WordParser to start retrieving source code
function retrievesource($word = '')
* Retrieve a token from the token list
* The {@link Parser} class relies upon this method to retrieve the next
* token. The {@link $wordseperators} array is a collection of strings
* that delineate tokens for the current parser state. $wordseperators
* is set by the parser with a call to {@link Parser::configWordParser()}
* every time a new parser state is reached.
* For example, while parsing the source code for a class, the word
* <code>var</code> is a token, and <code>global</code> is not,
* but inside a function, the reverse is true. The parser state
* {@link PARSER_STATE_CLASS} has a token list that includes whitespace,
* code delimiters like ; and {}, and comment/DocBlock indicators
* If the whitespace option has been turned off using
* {@link setWhitespace()}, then no whitespace is returned with tokens
* In the first segment of the function, the code attempts to find the next
* token. A cache is used to speed repetitious tasks. The $tpos variable
* is used to hold the position of the next token. $npos is used to
* hold the end of the token, and so $npos - $tpos will give the length
* of the token. This is used to allow tokens that contain whitespace,
* should that option be desired.
* {@link $data} is of course the string containing the PHP code to be
* parsed, and {@link $pos} is the cursor, or current location within the
* @return string|falsethe next token, an empty string if there are no
* token separators in the $wordseperators array,
* or false if the end of input has been reached
if ($this->size == $this->pos)
// assume, for starting, that the token is from $this->pos to the end
//$this->wordseperators = array();
foreach($this->wordseperators as $sep)
// cache is set if this separator has been tested
if (isset ($this->cache[$sep]))
$tpos = $this->cache[$sep];
if ($tpos < $this->pos || !is_int($tpos))
// find the position of the next token separator
$tpos = strpos($this->data,$sep,$this->pos);
// was a token separator found that is closer to the current
if ( ($tpos < $npos) && !($tpos === false))
//echo trim($sep) . "=$tpos\n";
// set the length of the token to be from $this->pos to
// the next token separator
else if (!($tpos === false))
$this->cache[$sep] = $tpos;
// no token separators, tell the parser to choose a new state
$len = $npos - $this->pos;
$word = substr($this->data,$this->pos,$len);
// Change random other os newlines to the unix one
if ($word == "\r" || $word == "\r\n")
if ($this->linenumpos <= $this->pos)
$this->linenumpos = $this->pos + $len;
$this->pos = $this->pos + $len;
//$this->word = WORD_PARSER_RET_SEP;
// Things like // commenats rely on the newline to find their end so im going to have to return them
// never return worthless white space /t ' '
if ($this->returnWhiteSpace == false)
//$this->time3 = $this->time3 + ($this->mtime() - $st3);
//$this->time = $this->time + ($this->mtime() - $st);
* Returns the current pointer position, or 1 character after the end of the word
* @param integer starting position
* @param integer length of block to retrieve
return substr($this->data,$start,$len);
* @param array array of strings that separate tokens
$this->wordseperators = &$seps;
* Set the internal cursor within the source code
* Backup to the previous token so that it can be retrieved again in a new
* Occasionally, a word will be passed to an event handler that should be
* handled by another event handler. This method allows that to happen.
* @param string token to back up to
if ($this->getsource) $this->source = substr($this->source,0,strlen($this->source) - 1);
$this->pos = $this->pos - strlen($word);
* set parser to return or strip whitespace
$this->returnWhiteSpace = $val;
|