MediaWiki  REL1_20
StringUtils.php
Go to the documentation of this file.
00001 <?php
00026 class StringUtils {
00045         static function hungryDelimiterReplace( $startDelim, $endDelim, $replace, $subject ) {
00046                 $segments = explode( $startDelim, $subject );
00047                 $output = array_shift( $segments );
00048                 foreach ( $segments as $s ) {
00049                         $endDelimPos = strpos( $s, $endDelim );
00050                         if ( $endDelimPos === false ) {
00051                                 $output .= $startDelim . $s;
00052                         } else {
00053                                 $output .= $replace . substr( $s, $endDelimPos + strlen( $endDelim ) );
00054                         }
00055                 }
00056                 return $output;
00057         }
00058 
00081         static function delimiterReplaceCallback( $startDelim, $endDelim, $callback, $subject, $flags = '' ) {
00082                 $inputPos = 0;
00083                 $outputPos = 0;
00084                 $output = '';
00085                 $foundStart = false;
00086                 $encStart = preg_quote( $startDelim, '!' );
00087                 $encEnd = preg_quote( $endDelim, '!' );
00088                 $strcmp = strpos( $flags, 'i' ) === false ? 'strcmp' : 'strcasecmp';
00089                 $endLength = strlen( $endDelim );
00090                 $m = array();
00091 
00092                 while ( $inputPos < strlen( $subject ) &&
00093                   preg_match( "!($encStart)|($encEnd)!S$flags", $subject, $m, PREG_OFFSET_CAPTURE, $inputPos ) )
00094                 {
00095                         $tokenOffset = $m[0][1];
00096                         if ( $m[1][0] != '' ) {
00097                                 if ( $foundStart &&
00098                                   $strcmp( $endDelim, substr( $subject, $tokenOffset, $endLength ) ) == 0 )
00099                                 {
00100                                         # An end match is present at the same location
00101                                         $tokenType = 'end';
00102                                         $tokenLength = $endLength;
00103                                 } else {
00104                                         $tokenType = 'start';
00105                                         $tokenLength = strlen( $m[0][0] );
00106                                 }
00107                         } elseif ( $m[2][0] != '' ) {
00108                                 $tokenType = 'end';
00109                                 $tokenLength = strlen( $m[0][0] );
00110                         } else {
00111                                 throw new MWException( 'Invalid delimiter given to ' . __METHOD__ );
00112                         }
00113 
00114                         if ( $tokenType == 'start' ) {
00115                                 # Only move the start position if we haven't already found a start
00116                                 # This means that START START END matches outer pair
00117                                 if ( !$foundStart ) {
00118                                         # Found start
00119                                         $inputPos = $tokenOffset + $tokenLength;
00120                                         # Write out the non-matching section
00121                                         $output .= substr( $subject, $outputPos, $tokenOffset - $outputPos );
00122                                         $outputPos = $tokenOffset;
00123                                         $contentPos = $inputPos;
00124                                         $foundStart = true;
00125                                 } else {
00126                                         # Move the input position past the *first character* of START,
00127                                         # to protect against missing END when it overlaps with START
00128                                         $inputPos = $tokenOffset + 1;
00129                                 }
00130                         } elseif ( $tokenType == 'end' ) {
00131                                 if ( $foundStart ) {
00132                                         # Found match
00133                                         $output .= call_user_func( $callback, array(
00134                                                 substr( $subject, $outputPos, $tokenOffset + $tokenLength - $outputPos ),
00135                                                 substr( $subject, $contentPos, $tokenOffset - $contentPos )
00136                                         ));
00137                                         $foundStart = false;
00138                                 } else {
00139                                         # Non-matching end, write it out
00140                                         $output .= substr( $subject, $inputPos, $tokenOffset + $tokenLength - $outputPos );
00141                                 }
00142                                 $inputPos = $outputPos = $tokenOffset + $tokenLength;
00143                         } else {
00144                                 throw new MWException( 'Invalid delimiter given to ' . __METHOD__ );
00145                         }
00146                 }
00147                 if ( $outputPos < strlen( $subject ) ) {
00148                         $output .= substr( $subject, $outputPos );
00149                 }
00150                 return $output;
00151         }
00152 
00166         static function delimiterReplace( $startDelim, $endDelim, $replace, $subject, $flags = '' ) {
00167                 $replacer = new RegexlikeReplacer( $replace );
00168                 return self::delimiterReplaceCallback( $startDelim, $endDelim,
00169                         $replacer->cb(), $subject, $flags );
00170         }
00171 
00179         static function explodeMarkup( $separator, $text ) {
00180                 $placeholder = "\x00";
00181 
00182                 // Remove placeholder instances
00183                 $text = str_replace( $placeholder, '', $text );
00184 
00185                 // Replace instances of the separator inside HTML-like tags with the placeholder
00186                 $replacer = new DoubleReplacer( $separator, $placeholder );
00187                 $cleaned = StringUtils::delimiterReplaceCallback( '<', '>', $replacer->cb(), $text );
00188 
00189                 // Explode, then put the replaced separators back in
00190                 $items = explode( $separator, $cleaned );
00191                 foreach( $items as $i => $str ) {
00192                         $items[$i] = str_replace( $placeholder, $separator, $str );
00193                 }
00194 
00195                 return $items;
00196         }
00197 
00205         static function escapeRegexReplacement( $string ) {
00206                 $string = str_replace( '\\', '\\\\', $string );
00207                 $string = str_replace( '$', '\\$', $string );
00208                 return $string;
00209         }
00210 
00218         static function explode( $separator, $subject ) {
00219                 if ( substr_count( $subject, $separator ) > 1000 ) {
00220                         return new ExplodeIterator( $separator, $subject );
00221                 } else {
00222                         return new ArrayIterator( explode( $separator, $subject ) );
00223                 }
00224         }
00225 }
00226 
00231 class Replacer {
00232 
00236         function cb() {
00237                 return array( &$this, 'replace' );
00238         }
00239 }
00240 
00244 class RegexlikeReplacer extends Replacer {
00245         var $r;
00246 
00250         function __construct( $r ) {
00251                 $this->r = $r;
00252         }
00253 
00258         function replace( $matches ) {
00259                 $pairs = array();
00260                 foreach ( $matches as $i => $match ) {
00261                         $pairs["\$$i"] = $match;
00262                 }
00263                 return strtr( $this->r, $pairs );
00264         }
00265 
00266 }
00267 
00271 class DoubleReplacer extends Replacer {
00272 
00278         function __construct( $from, $to, $index = 0 ) {
00279                 $this->from = $from;
00280                 $this->to = $to;
00281                 $this->index = $index;
00282         }
00283 
00288         function replace( $matches ) {
00289                 return str_replace( $this->from, $this->to, $matches[$this->index] );
00290         }
00291 }
00292 
00296 class HashtableReplacer extends Replacer {
00297         var $table, $index;
00298 
00303         function __construct( $table, $index = 0 ) {
00304                 $this->table = $table;
00305                 $this->index = $index;
00306         }
00307 
00312         function replace( $matches ) {
00313                 return $this->table[$matches[$this->index]];
00314         }
00315 }
00316 
00321 class ReplacementArray {
00322         /*mostly private*/ var $data = false;
00323         /*mostly private*/ var $fss = false;
00324 
00330         function __construct( $data = array() ) {
00331                 $this->data = $data;
00332         }
00333 
00337         function __sleep() {
00338                 return array( 'data' );
00339         }
00340 
00341         function __wakeup() {
00342                 $this->fss = false;
00343         }
00344 
00348         function setArray( $data ) {
00349                 $this->data = $data;
00350                 $this->fss = false;
00351         }
00352 
00356         function getArray() {
00357                 return $this->data;
00358         }
00359 
00365         function setPair( $from, $to ) {
00366                 $this->data[$from] = $to;
00367                 $this->fss = false;
00368         }
00369 
00373         function mergeArray( $data ) {
00374                 $this->data = array_merge( $this->data, $data );
00375                 $this->fss = false;
00376         }
00377 
00381         function merge( $other ) {
00382                 $this->data = array_merge( $this->data, $other->data );
00383                 $this->fss = false;
00384         }
00385 
00389         function removePair( $from ) {
00390                 unset($this->data[$from]);
00391                 $this->fss = false;
00392         }
00393 
00397         function removeArray( $data ) {
00398                 foreach( $data as $from => $to ) {
00399                         $this->removePair( $from );
00400                 }
00401                 $this->fss = false;
00402         }
00403 
00408         function replace( $subject ) {
00409                 if ( function_exists( 'fss_prep_replace' ) ) {
00410                         wfProfileIn( __METHOD__.'-fss' );
00411                         if ( $this->fss === false ) {
00412                                 $this->fss = fss_prep_replace( $this->data );
00413                         }
00414                         $result = fss_exec_replace( $this->fss, $subject );
00415                         wfProfileOut( __METHOD__.'-fss' );
00416                 } else {
00417                         wfProfileIn( __METHOD__.'-strtr' );
00418                         $result = strtr( $subject, $this->data );
00419                         wfProfileOut( __METHOD__.'-strtr' );
00420                 }
00421                 return $result;
00422         }
00423 }
00424 
00434 class ExplodeIterator implements Iterator {
00435         // The subject string
00436         var $subject, $subjectLength;
00437 
00438         // The delimiter
00439         var $delim, $delimLength;
00440 
00441         // The position of the start of the line
00442         var $curPos;
00443 
00444         // The position after the end of the next delimiter
00445         var $endPos;
00446 
00447         // The current token
00448         var $current;
00449 
00455         function __construct( $delim, $s ) {
00456                 $this->subject = $s;
00457                 $this->delim = $delim;
00458 
00459                 // Micro-optimisation (theoretical)
00460                 $this->subjectLength = strlen( $s );
00461                 $this->delimLength = strlen( $delim );
00462 
00463                 $this->rewind();
00464         }
00465 
00466         function rewind() {
00467                 $this->curPos = 0;
00468                 $this->endPos = strpos( $this->subject, $this->delim );
00469                 $this->refreshCurrent();
00470         }
00471 
00472         function refreshCurrent() {
00473                 if ( $this->curPos === false ) {
00474                         $this->current = false;
00475                 } elseif ( $this->curPos >= $this->subjectLength ) {
00476                         $this->current = '';
00477                 } elseif ( $this->endPos === false ) {
00478                         $this->current = substr( $this->subject, $this->curPos );
00479                 } else {
00480                         $this->current = substr( $this->subject, $this->curPos, $this->endPos - $this->curPos );
00481                 }
00482         }
00483 
00484         function current() {
00485                 return $this->current;
00486         }
00487 
00488         function key() {
00489                 return $this->curPos;
00490         }
00491 
00495         function next() {
00496                 if ( $this->endPos === false ) {
00497                         $this->curPos = false;
00498                 } else {
00499                         $this->curPos = $this->endPos + $this->delimLength;
00500                         if ( $this->curPos >= $this->subjectLength ) {
00501                                 $this->endPos = false;
00502                         } else {
00503                                 $this->endPos = strpos( $this->subject, $this->delim, $this->curPos );
00504                         }
00505                 }
00506                 $this->refreshCurrent();
00507                 return $this->current;
00508         }
00509 
00513         function valid() {
00514                 return $this->curPos !== false;
00515         }
00516 }