MediaWiki  REL1_19
StringUtils.php
Go to the documentation of this file.
00001 <?php
00005 class StringUtils {
00024         static function hungryDelimiterReplace( $startDelim, $endDelim, $replace, $subject ) {
00025                 $segments = explode( $startDelim, $subject );
00026                 $output = array_shift( $segments );
00027                 foreach ( $segments as $s ) {
00028                         $endDelimPos = strpos( $s, $endDelim );
00029                         if ( $endDelimPos === false ) {
00030                                 $output .= $startDelim . $s;
00031                         } else {
00032                                 $output .= $replace . substr( $s, $endDelimPos + strlen( $endDelim ) );
00033                         }
00034                 }
00035                 return $output;
00036         }
00037 
00059         static function delimiterReplaceCallback( $startDelim, $endDelim, $callback, $subject, $flags = '' ) {
00060                 $inputPos = 0;
00061                 $outputPos = 0;
00062                 $output = '';
00063                 $foundStart = false;
00064                 $encStart = preg_quote( $startDelim, '!' );
00065                 $encEnd = preg_quote( $endDelim, '!' );
00066                 $strcmp = strpos( $flags, 'i' ) === false ? 'strcmp' : 'strcasecmp';
00067                 $endLength = strlen( $endDelim );
00068                 $m = array();
00069 
00070                 while ( $inputPos < strlen( $subject ) &&
00071                   preg_match( "!($encStart)|($encEnd)!S$flags", $subject, $m, PREG_OFFSET_CAPTURE, $inputPos ) )
00072                 {
00073                         $tokenOffset = $m[0][1];
00074                         if ( $m[1][0] != '' ) {
00075                                 if ( $foundStart &&
00076                                   $strcmp( $endDelim, substr( $subject, $tokenOffset, $endLength ) ) == 0 )
00077                                 {
00078                                         # An end match is present at the same location
00079                                         $tokenType = 'end';
00080                                         $tokenLength = $endLength;
00081                                 } else {
00082                                         $tokenType = 'start';
00083                                         $tokenLength = strlen( $m[0][0] );
00084                                 }
00085                         } elseif ( $m[2][0] != '' ) {
00086                                 $tokenType = 'end';
00087                                 $tokenLength = strlen( $m[0][0] );
00088                         } else {
00089                                 throw new MWException( 'Invalid delimiter given to ' . __METHOD__ );
00090                         }
00091 
00092                         if ( $tokenType == 'start' ) {
00093                                 # Only move the start position if we haven't already found a start
00094                                 # This means that START START END matches outer pair
00095                                 if ( !$foundStart ) {
00096                                         # Found start
00097                                         $inputPos = $tokenOffset + $tokenLength;
00098                                         # Write out the non-matching section
00099                                         $output .= substr( $subject, $outputPos, $tokenOffset - $outputPos );
00100                                         $outputPos = $tokenOffset;
00101                                         $contentPos = $inputPos;
00102                                         $foundStart = true;
00103                                 } else {
00104                                         # Move the input position past the *first character* of START,
00105                                         # to protect against missing END when it overlaps with START
00106                                         $inputPos = $tokenOffset + 1;
00107                                 }
00108                         } elseif ( $tokenType == 'end' ) {
00109                                 if ( $foundStart ) {
00110                                         # Found match
00111                                         $output .= call_user_func( $callback, array(
00112                                                 substr( $subject, $outputPos, $tokenOffset + $tokenLength - $outputPos ),
00113                                                 substr( $subject, $contentPos, $tokenOffset - $contentPos )
00114                                         ));
00115                                         $foundStart = false;
00116                                 } else {
00117                                         # Non-matching end, write it out
00118                                         $output .= substr( $subject, $inputPos, $tokenOffset + $tokenLength - $outputPos );
00119                                 }
00120                                 $inputPos = $outputPos = $tokenOffset + $tokenLength;
00121                         } else {
00122                                 throw new MWException( 'Invalid delimiter given to ' . __METHOD__ );
00123                         }
00124                 }
00125                 if ( $outputPos < strlen( $subject ) ) {
00126                         $output .= substr( $subject, $outputPos );
00127                 }
00128                 return $output;
00129         }
00130 
00144         static function delimiterReplace( $startDelim, $endDelim, $replace, $subject, $flags = '' ) {
00145                 $replacer = new RegexlikeReplacer( $replace );
00146                 return self::delimiterReplaceCallback( $startDelim, $endDelim,
00147                         $replacer->cb(), $subject, $flags );
00148         }
00149 
00157         static function explodeMarkup( $separator, $text ) {
00158                 $placeholder = "\x00";
00159 
00160                 // Remove placeholder instances
00161                 $text = str_replace( $placeholder, '', $text );
00162 
00163                 // Replace instances of the separator inside HTML-like tags with the placeholder
00164                 $replacer = new DoubleReplacer( $separator, $placeholder );
00165                 $cleaned = StringUtils::delimiterReplaceCallback( '<', '>', $replacer->cb(), $text );
00166 
00167                 // Explode, then put the replaced separators back in
00168                 $items = explode( $separator, $cleaned );
00169                 foreach( $items as $i => $str ) {
00170                         $items[$i] = str_replace( $placeholder, $separator, $str );
00171                 }
00172 
00173                 return $items;
00174         }
00175 
00183         static function escapeRegexReplacement( $string ) {
00184                 $string = str_replace( '\\', '\\\\', $string );
00185                 $string = str_replace( '$', '\\$', $string );
00186                 return $string;
00187         }
00188 
00196         static function explode( $separator, $subject ) {
00197                 if ( substr_count( $subject, $separator ) > 1000 ) {
00198                         return new ExplodeIterator( $separator, $subject );
00199                 } else {
00200                         return new ArrayIterator( explode( $separator, $subject ) );
00201                 }
00202         }
00203 }
00204 
00209 class Replacer {
00210         function cb() {
00211                 return array( &$this, 'replace' );
00212         }
00213 }
00214 
00218 class RegexlikeReplacer extends Replacer {
00219         var $r;
00220         function __construct( $r ) {
00221                 $this->r = $r;
00222         }
00223 
00224         function replace( $matches ) {
00225                 $pairs = array();
00226                 foreach ( $matches as $i => $match ) {
00227                         $pairs["\$$i"] = $match;
00228                 }
00229                 return strtr( $this->r, $pairs );
00230         }
00231 
00232 }
00233 
00237 class DoubleReplacer extends Replacer {
00238         function __construct( $from, $to, $index = 0 ) {
00239                 $this->from = $from;
00240                 $this->to = $to;
00241                 $this->index = $index;
00242         }
00243 
00244         function replace( $matches ) {
00245                 return str_replace( $this->from, $this->to, $matches[$this->index] );
00246         }
00247 }
00248 
00252 class HashtableReplacer extends Replacer {
00253         var $table, $index;
00254 
00255         function __construct( $table, $index = 0 ) {
00256                 $this->table = $table;
00257                 $this->index = $index;
00258         }
00259 
00260         function replace( $matches ) {
00261                 return $this->table[$matches[$this->index]];
00262         }
00263 }
00264 
00269 class ReplacementArray {
00270         /*mostly private*/ var $data = false;
00271         /*mostly private*/ var $fss = false;
00272 
00277         function __construct( $data = array() ) {
00278                 $this->data = $data;
00279         }
00280 
00281         function __sleep() {
00282                 return array( 'data' );
00283         }
00284 
00285         function __wakeup() {
00286                 $this->fss = false;
00287         }
00288 
00292         function setArray( $data ) {
00293                 $this->data = $data;
00294                 $this->fss = false;
00295         }
00296 
00297         function getArray() {
00298                 return $this->data;
00299         }
00300 
00304         function setPair( $from, $to ) {
00305                 $this->data[$from] = $to;
00306                 $this->fss = false;
00307         }
00308 
00309         function mergeArray( $data ) {
00310                 $this->data = array_merge( $this->data, $data );
00311                 $this->fss = false;
00312         }
00313 
00314         function merge( $other ) {
00315                 $this->data = array_merge( $this->data, $other->data );
00316                 $this->fss = false;
00317         }
00318 
00319         function removePair( $from ) {
00320                 unset($this->data[$from]);
00321                 $this->fss = false;
00322         }
00323 
00324         function removeArray( $data ) {
00325                 foreach( $data as $from => $to )
00326                         $this->removePair( $from );
00327                 $this->fss = false;
00328         }
00329 
00330         function replace( $subject ) {
00331                 if ( function_exists( 'fss_prep_replace' ) ) {
00332                         wfProfileIn( __METHOD__.'-fss' );
00333                         if ( $this->fss === false ) {
00334                                 $this->fss = fss_prep_replace( $this->data );
00335                         }
00336                         $result = fss_exec_replace( $this->fss, $subject );
00337                         wfProfileOut( __METHOD__.'-fss' );
00338                 } else {
00339                         wfProfileIn( __METHOD__.'-strtr' );
00340                         $result = strtr( $subject, $this->data );
00341                         wfProfileOut( __METHOD__.'-strtr' );
00342                 }
00343                 return $result;
00344         }
00345 }
00346 
00356 class ExplodeIterator implements Iterator {
00357         // The subject string
00358         var $subject, $subjectLength;
00359 
00360         // The delimiter
00361         var $delim, $delimLength;
00362 
00363         // The position of the start of the line
00364         var $curPos;
00365 
00366         // The position after the end of the next delimiter
00367         var $endPos;
00368 
00369         // The current token
00370         var $current;
00371 
00375         function __construct( $delim, $s ) {
00376                 $this->subject = $s;
00377                 $this->delim = $delim;
00378 
00379                 // Micro-optimisation (theoretical)
00380                 $this->subjectLength = strlen( $s );
00381                 $this->delimLength = strlen( $delim );
00382 
00383                 $this->rewind();
00384         }
00385 
00386         function rewind() {
00387                 $this->curPos = 0;
00388                 $this->endPos = strpos( $this->subject, $this->delim );
00389                 $this->refreshCurrent();
00390         }
00391 
00392 
00393         function refreshCurrent() {
00394                 if ( $this->curPos === false ) {
00395                         $this->current = false;
00396                 } elseif ( $this->curPos >= $this->subjectLength ) {
00397                         $this->current = '';
00398                 } elseif ( $this->endPos === false ) {
00399                         $this->current = substr( $this->subject, $this->curPos );
00400                 } else {
00401                         $this->current = substr( $this->subject, $this->curPos, $this->endPos - $this->curPos );
00402                 }
00403         }
00404 
00405         function current() {
00406                 return $this->current;
00407         }
00408 
00409         function key() {
00410                 return $this->curPos;
00411         }
00412 
00413         function next() {
00414                 if ( $this->endPos === false ) {
00415                         $this->curPos = false;
00416                 } else {
00417                         $this->curPos = $this->endPos + $this->delimLength;
00418                         if ( $this->curPos >= $this->subjectLength ) {
00419                                 $this->endPos = false;
00420                         } else {
00421                                 $this->endPos = strpos( $this->subject, $this->delim, $this->curPos );
00422                         }
00423                 }
00424                 $this->refreshCurrent();
00425                 return $this->current;
00426         }
00427 
00428         function valid() {
00429                 return $this->curPos !== false;
00430         }
00431 }
00432