MediaWiki
REL1_21
|
00001 <?php 00026 class StringUtils { 00027 00049 static function isUtf8( $value, $disableMbstring = false ) { 00050 00051 if ( preg_match( '/[\x80-\xff]/', $value ) === 0 ) { 00052 # no high bit set, this is pure ASCII which is de facto 00053 # valid UTF-8 00054 return true; 00055 } 00056 00057 if ( !$disableMbstring && function_exists( 'mb_check_encoding' ) ) { 00058 return mb_check_encoding( $value, 'UTF-8' ); 00059 } else { 00060 $hasUtf8 = preg_match( '/^(?> 00061 [\x00-\x7f] 00062 | [\xc0-\xdf][\x80-\xbf] 00063 | [\xe0-\xef][\x80-\xbf]{2} 00064 | [\xf0-\xf7][\x80-\xbf]{3} 00065 | [\xf8-\xfb][\x80-\xbf]{4} 00066 | \xfc[\x84-\xbf][\x80-\xbf]{4} 00067 )+$/x', $value ); 00068 return ($hasUtf8 > 0 ); 00069 } 00070 } 00071 00090 static function hungryDelimiterReplace( $startDelim, $endDelim, $replace, $subject ) { 00091 $segments = explode( $startDelim, $subject ); 00092 $output = array_shift( $segments ); 00093 foreach ( $segments as $s ) { 00094 $endDelimPos = strpos( $s, $endDelim ); 00095 if ( $endDelimPos === false ) { 00096 $output .= $startDelim . $s; 00097 } else { 00098 $output .= $replace . substr( $s, $endDelimPos + strlen( $endDelim ) ); 00099 } 00100 } 00101 return $output; 00102 } 00103 00126 static function delimiterReplaceCallback( $startDelim, $endDelim, $callback, $subject, $flags = '' ) { 00127 $inputPos = 0; 00128 $outputPos = 0; 00129 $output = ''; 00130 $foundStart = false; 00131 $encStart = preg_quote( $startDelim, '!' ); 00132 $encEnd = preg_quote( $endDelim, '!' ); 00133 $strcmp = strpos( $flags, 'i' ) === false ? 'strcmp' : 'strcasecmp'; 00134 $endLength = strlen( $endDelim ); 00135 $m = array(); 00136 00137 while ( $inputPos < strlen( $subject ) && 00138 preg_match( "!($encStart)|($encEnd)!S$flags", $subject, $m, PREG_OFFSET_CAPTURE, $inputPos ) ) 00139 { 00140 $tokenOffset = $m[0][1]; 00141 if ( $m[1][0] != '' ) { 00142 if ( $foundStart && 00143 $strcmp( $endDelim, substr( $subject, $tokenOffset, $endLength ) ) == 0 ) 00144 { 00145 # An end match is present at the same location 00146 $tokenType = 'end'; 00147 $tokenLength = $endLength; 00148 } else { 00149 $tokenType = 'start'; 00150 $tokenLength = strlen( $m[0][0] ); 00151 } 00152 } elseif ( $m[2][0] != '' ) { 00153 $tokenType = 'end'; 00154 $tokenLength = strlen( $m[0][0] ); 00155 } else { 00156 throw new MWException( 'Invalid delimiter given to ' . __METHOD__ ); 00157 } 00158 00159 if ( $tokenType == 'start' ) { 00160 # Only move the start position if we haven't already found a start 00161 # This means that START START END matches outer pair 00162 if ( !$foundStart ) { 00163 # Found start 00164 $inputPos = $tokenOffset + $tokenLength; 00165 # Write out the non-matching section 00166 $output .= substr( $subject, $outputPos, $tokenOffset - $outputPos ); 00167 $outputPos = $tokenOffset; 00168 $contentPos = $inputPos; 00169 $foundStart = true; 00170 } else { 00171 # Move the input position past the *first character* of START, 00172 # to protect against missing END when it overlaps with START 00173 $inputPos = $tokenOffset + 1; 00174 } 00175 } elseif ( $tokenType == 'end' ) { 00176 if ( $foundStart ) { 00177 # Found match 00178 $output .= call_user_func( $callback, array( 00179 substr( $subject, $outputPos, $tokenOffset + $tokenLength - $outputPos ), 00180 substr( $subject, $contentPos, $tokenOffset - $contentPos ) 00181 )); 00182 $foundStart = false; 00183 } else { 00184 # Non-matching end, write it out 00185 $output .= substr( $subject, $inputPos, $tokenOffset + $tokenLength - $outputPos ); 00186 } 00187 $inputPos = $outputPos = $tokenOffset + $tokenLength; 00188 } else { 00189 throw new MWException( 'Invalid delimiter given to ' . __METHOD__ ); 00190 } 00191 } 00192 if ( $outputPos < strlen( $subject ) ) { 00193 $output .= substr( $subject, $outputPos ); 00194 } 00195 return $output; 00196 } 00197 00211 static function delimiterReplace( $startDelim, $endDelim, $replace, $subject, $flags = '' ) { 00212 $replacer = new RegexlikeReplacer( $replace ); 00213 return self::delimiterReplaceCallback( $startDelim, $endDelim, 00214 $replacer->cb(), $subject, $flags ); 00215 } 00216 00224 static function explodeMarkup( $separator, $text ) { 00225 $placeholder = "\x00"; 00226 00227 // Remove placeholder instances 00228 $text = str_replace( $placeholder, '', $text ); 00229 00230 // Replace instances of the separator inside HTML-like tags with the placeholder 00231 $replacer = new DoubleReplacer( $separator, $placeholder ); 00232 $cleaned = StringUtils::delimiterReplaceCallback( '<', '>', $replacer->cb(), $text ); 00233 00234 // Explode, then put the replaced separators back in 00235 $items = explode( $separator, $cleaned ); 00236 foreach( $items as $i => $str ) { 00237 $items[$i] = str_replace( $placeholder, $separator, $str ); 00238 } 00239 00240 return $items; 00241 } 00242 00250 static function escapeRegexReplacement( $string ) { 00251 $string = str_replace( '\\', '\\\\', $string ); 00252 $string = str_replace( '$', '\\$', $string ); 00253 return $string; 00254 } 00255 00263 static function explode( $separator, $subject ) { 00264 if ( substr_count( $subject, $separator ) > 1000 ) { 00265 return new ExplodeIterator( $separator, $subject ); 00266 } else { 00267 return new ArrayIterator( explode( $separator, $subject ) ); 00268 } 00269 } 00270 } 00271 00276 class Replacer { 00277 00281 function cb() { 00282 return array( &$this, 'replace' ); 00283 } 00284 } 00285 00289 class RegexlikeReplacer extends Replacer { 00290 var $r; 00291 00295 function __construct( $r ) { 00296 $this->r = $r; 00297 } 00298 00303 function replace( $matches ) { 00304 $pairs = array(); 00305 foreach ( $matches as $i => $match ) { 00306 $pairs["\$$i"] = $match; 00307 } 00308 return strtr( $this->r, $pairs ); 00309 } 00310 00311 } 00312 00316 class DoubleReplacer extends Replacer { 00317 00323 function __construct( $from, $to, $index = 0 ) { 00324 $this->from = $from; 00325 $this->to = $to; 00326 $this->index = $index; 00327 } 00328 00333 function replace( $matches ) { 00334 return str_replace( $this->from, $this->to, $matches[$this->index] ); 00335 } 00336 } 00337 00341 class HashtableReplacer extends Replacer { 00342 var $table, $index; 00343 00348 function __construct( $table, $index = 0 ) { 00349 $this->table = $table; 00350 $this->index = $index; 00351 } 00352 00357 function replace( $matches ) { 00358 return $this->table[$matches[$this->index]]; 00359 } 00360 } 00361 00366 class ReplacementArray { 00367 /*mostly private*/ var $data = false; 00368 /*mostly private*/ var $fss = false; 00369 00375 function __construct( $data = array() ) { 00376 $this->data = $data; 00377 } 00378 00382 function __sleep() { 00383 return array( 'data' ); 00384 } 00385 00386 function __wakeup() { 00387 $this->fss = false; 00388 } 00389 00393 function setArray( $data ) { 00394 $this->data = $data; 00395 $this->fss = false; 00396 } 00397 00401 function getArray() { 00402 return $this->data; 00403 } 00404 00410 function setPair( $from, $to ) { 00411 $this->data[$from] = $to; 00412 $this->fss = false; 00413 } 00414 00418 function mergeArray( $data ) { 00419 $this->data = array_merge( $this->data, $data ); 00420 $this->fss = false; 00421 } 00422 00426 function merge( $other ) { 00427 $this->data = array_merge( $this->data, $other->data ); 00428 $this->fss = false; 00429 } 00430 00434 function removePair( $from ) { 00435 unset( $this->data[$from] ); 00436 $this->fss = false; 00437 } 00438 00442 function removeArray( $data ) { 00443 foreach( $data as $from => $to ) { 00444 $this->removePair( $from ); 00445 } 00446 $this->fss = false; 00447 } 00448 00453 function replace( $subject ) { 00454 if ( function_exists( 'fss_prep_replace' ) ) { 00455 wfProfileIn( __METHOD__.'-fss' ); 00456 if ( $this->fss === false ) { 00457 $this->fss = fss_prep_replace( $this->data ); 00458 } 00459 $result = fss_exec_replace( $this->fss, $subject ); 00460 wfProfileOut( __METHOD__.'-fss' ); 00461 } else { 00462 wfProfileIn( __METHOD__.'-strtr' ); 00463 $result = strtr( $subject, $this->data ); 00464 wfProfileOut( __METHOD__.'-strtr' ); 00465 } 00466 return $result; 00467 } 00468 } 00469 00479 class ExplodeIterator implements Iterator { 00480 // The subject string 00481 var $subject, $subjectLength; 00482 00483 // The delimiter 00484 var $delim, $delimLength; 00485 00486 // The position of the start of the line 00487 var $curPos; 00488 00489 // The position after the end of the next delimiter 00490 var $endPos; 00491 00492 // The current token 00493 var $current; 00494 00500 function __construct( $delim, $s ) { 00501 $this->subject = $s; 00502 $this->delim = $delim; 00503 00504 // Micro-optimisation (theoretical) 00505 $this->subjectLength = strlen( $s ); 00506 $this->delimLength = strlen( $delim ); 00507 00508 $this->rewind(); 00509 } 00510 00511 function rewind() { 00512 $this->curPos = 0; 00513 $this->endPos = strpos( $this->subject, $this->delim ); 00514 $this->refreshCurrent(); 00515 } 00516 00517 function refreshCurrent() { 00518 if ( $this->curPos === false ) { 00519 $this->current = false; 00520 } elseif ( $this->curPos >= $this->subjectLength ) { 00521 $this->current = ''; 00522 } elseif ( $this->endPos === false ) { 00523 $this->current = substr( $this->subject, $this->curPos ); 00524 } else { 00525 $this->current = substr( $this->subject, $this->curPos, $this->endPos - $this->curPos ); 00526 } 00527 } 00528 00529 function current() { 00530 return $this->current; 00531 } 00532 00533 function key() { 00534 return $this->curPos; 00535 } 00536 00540 function next() { 00541 if ( $this->endPos === false ) { 00542 $this->curPos = false; 00543 } else { 00544 $this->curPos = $this->endPos + $this->delimLength; 00545 if ( $this->curPos >= $this->subjectLength ) { 00546 $this->endPos = false; 00547 } else { 00548 $this->endPos = strpos( $this->subject, $this->delim, $this->curPos ); 00549 } 00550 } 00551 $this->refreshCurrent(); 00552 return $this->current; 00553 } 00554 00558 function valid() { 00559 return $this->curPos !== false; 00560 } 00561 }