MediaWiki  REL1_24
LinkHolderArray.php
Go to the documentation of this file.
00001 <?php
00027 class LinkHolderArray {
00028     public $internals = array();
00029     public $interwikis = array();
00030     public $size = 0;
00031 
00035     public $parent;
00036     protected $tempIdOffset;
00037 
00041     public function __construct( $parent ) {
00042         $this->parent = $parent;
00043     }
00044 
00048     public function __destruct() {
00049         foreach ( $this as $name => $value ) {
00050             unset( $this->$name );
00051         }
00052     }
00053 
00062     public function __sleep() {
00063         foreach ( $this->internals as &$nsLinks ) {
00064             foreach ( $nsLinks as &$entry ) {
00065                 unset( $entry['title'] );
00066             }
00067         }
00068         unset( $nsLinks );
00069         unset( $entry );
00070 
00071         foreach ( $this->interwikis as &$entry ) {
00072             unset( $entry['title'] );
00073         }
00074         unset( $entry );
00075 
00076         return array( 'internals', 'interwikis', 'size' );
00077     }
00078 
00082     public function __wakeup() {
00083         foreach ( $this->internals as &$nsLinks ) {
00084             foreach ( $nsLinks as &$entry ) {
00085                 $entry['title'] = Title::newFromText( $entry['pdbk'] );
00086             }
00087         }
00088         unset( $nsLinks );
00089         unset( $entry );
00090 
00091         foreach ( $this->interwikis as &$entry ) {
00092             $entry['title'] = Title::newFromText( $entry['pdbk'] );
00093         }
00094         unset( $entry );
00095     }
00096 
00101     public function merge( $other ) {
00102         foreach ( $other->internals as $ns => $entries ) {
00103             $this->size += count( $entries );
00104             if ( !isset( $this->internals[$ns] ) ) {
00105                 $this->internals[$ns] = $entries;
00106             } else {
00107                 $this->internals[$ns] += $entries;
00108             }
00109         }
00110         $this->interwikis += $other->interwikis;
00111     }
00112 
00125     public function mergeForeign( $other, $texts ) {
00126         $this->tempIdOffset = $idOffset = $this->parent->nextLinkID();
00127         $maxId = 0;
00128 
00129         # Renumber internal links
00130         foreach ( $other->internals as $ns => $nsLinks ) {
00131             foreach ( $nsLinks as $key => $entry ) {
00132                 $newKey = $idOffset + $key;
00133                 $this->internals[$ns][$newKey] = $entry;
00134                 $maxId = $newKey > $maxId ? $newKey : $maxId;
00135             }
00136         }
00137         $texts = preg_replace_callback( '/(<!--LINK \d+:)(\d+)(-->)/',
00138             array( $this, 'mergeForeignCallback' ), $texts );
00139 
00140         # Renumber interwiki links
00141         foreach ( $other->interwikis as $key => $entry ) {
00142             $newKey = $idOffset + $key;
00143             $this->interwikis[$newKey] = $entry;
00144             $maxId = $newKey > $maxId ? $newKey : $maxId;
00145         }
00146         $texts = preg_replace_callback( '/(<!--IWLINK )(\d+)(-->)/',
00147             array( $this, 'mergeForeignCallback' ), $texts );
00148 
00149         # Set the parent link ID to be beyond the highest used ID
00150         $this->parent->setLinkID( $maxId + 1 );
00151         $this->tempIdOffset = null;
00152         return $texts;
00153     }
00154 
00159     protected function mergeForeignCallback( $m ) {
00160         return $m[1] . ( $m[2] + $this->tempIdOffset ) . $m[3];
00161     }
00162 
00169     public function getSubArray( $text ) {
00170         $sub = new LinkHolderArray( $this->parent );
00171 
00172         # Internal links
00173         $pos = 0;
00174         while ( $pos < strlen( $text ) ) {
00175             if ( !preg_match( '/<!--LINK (\d+):(\d+)-->/',
00176                 $text, $m, PREG_OFFSET_CAPTURE, $pos )
00177             ) {
00178                 break;
00179             }
00180             $ns = $m[1][0];
00181             $key = $m[2][0];
00182             $sub->internals[$ns][$key] = $this->internals[$ns][$key];
00183             $pos = $m[0][1] + strlen( $m[0][0] );
00184         }
00185 
00186         # Interwiki links
00187         $pos = 0;
00188         while ( $pos < strlen( $text ) ) {
00189             if ( !preg_match( '/<!--IWLINK (\d+)-->/', $text, $m, PREG_OFFSET_CAPTURE, $pos ) ) {
00190                 break;
00191             }
00192             $key = $m[1][0];
00193             $sub->interwikis[$key] = $this->interwikis[$key];
00194             $pos = $m[0][1] + strlen( $m[0][0] );
00195         }
00196         return $sub;
00197     }
00198 
00203     public function isBig() {
00204         global $wgLinkHolderBatchSize;
00205         return $this->size > $wgLinkHolderBatchSize;
00206     }
00207 
00212     public function clear() {
00213         $this->internals = array();
00214         $this->interwikis = array();
00215         $this->size = 0;
00216     }
00217 
00231     public function makeHolder( $nt, $text = '', $query = array(), $trail = '', $prefix = '' ) {
00232         wfProfileIn( __METHOD__ );
00233         if ( !is_object( $nt ) ) {
00234             # Fail gracefully
00235             $retVal = "<!-- ERROR -->{$prefix}{$text}{$trail}";
00236         } else {
00237             # Separate the link trail from the rest of the link
00238             list( $inside, $trail ) = Linker::splitTrail( $trail );
00239 
00240             $entry = array(
00241                 'title' => $nt,
00242                 'text' => $prefix . $text . $inside,
00243                 'pdbk' => $nt->getPrefixedDBkey(),
00244             );
00245             if ( $query !== array() ) {
00246                 $entry['query'] = $query;
00247             }
00248 
00249             if ( $nt->isExternal() ) {
00250                 // Use a globally unique ID to keep the objects mergable
00251                 $key = $this->parent->nextLinkID();
00252                 $this->interwikis[$key] = $entry;
00253                 $retVal = "<!--IWLINK $key-->{$trail}";
00254             } else {
00255                 $key = $this->parent->nextLinkID();
00256                 $ns = $nt->getNamespace();
00257                 $this->internals[$ns][$key] = $entry;
00258                 $retVal = "<!--LINK $ns:$key-->{$trail}";
00259             }
00260             $this->size++;
00261         }
00262         wfProfileOut( __METHOD__ );
00263         return $retVal;
00264     }
00265 
00272     public function replace( &$text ) {
00273         wfProfileIn( __METHOD__ );
00274 
00276         $colours = $this->replaceInternal( $text );
00277         $this->replaceInterwiki( $text );
00278 
00279         wfProfileOut( __METHOD__ );
00280         return $colours;
00281     }
00282 
00287     protected function replaceInternal( &$text ) {
00288         if ( !$this->internals ) {
00289             return;
00290         }
00291 
00292         wfProfileIn( __METHOD__ );
00293         global $wgContLang, $wgContentHandlerUseDB;
00294 
00295         $colours = array();
00296         $linkCache = LinkCache::singleton();
00297         $output = $this->parent->getOutput();
00298 
00299         wfProfileIn( __METHOD__ . '-check' );
00300         $dbr = wfGetDB( DB_SLAVE );
00301         $threshold = $this->parent->getOptions()->getStubThreshold();
00302 
00303         # Sort by namespace
00304         ksort( $this->internals );
00305 
00306         $linkcolour_ids = array();
00307 
00308         # Generate query
00309         $queries = array();
00310         foreach ( $this->internals as $ns => $entries ) {
00311             foreach ( $entries as $entry ) {
00313                 $title = $entry['title'];
00314                 $pdbk = $entry['pdbk'];
00315 
00316                 # Skip invalid entries.
00317                 # Result will be ugly, but prevents crash.
00318                 if ( is_null( $title ) ) {
00319                     continue;
00320                 }
00321 
00322                 # Check if it's a static known link, e.g. interwiki
00323                 if ( $title->isAlwaysKnown() ) {
00324                     $colours[$pdbk] = '';
00325                 } elseif ( $ns == NS_SPECIAL ) {
00326                     $colours[$pdbk] = 'new';
00327                 } elseif ( ( $id = $linkCache->getGoodLinkID( $pdbk ) ) != 0 ) {
00328                     $colours[$pdbk] = Linker::getLinkColour( $title, $threshold );
00329                     $output->addLink( $title, $id );
00330                     $linkcolour_ids[$id] = $pdbk;
00331                 } elseif ( $linkCache->isBadLink( $pdbk ) ) {
00332                     $colours[$pdbk] = 'new';
00333                 } else {
00334                     # Not in the link cache, add it to the query
00335                     $queries[$ns][] = $title->getDBkey();
00336                 }
00337             }
00338         }
00339         if ( $queries ) {
00340             $where = array();
00341             foreach ( $queries as $ns => $pages ) {
00342                 $where[] = $dbr->makeList(
00343                     array(
00344                         'page_namespace' => $ns,
00345                         'page_title' => array_unique( $pages ),
00346                     ),
00347                     LIST_AND
00348                 );
00349             }
00350 
00351             $fields = array( 'page_id', 'page_namespace', 'page_title',
00352                 'page_is_redirect', 'page_len', 'page_latest' );
00353 
00354             if ( $wgContentHandlerUseDB ) {
00355                 $fields[] = 'page_content_model';
00356             }
00357 
00358             $res = $dbr->select(
00359                 'page',
00360                 $fields,
00361                 $dbr->makeList( $where, LIST_OR ),
00362                 __METHOD__
00363             );
00364 
00365             # Fetch data and form into an associative array
00366             # non-existent = broken
00367             foreach ( $res as $s ) {
00368                 $title = Title::makeTitle( $s->page_namespace, $s->page_title );
00369                 $pdbk = $title->getPrefixedDBkey();
00370                 $linkCache->addGoodLinkObjFromRow( $title, $s );
00371                 $output->addLink( $title, $s->page_id );
00372                 # @todo FIXME: Convoluted data flow
00373                 # The redirect status and length is passed to getLinkColour via the LinkCache
00374                 # Use formal parameters instead
00375                 $colours[$pdbk] = Linker::getLinkColour( $title, $threshold );
00376                 //add id to the extension todolist
00377                 $linkcolour_ids[$s->page_id] = $pdbk;
00378             }
00379             unset( $res );
00380         }
00381         if ( count( $linkcolour_ids ) ) {
00382             //pass an array of page_ids to an extension
00383             wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) );
00384         }
00385         wfProfileOut( __METHOD__ . '-check' );
00386 
00387         # Do a second query for different language variants of links and categories
00388         if ( $wgContLang->hasVariants() ) {
00389             $this->doVariants( $colours );
00390         }
00391 
00392         # Construct search and replace arrays
00393         wfProfileIn( __METHOD__ . '-construct' );
00394         $replacePairs = array();
00395         foreach ( $this->internals as $ns => $entries ) {
00396             foreach ( $entries as $index => $entry ) {
00397                 $pdbk = $entry['pdbk'];
00398                 $title = $entry['title'];
00399                 $query = isset( $entry['query'] ) ? $entry['query'] : array();
00400                 $key = "$ns:$index";
00401                 $searchkey = "<!--LINK $key-->";
00402                 $displayText = $entry['text'];
00403                 if ( isset( $entry['selflink'] ) ) {
00404                     $replacePairs[$searchkey] = Linker::makeSelfLinkObj( $title, $displayText, $query );
00405                     continue;
00406                 }
00407                 if ( $displayText === '' ) {
00408                     $displayText = null;
00409                 }
00410                 if ( !isset( $colours[$pdbk] ) ) {
00411                     $colours[$pdbk] = 'new';
00412                 }
00413                 $attribs = array();
00414                 if ( $colours[$pdbk] == 'new' ) {
00415                     $linkCache->addBadLinkObj( $title );
00416                     $output->addLink( $title, 0 );
00417                     $type = array( 'broken' );
00418                 } else {
00419                     if ( $colours[$pdbk] != '' ) {
00420                         $attribs['class'] = $colours[$pdbk];
00421                     }
00422                     $type = array( 'known', 'noclasses' );
00423                 }
00424                 $replacePairs[$searchkey] = Linker::link( $title, $displayText,
00425                         $attribs, $query, $type );
00426             }
00427         }
00428         $replacer = new HashtableReplacer( $replacePairs, 1 );
00429         wfProfileOut( __METHOD__ . '-construct' );
00430 
00431         # Do the thing
00432         wfProfileIn( __METHOD__ . '-replace' );
00433         $text = preg_replace_callback(
00434             '/(<!--LINK .*?-->)/',
00435             $replacer->cb(),
00436             $text
00437         );
00438 
00439         wfProfileOut( __METHOD__ . '-replace' );
00440         wfProfileOut( __METHOD__ );
00441     }
00442 
00447     protected function replaceInterwiki( &$text ) {
00448         if ( empty( $this->interwikis ) ) {
00449             return;
00450         }
00451 
00452         wfProfileIn( __METHOD__ );
00453         # Make interwiki link HTML
00454         $output = $this->parent->getOutput();
00455         $replacePairs = array();
00456         foreach ( $this->interwikis as $key => $link ) {
00457             $replacePairs[$key] = Linker::link( $link['title'], $link['text'] );
00458             $output->addInterwikiLink( $link['title'] );
00459         }
00460         $replacer = new HashtableReplacer( $replacePairs, 1 );
00461 
00462         $text = preg_replace_callback(
00463             '/<!--IWLINK (.*?)-->/',
00464             $replacer->cb(),
00465             $text );
00466         wfProfileOut( __METHOD__ );
00467     }
00468 
00473     protected function doVariants( &$colours ) {
00474         global $wgContLang, $wgContentHandlerUseDB;
00475         $linkBatch = new LinkBatch();
00476         $variantMap = array(); // maps $pdbkey_Variant => $keys (of link holders)
00477         $output = $this->parent->getOutput();
00478         $linkCache = LinkCache::singleton();
00479         $threshold = $this->parent->getOptions()->getStubThreshold();
00480         $titlesToBeConverted = '';
00481         $titlesAttrs = array();
00482 
00483         // Concatenate titles to a single string, thus we only need auto convert the
00484         // single string to all variants. This would improve parser's performance
00485         // significantly.
00486         foreach ( $this->internals as $ns => $entries ) {
00487             if ( $ns == NS_SPECIAL ) {
00488                 continue;
00489             }
00490             foreach ( $entries as $index => $entry ) {
00491                 $pdbk = $entry['pdbk'];
00492                 // we only deal with new links (in its first query)
00493                 if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] === 'new' ) {
00494                     $titlesAttrs[] = array( $index, $entry['title'] );
00495                     // separate titles with \0 because it would never appears
00496                     // in a valid title
00497                     $titlesToBeConverted .= $entry['title']->getText() . "\0";
00498                 }
00499             }
00500         }
00501 
00502         // Now do the conversion and explode string to text of titles
00503         $titlesAllVariants = $wgContLang->autoConvertToAllVariants( rtrim( $titlesToBeConverted, "\0" ) );
00504         $allVariantsName = array_keys( $titlesAllVariants );
00505         foreach ( $titlesAllVariants as &$titlesVariant ) {
00506             $titlesVariant = explode( "\0", $titlesVariant );
00507         }
00508 
00509         // Then add variants of links to link batch
00510         $parentTitle = $this->parent->getTitle();
00511         foreach ( $titlesAttrs as $i => $attrs ) {
00513             list( $index, $title ) = $attrs;
00514             $ns = $title->getNamespace();
00515             $text = $title->getText();
00516 
00517             foreach ( $allVariantsName as $variantName ) {
00518                 $textVariant = $titlesAllVariants[$variantName][$i];
00519                 if ( $textVariant === $text ) {
00520                     continue;
00521                 }
00522 
00523                 $variantTitle = Title::makeTitle( $ns, $textVariant );
00524                 if ( is_null( $variantTitle ) ) {
00525                     continue;
00526                 }
00527 
00528                 // Self-link checking for mixed/different variant titles. At this point, we
00529                 // already know the exact title does not exist, so the link cannot be to a
00530                 // variant of the current title that exists as a separate page.
00531                 if ( $variantTitle->equals( $parentTitle ) && !$title->hasFragment() ) {
00532                     $this->internals[$ns][$index]['selflink'] = true;
00533                     continue 2;
00534                 }
00535 
00536                 $linkBatch->addObj( $variantTitle );
00537                 $variantMap[$variantTitle->getPrefixedDBkey()][] = "$ns:$index";
00538             }
00539         }
00540 
00541         // process categories, check if a category exists in some variant
00542         $categoryMap = array(); // maps $category_variant => $category (dbkeys)
00543         $varCategories = array(); // category replacements oldDBkey => newDBkey
00544         foreach ( $output->getCategoryLinks() as $category ) {
00545             $categoryTitle = Title::makeTitleSafe( NS_CATEGORY, $category );
00546             $linkBatch->addObj( $categoryTitle );
00547             $variants = $wgContLang->autoConvertToAllVariants( $category );
00548             foreach ( $variants as $variant ) {
00549                 if ( $variant !== $category ) {
00550                     $variantTitle = Title::makeTitleSafe( NS_CATEGORY, $variant );
00551                     if ( is_null( $variantTitle ) ) {
00552                         continue;
00553                     }
00554                     $linkBatch->addObj( $variantTitle );
00555                     $categoryMap[$variant] = array( $category, $categoryTitle );
00556                 }
00557             }
00558         }
00559 
00560         if ( !$linkBatch->isEmpty() ) {
00561             // construct query
00562             $dbr = wfGetDB( DB_SLAVE );
00563             $fields = array( 'page_id', 'page_namespace', 'page_title',
00564                 'page_is_redirect', 'page_len', 'page_latest' );
00565 
00566             if ( $wgContentHandlerUseDB ) {
00567                 $fields[] = 'page_content_model';
00568             }
00569 
00570             $varRes = $dbr->select( 'page',
00571                 $fields,
00572                 $linkBatch->constructSet( 'page', $dbr ),
00573                 __METHOD__
00574             );
00575 
00576             $linkcolour_ids = array();
00577 
00578             // for each found variants, figure out link holders and replace
00579             foreach ( $varRes as $s ) {
00580 
00581                 $variantTitle = Title::makeTitle( $s->page_namespace, $s->page_title );
00582                 $varPdbk = $variantTitle->getPrefixedDBkey();
00583                 $vardbk = $variantTitle->getDBkey();
00584 
00585                 $holderKeys = array();
00586                 if ( isset( $variantMap[$varPdbk] ) ) {
00587                     $holderKeys = $variantMap[$varPdbk];
00588                     $linkCache->addGoodLinkObjFromRow( $variantTitle, $s );
00589                     $output->addLink( $variantTitle, $s->page_id );
00590                 }
00591 
00592                 // loop over link holders
00593                 foreach ( $holderKeys as $key ) {
00594                     list( $ns, $index ) = explode( ':', $key, 2 );
00595                     $entry =& $this->internals[$ns][$index];
00596                     $pdbk = $entry['pdbk'];
00597 
00598                     if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] === 'new' ) {
00599                         // found link in some of the variants, replace the link holder data
00600                         $entry['title'] = $variantTitle;
00601                         $entry['pdbk'] = $varPdbk;
00602 
00603                         // set pdbk and colour
00604                         # @todo FIXME: Convoluted data flow
00605                         # The redirect status and length is passed to getLinkColour via the LinkCache
00606                         # Use formal parameters instead
00607                         $colours[$varPdbk] = Linker::getLinkColour( $variantTitle, $threshold );
00608                         $linkcolour_ids[$s->page_id] = $pdbk;
00609                     }
00610                 }
00611 
00612                 // check if the object is a variant of a category
00613                 if ( isset( $categoryMap[$vardbk] ) ) {
00614                     list( $oldkey, $oldtitle ) = $categoryMap[$vardbk];
00615                     if ( !isset( $varCategories[$oldkey] ) && !$oldtitle->exists() ) {
00616                         $varCategories[$oldkey] = $vardbk;
00617                     }
00618                 }
00619             }
00620             wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) );
00621 
00622             // rebuild the categories in original order (if there are replacements)
00623             if ( count( $varCategories ) > 0 ) {
00624                 $newCats = array();
00625                 $originalCats = $output->getCategories();
00626                 foreach ( $originalCats as $cat => $sortkey ) {
00627                     // make the replacement
00628                     if ( array_key_exists( $cat, $varCategories ) ) {
00629                         $newCats[$varCategories[$cat]] = $sortkey;
00630                     } else {
00631                         $newCats[$cat] = $sortkey;
00632                     }
00633                 }
00634                 $output->setCategoryLinks( $newCats );
00635             }
00636         }
00637     }
00638 
00646     public function replaceText( $text ) {
00647         wfProfileIn( __METHOD__ );
00648 
00649         $text = preg_replace_callback(
00650             '/<!--(LINK|IWLINK) (.*?)-->/',
00651             array( &$this, 'replaceTextCallback' ),
00652             $text );
00653 
00654         wfProfileOut( __METHOD__ );
00655         return $text;
00656     }
00657 
00665     public function replaceTextCallback( $matches ) {
00666         $type = $matches[1];
00667         $key = $matches[2];
00668         if ( $type == 'LINK' ) {
00669             list( $ns, $index ) = explode( ':', $key, 2 );
00670             if ( isset( $this->internals[$ns][$index]['text'] ) ) {
00671                 return $this->internals[$ns][$index]['text'];
00672             }
00673         } elseif ( $type == 'IWLINK' ) {
00674             if ( isset( $this->interwikis[$key]['text'] ) ) {
00675                 return $this->interwikis[$key]['text'];
00676             }
00677         }
00678         return $matches[0];
00679     }
00680 }