MediaWiki  REL1_23
LinkHolderArray.php
Go to the documentation of this file.
00001 <?php
00027 class LinkHolderArray {
00028     var $internals = array(), $interwikis = array();
00029     var $size = 0;
00030     var $parent;
00031     protected $tempIdOffset;
00032 
00033     function __construct( $parent ) {
00034         $this->parent = $parent;
00035     }
00036 
00040     function __destruct() {
00041         foreach ( $this as $name => $value ) {
00042             unset( $this->$name );
00043         }
00044     }
00045 
00054     function __sleep() {
00055         foreach ( $this->internals as &$nsLinks ) {
00056             foreach ( $nsLinks as &$entry ) {
00057                 unset( $entry['title'] );
00058             }
00059         }
00060         unset( $nsLinks );
00061         unset( $entry );
00062 
00063         foreach ( $this->interwikis as &$entry ) {
00064             unset( $entry['title'] );
00065         }
00066         unset( $entry );
00067 
00068         return array( 'internals', 'interwikis', 'size' );
00069     }
00070 
00074     function __wakeup() {
00075         foreach ( $this->internals as &$nsLinks ) {
00076             foreach ( $nsLinks as &$entry ) {
00077                 $entry['title'] = Title::newFromText( $entry['pdbk'] );
00078             }
00079         }
00080         unset( $nsLinks );
00081         unset( $entry );
00082 
00083         foreach ( $this->interwikis as &$entry ) {
00084             $entry['title'] = Title::newFromText( $entry['pdbk'] );
00085         }
00086         unset( $entry );
00087     }
00088 
00093     function merge( $other ) {
00094         foreach ( $other->internals as $ns => $entries ) {
00095             $this->size += count( $entries );
00096             if ( !isset( $this->internals[$ns] ) ) {
00097                 $this->internals[$ns] = $entries;
00098             } else {
00099                 $this->internals[$ns] += $entries;
00100             }
00101         }
00102         $this->interwikis += $other->interwikis;
00103     }
00104 
00117     function mergeForeign( $other, $texts ) {
00118         $this->tempIdOffset = $idOffset = $this->parent->nextLinkID();
00119         $maxId = 0;
00120 
00121         # Renumber internal links
00122         foreach ( $other->internals as $ns => $nsLinks ) {
00123             foreach ( $nsLinks as $key => $entry ) {
00124                 $newKey = $idOffset + $key;
00125                 $this->internals[$ns][$newKey] = $entry;
00126                 $maxId = $newKey > $maxId ? $newKey : $maxId;
00127             }
00128         }
00129         $texts = preg_replace_callback( '/(<!--LINK \d+:)(\d+)(-->)/',
00130             array( $this, 'mergeForeignCallback' ), $texts );
00131 
00132         # Renumber interwiki links
00133         foreach ( $other->interwikis as $key => $entry ) {
00134             $newKey = $idOffset + $key;
00135             $this->interwikis[$newKey] = $entry;
00136             $maxId = $newKey > $maxId ? $newKey : $maxId;
00137         }
00138         $texts = preg_replace_callback( '/(<!--IWLINK )(\d+)(-->)/',
00139             array( $this, 'mergeForeignCallback' ), $texts );
00140 
00141         # Set the parent link ID to be beyond the highest used ID
00142         $this->parent->setLinkID( $maxId + 1 );
00143         $this->tempIdOffset = null;
00144         return $texts;
00145     }
00146 
00147     protected function mergeForeignCallback( $m ) {
00148         return $m[1] . ( $m[2] + $this->tempIdOffset ) . $m[3];
00149     }
00150 
00156     function getSubArray( $text ) {
00157         $sub = new LinkHolderArray( $this->parent );
00158 
00159         # Internal links
00160         $pos = 0;
00161         while ( $pos < strlen( $text ) ) {
00162             if ( !preg_match( '/<!--LINK (\d+):(\d+)-->/',
00163                 $text, $m, PREG_OFFSET_CAPTURE, $pos )
00164             ) {
00165                 break;
00166             }
00167             $ns = $m[1][0];
00168             $key = $m[2][0];
00169             $sub->internals[$ns][$key] = $this->internals[$ns][$key];
00170             $pos = $m[0][1] + strlen( $m[0][0] );
00171         }
00172 
00173         # Interwiki links
00174         $pos = 0;
00175         while ( $pos < strlen( $text ) ) {
00176             if ( !preg_match( '/<!--IWLINK (\d+)-->/', $text, $m, PREG_OFFSET_CAPTURE, $pos ) ) {
00177                 break;
00178             }
00179             $key = $m[1][0];
00180             $sub->interwikis[$key] = $this->interwikis[$key];
00181             $pos = $m[0][1] + strlen( $m[0][0] );
00182         }
00183         return $sub;
00184     }
00185 
00190     function isBig() {
00191         global $wgLinkHolderBatchSize;
00192         return $this->size > $wgLinkHolderBatchSize;
00193     }
00194 
00199     function clear() {
00200         $this->internals = array();
00201         $this->interwikis = array();
00202         $this->size = 0;
00203     }
00204 
00218     function makeHolder( $nt, $text = '', $query = array(), $trail = '', $prefix = '' ) {
00219         wfProfileIn( __METHOD__ );
00220         if ( !is_object( $nt ) ) {
00221             # Fail gracefully
00222             $retVal = "<!-- ERROR -->{$prefix}{$text}{$trail}";
00223         } else {
00224             # Separate the link trail from the rest of the link
00225             list( $inside, $trail ) = Linker::splitTrail( $trail );
00226 
00227             $entry = array(
00228                 'title' => $nt,
00229                 'text' => $prefix . $text . $inside,
00230                 'pdbk' => $nt->getPrefixedDBkey(),
00231             );
00232             if ( $query !== array() ) {
00233                 $entry['query'] = $query;
00234             }
00235 
00236             if ( $nt->isExternal() ) {
00237                 // Use a globally unique ID to keep the objects mergable
00238                 $key = $this->parent->nextLinkID();
00239                 $this->interwikis[$key] = $entry;
00240                 $retVal = "<!--IWLINK $key-->{$trail}";
00241             } else {
00242                 $key = $this->parent->nextLinkID();
00243                 $ns = $nt->getNamespace();
00244                 $this->internals[$ns][$key] = $entry;
00245                 $retVal = "<!--LINK $ns:$key-->{$trail}";
00246             }
00247             $this->size++;
00248         }
00249         wfProfileOut( __METHOD__ );
00250         return $retVal;
00251     }
00252 
00258     function replace( &$text ) {
00259         wfProfileIn( __METHOD__ );
00260 
00261         $colours = $this->replaceInternal( $text ); // FIXME: replaceInternal doesn't return a value
00262         $this->replaceInterwiki( $text );
00263 
00264         wfProfileOut( __METHOD__ );
00265         return $colours;
00266     }
00267 
00271     protected function replaceInternal( &$text ) {
00272         if ( !$this->internals ) {
00273             return;
00274         }
00275 
00276         wfProfileIn( __METHOD__ );
00277         global $wgContLang;
00278 
00279         $colours = array();
00280         $linkCache = LinkCache::singleton();
00281         $output = $this->parent->getOutput();
00282 
00283         wfProfileIn( __METHOD__ . '-check' );
00284         $dbr = wfGetDB( DB_SLAVE );
00285         $threshold = $this->parent->getOptions()->getStubThreshold();
00286 
00287         # Sort by namespace
00288         ksort( $this->internals );
00289 
00290         $linkcolour_ids = array();
00291 
00292         # Generate query
00293         $queries = array();
00294         foreach ( $this->internals as $ns => $entries ) {
00295             foreach ( $entries as $entry ) {
00296                 $title = $entry['title'];
00297                 $pdbk = $entry['pdbk'];
00298 
00299                 # Skip invalid entries.
00300                 # Result will be ugly, but prevents crash.
00301                 if ( is_null( $title ) ) {
00302                     continue;
00303                 }
00304 
00305                 # Check if it's a static known link, e.g. interwiki
00306                 if ( $title->isAlwaysKnown() ) {
00307                     $colours[$pdbk] = '';
00308                 } elseif ( $ns == NS_SPECIAL ) {
00309                     $colours[$pdbk] = 'new';
00310                 } elseif ( ( $id = $linkCache->getGoodLinkID( $pdbk ) ) != 0 ) {
00311                     $colours[$pdbk] = Linker::getLinkColour( $title, $threshold );
00312                     $output->addLink( $title, $id );
00313                     $linkcolour_ids[$id] = $pdbk;
00314                 } elseif ( $linkCache->isBadLink( $pdbk ) ) {
00315                     $colours[$pdbk] = 'new';
00316                 } else {
00317                     # Not in the link cache, add it to the query
00318                     $queries[$ns][] = $title->getDBkey();
00319                 }
00320             }
00321         }
00322         if ( $queries ) {
00323             $where = array();
00324             foreach ( $queries as $ns => $pages ) {
00325                 $where[] = $dbr->makeList(
00326                     array(
00327                         'page_namespace' => $ns,
00328                         'page_title' => $pages,
00329                     ),
00330                     LIST_AND
00331                 );
00332             }
00333 
00334             $res = $dbr->select(
00335                 'page',
00336                 array( 'page_id', 'page_namespace', 'page_title', 'page_is_redirect', 'page_len', 'page_latest' ),
00337                 $dbr->makeList( $where, LIST_OR ),
00338                 __METHOD__
00339             );
00340 
00341             # Fetch data and form into an associative array
00342             # non-existent = broken
00343             foreach ( $res as $s ) {
00344                 $title = Title::makeTitle( $s->page_namespace, $s->page_title );
00345                 $pdbk = $title->getPrefixedDBkey();
00346                 $linkCache->addGoodLinkObjFromRow( $title, $s );
00347                 $output->addLink( $title, $s->page_id );
00348                 # @todo FIXME: Convoluted data flow
00349                 # The redirect status and length is passed to getLinkColour via the LinkCache
00350                 # Use formal parameters instead
00351                 $colours[$pdbk] = Linker::getLinkColour( $title, $threshold );
00352                 //add id to the extension todolist
00353                 $linkcolour_ids[$s->page_id] = $pdbk;
00354             }
00355             unset( $res );
00356         }
00357         if ( count( $linkcolour_ids ) ) {
00358             //pass an array of page_ids to an extension
00359             wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) );
00360         }
00361         wfProfileOut( __METHOD__ . '-check' );
00362 
00363         # Do a second query for different language variants of links and categories
00364         if ( $wgContLang->hasVariants() ) {
00365             $this->doVariants( $colours );
00366         }
00367 
00368         # Construct search and replace arrays
00369         wfProfileIn( __METHOD__ . '-construct' );
00370         $replacePairs = array();
00371         foreach ( $this->internals as $ns => $entries ) {
00372             foreach ( $entries as $index => $entry ) {
00373                 $pdbk = $entry['pdbk'];
00374                 $title = $entry['title'];
00375                 $query = isset( $entry['query'] ) ? $entry['query'] : array();
00376                 $key = "$ns:$index";
00377                 $searchkey = "<!--LINK $key-->";
00378                 $displayText = $entry['text'];
00379                 if ( isset( $entry['selflink'] ) ) {
00380                     $replacePairs[$searchkey] = Linker::makeSelfLinkObj( $title, $displayText, $query );
00381                     continue;
00382                 }
00383                 if ( $displayText === '' ) {
00384                     $displayText = null;
00385                 }
00386                 if ( !isset( $colours[$pdbk] ) ) {
00387                     $colours[$pdbk] = 'new';
00388                 }
00389                 $attribs = array();
00390                 if ( $colours[$pdbk] == 'new' ) {
00391                     $linkCache->addBadLinkObj( $title );
00392                     $output->addLink( $title, 0 );
00393                     $type = array( 'broken' );
00394                 } else {
00395                     if ( $colours[$pdbk] != '' ) {
00396                         $attribs['class'] = $colours[$pdbk];
00397                     }
00398                     $type = array( 'known', 'noclasses' );
00399                 }
00400                 $replacePairs[$searchkey] = Linker::link( $title, $displayText,
00401                         $attribs, $query, $type );
00402             }
00403         }
00404         $replacer = new HashtableReplacer( $replacePairs, 1 );
00405         wfProfileOut( __METHOD__ . '-construct' );
00406 
00407         # Do the thing
00408         wfProfileIn( __METHOD__ . '-replace' );
00409         $text = preg_replace_callback(
00410             '/(<!--LINK .*?-->)/',
00411             $replacer->cb(),
00412             $text
00413         );
00414 
00415         wfProfileOut( __METHOD__ . '-replace' );
00416         wfProfileOut( __METHOD__ );
00417     }
00418 
00422     protected function replaceInterwiki( &$text ) {
00423         if ( empty( $this->interwikis ) ) {
00424             return;
00425         }
00426 
00427         wfProfileIn( __METHOD__ );
00428         # Make interwiki link HTML
00429         $output = $this->parent->getOutput();
00430         $replacePairs = array();
00431         foreach ( $this->interwikis as $key => $link ) {
00432             $replacePairs[$key] = Linker::link( $link['title'], $link['text'] );
00433             $output->addInterwikiLink( $link['title'] );
00434         }
00435         $replacer = new HashtableReplacer( $replacePairs, 1 );
00436 
00437         $text = preg_replace_callback(
00438             '/<!--IWLINK (.*?)-->/',
00439             $replacer->cb(),
00440             $text );
00441         wfProfileOut( __METHOD__ );
00442     }
00443 
00447     protected function doVariants( &$colours ) {
00448         global $wgContLang;
00449         $linkBatch = new LinkBatch();
00450         $variantMap = array(); // maps $pdbkey_Variant => $keys (of link holders)
00451         $output = $this->parent->getOutput();
00452         $linkCache = LinkCache::singleton();
00453         $threshold = $this->parent->getOptions()->getStubThreshold();
00454         $titlesToBeConverted = '';
00455         $titlesAttrs = array();
00456 
00457         // Concatenate titles to a single string, thus we only need auto convert the
00458         // single string to all variants. This would improve parser's performance
00459         // significantly.
00460         foreach ( $this->internals as $ns => $entries ) {
00461             if ( $ns == NS_SPECIAL ) {
00462                 continue;
00463             }
00464             foreach ( $entries as $index => $entry ) {
00465                 $pdbk = $entry['pdbk'];
00466                 // we only deal with new links (in its first query)
00467                 if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] === 'new' ) {
00468                     $titlesAttrs[] = array( $index, $entry['title'] );
00469                     // separate titles with \0 because it would never appears
00470                     // in a valid title
00471                     $titlesToBeConverted .= $entry['title']->getText() . "\0";
00472                 }
00473             }
00474         }
00475 
00476         // Now do the conversion and explode string to text of titles
00477         $titlesAllVariants = $wgContLang->autoConvertToAllVariants( rtrim( $titlesToBeConverted, "\0" ) );
00478         $allVariantsName = array_keys( $titlesAllVariants );
00479         foreach ( $titlesAllVariants as &$titlesVariant ) {
00480             $titlesVariant = explode( "\0", $titlesVariant );
00481         }
00482 
00483         // Then add variants of links to link batch
00484         $parentTitle = $this->parent->getTitle();
00485         foreach ( $titlesAttrs as $i => $attrs ) {
00486             list( $index, $title ) = $attrs;
00487             $ns = $title->getNamespace();
00488             $text = $title->getText();
00489 
00490             foreach ( $allVariantsName as $variantName ) {
00491                 $textVariant = $titlesAllVariants[$variantName][$i];
00492                 if ( $textVariant === $text ) {
00493                     continue;
00494                 }
00495 
00496                 $variantTitle = Title::makeTitle( $ns, $textVariant );
00497                 if ( is_null( $variantTitle ) ) {
00498                     continue;
00499                 }
00500 
00501                 // Self-link checking for mixed/different variant titles. At this point, we
00502                 // already know the exact title does not exist, so the link cannot be to a
00503                 // variant of the current title that exists as a separate page.
00504                 if ( $variantTitle->equals( $parentTitle ) && !$title->hasFragment() ) {
00505                     $this->internals[$ns][$index]['selflink'] = true;
00506                     continue 2;
00507                 }
00508 
00509                 $linkBatch->addObj( $variantTitle );
00510                 $variantMap[$variantTitle->getPrefixedDBkey()][] = "$ns:$index";
00511             }
00512         }
00513 
00514         // process categories, check if a category exists in some variant
00515         $categoryMap = array(); // maps $category_variant => $category (dbkeys)
00516         $varCategories = array(); // category replacements oldDBkey => newDBkey
00517         foreach ( $output->getCategoryLinks() as $category ) {
00518             $categoryTitle = Title::makeTitleSafe( NS_CATEGORY, $category );
00519             $linkBatch->addObj( $categoryTitle );
00520             $variants = $wgContLang->autoConvertToAllVariants( $category );
00521             foreach ( $variants as $variant ) {
00522                 if ( $variant !== $category ) {
00523                     $variantTitle = Title::makeTitleSafe( NS_CATEGORY, $variant );
00524                     if ( is_null( $variantTitle ) ) {
00525                         continue;
00526                     }
00527                     $linkBatch->addObj( $variantTitle );
00528                     $categoryMap[$variant] = array( $category, $categoryTitle );
00529                 }
00530             }
00531         }
00532 
00533         if ( !$linkBatch->isEmpty() ) {
00534             // construct query
00535             $dbr = wfGetDB( DB_SLAVE );
00536             $varRes = $dbr->select( 'page',
00537                 array( 'page_id', 'page_namespace', 'page_title', 'page_is_redirect', 'page_len', 'page_latest' ),
00538                 $linkBatch->constructSet( 'page', $dbr ),
00539                 __METHOD__
00540             );
00541 
00542             $linkcolour_ids = array();
00543 
00544             // for each found variants, figure out link holders and replace
00545             foreach ( $varRes as $s ) {
00546 
00547                 $variantTitle = Title::makeTitle( $s->page_namespace, $s->page_title );
00548                 $varPdbk = $variantTitle->getPrefixedDBkey();
00549                 $vardbk = $variantTitle->getDBkey();
00550 
00551                 $holderKeys = array();
00552                 if ( isset( $variantMap[$varPdbk] ) ) {
00553                     $holderKeys = $variantMap[$varPdbk];
00554                     $linkCache->addGoodLinkObjFromRow( $variantTitle, $s );
00555                     $output->addLink( $variantTitle, $s->page_id );
00556                 }
00557 
00558                 // loop over link holders
00559                 foreach ( $holderKeys as $key ) {
00560                     list( $ns, $index ) = explode( ':', $key, 2 );
00561                     $entry =& $this->internals[$ns][$index];
00562                     $pdbk = $entry['pdbk'];
00563 
00564                     if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] === 'new' ) {
00565                         // found link in some of the variants, replace the link holder data
00566                         $entry['title'] = $variantTitle;
00567                         $entry['pdbk'] = $varPdbk;
00568 
00569                         // set pdbk and colour
00570                         # @todo FIXME: Convoluted data flow
00571                         # The redirect status and length is passed to getLinkColour via the LinkCache
00572                         # Use formal parameters instead
00573                         $colours[$varPdbk] = Linker::getLinkColour( $variantTitle, $threshold );
00574                         $linkcolour_ids[$s->page_id] = $pdbk;
00575                     }
00576                 }
00577 
00578                 // check if the object is a variant of a category
00579                 if ( isset( $categoryMap[$vardbk] ) ) {
00580                     list( $oldkey, $oldtitle ) = $categoryMap[$vardbk];
00581                     if ( !isset( $varCategories[$oldkey] ) && !$oldtitle->exists() ) {
00582                         $varCategories[$oldkey] = $vardbk;
00583                     }
00584                 }
00585             }
00586             wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) );
00587 
00588             // rebuild the categories in original order (if there are replacements)
00589             if ( count( $varCategories ) > 0 ) {
00590                 $newCats = array();
00591                 $originalCats = $output->getCategories();
00592                 foreach ( $originalCats as $cat => $sortkey ) {
00593                     // make the replacement
00594                     if ( array_key_exists( $cat, $varCategories ) ) {
00595                         $newCats[$varCategories[$cat]] = $sortkey;
00596                     } else {
00597                         $newCats[$cat] = $sortkey;
00598                     }
00599                 }
00600                 $output->setCategoryLinks( $newCats );
00601             }
00602         }
00603     }
00604 
00612     function replaceText( $text ) {
00613         wfProfileIn( __METHOD__ );
00614 
00615         $text = preg_replace_callback(
00616             '/<!--(LINK|IWLINK) (.*?)-->/',
00617             array( &$this, 'replaceTextCallback' ),
00618             $text );
00619 
00620         wfProfileOut( __METHOD__ );
00621         return $text;
00622     }
00623 
00631     function replaceTextCallback( $matches ) {
00632         $type = $matches[1];
00633         $key = $matches[2];
00634         if ( $type == 'LINK' ) {
00635             list( $ns, $index ) = explode( ':', $key, 2 );
00636             if ( isset( $this->internals[$ns][$index]['text'] ) ) {
00637                 return $this->internals[$ns][$index]['text'];
00638             }
00639         } elseif ( $type == 'IWLINK' ) {
00640             if ( isset( $this->interwikis[$key]['text'] ) ) {
00641                 return $this->interwikis[$key]['text'];
00642             }
00643         }
00644         return $matches[0];
00645     }
00646 }