MediaWiki  REL1_22
LinkHolderArray.php
Go to the documentation of this file.
00001 <?php
00027 class LinkHolderArray {
00028     var $internals = array(), $interwikis = array();
00029     var $size = 0;
00030     var $parent;
00031     protected $tempIdOffset;
00032 
00033     function __construct( $parent ) {
00034         $this->parent = $parent;
00035     }
00036 
00040     function __destruct() {
00041         foreach ( $this as $name => $value ) {
00042             unset( $this->$name );
00043         }
00044     }
00045 
00054     function __sleep() {
00055         foreach ( $this->internals as &$nsLinks ) {
00056             foreach ( $nsLinks as &$entry ) {
00057                 unset( $entry['title'] );
00058             }
00059         }
00060         unset( $nsLinks );
00061         unset( $entry );
00062 
00063         foreach ( $this->interwikis as &$entry ) {
00064             unset( $entry['title'] );
00065         }
00066         unset( $entry );
00067 
00068         return array( 'internals', 'interwikis', 'size' );
00069     }
00070 
00074     function __wakeup() {
00075         foreach ( $this->internals as &$nsLinks ) {
00076             foreach ( $nsLinks as &$entry ) {
00077                 $entry['title'] = Title::newFromText( $entry['pdbk'] );
00078             }
00079         }
00080         unset( $nsLinks );
00081         unset( $entry );
00082 
00083         foreach ( $this->interwikis as &$entry ) {
00084             $entry['title'] = Title::newFromText( $entry['pdbk'] );
00085         }
00086         unset( $entry );
00087     }
00088 
00093     function merge( $other ) {
00094         foreach ( $other->internals as $ns => $entries ) {
00095             $this->size += count( $entries );
00096             if ( !isset( $this->internals[$ns] ) ) {
00097                 $this->internals[$ns] = $entries;
00098             } else {
00099                 $this->internals[$ns] += $entries;
00100             }
00101         }
00102         $this->interwikis += $other->interwikis;
00103     }
00104 
00117     function mergeForeign( $other, $texts ) {
00118         $this->tempIdOffset = $idOffset = $this->parent->nextLinkID();
00119         $maxId = 0;
00120 
00121         # Renumber internal links
00122         foreach ( $other->internals as $ns => $nsLinks ) {
00123             foreach ( $nsLinks as $key => $entry ) {
00124                 $newKey = $idOffset + $key;
00125                 $this->internals[$ns][$newKey] = $entry;
00126                 $maxId = $newKey > $maxId ? $newKey : $maxId;
00127             }
00128         }
00129         $texts = preg_replace_callback( '/(<!--LINK \d+:)(\d+)(-->)/',
00130             array( $this, 'mergeForeignCallback' ), $texts );
00131 
00132         # Renumber interwiki links
00133         foreach ( $other->interwikis as $key => $entry ) {
00134             $newKey = $idOffset + $key;
00135             $this->interwikis[$newKey] = $entry;
00136             $maxId = $newKey > $maxId ? $newKey : $maxId;
00137         }
00138         $texts = preg_replace_callback( '/(<!--IWLINK )(\d+)(-->)/',
00139             array( $this, 'mergeForeignCallback' ), $texts );
00140 
00141         # Set the parent link ID to be beyond the highest used ID
00142         $this->parent->setLinkID( $maxId + 1 );
00143         $this->tempIdOffset = null;
00144         return $texts;
00145     }
00146 
00147     protected function mergeForeignCallback( $m ) {
00148         return $m[1] . ( $m[2] + $this->tempIdOffset ) . $m[3];
00149     }
00150 
00156     function getSubArray( $text ) {
00157         $sub = new LinkHolderArray( $this->parent );
00158 
00159         # Internal links
00160         $pos = 0;
00161         while ( $pos < strlen( $text ) ) {
00162             if ( !preg_match( '/<!--LINK (\d+):(\d+)-->/',
00163                 $text, $m, PREG_OFFSET_CAPTURE, $pos ) )
00164             {
00165                 break;
00166             }
00167             $ns = $m[1][0];
00168             $key = $m[2][0];
00169             $sub->internals[$ns][$key] = $this->internals[$ns][$key];
00170             $pos = $m[0][1] + strlen( $m[0][0] );
00171         }
00172 
00173         # Interwiki links
00174         $pos = 0;
00175         while ( $pos < strlen( $text ) ) {
00176             if ( !preg_match( '/<!--IWLINK (\d+)-->/', $text, $m, PREG_OFFSET_CAPTURE, $pos ) ) {
00177                 break;
00178             }
00179             $key = $m[1][0];
00180             $sub->interwikis[$key] = $this->interwikis[$key];
00181             $pos = $m[0][1] + strlen( $m[0][0] );
00182         }
00183         return $sub;
00184     }
00185 
00190     function isBig() {
00191         global $wgLinkHolderBatchSize;
00192         return $this->size > $wgLinkHolderBatchSize;
00193     }
00194 
00199     function clear() {
00200         $this->internals = array();
00201         $this->interwikis = array();
00202         $this->size = 0;
00203     }
00204 
00218     function makeHolder( $nt, $text = '', $query = array(), $trail = '', $prefix = '' ) {
00219         wfProfileIn( __METHOD__ );
00220         if ( !is_object( $nt ) ) {
00221             # Fail gracefully
00222             $retVal = "<!-- ERROR -->{$prefix}{$text}{$trail}";
00223         } else {
00224             # Separate the link trail from the rest of the link
00225             list( $inside, $trail ) = Linker::splitTrail( $trail );
00226 
00227             $entry = array(
00228                 'title' => $nt,
00229                 'text' => $prefix . $text . $inside,
00230                 'pdbk' => $nt->getPrefixedDBkey(),
00231             );
00232             if ( $query !== array() ) {
00233                 $entry['query'] = $query;
00234             }
00235 
00236             if ( $nt->isExternal() ) {
00237                 // Use a globally unique ID to keep the objects mergable
00238                 $key = $this->parent->nextLinkID();
00239                 $this->interwikis[$key] = $entry;
00240                 $retVal = "<!--IWLINK $key-->{$trail}";
00241             } else {
00242                 $key = $this->parent->nextLinkID();
00243                 $ns = $nt->getNamespace();
00244                 $this->internals[$ns][$key] = $entry;
00245                 $retVal = "<!--LINK $ns:$key-->{$trail}";
00246             }
00247             $this->size++;
00248         }
00249         wfProfileOut( __METHOD__ );
00250         return $retVal;
00251     }
00252 
00259     function replace( &$text ) {
00260         wfProfileIn( __METHOD__ );
00261 
00262         $colours = $this->replaceInternal( $text ); // FIXME: replaceInternal doesn't return a value
00263         $this->replaceInterwiki( $text );
00264 
00265         wfProfileOut( __METHOD__ );
00266         return $colours;
00267     }
00268 
00272     protected function replaceInternal( &$text ) {
00273         if ( !$this->internals ) {
00274             return;
00275         }
00276 
00277         wfProfileIn( __METHOD__ );
00278         global $wgContLang;
00279 
00280         $colours = array();
00281         $linkCache = LinkCache::singleton();
00282         $output = $this->parent->getOutput();
00283 
00284         if ( $linkCache->useDatabase() ) {
00285             wfProfileIn( __METHOD__ . '-check' );
00286             $dbr = wfGetDB( DB_SLAVE );
00287             $threshold = $this->parent->getOptions()->getStubThreshold();
00288 
00289             # Sort by namespace
00290             ksort( $this->internals );
00291 
00292             $linkcolour_ids = array();
00293 
00294             # Generate query
00295             $queries = array();
00296             foreach ( $this->internals as $ns => $entries ) {
00297                 foreach ( $entries as $entry ) {
00298                     $title = $entry['title'];
00299                     $pdbk = $entry['pdbk'];
00300 
00301                     # Skip invalid entries.
00302                     # Result will be ugly, but prevents crash.
00303                     if ( is_null( $title ) ) {
00304                         continue;
00305                     }
00306 
00307                     # Check if it's a static known link, e.g. interwiki
00308                     if ( $title->isAlwaysKnown() ) {
00309                         $colours[$pdbk] = '';
00310                     } elseif ( $ns == NS_SPECIAL ) {
00311                         $colours[$pdbk] = 'new';
00312                     } elseif ( ( $id = $linkCache->getGoodLinkID( $pdbk ) ) != 0 ) {
00313                         $colours[$pdbk] = Linker::getLinkColour( $title, $threshold );
00314                         $output->addLink( $title, $id );
00315                         $linkcolour_ids[$id] = $pdbk;
00316                     } elseif ( $linkCache->isBadLink( $pdbk ) ) {
00317                         $colours[$pdbk] = 'new';
00318                     } else {
00319                         # Not in the link cache, add it to the query
00320                         $queries[$ns][] = $title->getDBkey();
00321                     }
00322                 }
00323             }
00324             if ( $queries ) {
00325                 $where = array();
00326                 foreach ( $queries as $ns => $pages ) {
00327                     $where[] = $dbr->makeList(
00328                         array(
00329                             'page_namespace' => $ns,
00330                             'page_title' => $pages,
00331                         ),
00332                         LIST_AND
00333                     );
00334                 }
00335 
00336                 $res = $dbr->select(
00337                     'page',
00338                     array( 'page_id', 'page_namespace', 'page_title', 'page_is_redirect', 'page_len', 'page_latest' ),
00339                     $dbr->makeList( $where, LIST_OR ),
00340                     __METHOD__
00341                 );
00342 
00343                 # Fetch data and form into an associative array
00344                 # non-existent = broken
00345                 foreach ( $res as $s ) {
00346                     $title = Title::makeTitle( $s->page_namespace, $s->page_title );
00347                     $pdbk = $title->getPrefixedDBkey();
00348                     $linkCache->addGoodLinkObjFromRow( $title, $s );
00349                     $output->addLink( $title, $s->page_id );
00350                     # @todo FIXME: Convoluted data flow
00351                     # The redirect status and length is passed to getLinkColour via the LinkCache
00352                     # Use formal parameters instead
00353                     $colours[$pdbk] = Linker::getLinkColour( $title, $threshold );
00354                     //add id to the extension todolist
00355                     $linkcolour_ids[$s->page_id] = $pdbk;
00356                 }
00357                 unset( $res );
00358             }
00359             if ( count( $linkcolour_ids ) ) {
00360                 //pass an array of page_ids to an extension
00361                 wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) );
00362             }
00363             wfProfileOut( __METHOD__ . '-check' );
00364         }
00365 
00366         # Do a second query for different language variants of links and categories
00367         if ( $wgContLang->hasVariants() ) {
00368             $this->doVariants( $colours );
00369         }
00370 
00371         # Construct search and replace arrays
00372         wfProfileIn( __METHOD__ . '-construct' );
00373         $replacePairs = array();
00374         foreach ( $this->internals as $ns => $entries ) {
00375             foreach ( $entries as $index => $entry ) {
00376                 $pdbk = $entry['pdbk'];
00377                 $title = $entry['title'];
00378                 $query = isset( $entry['query'] ) ? $entry['query'] : array();
00379                 $key = "$ns:$index";
00380                 $searchkey = "<!--LINK $key-->";
00381                 $displayText = $entry['text'];
00382                 if ( isset( $entry['selflink'] ) ) {
00383                     $replacePairs[$searchkey] = Linker::makeSelfLinkObj( $title, $displayText, $query );
00384                     continue;
00385                 }
00386                 if ( $displayText === '' ) {
00387                     $displayText = null;
00388                 }
00389                 if ( !isset( $colours[$pdbk] ) ) {
00390                     $colours[$pdbk] = 'new';
00391                 }
00392                 $attribs = array();
00393                 if ( $colours[$pdbk] == 'new' ) {
00394                     $linkCache->addBadLinkObj( $title );
00395                     $output->addLink( $title, 0 );
00396                     $type = array( 'broken' );
00397                 } else {
00398                     if ( $colours[$pdbk] != '' ) {
00399                         $attribs['class'] = $colours[$pdbk];
00400                     }
00401                     $type = array( 'known', 'noclasses' );
00402                 }
00403                 $replacePairs[$searchkey] = Linker::link( $title, $displayText,
00404                         $attribs, $query, $type );
00405             }
00406         }
00407         $replacer = new HashtableReplacer( $replacePairs, 1 );
00408         wfProfileOut( __METHOD__ . '-construct' );
00409 
00410         # Do the thing
00411         wfProfileIn( __METHOD__ . '-replace' );
00412         $text = preg_replace_callback(
00413             '/(<!--LINK .*?-->)/',
00414             $replacer->cb(),
00415             $text
00416         );
00417 
00418         wfProfileOut( __METHOD__ . '-replace' );
00419         wfProfileOut( __METHOD__ );
00420     }
00421 
00425     protected function replaceInterwiki( &$text ) {
00426         if ( empty( $this->interwikis ) ) {
00427             return;
00428         }
00429 
00430         wfProfileIn( __METHOD__ );
00431         # Make interwiki link HTML
00432         $output = $this->parent->getOutput();
00433         $replacePairs = array();
00434         foreach ( $this->interwikis as $key => $link ) {
00435             $replacePairs[$key] = Linker::link( $link['title'], $link['text'] );
00436             $output->addInterwikiLink( $link['title'] );
00437         }
00438         $replacer = new HashtableReplacer( $replacePairs, 1 );
00439 
00440         $text = preg_replace_callback(
00441             '/<!--IWLINK (.*?)-->/',
00442             $replacer->cb(),
00443             $text );
00444         wfProfileOut( __METHOD__ );
00445     }
00446 
00450     protected function doVariants( &$colours ) {
00451         global $wgContLang;
00452         $linkBatch = new LinkBatch();
00453         $variantMap = array(); // maps $pdbkey_Variant => $keys (of link holders)
00454         $output = $this->parent->getOutput();
00455         $linkCache = LinkCache::singleton();
00456         $threshold = $this->parent->getOptions()->getStubThreshold();
00457         $titlesToBeConverted = '';
00458         $titlesAttrs = array();
00459 
00460         // Concatenate titles to a single string, thus we only need auto convert the
00461         // single string to all variants. This would improve parser's performance
00462         // significantly.
00463         foreach ( $this->internals as $ns => $entries ) {
00464             if ( $ns == NS_SPECIAL ) {
00465                 continue;
00466             }
00467             foreach ( $entries as $index => $entry ) {
00468                 $pdbk = $entry['pdbk'];
00469                 // we only deal with new links (in its first query)
00470                 if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] === 'new' ) {
00471                     $titlesAttrs[] = array( $index, $entry['title'] );
00472                     // separate titles with \0 because it would never appears
00473                     // in a valid title
00474                     $titlesToBeConverted .= $entry['title']->getText() . "\0";
00475                 }
00476             }
00477         }
00478 
00479         // Now do the conversion and explode string to text of titles
00480         $titlesAllVariants = $wgContLang->autoConvertToAllVariants( rtrim( $titlesToBeConverted, "\0" ) );
00481         $allVariantsName = array_keys( $titlesAllVariants );
00482         foreach ( $titlesAllVariants as &$titlesVariant ) {
00483             $titlesVariant = explode( "\0", $titlesVariant );
00484         }
00485 
00486         // Then add variants of links to link batch
00487         $parentTitle = $this->parent->getTitle();
00488         foreach ( $titlesAttrs as $i => $attrs ) {
00489             list( $index, $title ) = $attrs;
00490             $ns = $title->getNamespace();
00491             $text = $title->getText();
00492 
00493             foreach ( $allVariantsName as $variantName ) {
00494                 $textVariant = $titlesAllVariants[$variantName][$i];
00495                 if ( $textVariant === $text ) {
00496                     continue;
00497                 }
00498 
00499                 $variantTitle = Title::makeTitle( $ns, $textVariant );
00500                 if ( is_null( $variantTitle ) ) {
00501                     continue;
00502                 }
00503 
00504                 // Self-link checking for mixed/different variant titles. At this point, we
00505                 // already know the exact title does not exist, so the link cannot be to a
00506                 // variant of the current title that exists as a separate page.
00507                 if ( $variantTitle->equals( $parentTitle ) && $title->getFragment() === '' ) {
00508                     $this->internals[$ns][$index]['selflink'] = true;
00509                     continue 2;
00510                 }
00511 
00512                 $linkBatch->addObj( $variantTitle );
00513                 $variantMap[$variantTitle->getPrefixedDBkey()][] = "$ns:$index";
00514             }
00515         }
00516 
00517         // process categories, check if a category exists in some variant
00518         $categoryMap = array(); // maps $category_variant => $category (dbkeys)
00519         $varCategories = array(); // category replacements oldDBkey => newDBkey
00520         foreach ( $output->getCategoryLinks() as $category ) {
00521             $categoryTitle = Title::makeTitleSafe( NS_CATEGORY, $category );
00522             $linkBatch->addObj( $categoryTitle );
00523             $variants = $wgContLang->autoConvertToAllVariants( $category );
00524             foreach ( $variants as $variant ) {
00525                 if ( $variant !== $category ) {
00526                     $variantTitle = Title::makeTitleSafe( NS_CATEGORY, $variant );
00527                     if ( is_null( $variantTitle ) ) {
00528                         continue;
00529                     }
00530                     $linkBatch->addObj( $variantTitle );
00531                     $categoryMap[$variant] = array( $category, $categoryTitle );
00532                 }
00533             }
00534         }
00535 
00536         if ( !$linkBatch->isEmpty() ) {
00537             // construct query
00538             $dbr = wfGetDB( DB_SLAVE );
00539             $varRes = $dbr->select( 'page',
00540                 array( 'page_id', 'page_namespace', 'page_title', 'page_is_redirect', 'page_len', 'page_latest' ),
00541                 $linkBatch->constructSet( 'page', $dbr ),
00542                 __METHOD__
00543             );
00544 
00545             $linkcolour_ids = array();
00546 
00547             // for each found variants, figure out link holders and replace
00548             foreach ( $varRes as $s ) {
00549 
00550                 $variantTitle = Title::makeTitle( $s->page_namespace, $s->page_title );
00551                 $varPdbk = $variantTitle->getPrefixedDBkey();
00552                 $vardbk = $variantTitle->getDBkey();
00553 
00554                 $holderKeys = array();
00555                 if ( isset( $variantMap[$varPdbk] ) ) {
00556                     $holderKeys = $variantMap[$varPdbk];
00557                     $linkCache->addGoodLinkObjFromRow( $variantTitle, $s );
00558                     $output->addLink( $variantTitle, $s->page_id );
00559                 }
00560 
00561                 // loop over link holders
00562                 foreach ( $holderKeys as $key ) {
00563                     list( $ns, $index ) = explode( ':', $key, 2 );
00564                     $entry =& $this->internals[$ns][$index];
00565                     $pdbk = $entry['pdbk'];
00566 
00567                     if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] === 'new' ) {
00568                         // found link in some of the variants, replace the link holder data
00569                         $entry['title'] = $variantTitle;
00570                         $entry['pdbk'] = $varPdbk;
00571 
00572                         // set pdbk and colour
00573                         # @todo FIXME: Convoluted data flow
00574                         # The redirect status and length is passed to getLinkColour via the LinkCache
00575                         # Use formal parameters instead
00576                         $colours[$varPdbk] = Linker::getLinkColour( $variantTitle, $threshold );
00577                         $linkcolour_ids[$s->page_id] = $pdbk;
00578                     }
00579                 }
00580 
00581                 // check if the object is a variant of a category
00582                 if ( isset( $categoryMap[$vardbk] ) ) {
00583                     list( $oldkey, $oldtitle ) = $categoryMap[$vardbk];
00584                     if ( !isset( $varCategories[$oldkey] ) && !$oldtitle->exists() ) {
00585                         $varCategories[$oldkey] = $vardbk;
00586                     }
00587                 }
00588             }
00589             wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) );
00590 
00591             // rebuild the categories in original order (if there are replacements)
00592             if ( count( $varCategories ) > 0 ) {
00593                 $newCats = array();
00594                 $originalCats = $output->getCategories();
00595                 foreach ( $originalCats as $cat => $sortkey ) {
00596                     // make the replacement
00597                     if ( array_key_exists( $cat, $varCategories ) ) {
00598                         $newCats[$varCategories[$cat]] = $sortkey;
00599                     } else {
00600                         $newCats[$cat] = $sortkey;
00601                     }
00602                 }
00603                 $output->setCategoryLinks( $newCats );
00604             }
00605         }
00606     }
00607 
00615     function replaceText( $text ) {
00616         wfProfileIn( __METHOD__ );
00617 
00618         $text = preg_replace_callback(
00619             '/<!--(LINK|IWLINK) (.*?)-->/',
00620             array( &$this, 'replaceTextCallback' ),
00621             $text );
00622 
00623         wfProfileOut( __METHOD__ );
00624         return $text;
00625     }
00626 
00634     function replaceTextCallback( $matches ) {
00635         $type = $matches[1];
00636         $key = $matches[2];
00637         if ( $type == 'LINK' ) {
00638             list( $ns, $index ) = explode( ':', $key, 2 );
00639             if ( isset( $this->internals[$ns][$index]['text'] ) ) {
00640                 return $this->internals[$ns][$index]['text'];
00641             }
00642         } elseif ( $type == 'IWLINK' ) {
00643             if ( isset( $this->interwikis[$key]['text'] ) ) {
00644                 return $this->interwikis[$key]['text'];
00645             }
00646         }
00647         return $matches[0];
00648     }
00649 }