[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/includes/title/ -> MediaWikiTitleCodec.php (source)

   1  <?php
   2  /**
   3   * A codec for %MediaWiki page titles.
   4   *
   5   * This program is free software; you can redistribute it and/or modify
   6   * it under the terms of the GNU General Public License as published by
   7   * the Free Software Foundation; either version 2 of the License, or
   8   * (at your option) any later version.
   9   *
  10   * This program is distributed in the hope that it will be useful,
  11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13   * GNU General Public License for more details.
  14   *
  15   * You should have received a copy of the GNU General Public License along
  16   * with this program; if not, write to the Free Software Foundation, Inc.,
  17   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18   * http://www.gnu.org/copyleft/gpl.html
  19   *
  20   * @file
  21   * @license GPL 2+
  22   * @author Daniel Kinzler
  23   */
  24  
  25  /**
  26   * A codec for %MediaWiki page titles.
  27   *
  28   * @note Normalization and validation is applied while parsing, not when formatting.
  29   * It's possible to construct a TitleValue with an invalid title, and use MediaWikiTitleCodec
  30   * to generate an (invalid) title string from it. TitleValues should be constructed only
  31   * via parseTitle() or from a (semi)trusted source, such as the database.
  32   *
  33   * @see https://www.mediawiki.org/wiki/Requests_for_comment/TitleValue
  34   */
  35  class MediaWikiTitleCodec implements TitleFormatter, TitleParser {
  36      /**
  37       * @var Language
  38       */
  39      protected $language;
  40  
  41      /**
  42       * @var GenderCache
  43       */
  44      protected $genderCache;
  45  
  46      /**
  47       * @var string[]
  48       */
  49      protected $localInterwikis;
  50  
  51      /**
  52       * @param Language $language The language object to use for localizing namespace names.
  53       * @param GenderCache $genderCache The gender cache for generating gendered namespace names
  54       * @param string[]|string $localInterwikis
  55       */
  56  	public function __construct( Language $language, GenderCache $genderCache,
  57          $localInterwikis = array()
  58      ) {
  59          $this->language = $language;
  60          $this->genderCache = $genderCache;
  61          $this->localInterwikis = (array)$localInterwikis;
  62      }
  63  
  64      /**
  65       * @see TitleFormatter::getNamespaceName()
  66       *
  67       * @param int $namespace
  68       * @param string $text
  69       *
  70       * @throws InvalidArgumentException If the namespace is invalid
  71       * @return string
  72       */
  73  	public function getNamespaceName( $namespace, $text ) {
  74          if ( $this->language->needsGenderDistinction() &&
  75              MWNamespace::hasGenderDistinction( $namespace )
  76          ) {
  77  
  78              //NOTE: we are assuming here that the title text is a user name!
  79              $gender = $this->genderCache->getGenderOf( $text, __METHOD__ );
  80              $name = $this->language->getGenderNsText( $namespace, $gender );
  81          } else {
  82              $name = $this->language->getNsText( $namespace );
  83          }
  84  
  85          if ( $name === false ) {
  86              throw new InvalidArgumentException( 'Unknown namespace ID: ' . $namespace );
  87          }
  88  
  89          return $name;
  90      }
  91  
  92      /**
  93       * @see TitleFormatter::formatTitle()
  94       *
  95       * @param int|bool $namespace The namespace ID (or false, if the namespace should be ignored)
  96       * @param string $text The page title. Should be valid. Only minimal normalization is applied.
  97       *        Underscores will be replaced.
  98       * @param string $fragment The fragment name (may be empty).
  99       *
 100       * @throws InvalidArgumentException If the namespace is invalid
 101       * @return string
 102       */
 103  	public function formatTitle( $namespace, $text, $fragment = '' ) {
 104          if ( $namespace !== false ) {
 105              $namespace = $this->getNamespaceName( $namespace, $text );
 106  
 107              if ( $namespace !== '' ) {
 108                  $text = $namespace . ':' . $text;
 109              }
 110          }
 111  
 112          if ( $fragment !== '' ) {
 113              $text = $text . '#' . $fragment;
 114          }
 115  
 116          $text = str_replace( '_', ' ', $text );
 117  
 118          return $text;
 119      }
 120  
 121      /**
 122       * Parses the given text and constructs a TitleValue. Normalization
 123       * is applied according to the rules appropriate for the form specified by $form.
 124       *
 125       * @param string $text The text to parse
 126       * @param int $defaultNamespace Namespace to assume per default (usually NS_MAIN)
 127       *
 128       * @throws MalformedTitleException
 129       * @return TitleValue
 130       */
 131  	public function parseTitle( $text, $defaultNamespace ) {
 132          // NOTE: this is an ugly cludge that allows this class to share the
 133          // code for parsing with the old Title class. The parser code should
 134          // be refactored to avoid this.
 135          $parts = $this->splitTitleString( $text, $defaultNamespace );
 136  
 137          // Interwiki links are not supported by TitleValue
 138          if ( $parts['interwiki'] !== '' ) {
 139              throw new MalformedTitleException( 'Title must not contain an interwiki prefix: ' . $text );
 140          }
 141  
 142          // Relative fragment links are not supported by TitleValue
 143          if ( $parts['dbkey'] === '' ) {
 144              throw new MalformedTitleException( 'Title must not be empty: ' . $text );
 145          }
 146  
 147          return new TitleValue( $parts['namespace'], $parts['dbkey'], $parts['fragment'] );
 148      }
 149  
 150      /**
 151       * @see TitleFormatter::getText()
 152       *
 153       * @param TitleValue $title
 154       *
 155       * @return string $title->getText()
 156       */
 157  	public function getText( TitleValue $title ) {
 158          return $this->formatTitle( false, $title->getText(), '' );
 159      }
 160  
 161      /**
 162       * @see TitleFormatter::getText()
 163       *
 164       * @param TitleValue $title
 165       *
 166       * @return string
 167       */
 168  	public function getPrefixedText( TitleValue $title ) {
 169          return $this->formatTitle( $title->getNamespace(), $title->getText(), '' );
 170      }
 171  
 172      /**
 173       * @see TitleFormatter::getText()
 174       *
 175       * @param TitleValue $title
 176       *
 177       * @return string
 178       */
 179  	public function getFullText( TitleValue $title ) {
 180          return $this->formatTitle( $title->getNamespace(), $title->getText(), $title->getFragment() );
 181      }
 182  
 183      /**
 184       * Normalizes and splits a title string.
 185       *
 186       * This function removes illegal characters, splits off the interwiki and
 187       * namespace prefixes, sets the other forms, and canonicalizes
 188       * everything.
 189       *
 190       * @todo this method is only exposed as a temporary measure to ease refactoring.
 191       * It was copied with minimal changes from Title::secureAndSplit().
 192       *
 193       * @todo This method should be split up and an appropriate interface
 194       * defined for use by the Title class.
 195       *
 196       * @param string $text
 197       * @param int $defaultNamespace
 198       *
 199       * @throws MalformedTitleException If $text is not a valid title string.
 200       * @return array A mapp with the fields 'interwiki', 'fragment', 'namespace',
 201       *         'user_case_dbkey', and 'dbkey'.
 202       */
 203  	public function splitTitleString( $text, $defaultNamespace = NS_MAIN ) {
 204          $dbkey = str_replace( ' ', '_', $text );
 205  
 206          # Initialisation
 207          $parts = array(
 208              'interwiki' => '',
 209              'local_interwiki' => false,
 210              'fragment' => '',
 211              'namespace' => $defaultNamespace,
 212              'dbkey' => $dbkey,
 213              'user_case_dbkey' => $dbkey,
 214          );
 215  
 216          # Strip Unicode bidi override characters.
 217          # Sometimes they slip into cut-n-pasted page titles, where the
 218          # override chars get included in list displays.
 219          $dbkey = preg_replace( '/\xE2\x80[\x8E\x8F\xAA-\xAE]/S', '', $dbkey );
 220  
 221          # Clean up whitespace
 222          # Note: use of the /u option on preg_replace here will cause
 223          # input with invalid UTF-8 sequences to be nullified out in PHP 5.2.x,
 224          # conveniently disabling them.
 225          $dbkey = preg_replace(
 226              '/[ _\xA0\x{1680}\x{180E}\x{2000}-\x{200A}\x{2028}\x{2029}\x{202F}\x{205F}\x{3000}]+/u',
 227              '_',
 228              $dbkey
 229          );
 230          $dbkey = trim( $dbkey, '_' );
 231  
 232          if ( strpos( $dbkey, UTF8_REPLACEMENT ) !== false ) {
 233              # Contained illegal UTF-8 sequences or forbidden Unicode chars.
 234              throw new MalformedTitleException( 'Bad UTF-8 sequences found in title: ' . $text );
 235          }
 236  
 237          $parts['dbkey'] = $dbkey;
 238  
 239          # Initial colon indicates main namespace rather than specified default
 240          # but should not create invalid {ns,title} pairs such as {0,Project:Foo}
 241          if ( $dbkey !== '' && ':' == $dbkey[0] ) {
 242              $parts['namespace'] = NS_MAIN;
 243              $dbkey = substr( $dbkey, 1 ); # remove the colon but continue processing
 244              $dbkey = trim( $dbkey, '_' ); # remove any subsequent whitespace
 245          }
 246  
 247          if ( $dbkey == '' ) {
 248              throw new MalformedTitleException( 'Empty title: ' . $text );
 249          }
 250  
 251          # Namespace or interwiki prefix
 252          $prefixRegexp = "/^(.+?)_*:_*(.*)$/S";
 253          do {
 254              $m = array();
 255              if ( preg_match( $prefixRegexp, $dbkey, $m ) ) {
 256                  $p = $m[1];
 257                  if ( ( $ns = $this->language->getNsIndex( $p ) ) !== false ) {
 258                      # Ordinary namespace
 259                      $dbkey = $m[2];
 260                      $parts['namespace'] = $ns;
 261                      # For Talk:X pages, check if X has a "namespace" prefix
 262                      if ( $ns == NS_TALK && preg_match( $prefixRegexp, $dbkey, $x ) ) {
 263                          if ( $this->language->getNsIndex( $x[1] ) ) {
 264                              # Disallow Talk:File:x type titles...
 265                              throw new MalformedTitleException( 'Bad namespace prefix: ' . $text );
 266                          } elseif ( Interwiki::isValidInterwiki( $x[1] ) ) {
 267                              //TODO: get rid of global state!
 268                              # Disallow Talk:Interwiki:x type titles...
 269                              throw new MalformedTitleException( 'Interwiki prefix found in title: ' . $text );
 270                          }
 271                      }
 272                  } elseif ( Interwiki::isValidInterwiki( $p ) ) {
 273                      # Interwiki link
 274                      $dbkey = $m[2];
 275                      $parts['interwiki'] = $this->language->lc( $p );
 276  
 277                      # Redundant interwiki prefix to the local wiki
 278                      foreach ( $this->localInterwikis as $localIW ) {
 279                          if ( 0 == strcasecmp( $parts['interwiki'], $localIW ) ) {
 280                              if ( $dbkey == '' ) {
 281                                  # Empty self-links should point to the Main Page, to ensure
 282                                  # compatibility with cross-wiki transclusions and the like.
 283                                  $mainPage = Title::newMainPage();
 284                                  return array(
 285                                      'interwiki' => $mainPage->getInterwiki(),
 286                                      'local_interwiki' => true,
 287                                      'fragment' => $mainPage->getFragment(),
 288                                      'namespace' => $mainPage->getNamespace(),
 289                                      'dbkey' => $mainPage->getDBkey(),
 290                                      'user_case_dbkey' => $mainPage->getUserCaseDBKey()
 291                                  );
 292                              }
 293                              $parts['interwiki'] = '';
 294                              # local interwikis should behave like initial-colon links
 295                              $parts['local_interwiki'] = true;
 296  
 297                              # Do another namespace split...
 298                              continue 2;
 299                          }
 300                      }
 301  
 302                      # If there's an initial colon after the interwiki, that also
 303                      # resets the default namespace
 304                      if ( $dbkey !== '' && $dbkey[0] == ':' ) {
 305                          $parts['namespace'] = NS_MAIN;
 306                          $dbkey = substr( $dbkey, 1 );
 307                      }
 308                  }
 309                  # If there's no recognized interwiki or namespace,
 310                  # then let the colon expression be part of the title.
 311              }
 312              break;
 313          } while ( true );
 314  
 315          $fragment = strstr( $dbkey, '#' );
 316          if ( false !== $fragment ) {
 317              $parts['fragment'] = str_replace( '_', ' ', substr( $fragment, 1 ) );
 318              $dbkey = substr( $dbkey, 0, strlen( $dbkey ) - strlen( $fragment ) );
 319              # remove whitespace again: prevents "Foo_bar_#"
 320              # becoming "Foo_bar_"
 321              $dbkey = preg_replace( '/_*$/', '', $dbkey );
 322          }
 323  
 324          # Reject illegal characters.
 325          $rxTc = Title::getTitleInvalidRegex();
 326          if ( preg_match( $rxTc, $dbkey ) ) {
 327              throw new MalformedTitleException( 'Illegal characters found in title: ' . $text );
 328          }
 329  
 330          # Pages with "/./" or "/../" appearing in the URLs will often be un-
 331          # reachable due to the way web browsers deal with 'relative' URLs.
 332          # Also, they conflict with subpage syntax.  Forbid them explicitly.
 333          if (
 334              strpos( $dbkey, '.' ) !== false &&
 335              (
 336                  $dbkey === '.' || $dbkey === '..' ||
 337                  strpos( $dbkey, './' ) === 0 ||
 338                  strpos( $dbkey, '../' ) === 0 ||
 339                  strpos( $dbkey, '/./' ) !== false ||
 340                  strpos( $dbkey, '/../' ) !== false ||
 341                  substr( $dbkey, -2 ) == '/.' ||
 342                  substr( $dbkey, -3 ) == '/..'
 343              )
 344          ) {
 345              throw new MalformedTitleException( 'Bad title: ' . $text );
 346          }
 347  
 348          # Magic tilde sequences? Nu-uh!
 349          if ( strpos( $dbkey, '~~~' ) !== false ) {
 350              throw new MalformedTitleException( 'Bad title: ' . $text );
 351          }
 352  
 353          # Limit the size of titles to 255 bytes. This is typically the size of the
 354          # underlying database field. We make an exception for special pages, which
 355          # don't need to be stored in the database, and may edge over 255 bytes due
 356          # to subpage syntax for long titles, e.g. [[Special:Block/Long name]]
 357          if (
 358              ( $parts['namespace'] != NS_SPECIAL && strlen( $dbkey ) > 255 )
 359              || strlen( $dbkey ) > 512
 360          ) {
 361              throw new MalformedTitleException( 'Title too long: ' . substr( $dbkey, 0, 255 ) . '...' );
 362          }
 363  
 364          # Normally, all wiki links are forced to have an initial capital letter so [[foo]]
 365          # and [[Foo]] point to the same place.  Don't force it for interwikis, since the
 366          # other site might be case-sensitive.
 367          $parts['user_case_dbkey'] = $dbkey;
 368          if ( $parts['interwiki'] === '' ) {
 369              $dbkey = Title::capitalize( $dbkey, $parts['namespace'] );
 370          }
 371  
 372          # Can't make a link to a namespace alone... "empty" local links can only be
 373          # self-links with a fragment identifier.
 374          if ( $dbkey == '' && $parts['interwiki'] === '' ) {
 375              if ( $parts['namespace'] != NS_MAIN ) {
 376                  throw new MalformedTitleException( 'Empty title: ' . $text );
 377              }
 378          }
 379  
 380          // Allow IPv6 usernames to start with '::' by canonicalizing IPv6 titles.
 381          // IP names are not allowed for accounts, and can only be referring to
 382          // edits from the IP. Given '::' abbreviations and caps/lowercaps,
 383          // there are numerous ways to present the same IP. Having sp:contribs scan
 384          // them all is silly and having some show the edits and others not is
 385          // inconsistent. Same for talk/userpages. Keep them normalized instead.
 386          if ( $parts['namespace'] == NS_USER || $parts['namespace'] == NS_USER_TALK ) {
 387              $dbkey = IP::sanitizeIP( $dbkey );
 388          }
 389  
 390          // Any remaining initial :s are illegal.
 391          if ( $dbkey !== '' && ':' == $dbkey[0] ) {
 392              throw new MalformedTitleException( 'Title must not start with a colon: ' . $text );
 393          }
 394  
 395          # Fill fields
 396          $parts['dbkey'] = $dbkey;
 397  
 398          return $parts;
 399      }
 400  }


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1