[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/maintenance/ -> syncFileBackend.php (source)

   1  <?php
   2  /**
   3   * Sync one file backend to another based on the journal of later.
   4   *
   5   * This program is free software; you can redistribute it and/or modify
   6   * it under the terms of the GNU General Public License as published by
   7   * the Free Software Foundation; either version 2 of the License, or
   8   * (at your option) any later version.
   9   *
  10   * This program is distributed in the hope that it will be useful,
  11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13   * GNU General Public License for more details.
  14   *
  15   * You should have received a copy of the GNU General Public License along
  16   * with this program; if not, write to the Free Software Foundation, Inc.,
  17   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18   * http://www.gnu.org/copyleft/gpl.html
  19   *
  20   * @file
  21   * @ingroup Maintenance
  22   */
  23  
  24  require_once  __DIR__ . '/Maintenance.php';
  25  
  26  /**
  27   * Maintenance script that syncs one file backend to another based on
  28   * the journal of later.
  29   *
  30   * @ingroup Maintenance
  31   */
  32  class SyncFileBackend extends Maintenance {
  33  	public function __construct() {
  34          parent::__construct();
  35          $this->mDescription = "Sync one file backend with another using the journal";
  36          $this->addOption( 'src', 'Name of backend to sync from', true, true );
  37          $this->addOption( 'dst', 'Name of destination backend to sync', false, true );
  38          $this->addOption( 'start', 'Starting journal ID', false, true );
  39          $this->addOption( 'end', 'Ending journal ID', false, true );
  40          $this->addOption( 'posdir', 'Directory to read/record journal positions', false, true );
  41          $this->addOption( 'posdump', 'Just dump current journal position into the position dir.' );
  42          $this->addOption( 'postime', 'For position dumps, get the ID at this time', false, true );
  43          $this->addOption( 'backoff', 'Stop at entries younger than this age (sec).', false, true );
  44          $this->addOption( 'verbose', 'Verbose mode', false, false, 'v' );
  45          $this->setBatchSize( 50 );
  46      }
  47  
  48  	public function execute() {
  49          $src = FileBackendGroup::singleton()->get( $this->getOption( 'src' ) );
  50  
  51          $posDir = $this->getOption( 'posdir' );
  52          $posFile = $posDir ? $posDir . '/' . wfWikiID() : false;
  53  
  54          if ( $this->hasOption( 'posdump' ) ) {
  55              // Just dump the current position into the specified position dir
  56              if ( !$this->hasOption( 'posdir' ) ) {
  57                  $this->error( "Param posdir required!", 1 );
  58              }
  59              if ( $this->hasOption( 'postime' ) ) {
  60                  $id = (int)$src->getJournal()->getPositionAtTime( $this->getOption( 'postime' ) );
  61                  $this->output( "Requested journal position is $id.\n" );
  62              } else {
  63                  $id = (int)$src->getJournal()->getCurrentPosition();
  64                  $this->output( "Current journal position is $id.\n" );
  65              }
  66              if ( file_put_contents( $posFile, $id, LOCK_EX ) !== false ) {
  67                  $this->output( "Saved journal position file.\n" );
  68              } else {
  69                  $this->output( "Could not save journal position file.\n" );
  70              }
  71              if ( $this->isQuiet() ) {
  72                  print $id; // give a single machine-readable number
  73              }
  74  
  75              return;
  76          }
  77  
  78          if ( !$this->hasOption( 'dst' ) ) {
  79              $this->error( "Param dst required!", 1 );
  80          }
  81          $dst = FileBackendGroup::singleton()->get( $this->getOption( 'dst' ) );
  82  
  83          $start = $this->getOption( 'start', 0 );
  84          if ( !$start && $posFile && is_dir( $posDir ) ) {
  85              $start = is_file( $posFile )
  86                  ? (int)trim( file_get_contents( $posFile ) )
  87                  : 0;
  88              ++$start; // we already did this ID, start with the next one
  89              $startFromPosFile = true;
  90          } else {
  91              $startFromPosFile = false;
  92          }
  93  
  94          if ( $this->hasOption( 'backoff' ) ) {
  95              $time = time() - $this->getOption( 'backoff', 0 );
  96              $end = (int)$src->getJournal()->getPositionAtTime( $time );
  97          } else {
  98              $end = $this->getOption( 'end', INF );
  99          }
 100  
 101          $this->output( "Synchronizing backend '{$dst->getName()}' to '{$src->getName()}'...\n" );
 102          $this->output( "Starting journal position is $start.\n" );
 103          if ( is_finite( $end ) ) {
 104              $this->output( "Ending journal position is $end.\n" );
 105          }
 106  
 107          // Periodically update the position file
 108          $callback = function ( $pos ) use ( $startFromPosFile, $posFile, $start ) {
 109              if ( $startFromPosFile && $pos >= $start ) { // successfully advanced
 110                  file_put_contents( $posFile, $pos, LOCK_EX );
 111              }
 112          };
 113  
 114          // Actually sync the dest backend with the reference backend
 115          $lastOKPos = $this->syncBackends( $src, $dst, $start, $end, $callback );
 116  
 117          // Update the sync position file
 118          if ( $startFromPosFile && $lastOKPos >= $start ) { // successfully advanced
 119              if ( file_put_contents( $posFile, $lastOKPos, LOCK_EX ) !== false ) {
 120                  $this->output( "Updated journal position file.\n" );
 121              } else {
 122                  $this->output( "Could not update journal position file.\n" );
 123              }
 124          }
 125  
 126          if ( $lastOKPos === false ) {
 127              if ( !$start ) {
 128                  $this->output( "No journal entries found.\n" );
 129              } else {
 130                  $this->output( "No new journal entries found.\n" );
 131              }
 132          } else {
 133              $this->output( "Stopped synchronization at journal position $lastOKPos.\n" );
 134          }
 135  
 136          if ( $this->isQuiet() ) {
 137              print $lastOKPos; // give a single machine-readable number
 138          }
 139      }
 140  
 141      /**
 142       * Sync $dst backend to $src backend based on the $src logs given after $start.
 143       * Returns the journal entry ID this advanced to and handled (inclusive).
 144       *
 145       * @param FileBackend $src
 146       * @param FileBackend $dst
 147       * @param int $start Starting journal position
 148       * @param int $end Starting journal position
 149       * @param Closure $callback Callback to update any position file
 150       * @return int|bool Journal entry ID or false if there are none
 151       */
 152  	protected function syncBackends(
 153          FileBackend $src, FileBackend $dst, $start, $end, Closure $callback
 154      ) {
 155          $lastOKPos = 0; // failed
 156          $first = true; // first batch
 157  
 158          if ( $start > $end ) { // sanity
 159              $this->error( "Error: given starting ID greater than ending ID.", 1 );
 160          }
 161  
 162          do {
 163              $limit = min( $this->mBatchSize, $end - $start + 1 ); // don't go pass ending ID
 164              $this->output( "Doing id $start to " . ( $start + $limit - 1 ) . "...\n" );
 165  
 166              $entries = $src->getJournal()->getChangeEntries( $start, $limit, $next );
 167              $start = $next; // start where we left off next time
 168              if ( $first && !count( $entries ) ) {
 169                  return false; // nothing to do
 170              }
 171              $first = false;
 172  
 173              $lastPosInBatch = 0;
 174              $pathsInBatch = array(); // changed paths
 175              foreach ( $entries as $entry ) {
 176                  if ( $entry['op'] !== 'null' ) { // null ops are just for reference
 177                      $pathsInBatch[$entry['path']] = 1; // remove duplicates
 178                  }
 179                  $lastPosInBatch = $entry['id'];
 180              }
 181  
 182              $status = $this->syncFileBatch( array_keys( $pathsInBatch ), $src, $dst );
 183              if ( $status->isOK() ) {
 184                  $lastOKPos = max( $lastOKPos, $lastPosInBatch );
 185                  $callback( $lastOKPos ); // update position file
 186              } else {
 187                  $this->error( print_r( $status->getErrorsArray(), true ) );
 188                  break; // no gaps; everything up to $lastPos must be OK
 189              }
 190  
 191              if ( !$start ) {
 192                  $this->output( "End of journal entries.\n" );
 193              }
 194          } while ( $start && $start <= $end );
 195  
 196          return $lastOKPos;
 197      }
 198  
 199      /**
 200       * Sync particular files of backend $src to the corresponding $dst backend files
 201       *
 202       * @param array $paths
 203       * @param FileBackend $src
 204       * @param FileBackend $dst
 205       * @return Status
 206       */
 207  	protected function syncFileBatch( array $paths, FileBackend $src, FileBackend $dst ) {
 208          $status = Status::newGood();
 209          if ( !count( $paths ) ) {
 210              return $status; // nothing to do
 211          }
 212  
 213          // Source: convert internal backend names (FileBackendMultiWrite) to the public one
 214          $sPaths = $this->replaceNamePaths( $paths, $src );
 215          // Destination: get corresponding path name
 216          $dPaths = $this->replaceNamePaths( $paths, $dst );
 217  
 218          // Lock the live backend paths from modification
 219          $sLock = $src->getScopedFileLocks( $sPaths, LockManager::LOCK_UW, $status );
 220          $eLock = $dst->getScopedFileLocks( $dPaths, LockManager::LOCK_EX, $status );
 221          if ( !$status->isOK() ) {
 222              return $status;
 223          }
 224  
 225          $src->preloadFileStat( array( 'srcs' => $sPaths, 'latest' => 1 ) );
 226          $dst->preloadFileStat( array( 'srcs' => $dPaths, 'latest' => 1 ) );
 227  
 228          $ops = array();
 229          $fsFiles = array();
 230          foreach ( $sPaths as $i => $sPath ) {
 231              $dPath = $dPaths[$i]; // destination
 232              $sExists = $src->fileExists( array( 'src' => $sPath, 'latest' => 1 ) );
 233              if ( $sExists === true ) { // exists in source
 234                  if ( $this->filesAreSame( $src, $dst, $sPath, $dPath ) ) {
 235                      continue; // avoid local copies for non-FS backends
 236                  }
 237                  // Note: getLocalReference() is fast for FS backends
 238                  $fsFile = $src->getLocalReference( array( 'src' => $sPath, 'latest' => 1 ) );
 239                  if ( !$fsFile ) {
 240                      $this->error( "Unable to sync '$dPath': could not get local copy." );
 241                      $status->fatal( 'backend-fail-internal', $src->getName() );
 242  
 243                      return $status;
 244                  }
 245                  $fsFiles[] = $fsFile; // keep TempFSFile objects alive as needed
 246                  // Note: prepare() is usually fast for key/value backends
 247                  $status->merge( $dst->prepare( array(
 248                      'dir' => dirname( $dPath ), 'bypassReadOnly' => 1 ) ) );
 249                  if ( !$status->isOK() ) {
 250                      return $status;
 251                  }
 252                  $ops[] = array( 'op' => 'store',
 253                      'src' => $fsFile->getPath(), 'dst' => $dPath, 'overwrite' => 1 );
 254              } elseif ( $sExists === false ) { // does not exist in source
 255                  $ops[] = array( 'op' => 'delete', 'src' => $dPath, 'ignoreMissingSource' => 1 );
 256              } else { // error
 257                  $this->error( "Unable to sync '$dPath': could not stat file." );
 258                  $status->fatal( 'backend-fail-internal', $src->getName() );
 259  
 260                  return $status;
 261              }
 262          }
 263  
 264          $t_start = microtime( true );
 265          $status = $dst->doQuickOperations( $ops, array( 'bypassReadOnly' => 1 ) );
 266          if ( !$status->isOK() ) {
 267              sleep( 10 ); // wait and retry copy again
 268              $status = $dst->doQuickOperations( $ops, array( 'bypassReadOnly' => 1 ) );
 269          }
 270          $ellapsed_ms = floor( ( microtime( true ) - $t_start ) * 1000 );
 271          if ( $status->isOK() && $this->getOption( 'verbose' ) ) {
 272              $this->output( "Synchronized these file(s) [{$ellapsed_ms}ms]:\n" .
 273                  implode( "\n", $dPaths ) . "\n" );
 274          }
 275  
 276          return $status;
 277      }
 278  
 279      /**
 280       * Substitute the backend name of storage paths with that of a given one
 281       *
 282       * @param array|string $paths List of paths or single string path
 283       * @param FileBackend $backend
 284       * @return array|string
 285       */
 286  	protected function replaceNamePaths( $paths, FileBackend $backend ) {
 287          return preg_replace(
 288              '!^mwstore://([^/]+)!',
 289              StringUtils::escapeRegexReplacement( "mwstore://" . $backend->getName() ),
 290              $paths // string or array
 291          );
 292      }
 293  
 294  	protected function filesAreSame( FileBackend $src, FileBackend $dst, $sPath, $dPath ) {
 295          return (
 296              ( $src->getFileSize( array( 'src' => $sPath ) )
 297                  === $dst->getFileSize( array( 'src' => $dPath ) ) // short-circuit
 298              ) && ( $src->getFileSha1Base36( array( 'src' => $sPath ) )
 299                  === $dst->getFileSha1Base36( array( 'src' => $dPath ) )
 300              )
 301          );
 302      }
 303  }
 304  
 305  $maintClass = "SyncFileBackend";
 306  require_once RUN_MAINTENANCE_IF_MAIN;


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1