[ Index ]

PHP Cross Reference of Drupal 6 (gatewave)

title

Body

[close]

/sites/all/modules/diff/ -> DiffEngine.php (source)

   1  <?php
   2  // $Id: DiffEngine.php,v 1.4.2.1.2.4 2010/08/12 16:34:08 yhahn Exp $
   3  
   4  /**
   5   * @file
   6   * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
   7   *
   8   * Copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
   9   * You may copy this code freely under the conditions of the GPL.
  10   */
  11  
  12  define('USE_ASSERTS', FALSE);
  13  
  14  /**
  15   * @todo document
  16   * @private
  17   * @subpackage DifferenceEngine
  18   */
  19  class _DiffOp {
  20    var $type;
  21    var $orig;
  22    var $closing;
  23  
  24    function reverse() {
  25      trigger_error('pure virtual', E_USER_ERROR);
  26    }
  27  
  28    function norig() {
  29      return $this->orig ? sizeof($this->orig) : 0;
  30    }
  31  
  32    function nclosing() {
  33      return $this->closing ? sizeof($this->closing) : 0;
  34    }
  35  }
  36  
  37  /**
  38   * @todo document
  39   * @private
  40   * @subpackage DifferenceEngine
  41   */
  42  class _DiffOp_Copy extends _DiffOp {
  43    var $type = 'copy';
  44  
  45    function _DiffOp_Copy($orig, $closing = FALSE) {
  46      if (!is_array($closing)) {
  47        $closing = $orig;
  48      }
  49      $this->orig = $orig;
  50      $this->closing = $closing;
  51    }
  52  
  53    function reverse() {
  54      return new _DiffOp_Copy($this->closing, $this->orig);
  55    }
  56  }
  57  
  58  /**
  59   * @todo document
  60   * @private
  61   * @subpackage DifferenceEngine
  62   */
  63  class _DiffOp_Delete extends _DiffOp {
  64    var $type = 'delete';
  65  
  66    function _DiffOp_Delete($lines) {
  67      $this->orig = $lines;
  68      $this->closing = FALSE;
  69    }
  70  
  71    function reverse() {
  72      return new _DiffOp_Add($this->orig);
  73    }
  74  }
  75  
  76  /**
  77   * @todo document
  78   * @private
  79   * @subpackage DifferenceEngine
  80   */
  81  class _DiffOp_Add extends _DiffOp {
  82    var $type = 'add';
  83  
  84    function _DiffOp_Add($lines) {
  85      $this->closing = $lines;
  86      $this->orig = FALSE;
  87    }
  88  
  89    function reverse() {
  90      return new _DiffOp_Delete($this->closing);
  91    }
  92  }
  93  
  94  /**
  95   * @todo document
  96   * @private
  97   * @subpackage DifferenceEngine
  98   */
  99  class _DiffOp_Change extends _DiffOp {
 100    var $type = 'change';
 101  
 102    function _DiffOp_Change($orig, $closing) {
 103      $this->orig = $orig;
 104      $this->closing = $closing;
 105    }
 106  
 107    function reverse() {
 108      return new _DiffOp_Change($this->closing, $this->orig);
 109    }
 110  }
 111  
 112  
 113  /**
 114   * Class used internally by Diff to actually compute the diffs.
 115   *
 116   * The algorithm used here is mostly lifted from the perl module
 117   * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
 118   *   http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
 119   *
 120   * More ideas are taken from:
 121   *   http://www.ics.uci.edu/~eppstein/161/960229.html
 122   *
 123   * Some ideas are (and a bit of code) are from from analyze.c, from GNU
 124   * diffutils-2.7, which can be found at:
 125   *   ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
 126   *
 127   * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
 128   * are my own.
 129   *
 130   * Line length limits for robustness added by Tim Starling, 2005-08-31
 131   *
 132   * @author Geoffrey T. Dairiki, Tim Starling
 133   * @private
 134   * @subpackage DifferenceEngine
 135   */
 136  class _DiffEngine {
 137    function MAX_XREF_LENGTH() {
 138      return 10000;
 139    }
 140  
 141    function diff($from_lines, $to_lines) {
 142  
 143      $n_from = sizeof($from_lines);
 144      $n_to = sizeof($to_lines);
 145  
 146      $this->xchanged = $this->ychanged = array();
 147      $this->xv = $this->yv = array();
 148      $this->xind = $this->yind = array();
 149      unset($this->seq);
 150      unset($this->in_seq);
 151      unset($this->lcs);
 152  
 153      // Skip leading common lines.
 154      for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
 155        if ($from_lines[$skip] !== $to_lines[$skip]) {
 156          break;
 157        }
 158        $this->xchanged[$skip] = $this->ychanged[$skip] = FALSE;
 159      }
 160      // Skip trailing common lines.
 161      $xi = $n_from;
 162      $yi = $n_to;
 163      for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
 164        if ($from_lines[$xi] !== $to_lines[$yi]) {
 165          break;
 166        }
 167        $this->xchanged[$xi] = $this->ychanged[$yi] = FALSE;
 168      }
 169  
 170      // Ignore lines which do not exist in both files.
 171      for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
 172        $xhash[$this->_line_hash($from_lines[$xi])] = 1;
 173      }
 174  
 175      for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
 176        $line = $to_lines[$yi];
 177        if ($this->ychanged[$yi] = empty($xhash[$this->_line_hash($line)])) {
 178          continue;
 179        }
 180        $yhash[$this->_line_hash($line)] = 1;
 181        $this->yv[] = $line;
 182        $this->yind[] = $yi;
 183      }
 184      for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
 185        $line = $from_lines[$xi];
 186        if ($this->xchanged[$xi] = empty($yhash[$this->_line_hash($line)])) {
 187          continue;
 188        }
 189        $this->xv[] = $line;
 190        $this->xind[] = $xi;
 191      }
 192  
 193      // Find the LCS.
 194      $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
 195  
 196      // Merge edits when possible
 197      $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
 198      $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
 199  
 200      // Compute the edit operations.
 201      $edits = array();
 202      $xi = $yi = 0;
 203      while ($xi < $n_from || $yi < $n_to) {
 204        USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
 205        USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
 206  
 207        // Skip matching "snake".
 208        $copy = array();
 209        while ( $xi < $n_from && $yi < $n_to && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
 210          $copy[] = $from_lines[$xi++];
 211          ++$yi;
 212        }
 213        if ($copy) {
 214          $edits[] = new _DiffOp_Copy($copy);
 215        }
 216        // Find deletes & adds.
 217        $delete = array();
 218        while ($xi < $n_from && $this->xchanged[$xi]) {
 219          $delete[] = $from_lines[$xi++];
 220        }
 221        $add = array();
 222        while ($yi < $n_to && $this->ychanged[$yi]) {
 223          $add[] = $to_lines[$yi++];
 224        }
 225        if ($delete && $add) {
 226          $edits[] = new _DiffOp_Change($delete, $add);
 227        }
 228        elseif ($delete) {
 229          $edits[] = new _DiffOp_Delete($delete);
 230        }
 231        elseif ($add) {
 232          $edits[] = new _DiffOp_Add($add);
 233        }
 234      }
 235      return $edits;
 236    }
 237  
 238    /**
 239     * Returns the whole line if it's small enough, or the MD5 hash otherwise.
 240     */
 241    function _line_hash($line) {
 242      if (strlen($line) > $this->MAX_XREF_LENGTH()) {
 243        return md5( line);
 244      }
 245      else {
 246        return $line;
 247      }
 248    }
 249  
 250  
 251    /**
 252     * Divide the Largest Common Subsequence (LCS) of the sequences
 253     * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
 254     * sized segments.
 255     *
 256     * Returns (LCS, PTS).  LCS is the length of the LCS. PTS is an
 257     * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
 258     * sub sequences.  The first sub-sequence is contained in [X0, X1),
 259     * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on.  Note
 260     * that (X0, Y0) == (XOFF, YOFF) and
 261     * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
 262     *
 263     * This function assumes that the first lines of the specified portions
 264     * of the two files do not match, and likewise that the last lines do not
 265     * match.  The caller must trim matching lines from the beginning and end
 266     * of the portions it is going to specify.
 267     */
 268    function _diag($xoff, $xlim, $yoff, $ylim, $nchunks) {
 269      $flip = FALSE;
 270  
 271      if ($xlim - $xoff > $ylim - $yoff) {
 272        // Things seems faster (I'm not sure I understand why)
 273        // when the shortest sequence in X.
 274        $flip = TRUE;
 275        list($xoff, $xlim, $yoff, $ylim) = array($yoff, $ylim, $xoff, $xlim);
 276      }
 277  
 278      if ($flip) {
 279        for ($i = $ylim - 1; $i >= $yoff; $i--) {
 280          $ymatches[$this->xv[$i]][] = $i;
 281        }
 282      }
 283      else {
 284        for ($i = $ylim - 1; $i >= $yoff; $i--) {
 285          $ymatches[$this->yv[$i]][] = $i;
 286        }
 287      }
 288      $this->lcs = 0;
 289      $this->seq[0]= $yoff - 1;
 290      $this->in_seq = array();
 291      $ymids[0] = array();
 292  
 293      $numer = $xlim - $xoff + $nchunks - 1;
 294      $x = $xoff;
 295      for ($chunk = 0; $chunk < $nchunks; $chunk++) {
 296        if ($chunk > 0) {
 297          for ($i = 0; $i <= $this->lcs; $i++) {
 298            $ymids[$i][$chunk-1] = $this->seq[$i];
 299          }
 300        }
 301  
 302        $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
 303        for ( ; $x < $x1; $x++) {
 304          $line = $flip ? $this->yv[$x] : $this->xv[$x];
 305          if (empty($ymatches[$line])) {
 306            continue;
 307          }
 308          $matches = $ymatches[$line];
 309          reset($matches);
 310          while (list ($junk, $y) = each($matches)) {
 311            if (empty($this->in_seq[$y])) {
 312              $k = $this->_lcs_pos($y);
 313              USE_ASSERTS && assert($k > 0);
 314              $ymids[$k] = $ymids[$k-1];
 315              break;
 316            }
 317          }
 318          while (list ($junk, $y) = each($matches)) {
 319            if ($y > $this->seq[$k-1]) {
 320              USE_ASSERTS && assert($y < $this->seq[$k]);
 321              // Optimization: this is a common case:
 322              // next match is just replacing previous match.
 323              $this->in_seq[$this->seq[$k]] = FALSE;
 324              $this->seq[$k] = $y;
 325              $this->in_seq[$y] = 1;
 326            }
 327            elseif (empty($this->in_seq[$y])) {
 328              $k = $this->_lcs_pos($y);
 329              USE_ASSERTS && assert($k > 0);
 330              $ymids[$k] = $ymids[$k-1];
 331            }
 332          }
 333        }
 334      }
 335  
 336      $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
 337      $ymid = $ymids[$this->lcs];
 338      for ($n = 0; $n < $nchunks - 1; $n++) {
 339        $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
 340        $y1 = $ymid[$n] + 1;
 341        $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
 342      }
 343      $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
 344  
 345      return array($this->lcs, $seps);
 346    }
 347  
 348    function _lcs_pos($ypos) {
 349  
 350      $end = $this->lcs;
 351      if ($end == 0 || $ypos > $this->seq[$end]) {
 352        $this->seq[++$this->lcs] = $ypos;
 353        $this->in_seq[$ypos] = 1;
 354        return $this->lcs;
 355      }
 356  
 357      $beg = 1;
 358      while ($beg < $end) {
 359        $mid = (int)(($beg + $end) / 2);
 360        if ($ypos > $this->seq[$mid]) {
 361          $beg = $mid + 1;
 362        }
 363        else {
 364          $end = $mid;
 365        }
 366      }
 367  
 368      USE_ASSERTS && assert($ypos != $this->seq[$end]);
 369  
 370      $this->in_seq[$this->seq[$end]] = FALSE;
 371      $this->seq[$end] = $ypos;
 372      $this->in_seq[$ypos] = 1;
 373      return $end;
 374    }
 375  
 376    /**
 377     * Find LCS of two sequences.
 378     *
 379     * The results are recorded in the vectors $this->{x,y}changed[], by
 380     * storing a 1 in the element for each line that is an insertion
 381     * or deletion (ie. is not in the LCS).
 382     *
 383     * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
 384     *
 385     * Note that XLIM, YLIM are exclusive bounds.
 386     * All line numbers are origin-0 and discarded lines are not counted.
 387     */
 388    function _compareseq($xoff, $xlim, $yoff, $ylim) {
 389  
 390      // Slide down the bottom initial diagonal.
 391      while ($xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff]) {
 392        ++$xoff;
 393        ++$yoff;
 394      }
 395  
 396      // Slide up the top initial diagonal.
 397      while ($xlim > $xoff && $ylim > $yoff && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
 398        --$xlim;
 399        --$ylim;
 400      }
 401  
 402      if ($xoff == $xlim || $yoff == $ylim) {
 403        $lcs = 0;
 404      }
 405      else {
 406        // This is ad hoc but seems to work well.
 407        //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
 408        //$nchunks = max(2, min(8, (int)$nchunks));
 409        $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
 410        list($lcs, $seps)
 411        = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks);
 412      }
 413  
 414      if ($lcs == 0) {
 415        // X and Y sequences have no common subsequence:
 416        // mark all changed.
 417        while ($yoff < $ylim) {
 418          $this->ychanged[$this->yind[$yoff++]] = 1;
 419        }
 420        while ($xoff < $xlim) {
 421          $this->xchanged[$this->xind[$xoff++]] = 1;
 422        }
 423      }
 424      else {
 425        // Use the partitions to split this problem into subproblems.
 426        reset($seps);
 427        $pt1 = $seps[0];
 428        while ($pt2 = next($seps)) {
 429          $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
 430          $pt1 = $pt2;
 431        }
 432      }
 433    }
 434  
 435    /**
 436     * Adjust inserts/deletes of identical lines to join changes
 437     * as much as possible.
 438     *
 439     * We do something when a run of changed lines include a
 440     * line at one end and has an excluded, identical line at the other.
 441     * We are free to choose which identical line is included.
 442     * `compareseq' usually chooses the one at the beginning,
 443     * but usually it is cleaner to consider the following identical line
 444     * to be the "change".
 445     *
 446     * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
 447     */
 448    function _shift_boundaries($lines, &$changed, $other_changed) {
 449      $i = 0;
 450      $j = 0;
 451  
 452      USE_ASSERTS && assert('sizeof($lines) == sizeof($changed)');
 453      $len = sizeof($lines);
 454      $other_len = sizeof($other_changed);
 455  
 456      while (1) {
 457        /*
 458         * Scan forwards to find beginning of another run of changes.
 459         * Also keep track of the corresponding point in the other file.
 460         *
 461         * Throughout this code, $i and $j are adjusted together so that
 462         * the first $i elements of $changed and the first $j elements
 463         * of $other_changed both contain the same number of zeros
 464         * (unchanged lines).
 465         * Furthermore, $j is always kept so that $j == $other_len or
 466         * $other_changed[$j] == FALSE.
 467         */
 468        while ($j < $other_len && $other_changed[$j]) {
 469          $j++;
 470        }
 471        while ($i < $len && ! $changed[$i]) {
 472          USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
 473          $i++;
 474          $j++;
 475          while ($j < $other_len && $other_changed[$j]) {
 476            $j++;
 477          }
 478        }
 479  
 480        if ($i == $len) {
 481          break;
 482        }
 483        $start = $i;
 484  
 485        // Find the end of this run of changes.
 486        while (++$i < $len && $changed[$i]) {
 487          continue;
 488        }
 489  
 490        do {
 491          /*
 492           * Record the length of this run of changes, so that
 493           * we can later determine whether the run has grown.
 494           */
 495          $runlength = $i - $start;
 496  
 497          /*
 498           * Move the changed region back, so long as the
 499           * previous unchanged line matches the last changed one.
 500           * This merges with previous changed regions.
 501           */
 502          while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
 503            $changed[--$start] = 1;
 504            $changed[--$i] = FALSE;
 505            while ($start > 0 && $changed[$start - 1]) {
 506              $start--;
 507            }
 508            USE_ASSERTS && assert('$j > 0');
 509            while ($other_changed[--$j]) {
 510              continue;
 511            }
 512            USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
 513          }
 514  
 515          /*
 516           * Set CORRESPONDING to the end of the changed run, at the last
 517           * point where it corresponds to a changed run in the other file.
 518           * CORRESPONDING == LEN means no such point has been found.
 519           */
 520          $corresponding = $j < $other_len ? $i : $len;
 521  
 522          /*
 523           * Move the changed region forward, so long as the
 524           * first changed line matches the following unchanged one.
 525           * This merges with following changed regions.
 526           * Do this second, so that if there are no merges,
 527           * the changed region is moved forward as far as possible.
 528           */
 529          while ($i < $len && $lines[$start] == $lines[$i]) {
 530            $changed[$start++] = FALSE;
 531            $changed[$i++] = 1;
 532            while ($i < $len && $changed[$i]) {
 533              $i++;
 534            }
 535            USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
 536            $j++;
 537            if ($j < $other_len && $other_changed[$j]) {
 538              $corresponding = $i;
 539              while ($j < $other_len && $other_changed[$j]) {
 540                $j++;
 541              }
 542            }
 543          }
 544        } while ($runlength != $i - $start);
 545  
 546        /*
 547         * If possible, move the fully-merged run of changes
 548         * back to a corresponding run in the other file.
 549         */
 550        while ($corresponding < $i) {
 551          $changed[--$start] = 1;
 552          $changed[--$i] = 0;
 553          USE_ASSERTS && assert('$j > 0');
 554          while ($other_changed[--$j]) {
 555            continue;
 556          }
 557          USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
 558        }
 559      }
 560    }
 561  }
 562  
 563  /**
 564   * Class representing a 'diff' between two sequences of strings.
 565   * @todo document
 566   * @private
 567   * @subpackage DifferenceEngine
 568   */
 569  class Diff {
 570    var $edits;
 571  
 572    /**
 573     * Constructor.
 574     * Computes diff between sequences of strings.
 575     *
 576     * @param $from_lines array An array of strings.
 577     *      (Typically these are lines from a file.)
 578     * @param $to_lines array An array of strings.
 579     */
 580    function Diff($from_lines, $to_lines) {
 581      $eng = new _DiffEngine;
 582      $this->edits = $eng->diff($from_lines, $to_lines);
 583      //$this->_check($from_lines, $to_lines);
 584    }
 585  
 586    /**
 587     * Compute reversed Diff.
 588     *
 589     * SYNOPSIS:
 590     *
 591     *  $diff = new Diff($lines1, $lines2);
 592     *  $rev = $diff->reverse();
 593     * @return object A Diff object representing the inverse of the
 594     *          original diff.
 595     */
 596    function reverse() {
 597      $rev = $this;
 598      $rev->edits = array();
 599      foreach ($this->edits as $edit) {
 600        $rev->edits[] = $edit->reverse();
 601      }
 602      return $rev;
 603    }
 604  
 605    /**
 606     * Check for empty diff.
 607     *
 608     * @return bool True iff two sequences were identical.
 609     */
 610    function isEmpty() {
 611      foreach ($this->edits as $edit) {
 612        if ($edit->type != 'copy') {
 613          return FALSE;
 614        }
 615      }
 616      return TRUE;
 617    }
 618  
 619    /**
 620     * Compute the length of the Longest Common Subsequence (LCS).
 621     *
 622     * This is mostly for diagnostic purposed.
 623     *
 624     * @return int The length of the LCS.
 625     */
 626    function lcs() {
 627      $lcs = 0;
 628      foreach ($this->edits as $edit) {
 629        if ($edit->type == 'copy') {
 630          $lcs += sizeof($edit->orig);
 631        }
 632      }
 633      return $lcs;
 634    }
 635  
 636    /**
 637     * Get the original set of lines.
 638     *
 639     * This reconstructs the $from_lines parameter passed to the
 640     * constructor.
 641     *
 642     * @return array The original sequence of strings.
 643     */
 644    function orig() {
 645      $lines = array();
 646  
 647      foreach ($this->edits as $edit) {
 648        if ($edit->orig) {
 649          array_splice($lines, sizeof($lines), 0, $edit->orig);
 650        }
 651      }
 652      return $lines;
 653    }
 654  
 655    /**
 656     * Get the closing set of lines.
 657     *
 658     * This reconstructs the $to_lines parameter passed to the
 659     * constructor.
 660     *
 661     * @return array The sequence of strings.
 662     */
 663    function closing() {
 664      $lines = array();
 665  
 666      foreach ($this->edits as $edit) {
 667        if ($edit->closing) {
 668          array_splice($lines, sizeof($lines), 0, $edit->closing);
 669        }
 670      }
 671      return $lines;
 672    }
 673  
 674    /**
 675     * Check a Diff for validity.
 676     *
 677     * This is here only for debugging purposes.
 678     */
 679    function _check($from_lines, $to_lines) {
 680      if (serialize($from_lines) != serialize($this->orig())) {
 681        trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
 682      }
 683      if (serialize($to_lines) != serialize($this->closing())) {
 684        trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
 685      }
 686  
 687      $rev = $this->reverse();
 688      if (serialize($to_lines) != serialize($rev->orig())) {
 689        trigger_error("Reversed original doesn't match", E_USER_ERROR);
 690      }
 691      if (serialize($from_lines) != serialize($rev->closing())) {
 692        trigger_error("Reversed closing doesn't match", E_USER_ERROR);
 693      }
 694  
 695  
 696      $prevtype = 'none';
 697      foreach ($this->edits as $edit) {
 698        if ( $prevtype == $edit->type ) {
 699          trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
 700        }
 701        $prevtype = $edit->type;
 702      }
 703  
 704      $lcs = $this->lcs();
 705      trigger_error('Diff okay: LCS = '. $lcs, E_USER_NOTICE);
 706    }
 707  }
 708  
 709  /**
 710   * FIXME: bad name.
 711   * @todo document
 712   * @private
 713   * @subpackage DifferenceEngine
 714   */
 715  class MappedDiff extends Diff {
 716    /**
 717     * Constructor.
 718     *
 719     * Computes diff between sequences of strings.
 720     *
 721     * This can be used to compute things like
 722     * case-insensitve diffs, or diffs which ignore
 723     * changes in white-space.
 724     *
 725     * @param $from_lines array An array of strings.
 726     *  (Typically these are lines from a file.)
 727     *
 728     * @param $to_lines array An array of strings.
 729     *
 730     * @param $mapped_from_lines array This array should
 731     *  have the same size number of elements as $from_lines.
 732     *  The elements in $mapped_from_lines and
 733     *  $mapped_to_lines are what is actually compared
 734     *  when computing the diff.
 735     *
 736     * @param $mapped_to_lines array This array should
 737     *  have the same number of elements as $to_lines.
 738     */
 739    function MappedDiff($from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines) {
 740  
 741      assert(sizeof($from_lines) == sizeof($mapped_from_lines));
 742      assert(sizeof($to_lines) == sizeof($mapped_to_lines));
 743  
 744      $this->Diff($mapped_from_lines, $mapped_to_lines);
 745  
 746      $xi = $yi = 0;
 747      for ($i = 0; $i < sizeof($this->edits); $i++) {
 748        $orig = &$this->edits[$i]->orig;
 749        if (is_array($orig)) {
 750          $orig = array_slice($from_lines, $xi, sizeof($orig));
 751          $xi += sizeof($orig);
 752        }
 753  
 754        $closing = &$this->edits[$i]->closing;
 755        if (is_array($closing)) {
 756          $closing = array_slice($to_lines, $yi, sizeof($closing));
 757          $yi += sizeof($closing);
 758        }
 759      }
 760    }
 761  }
 762  
 763  /**
 764   * A class to format Diffs
 765   *
 766   * This class formats the diff in classic diff format.
 767   * It is intended that this class be customized via inheritance,
 768   * to obtain fancier outputs.
 769   * @todo document
 770   * @private
 771   * @subpackage DifferenceEngine
 772   */
 773  class DiffFormatter {
 774    /**
 775     * Should a block header be shown?
 776     */
 777    var $show_header = TRUE;
 778  
 779    /**
 780     * Number of leading context "lines" to preserve.
 781     *
 782     * This should be left at zero for this class, but subclasses
 783     * may want to set this to other values.
 784     */
 785    var $leading_context_lines = 0;
 786  
 787    /**
 788     * Number of trailing context "lines" to preserve.
 789     *
 790     * This should be left at zero for this class, but subclasses
 791     * may want to set this to other values.
 792     */
 793    var $trailing_context_lines = 0;
 794  
 795    /**
 796     * Format a diff.
 797     *
 798     * @param $diff object A Diff object.
 799     * @return string The formatted output.
 800     */
 801    function format($diff) {
 802  
 803      $xi = $yi = 1;
 804      $block = FALSE;
 805      $context = array();
 806  
 807      $nlead = $this->leading_context_lines;
 808      $ntrail = $this->trailing_context_lines;
 809  
 810      $this->_start_diff();
 811  
 812      foreach ($diff->edits as $edit) {
 813        if ($edit->type == 'copy') {
 814          if (is_array($block)) {
 815            if (sizeof($edit->orig) <= $nlead + $ntrail) {
 816              $block[] = $edit;
 817            }
 818            else {
 819              if ($ntrail) {
 820                $context = array_slice($edit->orig, 0, $ntrail);
 821                $block[] = new _DiffOp_Copy($context);
 822              }
 823              $this->_block($x0, $ntrail + $xi - $x0, $y0, $ntrail + $yi - $y0, $block);
 824              $block = FALSE;
 825            }
 826          }
 827          $context = $edit->orig;
 828        }
 829        else {
 830          if (! is_array($block)) {
 831            $context = array_slice($context, sizeof($context) - $nlead);
 832            $x0 = $xi - sizeof($context);
 833            $y0 = $yi - sizeof($context);
 834            $block = array();
 835            if ($context) {
 836              $block[] = new _DiffOp_Copy($context);
 837            }
 838          }
 839          $block[] = $edit;
 840        }
 841  
 842        if ($edit->orig) {
 843          $xi += sizeof($edit->orig);
 844        }
 845        if ($edit->closing) {
 846          $yi += sizeof($edit->closing);
 847        }
 848      }
 849  
 850      if (is_array($block)) {
 851        $this->_block($x0, $xi - $x0, $y0, $yi - $y0, $block);
 852      }
 853      $end = $this->_end_diff();
 854      return $end;
 855    }
 856  
 857    function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) {
 858      $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
 859      foreach ($edits as $edit) {
 860        if ($edit->type == 'copy') {
 861          $this->_context($edit->orig);
 862        }
 863        elseif ($edit->type == 'add') {
 864          $this->_added($edit->closing);
 865        }
 866        elseif ($edit->type == 'delete') {
 867          $this->_deleted($edit->orig);
 868        }
 869        elseif ($edit->type == 'change') {
 870          $this->_changed($edit->orig, $edit->closing);
 871        }
 872        else {
 873          trigger_error('Unknown edit type', E_USER_ERROR);
 874        }
 875      }
 876      $this->_end_block();
 877    }
 878  
 879    function _start_diff() {
 880      ob_start();
 881    }
 882  
 883    function _end_diff() {
 884      $val = ob_get_contents();
 885      ob_end_clean();
 886      return $val;
 887    }
 888  
 889    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
 890      if ($xlen > 1) {
 891        $xbeg .= "," . ($xbeg + $xlen - 1);
 892      }
 893      if ($ylen > 1) {
 894        $ybeg .= "," . ($ybeg + $ylen - 1);
 895      }
 896  
 897      return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
 898    }
 899  
 900    function _start_block($header) {
 901      if ($this->show_header) {
 902        echo $header . "\n";
 903      }
 904    }
 905  
 906    function _end_block() {
 907    }
 908  
 909    function _lines($lines, $prefix = ' ') {
 910      foreach ($lines as $line) {
 911        echo "$prefix $line\n";
 912      }
 913    }
 914  
 915    function _context($lines) {
 916      $this->_lines($lines);
 917    }
 918  
 919    function _added($lines) {
 920      $this->_lines($lines, '>');
 921    }
 922    function _deleted($lines) {
 923      $this->_lines($lines, '<');
 924    }
 925  
 926    function _changed($orig, $closing) {
 927      $this->_deleted($orig);
 928      echo "---\n";
 929      $this->_added($closing);
 930    }
 931  }
 932  
 933  
 934  /**
 935   *  Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
 936   *
 937   */
 938  
 939  define('NBSP', '&#160;');      // iso-8859-x non-breaking space.
 940  
 941  /**
 942   * @todo document
 943   * @private
 944   * @subpackage DifferenceEngine
 945   */
 946  class _HWLDF_WordAccumulator {
 947    function _HWLDF_WordAccumulator() {
 948      $this->_lines = array();
 949      $this->_line = '';
 950      $this->_group = '';
 951      $this->_tag = '';
 952    }
 953  
 954    function _flushGroup($new_tag) {
 955      if ($this->_group !== '') {
 956        if ($this->_tag == 'mark') {
 957          $this->_line .= '<span class="diffchange">'. check_plain($this->_group) .'</span>';
 958        }
 959        else {
 960          $this->_line .= check_plain($this->_group);
 961        }
 962      }
 963      $this->_group = '';
 964      $this->_tag = $new_tag;
 965    }
 966  
 967    function _flushLine($new_tag) {
 968      $this->_flushGroup($new_tag);
 969      if ($this->_line != '') {
 970        array_push($this->_lines, $this->_line);
 971      }
 972      else {
 973        // make empty lines visible by inserting an NBSP
 974        array_push($this->_lines, NBSP);
 975      }
 976      $this->_line = '';
 977    }
 978  
 979    function addWords($words, $tag = '') {
 980      if ($tag != $this->_tag) {
 981        $this->_flushGroup($tag);
 982      }
 983      foreach ($words as $word) {
 984        // new-line should only come as first char of word.
 985        if ($word == '') {
 986          continue;
 987        }
 988        if ($word[0] == "\n") {
 989          $this->_flushLine($tag);
 990          $word = substr($word, 1);
 991        }
 992        assert(!strstr($word, "\n"));
 993        $this->_group .= $word;
 994      }
 995    }
 996  
 997    function getLines() {
 998      $this->_flushLine('~done');
 999      return $this->_lines;
1000    }
1001  }
1002  
1003  /**
1004   * @todo document
1005   * @private
1006   * @subpackage DifferenceEngine
1007   */
1008  class WordLevelDiff extends MappedDiff {
1009    function MAX_LINE_LENGTH() {
1010      return 10000;
1011    }
1012  
1013    function WordLevelDiff($orig_lines, $closing_lines) {
1014      list($orig_words, $orig_stripped) = $this->_split($orig_lines);
1015      list($closing_words, $closing_stripped) = $this->_split($closing_lines);
1016  
1017      $this->MappedDiff($orig_words, $closing_words, $orig_stripped, $closing_stripped);
1018    }
1019  
1020    function _split($lines) {
1021      $words = array();
1022      $stripped = array();
1023      $first = TRUE;
1024      foreach ($lines as $line) {
1025        // If the line is too long, just pretend the entire line is one big word
1026        // This prevents resource exhaustion problems
1027        if ( $first ) {
1028          $first = FALSE;
1029        }
1030        else {
1031          $words[] = "\n";
1032          $stripped[] = "\n";
1033        }
1034        if ( strlen( $line ) > $this->MAX_LINE_LENGTH() ) {
1035          $words[] = $line;
1036          $stripped[] = $line;
1037        }
1038        else {
1039          if (preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs', $line, $m)) {
1040            $words = array_merge($words, $m[0]);
1041            $stripped = array_merge($stripped, $m[1]);
1042          }
1043        }
1044      }
1045      return array($words, $stripped);
1046    }
1047  
1048    function orig() {
1049      $orig = new _HWLDF_WordAccumulator;
1050  
1051      foreach ($this->edits as $edit) {
1052        if ($edit->type == 'copy') {
1053          $orig->addWords($edit->orig);
1054        }
1055        elseif ($edit->orig) {
1056          $orig->addWords($edit->orig, 'mark');
1057        }
1058      }
1059      $lines = $orig->getLines();
1060      return $lines;
1061    }
1062  
1063    function closing() {
1064      $closing = new _HWLDF_WordAccumulator;
1065  
1066      foreach ($this->edits as $edit) {
1067        if ($edit->type == 'copy') {
1068          $closing->addWords($edit->closing);
1069        }
1070        elseif ($edit->closing) {
1071          $closing->addWords($edit->closing, 'mark');
1072        }
1073      }
1074      $lines = $closing->getLines();
1075      return $lines;
1076    }
1077  }
1078  
1079  /**
1080   * Diff formatter which uses Drupal theme functions.
1081   * @private
1082   * @subpackage DifferenceEngine
1083   */
1084  class DrupalDiffFormatter extends DiffFormatter {
1085  
1086    var $rows;
1087  
1088    function DrupalDiffFormatter() {
1089      $this->leading_context_lines = 2;
1090      $this->trailing_context_lines = 2;
1091    }
1092  
1093    function _start_diff() {
1094      $this->rows = array();
1095    }
1096  
1097    function _end_diff() {
1098      return $this->rows;
1099    }
1100  
1101    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
1102      return array(
1103        array(
1104          'data' => theme('diff_header_line', $xbeg),
1105          'colspan' => 2,
1106        ),
1107        array(
1108          'data' => theme('diff_header_line', $ybeg),
1109          'colspan' => 2,
1110        )
1111      );
1112    }
1113  
1114    function _start_block($header) {
1115      if ($this->show_header) {
1116        $this->rows[] = $header;
1117      }
1118    }
1119  
1120    function _end_block() {
1121    }
1122  
1123    function _lines($lines, $prefix=' ', $color='white') {
1124    }
1125  
1126    /**
1127     * Note: you should HTML-escape parameter before calling this.
1128     */
1129    function addedLine($line) {
1130      return array(
1131        '+',
1132        array(
1133          'data' => theme('diff_content_line', $line),
1134          'class' => 'diff-addedline',
1135        )
1136      );
1137    }
1138  
1139    /**
1140     * Note: you should HTML-escape parameter before calling this.
1141     */
1142    function deletedLine($line) {
1143      return array(
1144        '-',
1145        array(
1146          'data' => theme('diff_content_line', $line),
1147          'class' => 'diff-deletedline',
1148        )
1149      );
1150    }
1151  
1152    /**
1153     * Note: you should HTML-escape parameter before calling this.
1154     */
1155    function contextLine($line) {
1156      return array(
1157        '&nbsp;',
1158        array(
1159          'data' => theme('diff_content_line', $line),
1160          'class' => 'diff-context',
1161        )
1162      );
1163    }
1164  
1165    function emptyLine() {
1166      return array(
1167        '&nbsp;',
1168        theme('diff_empty_line', '&nbsp;'),
1169      );
1170    }
1171  
1172    function _added($lines) {
1173      foreach ($lines as $line) {
1174        $this->rows[] = array_merge($this->emptyLine(), $this->addedLine(check_plain($line)));
1175      }
1176    }
1177  
1178    function _deleted($lines) {
1179      foreach ($lines as $line) {
1180        $this->rows[] = array_merge($this->deletedLine(check_plain($line)), $this->emptyLine());
1181      }
1182    }
1183  
1184    function _context($lines) {
1185      foreach ($lines as $line) {
1186        $this->rows[] = array_merge($this->contextLine(check_plain($line)), $this->contextLine(check_plain($line)));
1187      }
1188    }
1189  
1190    function _changed($orig, $closing) {
1191      $diff = new WordLevelDiff($orig, $closing);
1192      $del = $diff->orig();
1193      $add = $diff->closing();
1194  
1195      // Notice that WordLevelDiff returns HTML-escaped output.
1196      // Hence, we will be calling addedLine/deletedLine without HTML-escaping.
1197  
1198      while ($line = array_shift($del)) {
1199        $aline = array_shift( $add );
1200        $this->rows[] = array_merge($this->deletedLine($line), $this->addedLine($aline));
1201      }
1202      foreach ($add as $line) {  // If any leftovers
1203        $this->rows[] = array_merge($this->emptyLine(), $this->addedLine($line));
1204      }
1205    }
1206  }
1207  
1208  /**
1209   * Drupal inline Diff formatter.
1210   * @private
1211   * @subpackage DifferenceEngine
1212   */
1213  class DrupalDiffInline {
1214    var $a;
1215    var $b;
1216  
1217    /**
1218     * Constructor.
1219     */
1220    function __construct($a, $b) {
1221      $this->a = $a;
1222      $this->b = $b;
1223    }
1224  
1225    /**
1226     * Render differences inline using HTML markup.
1227     */
1228    function render() {
1229      $a = preg_split('/(<[^>]+?>| )/', $this->a, -1, PREG_SPLIT_DELIM_CAPTURE);
1230      $b = preg_split('/(<[^>]+?>| )/', $this->b, -1, PREG_SPLIT_DELIM_CAPTURE);
1231      $diff = new Diff($a, $b);
1232      $diff->edits = $this->process_edits($diff->edits);
1233  
1234      // Assemble highlighted output
1235      $output = '';
1236      foreach ($diff->edits as $chunk) {
1237        switch ($chunk->type) {
1238          case 'copy':
1239            $output .= implode('', $chunk->closing);
1240            break;
1241          case 'delete':
1242            foreach ($chunk->orig as $i => $piece) {
1243              if (strpos($piece, '<') === 0 && substr($piece, strlen($piece) - 1) === '>') {
1244                $output .= $piece;
1245              }
1246              else {
1247                $output .= theme('diff_inline_chunk', $piece, $chunk->type);
1248              }
1249            }
1250            break;
1251          default:
1252            $chunk->closing = $this->process_chunk($chunk->closing);
1253            foreach ($chunk->closing as $i => $piece) {
1254              if ($piece === ' ' || (strpos($piece, '<') === 0 && substr($piece, strlen($piece) - 1) === '>' && strtolower(substr($piece, 1, 3)) != 'img')) {
1255                $output .= $piece;
1256              }
1257              else {
1258                $output .= theme('diff_inline_chunk', $piece, $chunk->type);
1259              }
1260            }
1261            break;
1262        }
1263      }
1264      return $output;
1265    }
1266  
1267    /**
1268     * Merge chunk segments between tag delimiters.
1269     */
1270    function process_chunk($chunk) {
1271      $processed = array();
1272      $j = 0;
1273      foreach ($chunk as $i => $piece) {
1274        $next = isset($chunk[$i+1]) ? $chunk[$i+1] : NULL;
1275        if (strpos($piece, '<') === 0 && substr($piece, strlen($piece) - 1) === '>') {
1276          $processed[$j] = $piece;
1277          $j++;
1278        }
1279        else if (isset($next) && strpos($next, '<') === 0 && substr($next, strlen($next) - 1) === '>') {
1280          $processed[$j] .= $piece;
1281          $j++;
1282        }
1283        else {
1284          $processed[$j] .= $piece;
1285        }
1286      }
1287      return $processed;
1288    }
1289  
1290    /**
1291     * Merge copy and equivalent edits into intelligible chunks.
1292     */
1293    function process_edits($edits) {
1294      $processed = array();
1295      $current = array_shift($edits);
1296  
1297      // Make two passes -- first merge space delimiter copies back into their originals.
1298      while ($chunk = array_shift($edits)) {
1299        if ($chunk->type == 'copy' && $chunk->orig === array(' ')) {
1300          $current->orig = array_merge((array) $current->orig, (array) $chunk->orig);
1301          $current->closing = array_merge((array) $current->closing, (array) $chunk->closing);
1302        }
1303        else {
1304          $processed[] = $current;
1305          $current = $chunk;
1306        }
1307      }
1308      $processed[] = $current;
1309  
1310      // Initial setup
1311      $edits = $processed;
1312      $processed = array();
1313      $current = array_shift($edits);
1314  
1315      // Second, merge equivalent chunks into each other.
1316      while ($chunk = array_shift($edits)) {
1317        if ($current->type == $chunk->type) {
1318          $current->orig = array_merge((array) $current->orig, (array) $chunk->orig);
1319          $current->closing = array_merge((array) $current->closing, (array) $chunk->closing);
1320        }
1321        else {
1322          $processed[] = $current;
1323          $current = $chunk;
1324        }
1325      }
1326      $processed[] = $current;
1327  
1328      return $processed;
1329    }
1330  }


Generated: Thu Mar 24 11:18:33 2011 Cross-referenced by PHPXref 0.7