| [ Index ] |
PHP Cross Reference of Wordpress 2.9.1 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * $Horde: framework/Text_Diff/Diff/Engine/native.php,v 1.10 2008/01/04 10:27:53 jan Exp $ 4 * 5 * Class used internally by Text_Diff to actually compute the diffs. This 6 * class is implemented using native PHP code. 7 * 8 * The algorithm used here is mostly lifted from the perl module 9 * Algorithm::Diff (version 1.06) by Ned Konz, which is available at: 10 * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip 11 * 12 * More ideas are taken from: http://www.ics.uci.edu/~eppstein/161/960229.html 13 * 14 * Some ideas (and a bit of code) are taken from analyze.c, of GNU 15 * diffutils-2.7, which can be found at: 16 * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz 17 * 18 * Some ideas (subdivision by NCHUNKS > 2, and some optimizations) are from 19 * Geoffrey T. Dairiki <dairiki@dairiki.org>. The original PHP version of this 20 * code was written by him, and is used/adapted with his permission. 21 * 22 * Copyright 2004-2008 The Horde Project (http://www.horde.org/) 23 * 24 * See the enclosed file COPYING for license information (LGPL). If you did 25 * not receive this file, see http://opensource.org/licenses/lgpl-license.php. 26 * 27 * @author Geoffrey T. Dairiki <dairiki@dairiki.org> 28 * @package Text_Diff 29 */ 30 class Text_Diff_Engine_native { 31 32 function diff($from_lines, $to_lines) 33 { 34 array_walk($from_lines, array('Text_Diff', 'trimNewlines')); 35 array_walk($to_lines, array('Text_Diff', 'trimNewlines')); 36 37 $n_from = count($from_lines); 38 $n_to = count($to_lines); 39 40 $this->xchanged = $this->ychanged = array(); 41 $this->xv = $this->yv = array(); 42 $this->xind = $this->yind = array(); 43 unset($this->seq); 44 unset($this->in_seq); 45 unset($this->lcs); 46 47 // Skip leading common lines. 48 for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) { 49 if ($from_lines[$skip] !== $to_lines[$skip]) { 50 break; 51 } 52 $this->xchanged[$skip] = $this->ychanged[$skip] = false; 53 } 54 55 // Skip trailing common lines. 56 $xi = $n_from; $yi = $n_to; 57 for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) { 58 if ($from_lines[$xi] !== $to_lines[$yi]) { 59 break; 60 } 61 $this->xchanged[$xi] = $this->ychanged[$yi] = false; 62 } 63 64 // Ignore lines which do not exist in both files. 65 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) { 66 $xhash[$from_lines[$xi]] = 1; 67 } 68 for ($yi = $skip; $yi < $n_to - $endskip; $yi++) { 69 $line = $to_lines[$yi]; 70 if (($this->ychanged[$yi] = empty($xhash[$line]))) { 71 continue; 72 } 73 $yhash[$line] = 1; 74 $this->yv[] = $line; 75 $this->yind[] = $yi; 76 } 77 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) { 78 $line = $from_lines[$xi]; 79 if (($this->xchanged[$xi] = empty($yhash[$line]))) { 80 continue; 81 } 82 $this->xv[] = $line; 83 $this->xind[] = $xi; 84 } 85 86 // Find the LCS. 87 $this->_compareseq(0, count($this->xv), 0, count($this->yv)); 88 89 // Merge edits when possible. 90 $this->_shiftBoundaries($from_lines, $this->xchanged, $this->ychanged); 91 $this->_shiftBoundaries($to_lines, $this->ychanged, $this->xchanged); 92 93 // Compute the edit operations. 94 $edits = array(); 95 $xi = $yi = 0; 96 while ($xi < $n_from || $yi < $n_to) { 97 assert($yi < $n_to || $this->xchanged[$xi]); 98 assert($xi < $n_from || $this->ychanged[$yi]); 99 100 // Skip matching "snake". 101 $copy = array(); 102 while ($xi < $n_from && $yi < $n_to 103 && !$this->xchanged[$xi] && !$this->ychanged[$yi]) { 104 $copy[] = $from_lines[$xi++]; 105 ++$yi; 106 } 107 if ($copy) { 108 $edits[] = &new Text_Diff_Op_copy($copy); 109 } 110 111 // Find deletes & adds. 112 $delete = array(); 113 while ($xi < $n_from && $this->xchanged[$xi]) { 114 $delete[] = $from_lines[$xi++]; 115 } 116 117 $add = array(); 118 while ($yi < $n_to && $this->ychanged[$yi]) { 119 $add[] = $to_lines[$yi++]; 120 } 121 122 if ($delete && $add) { 123 $edits[] = &new Text_Diff_Op_change($delete, $add); 124 } elseif ($delete) { 125 $edits[] = &new Text_Diff_Op_delete($delete); 126 } elseif ($add) { 127 $edits[] = &new Text_Diff_Op_add($add); 128 } 129 } 130 131 return $edits; 132 } 133 134 /** 135 * Divides the Largest Common Subsequence (LCS) of the sequences (XOFF, 136 * XLIM) and (YOFF, YLIM) into NCHUNKS approximately equally sized 137 * segments. 138 * 139 * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an array of 140 * NCHUNKS+1 (X, Y) indexes giving the diving points between sub 141 * sequences. The first sub-sequence is contained in (X0, X1), (Y0, Y1), 142 * the second in (X1, X2), (Y1, Y2) and so on. Note that (X0, Y0) == 143 * (XOFF, YOFF) and (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM). 144 * 145 * This function assumes that the first lines of the specified portions of 146 * the two files do not match, and likewise that the last lines do not 147 * match. The caller must trim matching lines from the beginning and end 148 * of the portions it is going to specify. 149 */ 150 function _diag ($xoff, $xlim, $yoff, $ylim, $nchunks) 151 { 152 $flip = false; 153 154 if ($xlim - $xoff > $ylim - $yoff) { 155 /* Things seems faster (I'm not sure I understand why) when the 156 * shortest sequence is in X. */ 157 $flip = true; 158 list ($xoff, $xlim, $yoff, $ylim) 159 = array($yoff, $ylim, $xoff, $xlim); 160 } 161 162 if ($flip) { 163 for ($i = $ylim - 1; $i >= $yoff; $i--) { 164 $ymatches[$this->xv[$i]][] = $i; 165 } 166 } else { 167 for ($i = $ylim - 1; $i >= $yoff; $i--) { 168 $ymatches[$this->yv[$i]][] = $i; 169 } 170 } 171 172 $this->lcs = 0; 173 $this->seq[0]= $yoff - 1; 174 $this->in_seq = array(); 175 $ymids[0] = array(); 176 177 $numer = $xlim - $xoff + $nchunks - 1; 178 $x = $xoff; 179 for ($chunk = 0; $chunk < $nchunks; $chunk++) { 180 if ($chunk > 0) { 181 for ($i = 0; $i <= $this->lcs; $i++) { 182 $ymids[$i][$chunk - 1] = $this->seq[$i]; 183 } 184 } 185 186 $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $chunk) / $nchunks); 187 for (; $x < $x1; $x++) { 188 $line = $flip ? $this->yv[$x] : $this->xv[$x]; 189 if (empty($ymatches[$line])) { 190 continue; 191 } 192 $matches = $ymatches[$line]; 193 reset($matches); 194 while (list(, $y) = each($matches)) { 195 if (empty($this->in_seq[$y])) { 196 $k = $this->_lcsPos($y); 197 assert($k > 0); 198 $ymids[$k] = $ymids[$k - 1]; 199 break; 200 } 201 } 202 while (list(, $y) = each($matches)) { 203 if ($y > $this->seq[$k - 1]) { 204 assert($y <= $this->seq[$k]); 205 /* Optimization: this is a common case: next match is 206 * just replacing previous match. */ 207 $this->in_seq[$this->seq[$k]] = false; 208 $this->seq[$k] = $y; 209 $this->in_seq[$y] = 1; 210 } elseif (empty($this->in_seq[$y])) { 211 $k = $this->_lcsPos($y); 212 assert($k > 0); 213 $ymids[$k] = $ymids[$k - 1]; 214 } 215 } 216 } 217 } 218 219 $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff); 220 $ymid = $ymids[$this->lcs]; 221 for ($n = 0; $n < $nchunks - 1; $n++) { 222 $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks); 223 $y1 = $ymid[$n] + 1; 224 $seps[] = $flip ? array($y1, $x1) : array($x1, $y1); 225 } 226 $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim); 227 228 return array($this->lcs, $seps); 229 } 230 231 function _lcsPos($ypos) 232 { 233 $end = $this->lcs; 234 if ($end == 0 || $ypos > $this->seq[$end]) { 235 $this->seq[++$this->lcs] = $ypos; 236 $this->in_seq[$ypos] = 1; 237 return $this->lcs; 238 } 239 240 $beg = 1; 241 while ($beg < $end) { 242 $mid = (int)(($beg + $end) / 2); 243 if ($ypos > $this->seq[$mid]) { 244 $beg = $mid + 1; 245 } else { 246 $end = $mid; 247 } 248 } 249 250 assert($ypos != $this->seq[$end]); 251 252 $this->in_seq[$this->seq[$end]] = false; 253 $this->seq[$end] = $ypos; 254 $this->in_seq[$ypos] = 1; 255 return $end; 256 } 257 258 /** 259 * Finds LCS of two sequences. 260 * 261 * The results are recorded in the vectors $this->{x,y}changed[], by 262 * storing a 1 in the element for each line that is an insertion or 263 * deletion (ie. is not in the LCS). 264 * 265 * The subsequence of file 0 is (XOFF, XLIM) and likewise for file 1. 266 * 267 * Note that XLIM, YLIM are exclusive bounds. All line numbers are 268 * origin-0 and discarded lines are not counted. 269 */ 270 function _compareseq ($xoff, $xlim, $yoff, $ylim) 271 { 272 /* Slide down the bottom initial diagonal. */ 273 while ($xoff < $xlim && $yoff < $ylim 274 && $this->xv[$xoff] == $this->yv[$yoff]) { 275 ++$xoff; 276 ++$yoff; 277 } 278 279 /* Slide up the top initial diagonal. */ 280 while ($xlim > $xoff && $ylim > $yoff 281 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) { 282 --$xlim; 283 --$ylim; 284 } 285 286 if ($xoff == $xlim || $yoff == $ylim) { 287 $lcs = 0; 288 } else { 289 /* This is ad hoc but seems to work well. $nchunks = 290 * sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5); $nchunks = 291 * max(2,min(8,(int)$nchunks)); */ 292 $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1; 293 list($lcs, $seps) 294 = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks); 295 } 296 297 if ($lcs == 0) { 298 /* X and Y sequences have no common subsequence: mark all 299 * changed. */ 300 while ($yoff < $ylim) { 301 $this->ychanged[$this->yind[$yoff++]] = 1; 302 } 303 while ($xoff < $xlim) { 304 $this->xchanged[$this->xind[$xoff++]] = 1; 305 } 306 } else { 307 /* Use the partitions to split this problem into subproblems. */ 308 reset($seps); 309 $pt1 = $seps[0]; 310 while ($pt2 = next($seps)) { 311 $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]); 312 $pt1 = $pt2; 313 } 314 } 315 } 316 317 /** 318 * Adjusts inserts/deletes of identical lines to join changes as much as 319 * possible. 320 * 321 * We do something when a run of changed lines include a line at one end 322 * and has an excluded, identical line at the other. We are free to 323 * choose which identical line is included. `compareseq' usually chooses 324 * the one at the beginning, but usually it is cleaner to consider the 325 * following identical line to be the "change". 326 * 327 * This is extracted verbatim from analyze.c (GNU diffutils-2.7). 328 */ 329 function _shiftBoundaries($lines, &$changed, $other_changed) 330 { 331 $i = 0; 332 $j = 0; 333 334 assert('count($lines) == count($changed)'); 335 $len = count($lines); 336 $other_len = count($other_changed); 337 338 while (1) { 339 /* Scan forward to find the beginning of another run of 340 * changes. Also keep track of the corresponding point in the 341 * other file. 342 * 343 * Throughout this code, $i and $j are adjusted together so that 344 * the first $i elements of $changed and the first $j elements of 345 * $other_changed both contain the same number of zeros (unchanged 346 * lines). 347 * 348 * Furthermore, $j is always kept so that $j == $other_len or 349 * $other_changed[$j] == false. */ 350 while ($j < $other_len && $other_changed[$j]) { 351 $j++; 352 } 353 354 while ($i < $len && ! $changed[$i]) { 355 assert('$j < $other_len && ! $other_changed[$j]'); 356 $i++; $j++; 357 while ($j < $other_len && $other_changed[$j]) { 358 $j++; 359 } 360 } 361 362 if ($i == $len) { 363 break; 364 } 365 366 $start = $i; 367 368 /* Find the end of this run of changes. */ 369 while (++$i < $len && $changed[$i]) { 370 continue; 371 } 372 373 do { 374 /* Record the length of this run of changes, so that we can 375 * later determine whether the run has grown. */ 376 $runlength = $i - $start; 377 378 /* Move the changed region back, so long as the previous 379 * unchanged line matches the last changed one. This merges 380 * with previous changed regions. */ 381 while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) { 382 $changed[--$start] = 1; 383 $changed[--$i] = false; 384 while ($start > 0 && $changed[$start - 1]) { 385 $start--; 386 } 387 assert('$j > 0'); 388 while ($other_changed[--$j]) { 389 continue; 390 } 391 assert('$j >= 0 && !$other_changed[$j]'); 392 } 393 394 /* Set CORRESPONDING to the end of the changed run, at the 395 * last point where it corresponds to a changed run in the 396 * other file. CORRESPONDING == LEN means no such point has 397 * been found. */ 398 $corresponding = $j < $other_len ? $i : $len; 399 400 /* Move the changed region forward, so long as the first 401 * changed line matches the following unchanged one. This 402 * merges with following changed regions. Do this second, so 403 * that if there are no merges, the changed region is moved 404 * forward as far as possible. */ 405 while ($i < $len && $lines[$start] == $lines[$i]) { 406 $changed[$start++] = false; 407 $changed[$i++] = 1; 408 while ($i < $len && $changed[$i]) { 409 $i++; 410 } 411 412 assert('$j < $other_len && ! $other_changed[$j]'); 413 $j++; 414 if ($j < $other_len && $other_changed[$j]) { 415 $corresponding = $i; 416 while ($j < $other_len && $other_changed[$j]) { 417 $j++; 418 } 419 } 420 } 421 } while ($runlength != $i - $start); 422 423 /* If possible, move the fully-merged run of changes back to a 424 * corresponding run in the other file. */ 425 while ($corresponding < $i) { 426 $changed[--$start] = 1; 427 $changed[--$i] = 0; 428 assert('$j > 0'); 429 while ($other_changed[--$j]) { 430 continue; 431 } 432 assert('$j >= 0 && !$other_changed[$j]'); 433 } 434 } 435 } 436 437 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
| Generated: Fri Jan 8 00:19:48 2010 | Cross-referenced by PHPXref 0.7 |