1 | 5 | simandl | <?php |
2 | | | /** |
3 | | | * General API for generating and formatting diffs - the differences between |
4 | | | * two sequences of strings. |
5 | | | * |
6 | | | * The original PHP version of this code was written by Geoffrey T. Dairiki |
7 | | | * <dairiki@dairiki.org>, and is used/adapted with his permission. |
8 | | | * |
9 | | | * $Horde: framework/Text_Diff/Diff.php,v 1.11.2.11 2008/02/24 10:57:46 jan Exp $ |
10 | | | * |
11 | | | * Copyright 2004 Geoffrey T. Dairiki <dairiki@dairiki.org> |
12 | | | * Copyright 2004-2008 The Horde Project (http://www.horde.org/) |
13 | | | * |
14 | | | * See the enclosed file COPYING for license information (LGPL). If you did |
15 | | | * not receive this file, see http://opensource.org/licenses/lgpl-license.php. |
16 | | | * |
17 | | | * @package Text_Diff |
18 | | | * @author Geoffrey T. Dairiki <dairiki@dairiki.org> |
19 | | | */ |
20 | | | class Text_Diff { |
21 | | | |
22 | | | /** |
23 | | | * Array of changes. |
24 | | | * |
25 | | | * @var array |
26 | | | */ |
27 | | | var $_edits; |
28 | | | |
29 | | | /** |
30 | | | * Computes diffs between sequences of strings. |
31 | | | * |
32 | | | * @param string $engine Name of the diffing engine to use. 'auto' |
33 | | | * will automatically select the best. |
34 | | | * @param array $params Parameters to pass to the diffing engine. |
35 | | | * Normally an array of two arrays, each |
36 | | | * containing the lines from a file. |
37 | | | */ |
38 | | | function Text_Diff($engine, $params) |
39 | | | { |
40 | | | // Backward compatibility workaround. |
41 | | | if (!is_string($engine)) { |
42 | | | $params = array($engine, $params); |
43 | | | $engine = 'auto'; |
44 | | | } |
45 | | | |
46 | | | if ($engine == 'auto') { |
47 | | | $engine = extension_loaded('xdiff') ? 'xdiff' : 'native'; |
48 | | | } else { |
49 | | | $engine = basename($engine); |
50 | | | } |
51 | | | |
52 | | | require_once 'Text/Diff/Engine/' . $engine . '.php'; |
53 | | | $class = 'Text_Diff_Engine_' . $engine; |
54 | | | $diff_engine = new $class(); |
55 | | | |
56 | | | $this->_edits = call_user_func_array(array($diff_engine, 'diff'), $params); |
57 | | | } |
58 | | | |
59 | | | /** |
60 | | | * Returns the array of differences. |
61 | | | */ |
62 | | | function getDiff() |
63 | | | { |
64 | | | return $this->_edits; |
65 | | | } |
66 | | | |
67 | | | /** |
68 | | | * returns the number of new (added) lines in a given diff. |
69 | | | * |
70 | | | * @since Text_Diff 1.1.0 |
71 | | | * @since Horde 3.2 |
72 | | | * |
73 | | | * @return integer The number of new lines |
74 | | | */ |
75 | | | function countAddedLines() |
76 | | | { |
77 | | | $count = 0; |
78 | | | foreach ($this->_edits as $edit) { |
79 | | | if (is_a($edit, 'Text_Diff_Op_add') || |
80 | | | is_a($edit, 'Text_Diff_Op_change')) { |
81 | | | $count += $edit->nfinal(); |
82 | | | } |
83 | | | } |
84 | | | return $count; |
85 | | | } |
86 | | | |
87 | | | /** |
88 | | | * Returns the number of deleted (removed) lines in a given diff. |
89 | | | * |
90 | | | * @since Text_Diff 1.1.0 |
91 | | | * @since Horde 3.2 |
92 | | | * |
93 | | | * @return integer The number of deleted lines |
94 | | | */ |
95 | | | function countDeletedLines() |
96 | | | { |
97 | | | $count = 0; |
98 | | | foreach ($this->_edits as $edit) { |
99 | | | if (is_a($edit, 'Text_Diff_Op_delete') || |
100 | | | is_a($edit, 'Text_Diff_Op_change')) { |
101 | | | $count += $edit->norig(); |
102 | | | } |
103 | | | } |
104 | | | return $count; |
105 | | | } |
106 | | | |
107 | | | /** |
108 | | | * Computes a reversed diff. |
109 | | | * |
110 | | | * Example: |
111 | | | * <code> |
112 | | | * $diff = new Text_Diff($lines1, $lines2); |
113 | | | * $rev = $diff->reverse(); |
114 | | | * </code> |
115 | | | * |
116 | | | * @return Text_Diff A Diff object representing the inverse of the |
117 | | | * original diff. Note that we purposely don't return a |
118 | | | * reference here, since this essentially is a clone() |
119 | | | * method. |
120 | | | */ |
121 | | | function reverse() |
122 | | | { |
123 | | | if (version_compare(zend_version(), '2', '>')) { |
124 | | | $rev = clone($this); |
125 | | | } else { |
126 | | | $rev = $this; |
127 | | | } |
128 | | | $rev->_edits = array(); |
129 | | | foreach ($this->_edits as $edit) { |
130 | | | $rev->_edits[] = $edit->reverse(); |
131 | | | } |
132 | | | return $rev; |
133 | | | } |
134 | | | |
135 | | | /** |
136 | | | * Checks for an empty diff. |
137 | | | * |
138 | | | * @return boolean True if two sequences were identical. |
139 | | | */ |
140 | | | function isEmpty() |
141 | | | { |
142 | | | foreach ($this->_edits as $edit) { |
143 | | | if (!is_a($edit, 'Text_Diff_Op_copy')) { |
144 | | | return false; |
145 | | | } |
146 | | | } |
147 | | | return true; |
148 | | | } |
149 | | | |
150 | | | /** |
151 | | | * Computes the length of the Longest Common Subsequence (LCS). |
152 | | | * |
153 | | | * This is mostly for diagnostic purposes. |
154 | | | * |
155 | | | * @return integer The length of the LCS. |
156 | | | */ |
157 | | | function lcs() |
158 | | | { |
159 | | | $lcs = 0; |
160 | | | foreach ($this->_edits as $edit) { |
161 | | | if (is_a($edit, 'Text_Diff_Op_copy')) { |
162 | | | $lcs += count($edit->orig); |
163 | | | } |
164 | | | } |
165 | | | return $lcs; |
166 | | | } |
167 | | | |
168 | | | /** |
169 | | | * Gets the original set of lines. |
170 | | | * |
171 | | | * This reconstructs the $from_lines parameter passed to the constructor. |
172 | | | * |
173 | | | * @return array The original sequence of strings. |
174 | | | */ |
175 | | | function getOriginal() |
176 | | | { |
177 | | | $lines = array(); |
178 | | | foreach ($this->_edits as $edit) { |
179 | | | if ($edit->orig) { |
180 | | | array_splice($lines, count($lines), 0, $edit->orig); |
181 | | | } |
182 | | | } |
183 | | | return $lines; |
184 | | | } |
185 | | | |
186 | | | /** |
187 | | | * Gets the final set of lines. |
188 | | | * |
189 | | | * This reconstructs the $to_lines parameter passed to the constructor. |
190 | | | * |
191 | | | * @return array The sequence of strings. |
192 | | | */ |
193 | | | function getFinal() |
194 | | | { |
195 | | | $lines = array(); |
196 | | | foreach ($this->_edits as $edit) { |
197 | | | if ($edit->final) { |
198 | | | array_splice($lines, count($lines), 0, $edit->final); |
199 | | | } |
200 | | | } |
201 | | | return $lines; |
202 | | | } |
203 | | | |
204 | | | /** |
205 | | | * Removes trailing newlines from a line of text. This is meant to be used |
206 | | | * with array_walk(). |
207 | | | * |
208 | | | * @param string $line The line to trim. |
209 | | | * @param integer $key The index of the line in the array. Not used. |
210 | | | */ |
211 | | | function trimNewlines(&$line, $key) |
212 | | | { |
213 | | | $line = str_replace(array("\n", "\r"), '', $line); |
214 | | | } |
215 | | | |
216 | | | /** |
217 | | | * Determines the location of the system temporary directory. |
218 | | | * |
219 | | | * @static |
220 | | | * |
221 | | | * @access protected |
222 | | | * |
223 | | | * @return string A directory name which can be used for temp files. |
224 | | | * Returns false if one could not be found. |
225 | | | */ |
226 | | | function _getTempDir() |
227 | | | { |
228 | | | $tmp_locations = array('/tmp', '/var/tmp', 'c:\WUTemp', 'c:\temp', |
229 | | | 'c:\windows\temp', 'c:\winnt\temp'); |
230 | | | |
231 | | | /* Try PHP's upload_tmp_dir directive. */ |
232 | | | $tmp = ini_get('upload_tmp_dir'); |
233 | | | |
234 | | | /* Otherwise, try to determine the TMPDIR environment variable. */ |
235 | | | if (!strlen($tmp)) { |
236 | | | $tmp = getenv('TMPDIR'); |
237 | | | } |
238 | | | |
239 | | | /* If we still cannot determine a value, then cycle through a list of |
240 | | | * preset possibilities. */ |
241 | | | while (!strlen($tmp) && count($tmp_locations)) { |
242 | | | $tmp_check = array_shift($tmp_locations); |
243 | | | if (@is_dir($tmp_check)) { |
244 | | | $tmp = $tmp_check; |
245 | | | } |
246 | | | } |
247 | | | |
248 | | | /* If it is still empty, we have failed, so return false; otherwise |
249 | | | * return the directory determined. */ |
250 | | | return strlen($tmp) ? $tmp : false; |
251 | | | } |
252 | | | |
253 | | | /** |
254 | | | * Checks a diff for validity. |
255 | | | * |
256 | | | * This is here only for debugging purposes. |
257 | | | */ |
258 | | | function _check($from_lines, $to_lines) |
259 | | | { |
260 | | | if (serialize($from_lines) != serialize($this->getOriginal())) { |
261 | | | trigger_error("Reconstructed original doesn't match", E_USER_ERROR); |
262 | | | } |
263 | | | if (serialize($to_lines) != serialize($this->getFinal())) { |
264 | | | trigger_error("Reconstructed final doesn't match", E_USER_ERROR); |
265 | | | } |
266 | | | |
267 | | | $rev = $this->reverse(); |
268 | | | if (serialize($to_lines) != serialize($rev->getOriginal())) { |
269 | | | trigger_error("Reversed original doesn't match", E_USER_ERROR); |
270 | | | } |
271 | | | if (serialize($from_lines) != serialize($rev->getFinal())) { |
272 | | | trigger_error("Reversed final doesn't match", E_USER_ERROR); |
273 | | | } |
274 | | | |
275 | | | $prevtype = null; |
276 | | | foreach ($this->_edits as $edit) { |
277 | | | if ($prevtype == get_class($edit)) { |
278 | | | trigger_error("Edit sequence is non-optimal", E_USER_ERROR); |
279 | | | } |
280 | | | $prevtype = get_class($edit); |
281 | | | } |
282 | | | |
283 | | | return true; |
284 | | | } |
285 | | | |
286 | | | } |
287 | | | |
288 | | | /** |
289 | | | * @package Text_Diff |
290 | | | * @author Geoffrey T. Dairiki <dairiki@dairiki.org> |
291 | | | */ |
292 | | | class Text_MappedDiff extends Text_Diff { |
293 | | | |
294 | | | /** |
295 | | | * Computes a diff between sequences of strings. |
296 | | | * |
297 | | | * This can be used to compute things like case-insensitve diffs, or diffs |
298 | | | * which ignore changes in white-space. |
299 | | | * |
300 | | | * @param array $from_lines An array of strings. |
301 | | | * @param array $to_lines An array of strings. |
302 | | | * @param array $mapped_from_lines This array should have the same size |
303 | | | * number of elements as $from_lines. The |
304 | | | * elements in $mapped_from_lines and |
305 | | | * $mapped_to_lines are what is actually |
306 | | | * compared when computing the diff. |
307 | | | * @param array $mapped_to_lines This array should have the same number |
308 | | | * of elements as $to_lines. |
309 | | | */ |
310 | | | function Text_MappedDiff($from_lines, $to_lines, |
311 | | | $mapped_from_lines, $mapped_to_lines) |
312 | | | { |
313 | | | assert(count($from_lines) == count($mapped_from_lines)); |
314 | | | assert(count($to_lines) == count($mapped_to_lines)); |
315 | | | |
316 | | | parent::Text_Diff($mapped_from_lines, $mapped_to_lines); |
317 | | | |
318 | | | $xi = $yi = 0; |
319 | | | for ($i = 0; $i < count($this->_edits); $i++) { |
320 | | | $orig = &$this->_edits[$i]->orig; |
321 | | | if (is_array($orig)) { |
322 | | | $orig = array_slice($from_lines, $xi, count($orig)); |
323 | | | $xi += count($orig); |
324 | | | } |
325 | | | |
326 | | | $final = &$this->_edits[$i]->final; |
327 | | | if (is_array($final)) { |
328 | | | $final = array_slice($to_lines, $yi, count($final)); |
329 | | | $yi += count($final); |
330 | | | } |
331 | | | } |
332 | | | } |
333 | | | |
334 | | | } |
335 | | | |
336 | | | /** |
337 | | | * @package Text_Diff |
338 | | | * @author Geoffrey T. Dairiki <dairiki@dairiki.org> |
339 | | | * |
340 | | | * @access private |
341 | | | */ |
342 | | | class Text_Diff_Op { |
343 | | | |
344 | | | var $orig; |
345 | | | var $final; |
346 | | | |
347 | | | function &reverse() |
348 | | | { |
349 | | | trigger_error('Abstract method', E_USER_ERROR); |
350 | | | } |
351 | | | |
352 | | | function norig() |
353 | | | { |
354 | | | return $this->orig ? count($this->orig) : 0; |
355 | | | } |
356 | | | |
357 | | | function nfinal() |
358 | | | { |
359 | | | return $this->final ? count($this->final) : 0; |
360 | | | } |
361 | | | |
362 | | | } |
363 | | | |
364 | | | /** |
365 | | | * @package Text_Diff |
366 | | | * @author Geoffrey T. Dairiki <dairiki@dairiki.org> |
367 | | | * |
368 | | | * @access private |
369 | | | */ |
370 | | | class Text_Diff_Op_copy extends Text_Diff_Op { |
371 | | | |
372 | | | function Text_Diff_Op_copy($orig, $final = false) |
373 | | | { |
374 | | | if (!is_array($final)) { |
375 | | | $final = $orig; |
376 | | | } |
377 | | | $this->orig = $orig; |
378 | | | $this->final = $final; |
379 | | | } |
380 | | | |
381 | | | function &reverse() |
382 | | | { |
383 | | | $reverse = &new Text_Diff_Op_copy($this->final, $this->orig); |
384 | | | return $reverse; |
385 | | | } |
386 | | | |
387 | | | } |
388 | | | |
389 | | | /** |
390 | | | * @package Text_Diff |
391 | | | * @author Geoffrey T. Dairiki <dairiki@dairiki.org> |
392 | | | * |
393 | | | * @access private |
394 | | | */ |
395 | | | class Text_Diff_Op_delete extends Text_Diff_Op { |
396 | | | |
397 | | | function Text_Diff_Op_delete($lines) |
398 | | | { |
399 | | | $this->orig = $lines; |
400 | | | $this->final = false; |
401 | | | } |
402 | | | |
403 | | | function &reverse() |
404 | | | { |
405 | | | $reverse = &new Text_Diff_Op_add($this->orig); |
406 | | | return $reverse; |
407 | | | } |
408 | | | |
409 | | | } |
410 | | | |
411 | | | /** |
412 | | | * @package Text_Diff |
413 | | | * @author Geoffrey T. Dairiki <dairiki@dairiki.org> |
414 | | | * |
415 | | | * @access private |
416 | | | */ |
417 | | | class Text_Diff_Op_add extends Text_Diff_Op { |
418 | | | |
419 | | | function Text_Diff_Op_add($lines) |
420 | | | { |
421 | | | $this->final = $lines; |
422 | | | $this->orig = false; |
423 | | | } |
424 | | | |
425 | | | function &reverse() |
426 | | | { |
427 | | | $reverse = &new Text_Diff_Op_delete($this->final); |
428 | | | return $reverse; |
429 | | | } |
430 | | | |
431 | | | } |
432 | | | |
433 | | | /** |
434 | | | * @package Text_Diff |
435 | | | * @author Geoffrey T. Dairiki <dairiki@dairiki.org> |
436 | | | * |
437 | | | * @access private |
438 | | | */ |
439 | | | class Text_Diff_Op_change extends Text_Diff_Op { |
440 | | | |
441 | | | function Text_Diff_Op_change($orig, $final) |
442 | | | { |
443 | | | $this->orig = $orig; |
444 | | | $this->final = $final; |
445 | | | } |
446 | | | |
447 | | | function &reverse() |
448 | | | { |
449 | | | $reverse = &new Text_Diff_Op_change($this->final, $this->orig); |
450 | | | return $reverse; |
451 | | | } |
452 | | | |
453 | | | } |