Differ.php 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399
  1. <?php
  2. /*
  3. * This file is part of sebastian/diff.
  4. *
  5. * (c) Sebastian Bergmann <sebastian@phpunit.de>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace SebastianBergmann\Diff;
  11. use SebastianBergmann\Diff\LCS\LongestCommonSubsequence;
  12. use SebastianBergmann\Diff\LCS\TimeEfficientImplementation;
  13. use SebastianBergmann\Diff\LCS\MemoryEfficientImplementation;
  14. /**
  15. * Diff implementation.
  16. */
  17. class Differ
  18. {
  19. /**
  20. * @var string
  21. */
  22. private $header;
  23. /**
  24. * @var bool
  25. */
  26. private $showNonDiffLines;
  27. /**
  28. * @param string $header
  29. * @param bool $showNonDiffLines
  30. */
  31. public function __construct($header = "--- Original\n+++ New\n", $showNonDiffLines = true)
  32. {
  33. $this->header = $header;
  34. $this->showNonDiffLines = $showNonDiffLines;
  35. }
  36. /**
  37. * Returns the diff between two arrays or strings as string.
  38. *
  39. * @param array|string $from
  40. * @param array|string $to
  41. * @param LongestCommonSubsequence $lcs
  42. *
  43. * @return string
  44. */
  45. public function diff($from, $to, LongestCommonSubsequence $lcs = null)
  46. {
  47. $from = $this->validateDiffInput($from);
  48. $to = $this->validateDiffInput($to);
  49. $diff = $this->diffToArray($from, $to, $lcs);
  50. $old = $this->checkIfDiffInOld($diff);
  51. $start = isset($old[0]) ? $old[0] : 0;
  52. $end = \count($diff);
  53. if ($tmp = \array_search($end, $old)) {
  54. $end = $tmp;
  55. }
  56. return $this->getBuffer($diff, $old, $start, $end);
  57. }
  58. /**
  59. * Casts variable to string if it is not a string or array.
  60. *
  61. * @param mixed $input
  62. *
  63. * @return string
  64. */
  65. private function validateDiffInput($input)
  66. {
  67. if (!\is_array($input) && !\is_string($input)) {
  68. return (string) $input;
  69. }
  70. return $input;
  71. }
  72. /**
  73. * Takes input of the diff array and returns the old array.
  74. * Iterates through diff line by line,
  75. *
  76. * @param array $diff
  77. *
  78. * @return array
  79. */
  80. private function checkIfDiffInOld(array $diff)
  81. {
  82. $inOld = false;
  83. $i = 0;
  84. $old = array();
  85. foreach ($diff as $line) {
  86. if ($line[1] === 0 /* OLD */) {
  87. if ($inOld === false) {
  88. $inOld = $i;
  89. }
  90. } elseif ($inOld !== false) {
  91. if (($i - $inOld) > 5) {
  92. $old[$inOld] = $i - 1;
  93. }
  94. $inOld = false;
  95. }
  96. ++$i;
  97. }
  98. return $old;
  99. }
  100. /**
  101. * Generates buffer in string format, returning the patch.
  102. *
  103. * @param array $diff
  104. * @param array $old
  105. * @param int $start
  106. * @param int $end
  107. *
  108. * @return string
  109. */
  110. private function getBuffer(array $diff, array $old, $start, $end)
  111. {
  112. $buffer = $this->header;
  113. if (!isset($old[$start])) {
  114. $buffer = $this->getDiffBufferElementNew($diff, $buffer, $start);
  115. ++$start;
  116. }
  117. for ($i = $start; $i < $end; $i++) {
  118. if (isset($old[$i])) {
  119. $i = $old[$i];
  120. $buffer = $this->getDiffBufferElementNew($diff, $buffer, $i);
  121. } else {
  122. $buffer = $this->getDiffBufferElement($diff, $buffer, $i);
  123. }
  124. }
  125. return $buffer;
  126. }
  127. /**
  128. * Gets individual buffer element.
  129. *
  130. * @param array $diff
  131. * @param string $buffer
  132. * @param int $diffIndex
  133. *
  134. * @return string
  135. */
  136. private function getDiffBufferElement(array $diff, $buffer, $diffIndex)
  137. {
  138. if ($diff[$diffIndex][1] === 1 /* ADDED */) {
  139. $buffer .= '+' . $diff[$diffIndex][0] . "\n";
  140. } elseif ($diff[$diffIndex][1] === 2 /* REMOVED */) {
  141. $buffer .= '-' . $diff[$diffIndex][0] . "\n";
  142. } elseif ($this->showNonDiffLines === true) {
  143. $buffer .= ' ' . $diff[$diffIndex][0] . "\n";
  144. }
  145. return $buffer;
  146. }
  147. /**
  148. * Gets individual buffer element with opening.
  149. *
  150. * @param array $diff
  151. * @param string $buffer
  152. * @param int $diffIndex
  153. *
  154. * @return string
  155. */
  156. private function getDiffBufferElementNew(array $diff, $buffer, $diffIndex)
  157. {
  158. if ($this->showNonDiffLines === true) {
  159. $buffer .= "@@ @@\n";
  160. }
  161. return $this->getDiffBufferElement($diff, $buffer, $diffIndex);
  162. }
  163. /**
  164. * Returns the diff between two arrays or strings as array.
  165. *
  166. * Each array element contains two elements:
  167. * - [0] => mixed $token
  168. * - [1] => 2|1|0
  169. *
  170. * - 2: REMOVED: $token was removed from $from
  171. * - 1: ADDED: $token was added to $from
  172. * - 0: OLD: $token is not changed in $to
  173. *
  174. * @param array|string $from
  175. * @param array|string $to
  176. * @param LongestCommonSubsequence $lcs
  177. *
  178. * @return array
  179. */
  180. public function diffToArray($from, $to, LongestCommonSubsequence $lcs = null)
  181. {
  182. if (\is_string($from)) {
  183. $fromMatches = $this->getNewLineMatches($from);
  184. $from = $this->splitStringByLines($from);
  185. } elseif (\is_array($from)) {
  186. $fromMatches = array();
  187. } else {
  188. throw new \InvalidArgumentException('"from" must be an array or string.');
  189. }
  190. if (\is_string($to)) {
  191. $toMatches = $this->getNewLineMatches($to);
  192. $to = $this->splitStringByLines($to);
  193. } elseif (\is_array($to)) {
  194. $toMatches = array();
  195. } else {
  196. throw new \InvalidArgumentException('"to" must be an array or string.');
  197. }
  198. list($from, $to, $start, $end) = self::getArrayDiffParted($from, $to);
  199. if ($lcs === null) {
  200. $lcs = $this->selectLcsImplementation($from, $to);
  201. }
  202. $common = $lcs->calculate(\array_values($from), \array_values($to));
  203. $diff = array();
  204. if ($this->detectUnmatchedLineEndings($fromMatches, $toMatches)) {
  205. $diff[] = array(
  206. '#Warning: Strings contain different line endings!',
  207. 0
  208. );
  209. }
  210. foreach ($start as $token) {
  211. $diff[] = array($token, 0 /* OLD */);
  212. }
  213. \reset($from);
  214. \reset($to);
  215. foreach ($common as $token) {
  216. while (($fromToken = \reset($from)) !== $token) {
  217. $diff[] = array(\array_shift($from), 2 /* REMOVED */);
  218. }
  219. while (($toToken = \reset($to)) !== $token) {
  220. $diff[] = array(\array_shift($to), 1 /* ADDED */);
  221. }
  222. $diff[] = array($token, 0 /* OLD */);
  223. \array_shift($from);
  224. \array_shift($to);
  225. }
  226. while (($token = \array_shift($from)) !== null) {
  227. $diff[] = array($token, 2 /* REMOVED */);
  228. }
  229. while (($token = \array_shift($to)) !== null) {
  230. $diff[] = array($token, 1 /* ADDED */);
  231. }
  232. foreach ($end as $token) {
  233. $diff[] = array($token, 0 /* OLD */);
  234. }
  235. return $diff;
  236. }
  237. /**
  238. * Get new strings denoting new lines from a given string.
  239. *
  240. * @param string $string
  241. *
  242. * @return array
  243. */
  244. private function getNewLineMatches($string)
  245. {
  246. \preg_match_all('(\r\n|\r|\n)', $string, $stringMatches);
  247. return $stringMatches;
  248. }
  249. /**
  250. * Checks if input is string, if so it will split it line-by-line.
  251. *
  252. * @param string $input
  253. *
  254. * @return array
  255. */
  256. private function splitStringByLines($input)
  257. {
  258. return \preg_split('(\r\n|\r|\n)', $input);
  259. }
  260. /**
  261. * @param array $from
  262. * @param array $to
  263. *
  264. * @return LongestCommonSubsequence
  265. */
  266. private function selectLcsImplementation(array $from, array $to)
  267. {
  268. // We do not want to use the time-efficient implementation if its memory
  269. // footprint will probably exceed this value. Note that the footprint
  270. // calculation is only an estimation for the matrix and the LCS method
  271. // will typically allocate a bit more memory than this.
  272. $memoryLimit = 100 * 1024 * 1024;
  273. if ($this->calculateEstimatedFootprint($from, $to) > $memoryLimit) {
  274. return new MemoryEfficientImplementation;
  275. }
  276. return new TimeEfficientImplementation;
  277. }
  278. /**
  279. * Calculates the estimated memory footprint for the DP-based method.
  280. *
  281. * @param array $from
  282. * @param array $to
  283. *
  284. * @return int|float
  285. */
  286. private function calculateEstimatedFootprint(array $from, array $to)
  287. {
  288. $itemSize = PHP_INT_SIZE === 4 ? 76 : 144;
  289. return $itemSize * \pow(\min(\count($from), \count($to)), 2);
  290. }
  291. /**
  292. * Returns true if line ends don't match on fromMatches and toMatches.
  293. *
  294. * @param array $fromMatches
  295. * @param array $toMatches
  296. *
  297. * @return bool
  298. */
  299. private function detectUnmatchedLineEndings(array $fromMatches, array $toMatches)
  300. {
  301. return isset($fromMatches[0], $toMatches[0]) &&
  302. \count($fromMatches[0]) === \count($toMatches[0]) &&
  303. $fromMatches[0] !== $toMatches[0];
  304. }
  305. /**
  306. * @param array $from
  307. * @param array $to
  308. *
  309. * @return array
  310. */
  311. private static function getArrayDiffParted(array &$from, array &$to)
  312. {
  313. $start = array();
  314. $end = array();
  315. \reset($to);
  316. foreach ($from as $k => $v) {
  317. $toK = \key($to);
  318. if ($toK === $k && $v === $to[$k]) {
  319. $start[$k] = $v;
  320. unset($from[$k], $to[$k]);
  321. } else {
  322. break;
  323. }
  324. }
  325. \end($from);
  326. \end($to);
  327. do {
  328. $fromK = \key($from);
  329. $toK = \key($to);
  330. if (null === $fromK || null === $toK || \current($from) !== \current($to)) {
  331. break;
  332. }
  333. \prev($from);
  334. \prev($to);
  335. $end = array($fromK => $from[$fromK]) + $end;
  336. unset($from[$fromK], $to[$toK]);
  337. } while (true);
  338. return array($from, $to, $start, $end);
  339. }
  340. }