123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399 |
- <?php
- /*
- * This file is part of sebastian/diff.
- *
- * (c) Sebastian Bergmann <sebastian@phpunit.de>
- *
- * For the full copyright and license information, please view the LICENSE
- * file that was distributed with this source code.
- */
- namespace SebastianBergmann\Diff;
- use SebastianBergmann\Diff\LCS\LongestCommonSubsequence;
- use SebastianBergmann\Diff\LCS\TimeEfficientImplementation;
- use SebastianBergmann\Diff\LCS\MemoryEfficientImplementation;
- /**
- * Diff implementation.
- */
- class Differ
- {
- /**
- * @var string
- */
- private $header;
- /**
- * @var bool
- */
- private $showNonDiffLines;
- /**
- * @param string $header
- * @param bool $showNonDiffLines
- */
- public function __construct($header = "--- Original\n+++ New\n", $showNonDiffLines = true)
- {
- $this->header = $header;
- $this->showNonDiffLines = $showNonDiffLines;
- }
- /**
- * Returns the diff between two arrays or strings as string.
- *
- * @param array|string $from
- * @param array|string $to
- * @param LongestCommonSubsequence $lcs
- *
- * @return string
- */
- public function diff($from, $to, LongestCommonSubsequence $lcs = null)
- {
- $from = $this->validateDiffInput($from);
- $to = $this->validateDiffInput($to);
- $diff = $this->diffToArray($from, $to, $lcs);
- $old = $this->checkIfDiffInOld($diff);
- $start = isset($old[0]) ? $old[0] : 0;
- $end = \count($diff);
- if ($tmp = \array_search($end, $old)) {
- $end = $tmp;
- }
- return $this->getBuffer($diff, $old, $start, $end);
- }
- /**
- * Casts variable to string if it is not a string or array.
- *
- * @param mixed $input
- *
- * @return string
- */
- private function validateDiffInput($input)
- {
- if (!\is_array($input) && !\is_string($input)) {
- return (string) $input;
- }
- return $input;
- }
- /**
- * Takes input of the diff array and returns the old array.
- * Iterates through diff line by line,
- *
- * @param array $diff
- *
- * @return array
- */
- private function checkIfDiffInOld(array $diff)
- {
- $inOld = false;
- $i = 0;
- $old = array();
- foreach ($diff as $line) {
- if ($line[1] === 0 /* OLD */) {
- if ($inOld === false) {
- $inOld = $i;
- }
- } elseif ($inOld !== false) {
- if (($i - $inOld) > 5) {
- $old[$inOld] = $i - 1;
- }
- $inOld = false;
- }
- ++$i;
- }
- return $old;
- }
- /**
- * Generates buffer in string format, returning the patch.
- *
- * @param array $diff
- * @param array $old
- * @param int $start
- * @param int $end
- *
- * @return string
- */
- private function getBuffer(array $diff, array $old, $start, $end)
- {
- $buffer = $this->header;
- if (!isset($old[$start])) {
- $buffer = $this->getDiffBufferElementNew($diff, $buffer, $start);
- ++$start;
- }
- for ($i = $start; $i < $end; $i++) {
- if (isset($old[$i])) {
- $i = $old[$i];
- $buffer = $this->getDiffBufferElementNew($diff, $buffer, $i);
- } else {
- $buffer = $this->getDiffBufferElement($diff, $buffer, $i);
- }
- }
- return $buffer;
- }
- /**
- * Gets individual buffer element.
- *
- * @param array $diff
- * @param string $buffer
- * @param int $diffIndex
- *
- * @return string
- */
- private function getDiffBufferElement(array $diff, $buffer, $diffIndex)
- {
- if ($diff[$diffIndex][1] === 1 /* ADDED */) {
- $buffer .= '+' . $diff[$diffIndex][0] . "\n";
- } elseif ($diff[$diffIndex][1] === 2 /* REMOVED */) {
- $buffer .= '-' . $diff[$diffIndex][0] . "\n";
- } elseif ($this->showNonDiffLines === true) {
- $buffer .= ' ' . $diff[$diffIndex][0] . "\n";
- }
- return $buffer;
- }
- /**
- * Gets individual buffer element with opening.
- *
- * @param array $diff
- * @param string $buffer
- * @param int $diffIndex
- *
- * @return string
- */
- private function getDiffBufferElementNew(array $diff, $buffer, $diffIndex)
- {
- if ($this->showNonDiffLines === true) {
- $buffer .= "@@ @@\n";
- }
- return $this->getDiffBufferElement($diff, $buffer, $diffIndex);
- }
- /**
- * Returns the diff between two arrays or strings as array.
- *
- * Each array element contains two elements:
- * - [0] => mixed $token
- * - [1] => 2|1|0
- *
- * - 2: REMOVED: $token was removed from $from
- * - 1: ADDED: $token was added to $from
- * - 0: OLD: $token is not changed in $to
- *
- * @param array|string $from
- * @param array|string $to
- * @param LongestCommonSubsequence $lcs
- *
- * @return array
- */
- public function diffToArray($from, $to, LongestCommonSubsequence $lcs = null)
- {
- if (\is_string($from)) {
- $fromMatches = $this->getNewLineMatches($from);
- $from = $this->splitStringByLines($from);
- } elseif (\is_array($from)) {
- $fromMatches = array();
- } else {
- throw new \InvalidArgumentException('"from" must be an array or string.');
- }
- if (\is_string($to)) {
- $toMatches = $this->getNewLineMatches($to);
- $to = $this->splitStringByLines($to);
- } elseif (\is_array($to)) {
- $toMatches = array();
- } else {
- throw new \InvalidArgumentException('"to" must be an array or string.');
- }
- list($from, $to, $start, $end) = self::getArrayDiffParted($from, $to);
- if ($lcs === null) {
- $lcs = $this->selectLcsImplementation($from, $to);
- }
- $common = $lcs->calculate(\array_values($from), \array_values($to));
- $diff = array();
- if ($this->detectUnmatchedLineEndings($fromMatches, $toMatches)) {
- $diff[] = array(
- '#Warning: Strings contain different line endings!',
- 0
- );
- }
- foreach ($start as $token) {
- $diff[] = array($token, 0 /* OLD */);
- }
- \reset($from);
- \reset($to);
- foreach ($common as $token) {
- while (($fromToken = \reset($from)) !== $token) {
- $diff[] = array(\array_shift($from), 2 /* REMOVED */);
- }
- while (($toToken = \reset($to)) !== $token) {
- $diff[] = array(\array_shift($to), 1 /* ADDED */);
- }
- $diff[] = array($token, 0 /* OLD */);
- \array_shift($from);
- \array_shift($to);
- }
- while (($token = \array_shift($from)) !== null) {
- $diff[] = array($token, 2 /* REMOVED */);
- }
- while (($token = \array_shift($to)) !== null) {
- $diff[] = array($token, 1 /* ADDED */);
- }
- foreach ($end as $token) {
- $diff[] = array($token, 0 /* OLD */);
- }
- return $diff;
- }
- /**
- * Get new strings denoting new lines from a given string.
- *
- * @param string $string
- *
- * @return array
- */
- private function getNewLineMatches($string)
- {
- \preg_match_all('(\r\n|\r|\n)', $string, $stringMatches);
- return $stringMatches;
- }
- /**
- * Checks if input is string, if so it will split it line-by-line.
- *
- * @param string $input
- *
- * @return array
- */
- private function splitStringByLines($input)
- {
- return \preg_split('(\r\n|\r|\n)', $input);
- }
- /**
- * @param array $from
- * @param array $to
- *
- * @return LongestCommonSubsequence
- */
- private function selectLcsImplementation(array $from, array $to)
- {
- // We do not want to use the time-efficient implementation if its memory
- // footprint will probably exceed this value. Note that the footprint
- // calculation is only an estimation for the matrix and the LCS method
- // will typically allocate a bit more memory than this.
- $memoryLimit = 100 * 1024 * 1024;
- if ($this->calculateEstimatedFootprint($from, $to) > $memoryLimit) {
- return new MemoryEfficientImplementation;
- }
- return new TimeEfficientImplementation;
- }
- /**
- * Calculates the estimated memory footprint for the DP-based method.
- *
- * @param array $from
- * @param array $to
- *
- * @return int|float
- */
- private function calculateEstimatedFootprint(array $from, array $to)
- {
- $itemSize = PHP_INT_SIZE === 4 ? 76 : 144;
- return $itemSize * \pow(\min(\count($from), \count($to)), 2);
- }
- /**
- * Returns true if line ends don't match on fromMatches and toMatches.
- *
- * @param array $fromMatches
- * @param array $toMatches
- *
- * @return bool
- */
- private function detectUnmatchedLineEndings(array $fromMatches, array $toMatches)
- {
- return isset($fromMatches[0], $toMatches[0]) &&
- \count($fromMatches[0]) === \count($toMatches[0]) &&
- $fromMatches[0] !== $toMatches[0];
- }
- /**
- * @param array $from
- * @param array $to
- *
- * @return array
- */
- private static function getArrayDiffParted(array &$from, array &$to)
- {
- $start = array();
- $end = array();
- \reset($to);
- foreach ($from as $k => $v) {
- $toK = \key($to);
- if ($toK === $k && $v === $to[$k]) {
- $start[$k] = $v;
- unset($from[$k], $to[$k]);
- } else {
- break;
- }
- }
- \end($from);
- \end($to);
- do {
- $fromK = \key($from);
- $toK = \key($to);
- if (null === $fromK || null === $toK || \current($from) !== \current($to)) {
- break;
- }
- \prev($from);
- \prev($to);
- $end = array($fromK => $from[$fromK]) + $end;
- unset($from[$fromK], $to[$toK]);
- } while (true);
- return array($from, $to, $start, $end);
- }
- }
|