Parser.php 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\Yaml;
  11. use Symfony\Component\Yaml\Exception\ParseException;
  12. use Symfony\Component\Yaml\Tag\TaggedValue;
  13. /**
  14. * Parser parses YAML strings to convert them to PHP arrays.
  15. *
  16. * @author Fabien Potencier <fabien@symfony.com>
  17. *
  18. * @final
  19. */
  20. class Parser
  21. {
  22. const TAG_PATTERN = '(?P<tag>![\w!.\/:-]+)';
  23. const BLOCK_SCALAR_HEADER_PATTERN = '(?P<separator>\||>)(?P<modifiers>\+|\-|\d+|\+\d+|\-\d+|\d+\+|\d+\-)?(?P<comments> +#.*)?';
  24. private $filename;
  25. private $offset = 0;
  26. private $totalNumberOfLines;
  27. private $lines = array();
  28. private $currentLineNb = -1;
  29. private $currentLine = '';
  30. private $refs = array();
  31. private $skippedLineNumbers = array();
  32. private $locallySkippedLineNumbers = array();
  33. /**
  34. * Parses a YAML file into a PHP value.
  35. *
  36. * @param string $filename The path to the YAML file to be parsed
  37. * @param int $flags A bit field of PARSE_* constants to customize the YAML parser behavior
  38. *
  39. * @return mixed The YAML converted to a PHP value
  40. *
  41. * @throws ParseException If the file could not be read or the YAML is not valid
  42. */
  43. public function parseFile(string $filename, int $flags = 0)
  44. {
  45. if (!is_file($filename)) {
  46. throw new ParseException(sprintf('File "%s" does not exist.', $filename));
  47. }
  48. if (!is_readable($filename)) {
  49. throw new ParseException(sprintf('File "%s" cannot be read.', $filename));
  50. }
  51. $this->filename = $filename;
  52. try {
  53. return $this->parse(file_get_contents($filename), $flags);
  54. } finally {
  55. $this->filename = null;
  56. }
  57. }
  58. /**
  59. * Parses a YAML string to a PHP value.
  60. *
  61. * @param string $value A YAML string
  62. * @param int $flags A bit field of PARSE_* constants to customize the YAML parser behavior
  63. *
  64. * @return mixed A PHP value
  65. *
  66. * @throws ParseException If the YAML is not valid
  67. */
  68. public function parse(string $value, int $flags = 0)
  69. {
  70. if (false === preg_match('//u', $value)) {
  71. throw new ParseException('The YAML value does not appear to be valid UTF-8.', -1, null, $this->filename);
  72. }
  73. $this->refs = array();
  74. $mbEncoding = null;
  75. $data = null;
  76. if (2 /* MB_OVERLOAD_STRING */ & (int) ini_get('mbstring.func_overload')) {
  77. $mbEncoding = mb_internal_encoding();
  78. mb_internal_encoding('UTF-8');
  79. }
  80. try {
  81. $data = $this->doParse($value, $flags);
  82. } finally {
  83. if (null !== $mbEncoding) {
  84. mb_internal_encoding($mbEncoding);
  85. }
  86. $this->lines = array();
  87. $this->currentLine = '';
  88. $this->refs = array();
  89. $this->skippedLineNumbers = array();
  90. $this->locallySkippedLineNumbers = array();
  91. }
  92. return $data;
  93. }
  94. /**
  95. * @internal
  96. *
  97. * @return int
  98. */
  99. public function getLastLineNumberBeforeDeprecation(): int
  100. {
  101. return $this->getRealCurrentLineNb();
  102. }
  103. private function doParse(string $value, int $flags)
  104. {
  105. $this->currentLineNb = -1;
  106. $this->currentLine = '';
  107. $value = $this->cleanup($value);
  108. $this->lines = explode("\n", $value);
  109. $this->locallySkippedLineNumbers = array();
  110. if (null === $this->totalNumberOfLines) {
  111. $this->totalNumberOfLines = count($this->lines);
  112. }
  113. if (!$this->moveToNextLine()) {
  114. return null;
  115. }
  116. $data = array();
  117. $context = null;
  118. $allowOverwrite = false;
  119. while ($this->isCurrentLineEmpty()) {
  120. if (!$this->moveToNextLine()) {
  121. return null;
  122. }
  123. }
  124. // Resolves the tag and returns if end of the document
  125. if (null !== ($tag = $this->getLineTag($this->currentLine, $flags, false)) && !$this->moveToNextLine()) {
  126. return new TaggedValue($tag, '');
  127. }
  128. do {
  129. if ($this->isCurrentLineEmpty()) {
  130. continue;
  131. }
  132. // tab?
  133. if ("\t" === $this->currentLine[0]) {
  134. throw new ParseException('A YAML file cannot contain tabs as indentation.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
  135. }
  136. Inline::initialize($flags, $this->getRealCurrentLineNb(), $this->filename);
  137. $isRef = $mergeNode = false;
  138. if (self::preg_match('#^\-((?P<leadspaces>\s+)(?P<value>.+))?$#u', rtrim($this->currentLine), $values)) {
  139. if ($context && 'mapping' == $context) {
  140. throw new ParseException('You cannot define a sequence item when in a mapping', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
  141. }
  142. $context = 'sequence';
  143. if (isset($values['value']) && self::preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
  144. $isRef = $matches['ref'];
  145. $values['value'] = $matches['value'];
  146. }
  147. if (isset($values['value'][1]) && '?' === $values['value'][0] && ' ' === $values['value'][1]) {
  148. throw new ParseException('Complex mappings are not supported.', $this->getRealCurrentLineNb() + 1, $this->currentLine);
  149. }
  150. // array
  151. if (!isset($values['value']) || '' == trim($values['value'], ' ') || 0 === strpos(ltrim($values['value'], ' '), '#')) {
  152. $data[] = $this->parseBlock($this->getRealCurrentLineNb() + 1, $this->getNextEmbedBlock(null, true) ?? '', $flags);
  153. } elseif (null !== $subTag = $this->getLineTag(ltrim($values['value'], ' '), $flags)) {
  154. $data[] = new TaggedValue(
  155. $subTag,
  156. $this->parseBlock($this->getRealCurrentLineNb() + 1, $this->getNextEmbedBlock(null, true), $flags)
  157. );
  158. } else {
  159. if (isset($values['leadspaces'])
  160. && self::preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\{\[].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $this->trimTag($values['value']), $matches)
  161. ) {
  162. // this is a compact notation element, add to next block and parse
  163. $block = $values['value'];
  164. if ($this->isNextLineIndented()) {
  165. $block .= "\n".$this->getNextEmbedBlock($this->getCurrentLineIndentation() + strlen($values['leadspaces']) + 1);
  166. }
  167. $data[] = $this->parseBlock($this->getRealCurrentLineNb(), $block, $flags);
  168. } else {
  169. $data[] = $this->parseValue($values['value'], $flags, $context);
  170. }
  171. }
  172. if ($isRef) {
  173. $this->refs[$isRef] = end($data);
  174. }
  175. } elseif (
  176. self::preg_match('#^(?P<key>(?:![^\s]++\s++)?(?:'.Inline::REGEX_QUOTED_STRING.'|(?:!?!php/const:)?[^ \'"\[\{!].*?)) *\:(\s++(?P<value>.+))?$#u', rtrim($this->currentLine), $values)
  177. && (false === strpos($values['key'], ' #') || in_array($values['key'][0], array('"', "'")))
  178. ) {
  179. if ($context && 'sequence' == $context) {
  180. throw new ParseException('You cannot define a mapping item when in a sequence', $this->currentLineNb + 1, $this->currentLine, $this->filename);
  181. }
  182. $context = 'mapping';
  183. try {
  184. $key = Inline::parseScalar($values['key']);
  185. } catch (ParseException $e) {
  186. $e->setParsedLine($this->getRealCurrentLineNb() + 1);
  187. $e->setSnippet($this->currentLine);
  188. throw $e;
  189. }
  190. if (!is_string($key) && !is_int($key)) {
  191. throw new ParseException(sprintf('%s keys are not supported. Quote your evaluable mapping keys instead.', is_numeric($key) ? 'Numeric' : 'Non-string'), $this->getRealCurrentLineNb() + 1, $this->currentLine);
  192. }
  193. // Convert float keys to strings, to avoid being converted to integers by PHP
  194. if (is_float($key)) {
  195. $key = (string) $key;
  196. }
  197. if ('<<' === $key && (!isset($values['value']) || !self::preg_match('#^&(?P<ref>[^ ]+)#u', $values['value'], $refMatches))) {
  198. $mergeNode = true;
  199. $allowOverwrite = true;
  200. if (isset($values['value'][0]) && '*' === $values['value'][0]) {
  201. $refName = substr(rtrim($values['value']), 1);
  202. if (!array_key_exists($refName, $this->refs)) {
  203. throw new ParseException(sprintf('Reference "%s" does not exist.', $refName), $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
  204. }
  205. $refValue = $this->refs[$refName];
  206. if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && $refValue instanceof \stdClass) {
  207. $refValue = (array) $refValue;
  208. }
  209. if (!is_array($refValue)) {
  210. throw new ParseException('YAML merge keys used with a scalar value instead of an array.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
  211. }
  212. $data += $refValue; // array union
  213. } else {
  214. if (isset($values['value']) && '' !== $values['value']) {
  215. $value = $values['value'];
  216. } else {
  217. $value = $this->getNextEmbedBlock();
  218. }
  219. $parsed = $this->parseBlock($this->getRealCurrentLineNb() + 1, $value, $flags);
  220. if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && $parsed instanceof \stdClass) {
  221. $parsed = (array) $parsed;
  222. }
  223. if (!is_array($parsed)) {
  224. throw new ParseException('YAML merge keys used with a scalar value instead of an array.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
  225. }
  226. if (isset($parsed[0])) {
  227. // If the value associated with the merge key is a sequence, then this sequence is expected to contain mapping nodes
  228. // and each of these nodes is merged in turn according to its order in the sequence. Keys in mapping nodes earlier
  229. // in the sequence override keys specified in later mapping nodes.
  230. foreach ($parsed as $parsedItem) {
  231. if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && $parsedItem instanceof \stdClass) {
  232. $parsedItem = (array) $parsedItem;
  233. }
  234. if (!is_array($parsedItem)) {
  235. throw new ParseException('Merge items must be arrays.', $this->getRealCurrentLineNb() + 1, $parsedItem, $this->filename);
  236. }
  237. $data += $parsedItem; // array union
  238. }
  239. } else {
  240. // If the value associated with the key is a single mapping node, each of its key/value pairs is inserted into the
  241. // current mapping, unless the key already exists in it.
  242. $data += $parsed; // array union
  243. }
  244. }
  245. } elseif ('<<' !== $key && isset($values['value']) && self::preg_match('#^&(?P<ref>[^ ]++) *+(?P<value>.*)#u', $values['value'], $matches)) {
  246. $isRef = $matches['ref'];
  247. $values['value'] = $matches['value'];
  248. }
  249. $subTag = null;
  250. if ($mergeNode) {
  251. // Merge keys
  252. } elseif (!isset($values['value']) || '' === $values['value'] || 0 === strpos($values['value'], '#') || (null !== $subTag = $this->getLineTag($values['value'], $flags)) || '<<' === $key) {
  253. // hash
  254. // if next line is less indented or equal, then it means that the current value is null
  255. if (!$this->isNextLineIndented() && !$this->isNextLineUnIndentedCollection()) {
  256. // Spec: Keys MUST be unique; first one wins.
  257. // But overwriting is allowed when a merge node is used in current block.
  258. if ($allowOverwrite || !isset($data[$key])) {
  259. if (null !== $subTag) {
  260. $data[$key] = new TaggedValue($subTag, '');
  261. } else {
  262. $data[$key] = null;
  263. }
  264. } else {
  265. throw new ParseException(sprintf('Duplicate key "%s" detected.', $key), $this->getRealCurrentLineNb() + 1, $this->currentLine);
  266. }
  267. } else {
  268. // remember the parsed line number here in case we need it to provide some contexts in error messages below
  269. $realCurrentLineNbKey = $this->getRealCurrentLineNb();
  270. $value = $this->parseBlock($this->getRealCurrentLineNb() + 1, $this->getNextEmbedBlock(), $flags);
  271. if ('<<' === $key) {
  272. $this->refs[$refMatches['ref']] = $value;
  273. if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && $value instanceof \stdClass) {
  274. $value = (array) $value;
  275. }
  276. $data += $value;
  277. } elseif ($allowOverwrite || !isset($data[$key])) {
  278. // Spec: Keys MUST be unique; first one wins.
  279. // But overwriting is allowed when a merge node is used in current block.
  280. if (null !== $subTag) {
  281. $data[$key] = new TaggedValue($subTag, $value);
  282. } else {
  283. $data[$key] = $value;
  284. }
  285. } else {
  286. throw new ParseException(sprintf('Duplicate key "%s" detected.', $key), $realCurrentLineNbKey + 1, $this->currentLine);
  287. }
  288. }
  289. } else {
  290. $value = $this->parseValue(rtrim($values['value']), $flags, $context);
  291. // Spec: Keys MUST be unique; first one wins.
  292. // But overwriting is allowed when a merge node is used in current block.
  293. if ($allowOverwrite || !isset($data[$key])) {
  294. $data[$key] = $value;
  295. } else {
  296. throw new ParseException(sprintf('Duplicate key "%s" detected.', $key), $this->getRealCurrentLineNb() + 1, $this->currentLine);
  297. }
  298. }
  299. if ($isRef) {
  300. $this->refs[$isRef] = $data[$key];
  301. }
  302. } else {
  303. // multiple documents are not supported
  304. if ('---' === $this->currentLine) {
  305. throw new ParseException('Multiple documents are not supported.', $this->currentLineNb + 1, $this->currentLine, $this->filename);
  306. }
  307. if ($deprecatedUsage = (isset($this->currentLine[1]) && '?' === $this->currentLine[0] && ' ' === $this->currentLine[1])) {
  308. throw new ParseException('Complex mappings are not supported.', $this->getRealCurrentLineNb() + 1, $this->currentLine);
  309. }
  310. // 1-liner optionally followed by newline(s)
  311. if (is_string($value) && $this->lines[0] === trim($value)) {
  312. try {
  313. $value = Inline::parse($this->lines[0], $flags, $this->refs);
  314. } catch (ParseException $e) {
  315. $e->setParsedLine($this->getRealCurrentLineNb() + 1);
  316. $e->setSnippet($this->currentLine);
  317. throw $e;
  318. }
  319. return $value;
  320. }
  321. // try to parse the value as a multi-line string as a last resort
  322. if (0 === $this->currentLineNb) {
  323. $previousLineWasNewline = false;
  324. $previousLineWasTerminatedWithBackslash = false;
  325. $value = '';
  326. foreach ($this->lines as $line) {
  327. // If the indentation is not consistent at offset 0, it is to be considered as a ParseError
  328. if (0 === $this->offset && !$deprecatedUsage && isset($line[0]) && ' ' === $line[0]) {
  329. throw new ParseException('Unable to parse.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
  330. }
  331. if ('' === trim($line)) {
  332. $value .= "\n";
  333. } elseif (!$previousLineWasNewline && !$previousLineWasTerminatedWithBackslash) {
  334. $value .= ' ';
  335. }
  336. if ('' !== trim($line) && '\\' === substr($line, -1)) {
  337. $value .= ltrim(substr($line, 0, -1));
  338. } elseif ('' !== trim($line)) {
  339. $value .= trim($line);
  340. }
  341. if ('' === trim($line)) {
  342. $previousLineWasNewline = true;
  343. $previousLineWasTerminatedWithBackslash = false;
  344. } elseif ('\\' === substr($line, -1)) {
  345. $previousLineWasNewline = false;
  346. $previousLineWasTerminatedWithBackslash = true;
  347. } else {
  348. $previousLineWasNewline = false;
  349. $previousLineWasTerminatedWithBackslash = false;
  350. }
  351. }
  352. try {
  353. return Inline::parse(trim($value));
  354. } catch (ParseException $e) {
  355. // fall-through to the ParseException thrown below
  356. }
  357. }
  358. throw new ParseException('Unable to parse.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
  359. }
  360. } while ($this->moveToNextLine());
  361. if (null !== $tag) {
  362. $data = new TaggedValue($tag, $data);
  363. }
  364. if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && !is_object($data) && 'mapping' === $context) {
  365. $object = new \stdClass();
  366. foreach ($data as $key => $value) {
  367. $object->$key = $value;
  368. }
  369. $data = $object;
  370. }
  371. return empty($data) ? null : $data;
  372. }
  373. private function parseBlock(int $offset, string $yaml, int $flags)
  374. {
  375. $skippedLineNumbers = $this->skippedLineNumbers;
  376. foreach ($this->locallySkippedLineNumbers as $lineNumber) {
  377. if ($lineNumber < $offset) {
  378. continue;
  379. }
  380. $skippedLineNumbers[] = $lineNumber;
  381. }
  382. $parser = new self();
  383. $parser->offset = $offset;
  384. $parser->totalNumberOfLines = $this->totalNumberOfLines;
  385. $parser->skippedLineNumbers = $skippedLineNumbers;
  386. $parser->refs = &$this->refs;
  387. return $parser->doParse($yaml, $flags);
  388. }
  389. /**
  390. * Returns the current line number (takes the offset into account).
  391. *
  392. * @internal
  393. *
  394. * @return int The current line number
  395. */
  396. public function getRealCurrentLineNb(): int
  397. {
  398. $realCurrentLineNumber = $this->currentLineNb + $this->offset;
  399. foreach ($this->skippedLineNumbers as $skippedLineNumber) {
  400. if ($skippedLineNumber > $realCurrentLineNumber) {
  401. break;
  402. }
  403. ++$realCurrentLineNumber;
  404. }
  405. return $realCurrentLineNumber;
  406. }
  407. /**
  408. * Returns the current line indentation.
  409. *
  410. * @return int The current line indentation
  411. */
  412. private function getCurrentLineIndentation(): int
  413. {
  414. return strlen($this->currentLine) - strlen(ltrim($this->currentLine, ' '));
  415. }
  416. /**
  417. * Returns the next embed block of YAML.
  418. *
  419. * @param int|null $indentation The indent level at which the block is to be read, or null for default
  420. * @param bool $inSequence True if the enclosing data structure is a sequence
  421. *
  422. * @return string A YAML string
  423. *
  424. * @throws ParseException When indentation problem are detected
  425. */
  426. private function getNextEmbedBlock(int $indentation = null, bool $inSequence = false): ?string
  427. {
  428. $oldLineIndentation = $this->getCurrentLineIndentation();
  429. $blockScalarIndentations = array();
  430. if ($this->isBlockScalarHeader()) {
  431. $blockScalarIndentations[] = $oldLineIndentation;
  432. }
  433. if (!$this->moveToNextLine()) {
  434. return null;
  435. }
  436. if (null === $indentation) {
  437. $newIndent = null;
  438. $movements = 0;
  439. do {
  440. $EOF = false;
  441. // empty and comment-like lines do not influence the indentation depth
  442. if ($this->isCurrentLineEmpty() || $this->isCurrentLineComment()) {
  443. $EOF = !$this->moveToNextLine();
  444. if (!$EOF) {
  445. ++$movements;
  446. }
  447. } else {
  448. $newIndent = $this->getCurrentLineIndentation();
  449. }
  450. } while (!$EOF && null === $newIndent);
  451. for ($i = 0; $i < $movements; ++$i) {
  452. $this->moveToPreviousLine();
  453. }
  454. $unindentedEmbedBlock = $this->isStringUnIndentedCollectionItem();
  455. if (!$this->isCurrentLineEmpty() && 0 === $newIndent && !$unindentedEmbedBlock) {
  456. throw new ParseException('Indentation problem.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
  457. }
  458. } else {
  459. $newIndent = $indentation;
  460. }
  461. $data = array();
  462. if ($this->getCurrentLineIndentation() >= $newIndent) {
  463. $data[] = substr($this->currentLine, $newIndent);
  464. } elseif ($this->isCurrentLineEmpty() || $this->isCurrentLineComment()) {
  465. $data[] = $this->currentLine;
  466. } else {
  467. $this->moveToPreviousLine();
  468. return null;
  469. }
  470. if ($inSequence && $oldLineIndentation === $newIndent && isset($data[0][0]) && '-' === $data[0][0]) {
  471. // the previous line contained a dash but no item content, this line is a sequence item with the same indentation
  472. // and therefore no nested list or mapping
  473. $this->moveToPreviousLine();
  474. return null;
  475. }
  476. $isItUnindentedCollection = $this->isStringUnIndentedCollectionItem();
  477. if (empty($blockScalarIndentations) && $this->isBlockScalarHeader()) {
  478. $blockScalarIndentations[] = $this->getCurrentLineIndentation();
  479. }
  480. $previousLineIndentation = $this->getCurrentLineIndentation();
  481. while ($this->moveToNextLine()) {
  482. $indent = $this->getCurrentLineIndentation();
  483. // terminate all block scalars that are more indented than the current line
  484. if (!empty($blockScalarIndentations) && $indent < $previousLineIndentation && '' !== trim($this->currentLine)) {
  485. foreach ($blockScalarIndentations as $key => $blockScalarIndentation) {
  486. if ($blockScalarIndentation >= $indent) {
  487. unset($blockScalarIndentations[$key]);
  488. }
  489. }
  490. }
  491. if (empty($blockScalarIndentations) && !$this->isCurrentLineComment() && $this->isBlockScalarHeader()) {
  492. $blockScalarIndentations[] = $indent;
  493. }
  494. $previousLineIndentation = $indent;
  495. if ($isItUnindentedCollection && !$this->isCurrentLineEmpty() && !$this->isStringUnIndentedCollectionItem() && $newIndent === $indent) {
  496. $this->moveToPreviousLine();
  497. break;
  498. }
  499. if ($this->isCurrentLineBlank()) {
  500. $data[] = substr($this->currentLine, $newIndent);
  501. continue;
  502. }
  503. if ($indent >= $newIndent) {
  504. $data[] = substr($this->currentLine, $newIndent);
  505. } elseif ($this->isCurrentLineComment()) {
  506. $data[] = $this->currentLine;
  507. } elseif (0 == $indent) {
  508. $this->moveToPreviousLine();
  509. break;
  510. } else {
  511. throw new ParseException('Indentation problem.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
  512. }
  513. }
  514. return implode("\n", $data);
  515. }
  516. /**
  517. * Moves the parser to the next line.
  518. *
  519. * @return bool
  520. */
  521. private function moveToNextLine(): bool
  522. {
  523. if ($this->currentLineNb >= count($this->lines) - 1) {
  524. return false;
  525. }
  526. $this->currentLine = $this->lines[++$this->currentLineNb];
  527. return true;
  528. }
  529. /**
  530. * Moves the parser to the previous line.
  531. *
  532. * @return bool
  533. */
  534. private function moveToPreviousLine(): bool
  535. {
  536. if ($this->currentLineNb < 1) {
  537. return false;
  538. }
  539. $this->currentLine = $this->lines[--$this->currentLineNb];
  540. return true;
  541. }
  542. /**
  543. * Parses a YAML value.
  544. *
  545. * @param string $value A YAML value
  546. * @param int $flags A bit field of PARSE_* constants to customize the YAML parser behavior
  547. * @param string $context The parser context (either sequence or mapping)
  548. *
  549. * @return mixed A PHP value
  550. *
  551. * @throws ParseException When reference does not exist
  552. */
  553. private function parseValue(string $value, int $flags, string $context)
  554. {
  555. if (0 === strpos($value, '*')) {
  556. if (false !== $pos = strpos($value, '#')) {
  557. $value = substr($value, 1, $pos - 2);
  558. } else {
  559. $value = substr($value, 1);
  560. }
  561. if (!array_key_exists($value, $this->refs)) {
  562. throw new ParseException(sprintf('Reference "%s" does not exist.', $value), $this->currentLineNb + 1, $this->currentLine, $this->filename);
  563. }
  564. return $this->refs[$value];
  565. }
  566. if (self::preg_match('/^(?:'.self::TAG_PATTERN.' +)?'.self::BLOCK_SCALAR_HEADER_PATTERN.'$/', $value, $matches)) {
  567. $modifiers = isset($matches['modifiers']) ? $matches['modifiers'] : '';
  568. $data = $this->parseBlockScalar($matches['separator'], preg_replace('#\d+#', '', $modifiers), (int) abs($modifiers));
  569. if ('' !== $matches['tag'] && '!' !== $matches['tag']) {
  570. if ('!!binary' === $matches['tag']) {
  571. return Inline::evaluateBinaryScalar($data);
  572. }
  573. return new TaggedValue(substr($matches['tag'], 1), $data);
  574. }
  575. return $data;
  576. }
  577. try {
  578. $quotation = '' !== $value && ('"' === $value[0] || "'" === $value[0]) ? $value[0] : null;
  579. // do not take following lines into account when the current line is a quoted single line value
  580. if (null !== $quotation && self::preg_match('/^'.$quotation.'.*'.$quotation.'(\s*#.*)?$/', $value)) {
  581. return Inline::parse($value, $flags, $this->refs);
  582. }
  583. $lines = array();
  584. while ($this->moveToNextLine()) {
  585. // unquoted strings end before the first unindented line
  586. if (null === $quotation && 0 === $this->getCurrentLineIndentation()) {
  587. $this->moveToPreviousLine();
  588. break;
  589. }
  590. $lines[] = trim($this->currentLine);
  591. // quoted string values end with a line that is terminated with the quotation character
  592. if ('' !== $this->currentLine && substr($this->currentLine, -1) === $quotation) {
  593. break;
  594. }
  595. }
  596. for ($i = 0, $linesCount = count($lines), $previousLineBlank = false; $i < $linesCount; ++$i) {
  597. if ('' === $lines[$i]) {
  598. $value .= "\n";
  599. $previousLineBlank = true;
  600. } elseif ($previousLineBlank) {
  601. $value .= $lines[$i];
  602. $previousLineBlank = false;
  603. } else {
  604. $value .= ' '.$lines[$i];
  605. $previousLineBlank = false;
  606. }
  607. }
  608. Inline::$parsedLineNumber = $this->getRealCurrentLineNb();
  609. $parsedValue = Inline::parse($value, $flags, $this->refs);
  610. if ('mapping' === $context && is_string($parsedValue) && '"' !== $value[0] && "'" !== $value[0] && '[' !== $value[0] && '{' !== $value[0] && '!' !== $value[0] && false !== strpos($parsedValue, ': ')) {
  611. throw new ParseException('A colon cannot be used in an unquoted mapping value.', $this->getRealCurrentLineNb() + 1, $value, $this->filename);
  612. }
  613. return $parsedValue;
  614. } catch (ParseException $e) {
  615. $e->setParsedLine($this->getRealCurrentLineNb() + 1);
  616. $e->setSnippet($this->currentLine);
  617. throw $e;
  618. }
  619. }
  620. /**
  621. * Parses a block scalar.
  622. *
  623. * @param string $style The style indicator that was used to begin this block scalar (| or >)
  624. * @param string $chomping The chomping indicator that was used to begin this block scalar (+ or -)
  625. * @param int $indentation The indentation indicator that was used to begin this block scalar
  626. *
  627. * @return string The text value
  628. */
  629. private function parseBlockScalar(string $style, string $chomping = '', int $indentation = 0): string
  630. {
  631. $notEOF = $this->moveToNextLine();
  632. if (!$notEOF) {
  633. return '';
  634. }
  635. $isCurrentLineBlank = $this->isCurrentLineBlank();
  636. $blockLines = array();
  637. // leading blank lines are consumed before determining indentation
  638. while ($notEOF && $isCurrentLineBlank) {
  639. // newline only if not EOF
  640. if ($notEOF = $this->moveToNextLine()) {
  641. $blockLines[] = '';
  642. $isCurrentLineBlank = $this->isCurrentLineBlank();
  643. }
  644. }
  645. // determine indentation if not specified
  646. if (0 === $indentation) {
  647. if (self::preg_match('/^ +/', $this->currentLine, $matches)) {
  648. $indentation = strlen($matches[0]);
  649. }
  650. }
  651. if ($indentation > 0) {
  652. $pattern = sprintf('/^ {%d}(.*)$/', $indentation);
  653. while (
  654. $notEOF && (
  655. $isCurrentLineBlank ||
  656. self::preg_match($pattern, $this->currentLine, $matches)
  657. )
  658. ) {
  659. if ($isCurrentLineBlank && strlen($this->currentLine) > $indentation) {
  660. $blockLines[] = substr($this->currentLine, $indentation);
  661. } elseif ($isCurrentLineBlank) {
  662. $blockLines[] = '';
  663. } else {
  664. $blockLines[] = $matches[1];
  665. }
  666. // newline only if not EOF
  667. if ($notEOF = $this->moveToNextLine()) {
  668. $isCurrentLineBlank = $this->isCurrentLineBlank();
  669. }
  670. }
  671. } elseif ($notEOF) {
  672. $blockLines[] = '';
  673. }
  674. if ($notEOF) {
  675. $blockLines[] = '';
  676. $this->moveToPreviousLine();
  677. } elseif (!$notEOF && !$this->isCurrentLineLastLineInDocument()) {
  678. $blockLines[] = '';
  679. }
  680. // folded style
  681. if ('>' === $style) {
  682. $text = '';
  683. $previousLineIndented = false;
  684. $previousLineBlank = false;
  685. for ($i = 0, $blockLinesCount = count($blockLines); $i < $blockLinesCount; ++$i) {
  686. if ('' === $blockLines[$i]) {
  687. $text .= "\n";
  688. $previousLineIndented = false;
  689. $previousLineBlank = true;
  690. } elseif (' ' === $blockLines[$i][0]) {
  691. $text .= "\n".$blockLines[$i];
  692. $previousLineIndented = true;
  693. $previousLineBlank = false;
  694. } elseif ($previousLineIndented) {
  695. $text .= "\n".$blockLines[$i];
  696. $previousLineIndented = false;
  697. $previousLineBlank = false;
  698. } elseif ($previousLineBlank || 0 === $i) {
  699. $text .= $blockLines[$i];
  700. $previousLineIndented = false;
  701. $previousLineBlank = false;
  702. } else {
  703. $text .= ' '.$blockLines[$i];
  704. $previousLineIndented = false;
  705. $previousLineBlank = false;
  706. }
  707. }
  708. } else {
  709. $text = implode("\n", $blockLines);
  710. }
  711. // deal with trailing newlines
  712. if ('' === $chomping) {
  713. $text = preg_replace('/\n+$/', "\n", $text);
  714. } elseif ('-' === $chomping) {
  715. $text = preg_replace('/\n+$/', '', $text);
  716. }
  717. return $text;
  718. }
  719. /**
  720. * Returns true if the next line is indented.
  721. *
  722. * @return bool Returns true if the next line is indented, false otherwise
  723. */
  724. private function isNextLineIndented(): bool
  725. {
  726. $currentIndentation = $this->getCurrentLineIndentation();
  727. $movements = 0;
  728. do {
  729. $EOF = !$this->moveToNextLine();
  730. if (!$EOF) {
  731. ++$movements;
  732. }
  733. } while (!$EOF && ($this->isCurrentLineEmpty() || $this->isCurrentLineComment()));
  734. if ($EOF) {
  735. return false;
  736. }
  737. $ret = $this->getCurrentLineIndentation() > $currentIndentation;
  738. for ($i = 0; $i < $movements; ++$i) {
  739. $this->moveToPreviousLine();
  740. }
  741. return $ret;
  742. }
  743. /**
  744. * Returns true if the current line is blank or if it is a comment line.
  745. *
  746. * @return bool Returns true if the current line is empty or if it is a comment line, false otherwise
  747. */
  748. private function isCurrentLineEmpty(): bool
  749. {
  750. return $this->isCurrentLineBlank() || $this->isCurrentLineComment();
  751. }
  752. /**
  753. * Returns true if the current line is blank.
  754. *
  755. * @return bool Returns true if the current line is blank, false otherwise
  756. */
  757. private function isCurrentLineBlank(): bool
  758. {
  759. return '' == trim($this->currentLine, ' ');
  760. }
  761. /**
  762. * Returns true if the current line is a comment line.
  763. *
  764. * @return bool Returns true if the current line is a comment line, false otherwise
  765. */
  766. private function isCurrentLineComment(): bool
  767. {
  768. //checking explicitly the first char of the trim is faster than loops or strpos
  769. $ltrimmedLine = ltrim($this->currentLine, ' ');
  770. return '' !== $ltrimmedLine && '#' === $ltrimmedLine[0];
  771. }
  772. private function isCurrentLineLastLineInDocument(): bool
  773. {
  774. return ($this->offset + $this->currentLineNb) >= ($this->totalNumberOfLines - 1);
  775. }
  776. /**
  777. * Cleanups a YAML string to be parsed.
  778. *
  779. * @param string $value The input YAML string
  780. *
  781. * @return string A cleaned up YAML string
  782. */
  783. private function cleanup(string $value): string
  784. {
  785. $value = str_replace(array("\r\n", "\r"), "\n", $value);
  786. // strip YAML header
  787. $count = 0;
  788. $value = preg_replace('#^\%YAML[: ][\d\.]+.*\n#u', '', $value, -1, $count);
  789. $this->offset += $count;
  790. // remove leading comments
  791. $trimmedValue = preg_replace('#^(\#.*?\n)+#s', '', $value, -1, $count);
  792. if (1 === $count) {
  793. // items have been removed, update the offset
  794. $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n");
  795. $value = $trimmedValue;
  796. }
  797. // remove start of the document marker (---)
  798. $trimmedValue = preg_replace('#^\-\-\-.*?\n#s', '', $value, -1, $count);
  799. if (1 === $count) {
  800. // items have been removed, update the offset
  801. $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n");
  802. $value = $trimmedValue;
  803. // remove end of the document marker (...)
  804. $value = preg_replace('#\.\.\.\s*$#', '', $value);
  805. }
  806. return $value;
  807. }
  808. /**
  809. * Returns true if the next line starts unindented collection.
  810. *
  811. * @return bool Returns true if the next line starts unindented collection, false otherwise
  812. */
  813. private function isNextLineUnIndentedCollection(): bool
  814. {
  815. $currentIndentation = $this->getCurrentLineIndentation();
  816. $movements = 0;
  817. do {
  818. $EOF = !$this->moveToNextLine();
  819. if (!$EOF) {
  820. ++$movements;
  821. }
  822. } while (!$EOF && ($this->isCurrentLineEmpty() || $this->isCurrentLineComment()));
  823. if ($EOF) {
  824. return false;
  825. }
  826. $ret = $this->getCurrentLineIndentation() === $currentIndentation && $this->isStringUnIndentedCollectionItem();
  827. for ($i = 0; $i < $movements; ++$i) {
  828. $this->moveToPreviousLine();
  829. }
  830. return $ret;
  831. }
  832. /**
  833. * Returns true if the string is un-indented collection item.
  834. *
  835. * @return bool Returns true if the string is un-indented collection item, false otherwise
  836. */
  837. private function isStringUnIndentedCollectionItem(): bool
  838. {
  839. return '-' === rtrim($this->currentLine) || 0 === strpos($this->currentLine, '- ');
  840. }
  841. /**
  842. * Tests whether or not the current line is the header of a block scalar.
  843. *
  844. * @return bool
  845. */
  846. private function isBlockScalarHeader(): bool
  847. {
  848. return (bool) self::preg_match('~'.self::BLOCK_SCALAR_HEADER_PATTERN.'$~', $this->currentLine);
  849. }
  850. /**
  851. * A local wrapper for `preg_match` which will throw a ParseException if there
  852. * is an internal error in the PCRE engine.
  853. *
  854. * This avoids us needing to check for "false" every time PCRE is used
  855. * in the YAML engine
  856. *
  857. * @throws ParseException on a PCRE internal error
  858. *
  859. * @see preg_last_error()
  860. *
  861. * @internal
  862. */
  863. public static function preg_match(string $pattern, string $subject, array &$matches = null, int $flags = 0, int $offset = 0): int
  864. {
  865. if (false === $ret = preg_match($pattern, $subject, $matches, $flags, $offset)) {
  866. switch (preg_last_error()) {
  867. case PREG_INTERNAL_ERROR:
  868. $error = 'Internal PCRE error.';
  869. break;
  870. case PREG_BACKTRACK_LIMIT_ERROR:
  871. $error = 'pcre.backtrack_limit reached.';
  872. break;
  873. case PREG_RECURSION_LIMIT_ERROR:
  874. $error = 'pcre.recursion_limit reached.';
  875. break;
  876. case PREG_BAD_UTF8_ERROR:
  877. $error = 'Malformed UTF-8 data.';
  878. break;
  879. case PREG_BAD_UTF8_OFFSET_ERROR:
  880. $error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point.';
  881. break;
  882. default:
  883. $error = 'Error.';
  884. }
  885. throw new ParseException($error);
  886. }
  887. return $ret;
  888. }
  889. /**
  890. * Trim the tag on top of the value.
  891. *
  892. * Prevent values such as `!foo {quz: bar}` to be considered as
  893. * a mapping block.
  894. */
  895. private function trimTag(string $value): string
  896. {
  897. if ('!' === $value[0]) {
  898. return ltrim(substr($value, 1, strcspn($value, " \r\n", 1)), ' ');
  899. }
  900. return $value;
  901. }
  902. private function getLineTag(string $value, int $flags, bool $nextLineCheck = true): ?string
  903. {
  904. if ('' === $value || '!' !== $value[0] || 1 !== self::preg_match('/^'.self::TAG_PATTERN.' *( +#.*)?$/', $value, $matches)) {
  905. return null;
  906. }
  907. if ($nextLineCheck && !$this->isNextLineIndented()) {
  908. return null;
  909. }
  910. $tag = substr($matches['tag'], 1);
  911. // Built-in tags
  912. if ($tag && '!' === $tag[0]) {
  913. throw new ParseException(sprintf('The built-in tag "!%s" is not implemented.', $tag), $this->getRealCurrentLineNb() + 1, $value, $this->filename);
  914. }
  915. if (Yaml::PARSE_CUSTOM_TAGS & $flags) {
  916. return $tag;
  917. }
  918. throw new ParseException(sprintf('Tags support is not enabled. You must use the flag `Yaml::PARSE_CUSTOM_TAGS` to use "%s".', $matches['tag']), $this->getRealCurrentLineNb() + 1, $value, $this->filename);
  919. }
  920. }