PK Na + lib/Doctrine/Common/Lexer/AbstractLexer.phpnu W+A input = $input;
$this->tokens = [];
$this->reset();
$this->scan($input);
}
/**
* Resets the lexer.
*
* @return void
*/
public function reset()
{
$this->lookahead = null;
$this->token = null;
$this->peek = 0;
$this->position = 0;
}
/**
* Resets the peek pointer to 0.
*
* @return void
*/
public function resetPeek()
{
$this->peek = 0;
}
/**
* Resets the lexer position on the input to the given position.
*
* @param int $position Position to place the lexical scanner.
*
* @return void
*/
public function resetPosition($position = 0)
{
$this->position = $position;
}
/**
* Retrieve the original lexer's input until a given position.
*
* @param int $position
*
* @return string
*/
public function getInputUntilPosition($position)
{
return substr($this->input, 0, $position);
}
/**
* Checks whether a given token matches the current lookahead.
*
* @param int|string $token
*
* @return bool
*/
public function isNextToken($token)
{
return $this->lookahead !== null && $this->lookahead['type'] === $token;
}
/**
* Checks whether any of the given tokens matches the current lookahead.
*
* @param array $tokens
*
* @return bool
*/
public function isNextTokenAny(array $tokens)
{
return $this->lookahead !== null && in_array($this->lookahead['type'], $tokens, true);
}
/**
* Moves to the next token in the input string.
*
* @return bool
*/
public function moveNext()
{
$this->peek = 0;
$this->token = $this->lookahead;
$this->lookahead = isset($this->tokens[$this->position])
? $this->tokens[$this->position++] : null;
return $this->lookahead !== null;
}
/**
* Tells the lexer to skip input tokens until it sees a token with the given value.
*
* @param string $type The token type to skip until.
*
* @return void
*/
public function skipUntil($type)
{
while ($this->lookahead !== null && $this->lookahead['type'] !== $type) {
$this->moveNext();
}
}
/**
* Checks if given value is identical to the given token.
*
* @param mixed $value
* @param int|string $token
*
* @return bool
*/
public function isA($value, $token)
{
return $this->getType($value) === $token;
}
/**
* Moves the lookahead token forward.
*
* @return array|null The next token or NULL if there are no more tokens ahead.
*/
public function peek()
{
if (isset($this->tokens[$this->position + $this->peek])) {
return $this->tokens[$this->position + $this->peek++];
}
return null;
}
/**
* Peeks at the next token, returns it and immediately resets the peek.
*
* @return array|null The next token or NULL if there are no more tokens ahead.
*/
public function glimpse()
{
$peek = $this->peek();
$this->peek = 0;
return $peek;
}
/**
* Scans the input string for tokens.
*
* @param string $input A query string.
*
* @return void
*/
protected function scan($input)
{
if (! isset($this->regex)) {
$this->regex = sprintf(
'/(%s)|%s/%s',
implode(')|(', $this->getCatchablePatterns()),
implode('|', $this->getNonCatchablePatterns()),
$this->getModifiers()
);
}
$flags = PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_OFFSET_CAPTURE;
$matches = preg_split($this->regex, $input, -1, $flags);
if ($matches === false) {
// Work around https://bugs.php.net/78122
$matches = [[$input, 0]];
}
foreach ($matches as $match) {
// Must remain before 'value' assignment since it can change content
$type = $this->getType($match[0]);
$this->tokens[] = [
'value' => $match[0],
'type' => $type,
'position' => $match[1],
];
}
}
/**
* Gets the literal for a given token.
*
* @param int|string $token
*
* @return int|string
*/
public function getLiteral($token)
{
$className = static::class;
$reflClass = new ReflectionClass($className);
$constants = $reflClass->getConstants();
foreach ($constants as $name => $value) {
if ($value === $token) {
return $className . '::' . $name;
}
}
return $token;
}
/**
* Regex modifiers
*
* @return string
*/
protected function getModifiers()
{
return 'i';
}
/**
* Lexical catchable patterns.
*
* @return array
*/
abstract protected function getCatchablePatterns();
/**
* Lexical non-catchable patterns.
*
* @return array
*/
abstract protected function getNonCatchablePatterns();
/**
* Retrieve token type. Also processes the token value if necessary.
*
* @param string $value
*
* @return int|string|null
*/
abstract protected function getType(&$value);
}
PK Nz z .travis.ymlnu W+A dist: xenial
sudo: false
language: php
php:
- 7.2
- 7.3
- 7.4snapshot
cache:
directories:
- $HOME/.composer/cache
before_install:
- |
if [ "x$COVERAGE" != "xyes" ]; then
mv ~/.phpenv/versions/$(phpenv version-name)/etc/conf.d/xdebug.ini{,.disabled} || true
fi
- travis_retry composer self-update
install: travis_retry composer update --prefer-dist
before_script:
- |
if [ "x$COVERAGE" == "xyes" ] && [[ ! $(php -m | grep -si xdebug) ]]; then
echo "xdebug is required for coverage"
exit 1
fi
script:
- |
if [ "x$COVERAGE" == "xyes" ]; then
./vendor/bin/phpunit --coverage-clover clover.xml
else
./vendor/bin/phpunit
fi
after_script:
- |
if [ "x$COVERAGE" == "xyes" ]; then
wget https://github.com/scrutinizer-ci/ocular/releases/download/1.5.2/ocular.phar
php ocular.phar code-coverage:upload --format=php-clover clover.xml
fi
jobs:
allow_failures:
- php: 7.4snapshot
include:
- stage: Test
php: 7.2
env: COVERAGE=yes
- stage: Code Quality
env: PHPSTAN
php: 7.3
script: vendor/bin/phpstan analyse
- stage: Code Quality
env: PHPCS
php: 7.2
script: vendor/bin/phpcs
PK Nyc README.mdnu W+A # Doctrine Lexer
Base library for a lexer that can be used in Top-Down, Recursive Descent Parsers.
This lexer is used in Doctrine Annotations and in Doctrine ORM (DQL).
https://www.doctrine-project.org/projects/lexer.html
PK N*nT T
.gitignorenu W+A /vendor
/composer.lock
/phpunit.xml
/.phpunit.result.cache
/phpcs.xml
/.phpcs-cache
PK N>>. . phpcs.xml.distnu W+A
lib
tests
PK Nt t .github/FUNDING.ymlnu W+A patreon: phpdoctrine
tidelift: packagist/doctrine%2Flexer
custom: https://www.doctrine-project.org/sponsorship.html
PK N9tۃ .gitattributesnu W+A # Auto-detect text files, ensure they use LF.
* text=auto eol=lf
# Exclude non-essential files from dist
/.github export-ignore
/docs export-ignore
/tests export-ignore
/.doctrine-project.json export-ignore
/.gitattributes export-ignore
/.gitignore export-ignore
/.travis.yml export-ignore
/phpcs.xml.dist export-ignore
/phpstan.neon.dist export-ignore
/phpunit.xml.dist export-ignore
PK N;Ww\ \ phpstan.neon.distnu W+A parameters:
level: 7
paths:
- %rootDir%/../../../lib
- %rootDir%/../../../tests
PK N$ϡA phpunit.xml.distnu W+A
tests
lib/Doctrine
PK NUy .doctrine-project.jsonnu W+A {
"active": true,
"name": "Lexer",
"slug": "lexer",
"docsSlug": "doctrine-lexer",
"versions": [
{
"name": "1.0",
"branchName": "1.0",
"slug": "1.0",
"current": true,
"maintained": true,
"aliases": [
"current",
"stable"
]
},
{
"name": "master",
"branchName": "master",
"slug": "latest",
"upcoming": true
}
]
}
PK N}Г 1 tests/Doctrine/Common/Lexer/AbstractLexerTest.phpnu W+A concreteLexer = new ConcreteLexer();
}
public function dataProvider()
{
return [
[
'price=10',
[
[
'value' => 'price',
'type' => 'string',
'position' => 0,
],
[
'value' => '=',
'type' => 'operator',
'position' => 5,
],
[
'value' => 10,
'type' => 'int',
'position' => 6,
],
],
],
];
}
public function testResetPeek()
{
$expectedTokens = [
[
'value' => 'price',
'type' => 'string',
'position' => 0,
],
[
'value' => '=',
'type' => 'operator',
'position' => 5,
],
[
'value' => 10,
'type' => 'int',
'position' => 6,
],
];
$this->concreteLexer->setInput('price=10');
$this->assertEquals($expectedTokens[0], $this->concreteLexer->peek());
$this->assertEquals($expectedTokens[1], $this->concreteLexer->peek());
$this->concreteLexer->resetPeek();
$this->assertEquals($expectedTokens[0], $this->concreteLexer->peek());
}
public function testResetPosition()
{
$expectedTokens = [
[
'value' => 'price',
'type' => 'string',
'position' => 0,
],
[
'value' => '=',
'type' => 'operator',
'position' => 5,
],
[
'value' => 10,
'type' => 'int',
'position' => 6,
],
];
$this->concreteLexer->setInput('price=10');
$this->assertNull($this->concreteLexer->lookahead);
$this->assertTrue($this->concreteLexer->moveNext());
$this->assertEquals($expectedTokens[0], $this->concreteLexer->lookahead);
$this->assertTrue($this->concreteLexer->moveNext());
$this->assertEquals($expectedTokens[1], $this->concreteLexer->lookahead);
$this->concreteLexer->resetPosition(0);
$this->assertTrue($this->concreteLexer->moveNext());
$this->assertEquals($expectedTokens[0], $this->concreteLexer->lookahead);
}
/**
* @param string $input
* @param array $expectedTokens
*
* @dataProvider dataProvider
*/
public function testMoveNext($input, $expectedTokens)
{
$this->concreteLexer->setInput($input);
$this->assertNull($this->concreteLexer->lookahead);
for ($i = 0; $i < count($expectedTokens); $i++) {
$this->assertTrue($this->concreteLexer->moveNext());
$this->assertEquals($expectedTokens[$i], $this->concreteLexer->lookahead);
}
$this->assertFalse($this->concreteLexer->moveNext());
$this->assertNull($this->concreteLexer->lookahead);
}
public function testSkipUntil()
{
$this->concreteLexer->setInput('price=10');
$this->assertTrue($this->concreteLexer->moveNext());
$this->concreteLexer->skipUntil('operator');
$this->assertEquals(
[
'value' => '=',
'type' => 'operator',
'position' => 5,
],
$this->concreteLexer->lookahead
);
}
public function testUtf8Mismatch()
{
$this->concreteLexer->setInput("\xE9=10");
$this->assertTrue($this->concreteLexer->moveNext());
$this->assertEquals(
[
'value' => "\xE9=10",
'type' => 'string',
'position' => 0,
],
$this->concreteLexer->lookahead
);
}
/**
* @param string $input
* @param array $expectedTokens
*
* @dataProvider dataProvider
*/
public function testPeek($input, $expectedTokens)
{
$this->concreteLexer->setInput($input);
foreach ($expectedTokens as $expectedToken) {
$this->assertEquals($expectedToken, $this->concreteLexer->peek());
}
$this->assertNull($this->concreteLexer->peek());
}
/**
* @param string $input
* @param array $expectedTokens
*
* @dataProvider dataProvider
*/
public function testGlimpse($input, $expectedTokens)
{
$this->concreteLexer->setInput($input);
foreach ($expectedTokens as $expectedToken) {
$this->assertEquals($expectedToken, $this->concreteLexer->glimpse());
$this->concreteLexer->moveNext();
}
$this->assertNull($this->concreteLexer->peek());
}
public function inputUntilPositionDataProvider()
{
return [
['price=10', 5, 'price'],
];
}
/**
* @param string $input
* @param int $position
* @param string $expectedInput
*
* @dataProvider inputUntilPositionDataProvider
*/
public function testGetInputUntilPosition($input, $position, $expectedInput)
{
$this->concreteLexer->setInput($input);
$this->assertSame($expectedInput, $this->concreteLexer->getInputUntilPosition($position));
}
/**
* @param string $input
* @param array $expectedTokens
*
* @dataProvider dataProvider
*/
public function testIsNextToken($input, $expectedTokens)
{
$this->concreteLexer->setInput($input);
$this->concreteLexer->moveNext();
for ($i = 0; $i < count($expectedTokens); $i++) {
$this->assertTrue($this->concreteLexer->isNextToken($expectedTokens[$i]['type']));
$this->concreteLexer->moveNext();
}
}
/**
* @param string $input
* @param array $expectedTokens
*
* @dataProvider dataProvider
*/
public function testIsNextTokenAny($input, $expectedTokens)
{
$allTokenTypes = array_map(static function ($token) {
return $token['type'];
}, $expectedTokens);
$this->concreteLexer->setInput($input);
$this->concreteLexer->moveNext();
for ($i = 0; $i < count($expectedTokens); $i++) {
$this->assertTrue($this->concreteLexer->isNextTokenAny([$expectedTokens[$i]['type']]));
$this->assertTrue($this->concreteLexer->isNextTokenAny($allTokenTypes));
$this->concreteLexer->moveNext();
}
}
public function testGetLiteral()
{
$this->assertSame('Doctrine\Tests\Common\Lexer\ConcreteLexer::INT', $this->concreteLexer->getLiteral('int'));
$this->assertSame('fake_token', $this->concreteLexer->getLiteral('fake_token'));
}
public function testIsA()
{
$this->assertTrue($this->concreteLexer->isA(11, 'int'));
$this->assertTrue($this->concreteLexer->isA(1.1, 'int'));
$this->assertTrue($this->concreteLexer->isA('=', 'operator'));
$this->assertTrue($this->concreteLexer->isA('>', 'operator'));
$this->assertTrue($this->concreteLexer->isA('<', 'operator'));
$this->assertTrue($this->concreteLexer->isA('fake_text', 'string'));
}
public function testAddCatchablePatternsToMutableLexer()
{
$mutableLexer = new MutableLexer();
$mutableLexer->addCatchablePattern('[a-z]');
$mutableLexer->setInput('one');
$token = $mutableLexer->glimpse();
$this->assertEquals('o', $token['value']);
$mutableLexer = new MutableLexer();
$mutableLexer->addCatchablePattern('[a-z]+');
$mutableLexer->setInput('one');
$token = $mutableLexer->glimpse();
$this->assertEquals('one', $token['value']);
}
}
PK NP P , tests/Doctrine/Common/Lexer/MutableLexer.phpnu W+A catchablePatterns[] = $pattern;
}
protected function getCatchablePatterns()
{
return $this->catchablePatterns;
}
protected function getNonCatchablePatterns()
{
return ['[\s,]+'];
}
protected function getType(&$value)
{
return 1;
}
}
PK N - tests/Doctrine/Common/Lexer/ConcreteLexer.phpnu W+A ',
'[a-z]+',
'\d+',
];
}
protected function getNonCatchablePatterns()
{
return [
'\s+',
'(.)',
];
}
protected function getType(&$value)
{
if (is_numeric($value)) {
$value = (int) $value;
return 'int';
}
if (in_array($value, ['=', '<', '>'])) {
return 'operator';
}
if (is_string($value)) {
return 'string';
}
return null;
}
protected function getModifiers()
{
return parent::getModifiers() . 'u';
}
}
PK N`XQ) ) LICENSEnu W+A Copyright (c) 2006-2018 Doctrine Project
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
PK N}[`
composer.jsonnu W+A {
"name": "doctrine/lexer",
"type": "library",
"description": "PHP Doctrine Lexer parser library that can be used in Top-Down, Recursive Descent Parsers.",
"keywords": [
"php",
"parser",
"lexer",
"annotations",
"docblock"
],
"homepage": "https://www.doctrine-project.org/projects/lexer.html",
"license": "MIT",
"authors": [
{"name": "Guilherme Blanco", "email": "guilhermeblanco@gmail.com"},
{"name": "Roman Borschel", "email": "roman@code-factory.org"},
{"name": "Johannes Schmitt", "email": "schmittjoh@gmail.com"}
],
"require": {
"php": "^7.2"
},
"require-dev": {
"doctrine/coding-standard": "^6.0",
"phpstan/phpstan": "^0.11.8",
"phpunit/phpunit": "^8.2"
},
"autoload": {
"psr-4": { "Doctrine\\Common\\Lexer\\": "lib/Doctrine/Common/Lexer" }
},
"autoload-dev": {
"psr-4": { "Doctrine\\Tests\\": "tests/Doctrine" }
},
"extra": {
"branch-alias": {
"dev-master": "1.2.x-dev"
}
},
"config": {
"sort-packages": true
}
}
PK N{>j j docs/en/index.rstnu W+A Introduction
============
Doctrine Lexer is a library that can be used in Top-Down, Recursive
Descent Parsers. This lexer is used in Doctrine Annotations and in
Doctrine ORM (DQL).
To write your own parser you just need to extend ``Doctrine\Common\Lexer\AbstractLexer``
and implement the following three abstract methods.
.. code-block:: php
/**
* Lexical catchable patterns.
*
* @return array
*/
abstract protected function getCatchablePatterns();
/**
* Lexical non-catchable patterns.
*
* @return array
*/
abstract protected function getNonCatchablePatterns();
/**
* Retrieve token type. Also processes the token value if necessary.
*
* @param string $value
* @return integer
*/
abstract protected function getType(&$value);
These methods define the `lexical `_
catchable and non-catchable patterns and a method for returning the
type of a token and filtering the value if necessary.
The Lexer is responsible for giving you an API to walk across a
string one character at a time and analyze the type of each character, value and position of
each token in the string. The low level API of the lexer is pretty simple:
- ``setInput($input)`` - Sets the input data to be tokenized. The Lexer is immediately reset and the new input tokenized.
- ``reset()`` - Resets the lexer.
- ``resetPeek()`` - Resets the peek pointer to 0.
- ``resetPosition($position = 0)`` - Resets the lexer position on the input to the given position.
- ``isNextToken($token)`` - Checks whether a given token matches the current lookahead.
- ``isNextTokenAny(array $tokens)`` - Checks whether any of the given tokens matches the current lookahead.
- ``moveNext()`` - Moves to the next token in the input string.
- ``skipUntil($type)`` - Tells the lexer to skip input tokens until it sees a token with the given value.
- ``isA($value, $token)`` - Checks if given value is identical to the given token.
- ``peek()`` - Moves the lookahead token forward.
- ``glimpse()`` - Peeks at the next token, returns it and immediately resets the peek.
PK NU]|2
2
! docs/en/simple-parser-example.rstnu W+A Simple Parser Example
=====================
Extend the ``Doctrine\Common\Lexer\AbstractLexer`` class and implement
the ``getCatchablePatterns``, ``getNonCatchablePatterns``, and ``getType``
methods. Here is a very simple example lexer implementation named ``CharacterTypeLexer``.
It tokenizes a string to ``T_UPPER``, ``T_LOWER`` and``T_NUMBER`` tokens:
.. code-block:: php
lexer = $lexer;
}
public function getUpperCaseCharacters($string)
{
$this->lexer->setInput($string);
$this->lexer->moveNext();
$upperCaseChars = array();
while (true) {
if (!$this->lexer->lookahead) {
break;
}
$this->lexer->moveNext();
if ($this->lexer->token['type'] === CharacterTypeLexer::T_UPPER) {
$upperCaseChars[] = $this->lexer->token['value'];
}
}
return $upperCaseChars;
}
}
$upperCaseCharacterExtractor = new UpperCaseCharacterExtracter(new CharacterTypeLexer());
$upperCaseCharacters = $upperCaseCharacterExtractor->getUpperCaseCharacters('1aBcdEfgHiJ12');
print_r($upperCaseCharacters);
The variable ``$upperCaseCharacters`` contains all of the upper case
characters:
.. code-block:: php
Array
(
[0] => B
[1] => E
[2] => H
[3] => J
)
This is a simple example but it should demonstrate the low level API
that can be used to build more complex parsers.
PK Nl }&