How to create a tree of arrays from logical notation (DSL) using PHP

My input is pretty simple:

$input = '( ( "M" AND ( "(" OR "AND" ) ) OR "T" )';

where (starts a new node on the tree and) completes it. AND and OR are reserved for a logical operation, so as long as they are not inside the "labels, they have special meaning. In my DSL sentences AND and OR, change the node level so that only AND or OR sentences can be on the level. If AND appears after OR , it will launch a new subnode. All the characters inside the "" should be treated as they are. Finally, "could have escaped from," as usual.

What is a good way to make a translation proposal that looks like this in PHP:

$output = array(array(array("M" , array("(", "AND")) , "T"), FALSE);

Note that FALSE is an indicator that the OR keyword was at the root level. If the entrance was:

( ( "M" AND ( "(" OR "AND" ) ) AND "T" )

then the output will be:

$output = array(array(array("M", array("(", "AND")), "T"), TRUE);

replace ('(', 'array (') eval-, .

NOT DSL.

. JavaSript .

Python:

Python , PHP Javascript. :

, , .

http://codepad.org/PdgQLviI

+4
1

. - - , - .

, , (lazy - greedy ). ( , , ). InvalidArgumentException RuntimeException, .

class TokenizedInput
{
    const VAR_REGEXP         = '\"(?P<string>.*?)\"';
    const BLOCK_OPEN_REGEXP  = '\(';
    const BLOCK_CLOSE_REGEXP = '\)';
    const KEYWORD_REGEXP     = '(?<keyword>OR|AND)';

    // Token: <TOKEN_DELIM_LEFT><TYPE_TOKEN><ID_DELIM>$id<TOKEN_DELIM_RIGHT>
    const TOKEN_DELIM_LEFT  = '<';
    const TOKEN_DELIM_RIGHT = '>';

    const VAR_TOKEN         = 'VAR';
    const KEYWORD_TOKEN     = 'KEYWORD';
    const BLOCK_OPEN_TOKEN  = 'BLOCK';
    const BLOCK_CLOSE_TOKEN = 'ENDBLOCK';

    const ID_DELIM  = ':';
    const ID_REGEXP = '[0-9]+';

    private $original;
    private $tokenized;
    private $data = [];

    private $blockLevel = 0;
    private $varTokenId = 0;

    protected $procedure = [
        'varTokens'    => self::VAR_REGEXP,
        'keywordToken' => self::KEYWORD_REGEXP,
        'blockTokens'  => '(?P<open>' . self::BLOCK_OPEN_REGEXP . ')|(?P<close>' . self::BLOCK_CLOSE_REGEXP . ')'
    ];

    private $tokenMatch;

    public function __construct($input) {
        $this->original = (string) $input;
    }

    public function string() {
        isset($this->tokenized) or $this->tokenize();
        return $this->tokenized;
    }

    public function variable($key) {
        isset($this->tokenized) or $this->tokenize();
        if (!isset($this->data[$key])) {
            throw new InvalidArgumentException("Variable id:($key) does not exist.");
        }
        return $this->data[$key];
    }

    public function tokenSearchRegexp() {
        if (!isset($this->tokenMatch)) {
            $strings  = $this->stringSearchRegexp();
            $blocks   = $this->blockSearchRegexp();
            $this->tokenMatch = '#(?:' . $strings . '|' . $blocks . ')#';
        }
        return $this->tokenMatch;
    }

    public function stringSearchRegexp($id = null) {
        $id = $id ?: self::ID_REGEXP;
        return preg_quote(self::TOKEN_DELIM_LEFT . self::VAR_TOKEN . self::ID_DELIM)
            . '(?P<id>' . $id . ')'
            . preg_quote(self::TOKEN_DELIM_RIGHT);
    }

    public function blockSearchRegexp($level = null) {
        $level = $level ?: self::ID_REGEXP;
        $block_open = preg_quote(self::TOKEN_DELIM_LEFT . self::BLOCK_OPEN_TOKEN . self::ID_DELIM)
            . '(?P<level>' . $level . ')'
            . preg_quote(self::TOKEN_DELIM_RIGHT);
        $block_close = preg_quote(self::TOKEN_DELIM_LEFT . self::BLOCK_CLOSE_TOKEN . self::ID_DELIM)
            . '\k<level>'
            . preg_quote(self::TOKEN_DELIM_RIGHT);
        return $block_open . '(?P<contents>.*)' . $block_close;
    }

    public function keywordSearchRegexp($keyword = null) {
        $keyword = $keyword ? '(?P<keyword>' . $keyword . ')' : self::KEYWORD_REGEXP;
        return preg_quote(self::TOKEN_DELIM_LEFT . self::KEYWORD_TOKEN . self::ID_DELIM)
            . $keyword
            . preg_quote(self::TOKEN_DELIM_RIGHT);
    }

    private function tokenize() {
        $current = $this->original;
        foreach ($this->procedure as $method => $pattern) {
            $current = preg_replace_callback('#(?:' . $pattern . ')#', [$this, $method], $current);
        }

        if ($this->blockLevel) {
            throw new RuntimeException("Syntax error. Parenthesis mismatch." . $this->blockLevel);
        }

        $this->tokenized = $current;
    }

    protected function blockTokens($match) {
        if (isset($match['close'])) {
            $token = self::BLOCK_CLOSE_TOKEN . self::ID_DELIM . --$this->blockLevel;
        } else {
            $token = self::BLOCK_OPEN_TOKEN . self::ID_DELIM . $this->blockLevel++;

        }

        return $this->addDelimiters($token);
    }

    protected function varTokens($match) {
        $this->data[$this->varTokenId] = $match[1];
        return $this->addDelimiters(self::VAR_TOKEN . self::ID_DELIM . $this->varTokenId++);
    }

    protected function keywordToken($match) {
        return $this->addDelimiters(self::KEYWORD_TOKEN . self::ID_DELIM . $match[1]);
    }

    private function addDelimiters($token) {
        return self::TOKEN_DELIM_LEFT . $token . self::TOKEN_DELIM_RIGHT;
    }
}

Parser - . , , satysfying Parsers.

class ParsedInput
{
    private $input;
    private $result;
    private $context;

    public function __construct(TokenizedInput $input) {
        $this->input = $input;
    }

    public function result() {
        if (isset($this->result)) { return $this->result; }

        $this->parse($this->input->string());
        $this->addOperator();

        return $this->result;
    }

    private function parse($string, $context = 'root') {
        $this->context = $context;
        preg_replace_callback(
            $this->input->tokenSearchRegexp(),
            [$this, 'buildStructure'],
            $string
        );

        return $this->result;
    }

    protected function buildStructure($match) {
        if (isset($match['contents'])) { $this->parseBlock($match['contents'], $match['level']); }
        elseif (isset($match['id'])) { $this->parseVar($match['id']); }
    }

    protected function parseVar($id) {
        $this->result[] = $this->input->variable((int) $id);
    }

    protected function parseBlock($contents, $level) {
        $nested = clone $this;
        $this->result[] = $nested->parse($contents, (int) $level);
    }

    protected function addOperator() {
        $subBlocks = '#' . $this->input->blockSearchRegexp(1) . '#';
        $rootLevel = preg_replace($subBlocks, '', $this->input->string());
        $rootKeyword = '#' . $this->input->keywordSearchRegexp('AND') . '#';
        return $this->result[] = (preg_match($rootKeyword, $rootLevel) === 1);
    }

    public function __clone() {
        $this->result = [];
    }
}

:

$input = '( ( "M" AND ( "(" OR "AND" ) ) AND "T" )';

$tokenized = new TokenizedInput($input);
$parsed = new ParsedInput($tokenized);

$result = $parsed->result();

/import/intrefaces, . (, ) , .

+1

Source: https://habr.com/ru/post/1667289/


All Articles