. - - , - .
, , (lazy - greedy ). ( , , ). InvalidArgumentException RuntimeException, .
class TokenizedInput
{
const VAR_REGEXP = '\"(?P<string>.*?)\"';
const BLOCK_OPEN_REGEXP = '\(';
const BLOCK_CLOSE_REGEXP = '\)';
const KEYWORD_REGEXP = '(?<keyword>OR|AND)';
const TOKEN_DELIM_LEFT = '<';
const TOKEN_DELIM_RIGHT = '>';
const VAR_TOKEN = 'VAR';
const KEYWORD_TOKEN = 'KEYWORD';
const BLOCK_OPEN_TOKEN = 'BLOCK';
const BLOCK_CLOSE_TOKEN = 'ENDBLOCK';
const ID_DELIM = ':';
const ID_REGEXP = '[0-9]+';
private $original;
private $tokenized;
private $data = [];
private $blockLevel = 0;
private $varTokenId = 0;
protected $procedure = [
'varTokens' => self::VAR_REGEXP,
'keywordToken' => self::KEYWORD_REGEXP,
'blockTokens' => '(?P<open>' . self::BLOCK_OPEN_REGEXP . ')|(?P<close>' . self::BLOCK_CLOSE_REGEXP . ')'
];
private $tokenMatch;
public function __construct($input) {
$this->original = (string) $input;
}
public function string() {
isset($this->tokenized) or $this->tokenize();
return $this->tokenized;
}
public function variable($key) {
isset($this->tokenized) or $this->tokenize();
if (!isset($this->data[$key])) {
throw new InvalidArgumentException("Variable id:($key) does not exist.");
}
return $this->data[$key];
}
public function tokenSearchRegexp() {
if (!isset($this->tokenMatch)) {
$strings = $this->stringSearchRegexp();
$blocks = $this->blockSearchRegexp();
$this->tokenMatch = '#(?:' . $strings . '|' . $blocks . ')#';
}
return $this->tokenMatch;
}
public function stringSearchRegexp($id = null) {
$id = $id ?: self::ID_REGEXP;
return preg_quote(self::TOKEN_DELIM_LEFT . self::VAR_TOKEN . self::ID_DELIM)
. '(?P<id>' . $id . ')'
. preg_quote(self::TOKEN_DELIM_RIGHT);
}
public function blockSearchRegexp($level = null) {
$level = $level ?: self::ID_REGEXP;
$block_open = preg_quote(self::TOKEN_DELIM_LEFT . self::BLOCK_OPEN_TOKEN . self::ID_DELIM)
. '(?P<level>' . $level . ')'
. preg_quote(self::TOKEN_DELIM_RIGHT);
$block_close = preg_quote(self::TOKEN_DELIM_LEFT . self::BLOCK_CLOSE_TOKEN . self::ID_DELIM)
. '\k<level>'
. preg_quote(self::TOKEN_DELIM_RIGHT);
return $block_open . '(?P<contents>.*)' . $block_close;
}
public function keywordSearchRegexp($keyword = null) {
$keyword = $keyword ? '(?P<keyword>' . $keyword . ')' : self::KEYWORD_REGEXP;
return preg_quote(self::TOKEN_DELIM_LEFT . self::KEYWORD_TOKEN . self::ID_DELIM)
. $keyword
. preg_quote(self::TOKEN_DELIM_RIGHT);
}
private function tokenize() {
$current = $this->original;
foreach ($this->procedure as $method => $pattern) {
$current = preg_replace_callback('#(?:' . $pattern . ')#', [$this, $method], $current);
}
if ($this->blockLevel) {
throw new RuntimeException("Syntax error. Parenthesis mismatch." . $this->blockLevel);
}
$this->tokenized = $current;
}
protected function blockTokens($match) {
if (isset($match['close'])) {
$token = self::BLOCK_CLOSE_TOKEN . self::ID_DELIM . --$this->blockLevel;
} else {
$token = self::BLOCK_OPEN_TOKEN . self::ID_DELIM . $this->blockLevel++;
}
return $this->addDelimiters($token);
}
protected function varTokens($match) {
$this->data[$this->varTokenId] = $match[1];
return $this->addDelimiters(self::VAR_TOKEN . self::ID_DELIM . $this->varTokenId++);
}
protected function keywordToken($match) {
return $this->addDelimiters(self::KEYWORD_TOKEN . self::ID_DELIM . $match[1]);
}
private function addDelimiters($token) {
return self::TOKEN_DELIM_LEFT . $token . self::TOKEN_DELIM_RIGHT;
}
}
Parser - .
, , satysfying Parsers.
class ParsedInput
{
private $input;
private $result;
private $context;
public function __construct(TokenizedInput $input) {
$this->input = $input;
}
public function result() {
if (isset($this->result)) { return $this->result; }
$this->parse($this->input->string());
$this->addOperator();
return $this->result;
}
private function parse($string, $context = 'root') {
$this->context = $context;
preg_replace_callback(
$this->input->tokenSearchRegexp(),
[$this, 'buildStructure'],
$string
);
return $this->result;
}
protected function buildStructure($match) {
if (isset($match['contents'])) { $this->parseBlock($match['contents'], $match['level']); }
elseif (isset($match['id'])) { $this->parseVar($match['id']); }
}
protected function parseVar($id) {
$this->result[] = $this->input->variable((int) $id);
}
protected function parseBlock($contents, $level) {
$nested = clone $this;
$this->result[] = $nested->parse($contents, (int) $level);
}
protected function addOperator() {
$subBlocks = '#' . $this->input->blockSearchRegexp(1) . '#';
$rootLevel = preg_replace($subBlocks, '', $this->input->string());
$rootKeyword = '#' . $this->input->keywordSearchRegexp('AND') . '#';
return $this->result[] = (preg_match($rootKeyword, $rootLevel) === 1);
}
public function __clone() {
$this->result = [];
}
}
:
$input = '( ( "M" AND ( "(" OR "AND" ) ) AND "T" )';
$tokenized = new TokenizedInput($input);
$parsed = new ParsedInput($tokenized);
$result = $parsed->result();
/import/intrefaces, . (, ) , .