Your IP : 3.145.85.123
<?php
/**
* @author Niels A.D.
* @author Todd Burry <todd@vanillaforums.com>
* @copyright 2010 Niels A.D., 2014 Todd Burry
* @license http://opensource.org/licenses/LGPL-2.1 LGPL-2.1
* @package pQuery
*/
namespace pagelayerQuery;
/**
* Tokenizes a css selector query
*/
class CSSQueryTokenizer extends TokenizerBase {
/**
* Opening bracket token, used for "["
*/
const TOK_BRACKET_OPEN = 100;
/**
* Closing bracket token, used for "]"
*/
const TOK_BRACKET_CLOSE = 101;
/**
* Opening brace token, used for "("
*/
const TOK_BRACE_OPEN = 102;
/**
* Closing brace token, used for ")"
*/
const TOK_BRACE_CLOSE = 103;
/**
* String token
*/
const TOK_STRING = 104;
/**
* Colon token, used for ":"
*/
const TOK_COLON = 105;
/**
* Comma token, used for ","
*/
const TOK_COMMA = 106;
/**
* "Not" token, used for "!"
*/
const TOK_NOT = 107;
/**
* "All" token, used for "*" in query
*/
const TOK_ALL = 108;
/**
* Pipe token, used for "|"
*/
const TOK_PIPE = 109;
/**
* Plus token, used for "+"
*/
const TOK_PLUS = 110;
/**
* "Sibling" token, used for "~" in query
*/
const TOK_SIBLING = 111;
/**
* Class token, used for "." in query
*/
const TOK_CLASS = 112;
/**
* ID token, used for "#" in query
*/
const TOK_ID = 113;
/**
* Child token, used for ">" in query
*/
const TOK_CHILD = 114;
/**
* Attribute compare prefix token, used for "|="
*/
const TOK_COMPARE_PREFIX = 115;
/**
* Attribute contains token, used for "*="
*/
const TOK_COMPARE_CONTAINS = 116;
/**
* Attribute contains word token, used for "~="
*/
const TOK_COMPARE_CONTAINS_WORD = 117;
/**
* Attribute compare end token, used for "$="
*/
const TOK_COMPARE_ENDS = 118;
/**
* Attribute equals token, used for "="
*/
const TOK_COMPARE_EQUALS = 119;
/**
* Attribute not equal token, used for "!="
*/
const TOK_COMPARE_NOT_EQUAL = 120;
/**
* Attribute compare bigger than token, used for ">="
*/
const TOK_COMPARE_BIGGER_THAN = 121;
/**
* Attribute compare smaller than token, used for "<="
*/
const TOK_COMPARE_SMALLER_THAN = 122;
/**
* Attribute compare with regex, used for "%="
*/
const TOK_COMPARE_REGEX = 123;
/**
* Attribute compare start token, used for "^="
*/
const TOK_COMPARE_STARTS = 124;
/**
* Sets query identifiers
* @see TokenizerBase::$identifiers
* @access private
*/
var $identifiers = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890_-?';
/**
* Map characters to match their tokens
* @see TokenizerBase::$custom_char_map
* @access private
*/
var $custom_char_map = array(
'.' => self::TOK_CLASS,
'#' => self::TOK_ID,
',' => self::TOK_COMMA,
'>' => 'parse_gt',//self::TOK_CHILD,
'+' => self::TOK_PLUS,
'~' => 'parse_sibling',
'|' => 'parse_pipe',
'*' => 'parse_star',
'$' => 'parse_compare',
'=' => self::TOK_COMPARE_EQUALS,
'!' => 'parse_not',
'%' => 'parse_compare',
'^' => 'parse_compare',
'<' => 'parse_compare',
'"' => 'parse_string',
"'" => 'parse_string',
'(' => self::TOK_BRACE_OPEN,
')' => self::TOK_BRACE_CLOSE,
'[' => self::TOK_BRACKET_OPEN,
']' => self::TOK_BRACKET_CLOSE,
':' => self::TOK_COLON
);
/**
* Parse ">" character
* @internal Could be {@link TOK_CHILD} or {@link TOK_COMPARE_BIGGER_THAN}
* @return int
*/
protected function parse_gt() {
if ((($this->pos + 1) < $this->size) && ($this->doc[$this->pos + 1] === '=')) {
++$this->pos;
return ($this->token = self::TOK_COMPARE_BIGGER_THAN);
} else {
return ($this->token = self::TOK_CHILD);
}
}
/**
* Parse "~" character
* @internal Could be {@link TOK_SIBLING} or {@link TOK_COMPARE_CONTAINS_WORD}
* @return int
*/
protected function parse_sibling() {
if ((($this->pos + 1) < $this->size) && ($this->doc[$this->pos + 1] === '=')) {
++$this->pos;
return ($this->token = self::TOK_COMPARE_CONTAINS_WORD);
} else {
return ($this->token = self::TOK_SIBLING);
}
}
/**
* Parse "|" character
* @internal Could be {@link TOK_PIPE} or {@link TOK_COMPARE_PREFIX}
* @return int
*/
protected function parse_pipe() {
if ((($this->pos + 1) < $this->size) && ($this->doc[$this->pos + 1] === '=')) {
++$this->pos;
return ($this->token = self::TOK_COMPARE_PREFIX);
} else {
return ($this->token = self::TOK_PIPE);
}
}
/**
* Parse "*" character
* @internal Could be {@link TOK_ALL} or {@link TOK_COMPARE_CONTAINS}
* @return int
*/
protected function parse_star() {
if ((($this->pos + 1) < $this->size) && ($this->doc[$this->pos + 1] === '=')) {
++$this->pos;
return ($this->token = self::TOK_COMPARE_CONTAINS);
} else {
return ($this->token = self::TOK_ALL);
}
}
/**
* Parse "!" character
* @internal Could be {@link TOK_NOT} or {@link TOK_COMPARE_NOT_EQUAL}
* @return int
*/
protected function parse_not() {
if ((($this->pos + 1) < $this->size) && ($this->doc[$this->pos + 1] === '=')) {
++$this->pos;
return ($this->token = self::TOK_COMPARE_NOT_EQUAL);
} else {
return ($this->token = self::TOK_NOT);
}
}
/**
* Parse several compare characters
* @return int
*/
protected function parse_compare() {
if ((($this->pos + 1) < $this->size) && ($this->doc[$this->pos + 1] === '=')) {
switch($this->doc[$this->pos++]) {
case '$':
return ($this->token = self::TOK_COMPARE_ENDS);
case '%':
return ($this->token = self::TOK_COMPARE_REGEX);
case '^':
return ($this->token = self::TOK_COMPARE_STARTS);
case '<':
return ($this->token = self::TOK_COMPARE_SMALLER_THAN);
}
}
return false;
}
/**
* Parse strings (" and ')
* @return int
*/
protected function parse_string() {
$char = $this->doc[$this->pos];
while (true) {
if ($this->next_search($char.'\\', false) !== self::TOK_NULL) {
if($this->doc[$this->pos] === $char) {
break;
} else {
++$this->pos;
}
} else {
$this->pos = $this->size - 1;
break;
}
}
return ($this->token = self::TOK_STRING);
}
}
/**
* Performs a css select query on HTML nodes
*/
class HtmlSelector {
/**
* Parser object
* @internal If string, then it will create a new instance as parser
* @var CSSQueryTokenizer
*/
var $parser = 'pagelayerQuery\\CSSQueryTokenizer';
/**
* Target of queries
* @var DomNode
*/
var $root = null;
/**
* Last performed query, result in {@link $result}
* @var string
*/
var $query = '';
/**
* Array of matching nodes
* @var array
*/
var $result = array();
/**
* Include root in search, if false the only child nodes are evaluated
* @var bool
*/
var $search_root = false;
/**
* Search recursively
* @var bool
*/
var $search_recursive = true;
/**
* Extra function map for custom filters
* @var array
* @internal array('root' => 'filter_root') will cause the
* selector to call $this->filter_root at :root
* @see DomNode::$filter_map
*/
var $custom_filter_map = array();
/**
* Class constructor
* @param DomNode $root {@link $root}
* @param string $query
* @param bool $search_root {@link $search_root}
* @param bool $search_recursive {@link $search_recursive}
* @param CSSQueryTokenizer $parser If null, then default class will be used
*/
function __construct($root, $query = '*', $search_root = false, $search_recursive = true, $parser = null) {
if ($parser === null) {
$parser = new $this->parser();
}
$this->parser = $parser;
$this->root =& $root;
$this->search_root = $search_root;
$this->search_recursive = $search_recursive;
$this->select($query);
}
#php4 PHP4 class constructor compatibility
#function HtmlSelector($root, $query = '*', $search_root = false, $search_recursive = true, $parser = null) {return $this->__construct($root, $query, $search_root, $search_recursive, $parser);}
#php4e
/**
* toString method, returns {@link $query}
* @return string
* @access private
*/
function __toString() {
return $this->query;
}
/**
* Class magic invoke method, performs {@link select()}
* @return array
* @access private
*/
function __invoke($query = '*') {
return $this->select($query);
}
/**
* Perform query
* @param string $query
* @return array False on failure
*/
function select($query = '*') {
$this->parser->setDoc($query);
$this->query = $query;
return (($this->parse()) ? $this->result : false);
}
/**
* Trigger error
* @param string $error
* @internal %pos% and %tok% will be replace in string with position and token(string)
* @access private
*/
protected function error($error) {
$error = htmlentities(str_replace(
array('%tok%', '%pos%'),
array($this->parser->getTokenString(), (int) $this->parser->getPos()),
$error
));
trigger_error($error);
}
/**
* Get identifier (parse identifier or string)
* @param bool $do_error Error on failure
* @return string False on failure
* @access private
*/
protected function parse_getIdentifier($do_error = true) {
$p =& $this->parser;
$tok = $p->token;
if ($tok === CSSQueryTokenizer::TOK_IDENTIFIER) {
return $p->getTokenString();
} elseif($tok === CSSQueryTokenizer::TOK_STRING) {
return str_replace(array('\\\'', '\\"', '\\\\'), array('\'', '"', '\\'), $p->getTokenString(1, -1));
} elseif ($do_error) {
$this->error('Expected identifier at %pos%!');
}
return false;
}
/**
* Get query conditions (tag, attribute and filter conditions)
* @return array False on failure
* @see DomNode::match()
* @access private
*/
protected function parse_conditions() {
$p =& $this->parser;
$tok = $p->token;
if ($tok === CSSQueryTokenizer::TOK_NULL) {
$this->error('Invalid search pattern(1): Empty string!');
return false;
}
$conditions_all = array();
//Tags
while ($tok !== CSSQueryTokenizer::TOK_NULL) {
$conditions = array('tags' => array(), 'attributes' => array());
if ($tok === CSSQueryTokenizer::TOK_ALL) {
$tok = $p->next();
if (($tok === CSSQueryTokenizer::TOK_PIPE) && ($tok = $p->next()) && ($tok !== CSSQueryTokenizer::TOK_ALL)) {
if (($tag = $this->parse_getIdentifier()) === false) {
return false;
}
$conditions['tags'][] = array(
'tag' => $tag,
'compare' => 'name'
);
$tok = $p->next_no_whitespace();
} else {
$conditions['tags'][''] = array(
'tag' => '',
'match' => false
);
if ($tok === CSSQueryTokenizer::TOK_ALL) {
$tok = $p->next_no_whitespace();
}
}
} elseif ($tok === CSSQueryTokenizer::TOK_PIPE) {
$tok = $p->next();
if ($tok === CSSQueryTokenizer::TOK_ALL) {
$conditions['tags'][] = array(
'tag' => '',
'compare' => 'namespace',
);
} elseif (($tag = $this->parse_getIdentifier()) !== false) {
$conditions['tags'][] = array(
'tag' => $tag,
'compare' => 'total',
);
} else {
return false;
}
$tok = $p->next_no_whitespace();
} elseif ($tok === CSSQueryTokenizer::TOK_BRACE_OPEN) {
$tok = $p->next_no_whitespace();
$last_mode = 'or';
while (true) {
$match = true;
$compare = 'total';
if ($tok === CSSQueryTokenizer::TOK_NOT) {
$match = false;
$tok = $p->next_no_whitespace();
}
if ($tok === CSSQueryTokenizer::TOK_ALL) {
$tok = $p->next();
if ($tok === CSSQueryTokenizer::TOK_PIPE) {
$this->next();
$compare = 'name';
if (($tag = $this->parse_getIdentifier()) === false) {
return false;
}
}
} elseif ($tok === CSSQueryTokenizer::TOK_PIPE) {
$tok = $p->next();
if ($tok === CSSQueryTokenizer::TOK_ALL) {
$tag = '';
$compare = 'namespace';
} elseif (($tag = $this->parse_getIdentifier()) === false) {
return false;
}
$tok = $p->next_no_whitespace();
} else {
if (($tag = $this->parse_getIdentifier()) === false) {
return false;
}
$tok = $p->next();
if ($tok === CSSQueryTokenizer::TOK_PIPE) {
$tok = $p->next();
if ($tok === CSSQueryTokenizer::TOK_ALL) {
$compare = 'namespace';
} elseif (($tag_name = $this->parse_getIdentifier()) !== false) {
$tag = $tag.':'.$tag_name;
} else {
return false;
}
$tok = $p->next_no_whitespace();
}
}
if ($tok === CSSQueryTokenizer::TOK_WHITESPACE) {
$tok = $p->next_no_whitespace();
}
$conditions['tags'][] = array(
'tag' => $tag,
'match' => $match,
'operator' => $last_mode,
'compare' => $compare
);
switch($tok) {
case CSSQueryTokenizer::TOK_COMMA:
$tok = $p->next_no_whitespace();
$last_mode = 'or';
continue 2;
case CSSQueryTokenizer::TOK_PLUS:
$tok = $p->next_no_whitespace();
$last_mode = 'and';
continue 2;
case CSSQueryTokenizer::TOK_BRACE_CLOSE:
$tok = $p->next();
break 2;
default:
$this->error('Expected closing brace or comma at pos %pos%!');
return false;
}
}
} elseif (($tag = $this->parse_getIdentifier(false)) !== false) {
$tok = $p->next();
if ($tok === CSSQueryTokenizer::TOK_PIPE) {
$tok = $p->next();
if ($tok === CSSQueryTokenizer::TOK_ALL) {
$conditions['tags'][] = array(
'tag' => $tag,
'compare' => 'namespace'
);
} elseif (($tag_name = $this->parse_getIdentifier()) !== false) {
$tag = $tag.':'.$tag_name;
$conditions['tags'][] = array(
'tag' => $tag,
'match' => true
);
} else {
return false;
}
$tok = $p->next();
} elseif ($tag === 'text' && $tok === CSSQueryTokenizer::TOK_BRACE_OPEN) {
$pos = $p->getPos();
$tok = $p->next();
if ($tok === CSSQueryTokenizer::TOK_BRACE_CLOSE) {
$conditions['tags'][] = array(
'tag' => '~text~',
'match' => true
);
$p->next();
} else {
$p->setPos($pos);
}
} else {
$conditions['tags'][] = array(
'tag' => $tag,
'match' => true
);
}
} else {
unset($conditions['tags']);
}
//Class
$last_mode = 'or';
if ($tok === CSSQueryTokenizer::TOK_CLASS) {
$p->next();
if (($class = $this->parse_getIdentifier()) === false) {
return false;
}
$conditions['attributes'][] = array(
'attribute' => 'class',
'operator_value' => 'contains_word',
'value' => $class,
'operator_result' => $last_mode
);
$last_mode = 'and';
$tok = $p->next();
}
//ID
if ($tok === CSSQueryTokenizer::TOK_ID) {
$p->next();
if (($id = $this->parse_getIdentifier()) === false) {
return false;
}
$conditions['attributes'][] = array(
'attribute' => 'id',
'operator_value' => 'equals',
'value' => $id,
'operator_result' => $last_mode
);
$last_mode = 'and';
$tok = $p->next();
}
//Attributes
if ($tok === CSSQueryTokenizer::TOK_BRACKET_OPEN) {
$tok = $p->next_no_whitespace();
while (true) {
$match = true;
$compare = 'total';
if ($tok === CSSQueryTokenizer::TOK_NOT) {
$match = false;
$tok = $p->next_no_whitespace();
}
if ($tok === CSSQueryTokenizer::TOK_ALL) {
$tok = $p->next();
if ($tok === CSSQueryTokenizer::TOK_PIPE) {
$tok = $p->next();
if (($attribute = $this->parse_getIdentifier()) === false) {
return false;
}
$compare = 'name';
$tok = $p->next();
} else {
$this->error('Expected pipe at pos %pos%!');
return false;
}
} elseif ($tok === CSSQueryTokenizer::TOK_PIPE) {
$tok = $p->next();
if (($tag = $this->parse_getIdentifier()) === false) {
return false;
}
$tok = $p->next_no_whitespace();
} elseif (($attribute = $this->parse_getIdentifier()) !== false) {
$tok = $p->next();
if ($tok === CSSQueryTokenizer::TOK_PIPE) {
$tok = $p->next();
if (($attribute_name = $this->parse_getIdentifier()) !== false) {
$attribute = $attribute.':'.$attribute_name;
} else {
return false;
}
$tok = $p->next();
}
} else {
return false;
}
if ($tok === CSSQueryTokenizer::TOK_WHITESPACE) {
$tok = $p->next_no_whitespace();
}
$operator_value = '';
$val = '';
switch($tok) {
case CSSQueryTokenizer::TOK_COMPARE_PREFIX:
case CSSQueryTokenizer::TOK_COMPARE_CONTAINS:
case CSSQueryTokenizer::TOK_COMPARE_CONTAINS_WORD:
case CSSQueryTokenizer::TOK_COMPARE_ENDS:
case CSSQueryTokenizer::TOK_COMPARE_EQUALS:
case CSSQueryTokenizer::TOK_COMPARE_NOT_EQUAL:
case CSSQueryTokenizer::TOK_COMPARE_REGEX:
case CSSQueryTokenizer::TOK_COMPARE_STARTS:
case CSSQueryTokenizer::TOK_COMPARE_BIGGER_THAN:
case CSSQueryTokenizer::TOK_COMPARE_SMALLER_THAN:
$operator_value = $p->getTokenString(($tok === CSSQueryTokenizer::TOK_COMPARE_EQUALS) ? 0 : -1);
$p->next_no_whitespace();
if (($val = $this->parse_getIdentifier()) === false) {
return false;
}
$tok = $p->next_no_whitespace();
break;
}
if ($operator_value && $val) {
$conditions['attributes'][] = array(
'attribute' => $attribute,
'operator_value' => $operator_value,
'value' => $val,
'match' => $match,
'operator_result' => $last_mode,
'compare' => $compare
);
} else {
$conditions['attributes'][] = array(
'attribute' => $attribute,
'value' => $match,
'operator_result' => $last_mode,
'compare' => $compare
);
}
switch($tok) {
case CSSQueryTokenizer::TOK_COMMA:
$tok = $p->next_no_whitespace();
$last_mode = 'or';
continue 2;
case CSSQueryTokenizer::TOK_PLUS:
$tok = $p->next_no_whitespace();
$last_mode = 'and';
continue 2;
case CSSQueryTokenizer::TOK_BRACKET_CLOSE:
$tok = $p->next();
break 2;
default:
$this->error('Expected closing bracket or comma at pos %pos%!');
return false;
}
}
}
if (count($conditions['attributes']) < 1) {
unset($conditions['attributes']);
}
while($tok === CSSQueryTokenizer::TOK_COLON) {
if (count($conditions) < 1) {
$conditions['tags'] = array(array(
'tag' => '',
'match' => false
));
}
$tok = $p->next();
if (($filter = $this->parse_getIdentifier()) === false) {
return false;
}
if (($tok = $p->next()) === CSSQueryTokenizer::TOK_BRACE_OPEN) {
$start = $p->pos;
$count = 1;
while ((($tok = $p->next()) !== CSSQueryTokenizer::TOK_NULL) && !(($tok === CSSQueryTokenizer::TOK_BRACE_CLOSE) && (--$count === 0))) {
if ($tok === CSSQueryTokenizer::TOK_BRACE_OPEN) {
++$count;
}
}
if ($tok !== CSSQueryTokenizer::TOK_BRACE_CLOSE) {
$this->error('Expected closing brace at pos %pos%!');
return false;
}
$len = $p->pos - 1 - $start;
$params = (($len > 0) ? substr($p->doc, $start + 1, $len) : '');
$tok = $p->next();
} else {
$params = '';
}
$conditions['filters'][] = array('filter' => $filter, 'params' => $params);
}
if (count($conditions) < 1) {
$this->error('Invalid search pattern(2): No conditions found!');
return false;
}
$conditions_all[] = $conditions;
if ($tok === CSSQueryTokenizer::TOK_WHITESPACE) {
$tok = $p->next_no_whitespace();
}
if ($tok === CSSQueryTokenizer::TOK_COMMA) {
$tok = $p->next_no_whitespace();
continue;
} else {
break;
}
}
return $conditions_all;
}
/**
* Evaluate root node using custom callback
* @param array $conditions {@link parse_conditions()}
* @param bool|int $recursive
* @param bool $check_root
* @return array
* @access private
*/
protected function parse_callback($conditions, $recursive = true, $check_root = false) {
return ($this->result = $this->root->getChildrenByMatch(
$conditions,
$recursive,
$check_root,
$this->custom_filter_map
));
}
/**
* Parse first bit of query, only root node has to be evaluated now
* @param bool|int $recursive
* @return bool
* @internal Result of query is set in {@link $result}
* @access private
*/
protected function parse_single($recursive = true) {
if (($c = $this->parse_conditions()) === false) {
return false;
}
$this->parse_callback($c, $recursive, $this->search_root);
return true;
}
/**
* Evaluate sibling nodes
* @return bool
* @internal Result of query is set in {@link $result}
* @access private
*/
protected function parse_adjacent() {
$tmp = $this->result;
$this->result = array();
if (($c = $this->parse_conditions()) === false) {
return false;
}
foreach($tmp as $t) {
if (($sibling = $t->getNextSibling()) !== false) {
if ($sibling->match($c, true, $this->custom_filter_map)) {
$this->result[] = $sibling;
}
}
}
return true;
}
/**
* Evaluate {@link $result}
* @param bool $parent Evaluate parent nodes
* @param bool|int $recursive
* @return bool
* @internal Result of query is set in {@link $result}
* @access private
*/
protected function parse_result($parent = false, $recursive = true) {
$tmp = $this->result;
$tmp_res = array();
if (($c = $this->parse_conditions()) === false) {
return false;
}
foreach(array_keys($tmp) as $t) {
$this->root = (($parent) ? $tmp[$t]->parent : $tmp[$t]);
$this->parse_callback($c, $recursive);
foreach(array_keys($this->result) as $r) {
if (!in_array($this->result[$r], $tmp_res, true)) {
$tmp_res[] = $this->result[$r];
}
}
}
$this->result = $tmp_res;
return true;
}
/**
* Parse full query
* @return bool
* @internal Result of query is set in {@link $result}
* @access private
*/
protected function parse() {
$p =& $this->parser;
$p->setPos(0);
$this->result = array();
if (!$this->parse_single()) {
return false;
}
while (count($this->result) > 0) {
switch($p->token) {
case CSSQueryTokenizer::TOK_CHILD:
$this->parser->next_no_whitespace();
if (!$this->parse_result(false, 1)) {
return false;
}
break;
case CSSQueryTokenizer::TOK_SIBLING:
$this->parser->next_no_whitespace();
if (!$this->parse_result(true, 1)) {
return false;
}
break;
case CSSQueryTokenizer::TOK_PLUS:
$this->parser->next_no_whitespace();
if (!$this->parse_adjacent()) {
return false;
}
break;
case CSSQueryTokenizer::TOK_ALL:
case CSSQueryTokenizer::TOK_IDENTIFIER:
case CSSQueryTokenizer::TOK_STRING:
case CSSQueryTokenizer::TOK_BRACE_OPEN:
case CSSQueryTokenizer::TOK_BRACKET_OPEN:
case CSSQueryTokenizer::TOK_ID:
case CSSQueryTokenizer::TOK_CLASS:
case CSSQueryTokenizer::TOK_COLON:
if (!$this->parse_result()) {
return false;
}
break;
case CSSQueryTokenizer::TOK_NULL:
break 2;
default:
$this->error('Invalid search pattern(3): No result modifier found!');
return false;
}
}
return true;
}
}
?>