URL-, 5-10 , ( ) , ( ). , : - , ? , .
, :
- URL- - . , , "/" ( , ). "/" - , "/" - .
- , ".".
- , "/" .
, URL, :
"www.stackoverflow.com/users/239289"
:
"www", "stackoverflow", "com", "/" , "users", "239289"
, "/" , .
tokenize URL - PHP, ( ). , :
function tokenize_url($url) {
$pos = strpos($url, '/');
if ($pos === 0) {
$hostname = '*';
$path = substr($url, 1);
} else if ($pos !== false) {
$hostname = substr($url, 0, $pos);
$path = substr($url, $pos + 1);
if ($path === false) {
$path = '';
}
} else {
$hostname = $url;
$path = '';
}
if ($hostname !== '') {
$hostname_tokens = explode('.', $hostname);
} else {
$hostname_tokens = array();
}
if ($path !== '') {
$path_tokens = explode('/', $path);
} else {
$path_tokens = array();
}
return array_merge($hostname_tokens, array('/'), $path_tokens);
}
, , URL-, URL- ( ). , ( , ), O (1) . , , "%!%!%" node.
- , :
function compile_site_list($site_list) {
$root = array();
foreach ($site_list as $url) {
$tokens = tokenize_url($url);
$node = &$root;
for ($i=0; $i<count($tokens); $i++) {
$token = $tokens[$i] . '%';
if (!(isset($node[$token]))) {
$node[$token] = array();
}
if ($i < (count($tokens) - 1)) {
$node = &$node[$token];
} else {
$node[$token]['%!%!%'] = 1;
}
}
}
return $root;
}
, URL- , compile_site_list() - .
URL. -, , , :
function scrub_url($url) {
$pos = strpos($url, '://');
if ($pos !== false) {
$url = substr($url, $pos + 3);
}
$pos = strpos($url, '?');
if ($pos !== false) {
$url = substr($url, 0, $pos);
}
return $url;
}
, , URL-, , . "%!%!%" , .
, , . , , ( "/" ), , .
- .
:
function search_compiled_list($url, $compiled_site_list) {
$url = scrub_url($url);
$tokens = tokenize_url($url);
return do_search($tokens, $compiled_site_list);
}
function do_search($tokens, $compiled_site_list) {
if (isset($compiled_site_list['%!%!%'])) {
return true;
} else if (count($tokens) === 0) {
return false;
}
$token = $tokens[0] . '%';
if (isset($compiled_site_list[$token])) {
$result = do_search(array_slice($tokens, 1),
$compiled_site_list[$token]);
if ($result === true) {
return true;
}
}
if ((isset($compiled_site_list['*%'])) && ($tokens[0] !== '/')) {
for ($i=1; $i<count($tokens); $i++) {
$result = do_search(array_slice($tokens, $i),
$compiled_site_list['*%']);
if ($result === true) {
return true;
}
}
}
return false;
}
, - $site_list, URL-, :
$url_to_check = "http://www.stackoverflow.com/users/120262?tab=accounts";
$compiled_site_list = compile_site_list($site_list);
$result = search_compiled_list($url_to_check, $compiled_site_list);
var_dump($result);
URL-, , , , , . , , , , /. ( , , .)
, URL-, , URL- ( ), . , - , 2 , .