reasonableframework/helper/webpagetool.php

456 lines
13 KiB
PHP
Raw Normal View History

2018-02-26 06:05:25 +00:00
<?php
/**
* @file webpagetool.php
2018-06-01 11:38:21 +00:00
* @date 2018-06-01
2018-02-26 06:05:25 +00:00
* @author Go Namhyeon <gnh1201@gmail.com>
* @brief WebPageTool helper
*/
2018-05-14 11:36:45 +00:00
/****** START EXAMPLES *****/
/* // REQUEST GET: $response = get_web_page($url, "get", $data); */
/* // REQUEST POST: $response = get_web_page($url, "post", $data); */
/* // REQUEST GET with CACHE: $response = get_web_page($url, "get.cache", $data); */
/* // REQUEST POST with CACHE: $response = get_web_page($url, "post.cache", $data); */
/* // REQUEST GET by CMD with CACHE: $response = get_web_page($url, "get.cmd.cache"); */
/* // REQUEST GET by SOCK with CACHE: $response = get_web_page($url, "get.sock.cache"); */
/* // REQUEST GET by FGC: $response = get_web_page($url, "get.fgc"); */
/* // REQUEST GET by WGET: $response = get_web_page($url, "get.wget"); */
/* // PRINT CONTENT: echo $response['content']; */
/****** END EXAMPLES *****/
2018-05-12 13:54:38 +00:00
if(!function_exists("get_web_fgc")) {
function get_web_fgc($url) {
2018-04-10 11:26:54 +00:00
return (ini_get("allow_url_fopen") ? file_get_contents($url) : false);
}
}
2018-05-12 12:59:00 +00:00
if(!function_exists("get_web_build_qs")) {
2018-06-01 11:38:21 +00:00
function get_web_build_qs($url="", $data) {
$qs = "";
if(empty($url)) {
$qs = http_build_query($data);
2018-05-12 12:59:00 +00:00
} else {
2018-06-01 11:38:21 +00:00
$pos = strpos($url, '?');
if ($pos === false) {
$qs = $url . '?' . http_build_query($data);
} else {
$qs = $url . '&' . http_build_query($data);
}
2018-05-12 12:59:00 +00:00
}
2018-06-01 11:38:21 +00:00
return $qs;
2018-05-12 12:59:00 +00:00
}
}
2018-04-10 11:26:54 +00:00
if(!function_exists("get_web_cmd")) {
function get_web_cmd($url, $method="get", $data=array(), $proxy="", $ua="", $ct_out=45, $t_out=45) {
$output = "";
$cmd_fin = "";
$cmd = "";
2018-09-23 16:05:17 +00:00
if(!loadHelper("exectool")) {
set_error("Helper exectool is required");
show_errors();
}
2018-04-10 11:26:54 +00:00
if($method == "get") {
2018-05-12 13:52:00 +00:00
$cmd = "curl -A '%s' -k '%s'";
2018-05-14 11:51:05 +00:00
$cmd_fin = sprintf($cmd, make_safe_argument($ua), make_safe_argument(get_web_build_qs($url, $data)));
2018-04-10 11:26:54 +00:00
}
if($method == "post") {
2018-05-12 13:52:00 +00:00
$cmd = "curl -X POST -A '%s' -k '%s' %s";
2018-04-10 11:26:54 +00:00
$params_cmd = "";
foreach($data as $k=>$v) {
2018-05-12 13:52:00 +00:00
if(substr($v, 0, 1) == "@") { // if file
2018-05-14 11:51:05 +00:00
$params_cmd .= sprintf("-F '%s=%s' ", make_safe_argument($k), make_safe_argument($v));
2018-05-12 13:52:00 +00:00
} else {
2018-05-14 11:51:05 +00:00
$params_cmd .= sprintf("-d '%s=%s' ", make_safe_argument($k), make_safe_argument($v));
2018-05-12 13:52:00 +00:00
}
2018-04-10 11:26:54 +00:00
}
2018-05-14 11:51:05 +00:00
$cmd_fin = sprintf($cmd, make_safe_argument($ua), make_safe_argument($url), $params_cmd);
2018-07-23 03:16:38 +00:00
}
2018-09-15 04:43:53 +00:00
if($method == "jsonrpc") {
$cmd = "curl -A '%s' --header 'Content-Type: application/json' --request POST --data '%s' %s";
$cmd_fin = sprintf($cmd, make_safe_argument($ua), json_encode($data), $url);
$output = exec_command($cmd_fin, "shell_exec");
}
2018-07-23 03:16:38 +00:00
if(!empty($cmd_fin)) {
$output = exec_command($cmd_fin, "shell_exec");
2018-04-10 11:26:54 +00:00
}
return $output;
}
}
2018-05-14 11:36:45 +00:00
// http://dev.epiloum.net/109
if(!function_exists("get_web_sock")) {
function get_web_sock($url, $method="get", $data=array(), $proxy="", $ua="", $ct_out=45, $t_out=45) {
$output = "";
$info = parse_url($url);
$req = '';
$line = '';
$agent = $ua;
$linebreak = "\r\n";
$headPassed = false;
if(!array_key_empty("scheme", $info)) {
switch($info['scheme'] = strtolower($info['scheme'])) {
case "http":
$info['port'] = 80;
break;
case "https":
$info['ssl'] = "ssl://";
$info['port'] = 443;
break;
default:
set_error("ambiguous protocol, HTTP or HTTPS");
show_errors();
return false;
}
} else {
set_error("ambiguous protocol, HTTP or HTTPS");
show_errors();
return false;
}
// Setting Path
if(array_key_empty("path", $info)) {
$info['path'] = "/";
}
// Setting Request Header
switch($method) {
case 'get':
if(array_key_empty("query", $info)) {
$info['path'] .= '?' . $info['query'];
}
$req .= 'GET ' . $info['path'] . ' HTTP/1.1' . $linebreak;
$req .= 'Host: ' . $info['host'] . $linebreak;
$req .= 'User-Agent: ' . $agent . $linebreak;
$req .= 'Referer: ' . $url . $linebreak;
$req .= 'Connection: Close' . $linebreak . $linebreak;
break;
case 'post':
$req .= 'POST ' . $info['path'] . ' HTTP/1.1' . $linebreak;
$req .= 'Host: ' . $info['host'] . $linebreak;
$req .= 'User-Agent: ' . $agent . $linebreak;
$req .= 'Referer: ' . $url . $linebreak;
$req .= 'Content-Type: application/x-www-form-urlencoded'.$linebreak;
$req .= 'Content-Length: '. strlen($info['query']) . $linebreak;
$req .= 'Connection: Close' . $linebreak . $linebreak;
$req .= $info['query'];
break;
}
// Socket Open
$fsock = @fsockopen($info['ssl'] . $info['host'], $info['port']);
if ($fsock)
{
fwrite($fsock, $req);
while(!feof($fsock))
{
$line = fgets($fsock, 128);
if($line == "\r\n" && !$headPassed)
{
$headPassed = true;
continue;
}
if($headPassed)
{
$output .= $line;
}
}
fclose($fsock);
}
return $output;
}
}
if(!function_exists("get_web_wget")) {
function get_web_wget($url, $method="get", $data=array(), $proxy="", $ua="", $ct_out=45, $t_out=45) {
2018-05-30 16:30:25 +00:00
$content = "";
$filename = make_random_id(32);
$filepath = write_storage_file("", array(
"filename" => $filename,
"mode" => "fake"
));
$cmd = sprintf("wget '%s' -O %s", $url, $filepath);
shell_exec($cmd);
$content = read_storage_file($filename);
return $content;
2018-05-14 11:36:45 +00:00
}
}
2018-02-26 06:10:06 +00:00
if(!function_exists("get_web_page")) {
2018-02-26 06:05:25 +00:00
function get_web_page($url, $method="get", $data=array(), $proxy="", $ua="", $ct_out=45, $t_out=45) {
2018-04-10 11:26:54 +00:00
$status = "-1";
$resno = "-1";
$errno = "-1";
2018-05-29 08:31:24 +00:00
$req_method = $method;
2018-05-12 16:07:33 +00:00
2018-05-14 11:36:45 +00:00
$method = strtolower($method);
2018-05-12 16:07:33 +00:00
$res_methods = explode(".", $method);
if(in_array("cache", $res_methods)) {
$content = get_web_cache($url, $method, $data, $proxy, $ua, $ct_out, $t_out);
} elseif(in_array("cmd", $res_methods)) {
2018-05-12 14:15:35 +00:00
$content = get_web_cmd($url, $res_methods[0], $data, $proxy, $ua, $ct_out, $t_out);
2018-05-12 16:07:33 +00:00
} elseif(in_array("fgc", $res_methods)) {
2018-05-12 13:54:38 +00:00
$content = get_web_fgc($url);
2018-05-14 11:36:45 +00:00
} elseif(in_array("sock", $res_methods)) {
$content = get_web_sock($url, $res_methods[0], $data, $proxy, $ua, $ct_out, $t_out);
} elseif(in_array("wget", $res_methods)) {
$content = get_web_wget($url, $res_methods[0], $data, $proxy, $ua, $ct_out, $t_out);
2018-09-15 04:43:53 +00:00
} elseif(in_array("jsonrpc", $res_methods)) {
$content = get_web_cmd($url, "jsonrpc", $data, $proxy, $ua, $ct_out, $t_out);
2018-04-10 11:26:54 +00:00
} else {
2018-05-12 13:52:00 +00:00
if(!in_array("curl", get_loaded_extensions())) {
2018-05-12 14:15:35 +00:00
$error_msg = "cURL extension needs to be installed.";
set_error($error_msg);
return $error_msg;
2018-05-12 13:52:00 +00:00
}
$options = array(
2018-05-12 13:54:38 +00:00
CURLOPT_URL => $url, // set remote url
2018-05-12 13:52:00 +00:00
CURLOPT_PROXY => $proxy, // set proxy server
CURLOPT_RETURNTRANSFER => true, // return web page
CURLOPT_HEADER => false, // don't return headers
CURLOPT_FOLLOWLOCATION => true, // follow redirects
CURLOPT_MAXREDIRS => 10, // stop after 10 redirects
CURLOPT_ENCODING => "", // handle compressed
CURLOPT_USERAGENT => $ua, // name of client
CURLOPT_AUTOREFERER => true, // set referrer on redirect
CURLOPT_CONNECTTIMEOUT => $ct_out, // time-out on connect
CURLOPT_TIMEOUT => $t_out, // time-out on response
CURLOPT_FAILONERROR => true, // get error code
CURLOPT_SSL_VERIFYHOST => false, // ignore ssl host verification
CURLOPT_SSL_VERIFYPEER => false, // ignore ssl peer verification
);
if(empty($options[CURLOPT_USERAGENT])) {
2018-07-24 11:47:44 +00:00
$ua = "Reasonable Framework is PHP framework for make solid and secure web development";
2018-05-12 13:52:00 +00:00
$options[CURLOPT_USERAGENT] = $ua;
}
if($method == "post" && count($data) > 0) {
$options[CURLOPT_POST] = 1;
2018-06-01 11:38:21 +00:00
$options[CURLOPT_POSTFIELDS] = get_web_build_qs("", $data);
2018-05-12 13:52:00 +00:00
}
2018-04-10 11:26:54 +00:00
2018-05-12 13:52:00 +00:00
if($method == "get" && count($data) > 0) {
$options[CURLOPT_URL] = get_web_build_qs($url, $data);
}
$ch = curl_init();
curl_setopt_array($ch, $options);
$content = curl_exec($ch);
2018-06-01 13:16:48 +00:00
2018-05-12 13:52:00 +00:00
if(!is_string($content)) {
2018-05-29 08:31:24 +00:00
$res = get_web_page($url, $method . ".cmd", $data, $proxy, $ua, $ct_out, $t_out);
2018-05-12 13:52:00 +00:00
$content = $res['content'];
2018-05-29 08:31:24 +00:00
$req_method = $res['method'];
2018-05-12 13:52:00 +00:00
} else {
$status = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$resno = curl_getinfo($ch, CURLINFO_RESPONSE_CODE);
$errno = curl_errno($ch);
}
curl_close($ch);
}
2018-02-26 06:05:25 +00:00
$content_size = strlen($content);
2018-05-12 18:26:11 +00:00
$gz_content = gzdeflate($content);
$gz_content_size = strlen($gz_content);
$gz_ratio = ($content_size > 0) ? (floatval($gz_content_size) / floatval($content_size)) : 1.0;
2018-02-26 06:05:25 +00:00
$response = array(
2018-05-12 18:26:11 +00:00
"content" => $content,
"size" => $content_size,
"status" => $status,
"resno" => $resno,
"errno" => $errno,
"id" => get_web_identifier($url, $method, $data),
"md5" => get_hashed_text($content, "md5"),
"sha1" => get_hashed_text($content, "sha1"),
2018-09-09 17:30:45 +00:00
"gz_content" => $gz_content,
2018-05-12 18:26:11 +00:00
"gz_size" => $gz_content_size,
"gz_md5" => get_hashed_text($gz_content, "md5"),
"gz_sha1" => get_hashed_text($gz_content, "sha1"),
"gz_ratio" => $gz_ratio,
2018-05-29 08:31:24 +00:00
"method" => $req_method,
"params" => $data,
2018-02-26 06:05:25 +00:00
);
2018-03-10 17:07:27 +00:00
2018-02-26 06:05:25 +00:00
return $response;
}
}
2018-05-12 16:07:33 +00:00
if(!function_exists("get_web_identifier")) {
function get_web_identifier($url, $method="get", $data=array()) {
$hash_data = (count($data) > 0) ? get_hashed_text(serialize($data)) : "*";
2018-05-12 17:51:25 +00:00
return get_hashed_text(sprintf("%s.%s.%s", get_hashed_text($method), get_hashed_text($url), $hash_data));
2018-05-12 16:07:33 +00:00
}
}
2018-05-12 17:51:25 +00:00
if(!function_exists("get_web_cache")) {
2018-05-12 16:07:33 +00:00
function get_web_cache($url, $method="get", $data=array(), $proxy="", $ua="", $ct_out=45, $t_out=45) {
2018-05-12 18:26:11 +00:00
$content = false;
2018-05-12 16:07:33 +00:00
$identifier = get_web_identifier($url, $method, $data);
2018-05-12 18:26:11 +00:00
$gz_content = read_storage_file($identifier, array(
2018-05-12 16:07:33 +00:00
"storage_type" => "cache"
));
2018-05-12 18:26:11 +00:00
if($gz_content === false) {
2018-05-12 17:51:25 +00:00
$no_cache_method = str_replace(".cache", "", $method);
$response = get_web_page($url, $no_cache_method, $data, $proxy, $ua, $ct_out, $t_out);
$content = $response['content'];
2018-05-12 18:26:11 +00:00
$gz_content = gzdeflate($content);
2018-05-12 16:07:33 +00:00
// save web page cache
2018-05-12 18:26:11 +00:00
write_storage_file($gz_content, array(
2018-05-12 16:07:33 +00:00
"storage_type" => "cache",
2018-05-12 17:51:25 +00:00
"filename" => $identifier
2018-05-12 16:07:33 +00:00
));
2018-05-12 18:26:11 +00:00
} else {
$content = gzinflate($gz_content);
2018-05-12 16:07:33 +00:00
}
return $content;
}
}
2018-02-26 06:10:06 +00:00
if(!function_exists("get_web_json")) {
2018-02-26 06:05:25 +00:00
function get_web_json($url, $method="get", $data=array(), $proxy="", $ua="", $ct_out=45, $t_out=45) {
2018-05-29 08:31:24 +00:00
$result = false;
2018-03-10 17:06:56 +00:00
$response = get_web_page($url, $method, $data, $proxy, $ua, $ct_out, $t_out);
if($response['size'] > 0) {
2018-10-20 18:56:07 +00:00
$result = get_parsed_json($response['content'], array("stdClass" => true));
2018-02-26 06:06:47 +00:00
}
2018-02-26 06:05:25 +00:00
2018-04-13 06:25:33 +00:00
return $result;
2018-02-26 06:05:25 +00:00
}
}
2018-03-10 16:59:53 +00:00
if(!function_exists("get_web_dom")) {
2018-03-10 17:03:10 +00:00
function get_web_dom($url, $method="get", $data=array(), $proxy="", $ua="", $ct_out=45, $t_out=45) {
2018-04-13 06:25:33 +00:00
$result = new stdClass();
2018-03-10 17:06:56 +00:00
$response = get_web_page($url, $method, $data, $proxy, $ua, $ct_out, $t_out);
2018-03-10 16:59:53 +00:00
// load simple_html_dom
2018-03-10 17:06:56 +00:00
if($response['size'] > 0) {
2018-10-20 18:59:14 +00:00
$result = get_parsed_dom($response['content']);
2018-03-10 16:59:53 +00:00
}
2018-03-10 17:00:12 +00:00
2018-04-13 06:25:33 +00:00
return $result;
2018-03-10 16:59:53 +00:00
}
}
2018-03-21 02:05:48 +00:00
if(!function_exists("get_web_meta")) {
function get_web_meta($url, $method="get", $data=array(), $proxy="", $ua="", $ct_out=45, $t_out=45) {
2018-10-20 19:03:08 +00:00
$result = false;
2018-03-21 02:05:48 +00:00
$response = get_web_page($url, $method, $data, $proxy, $ua, $ct_out, $t_out);
// load PHP-Metaparser
if($response['size'] > 0) {
2018-10-20 19:03:08 +00:00
if(loadHelper("metaparser.lnk")) {
$parser = new MetaParser($response['content'], $url);
$result = $parser->getDetails();
}
2018-03-21 02:05:48 +00:00
}
2018-04-13 06:25:33 +00:00
return $result;
}
}
if(!function_exists("get_web_xml")) {
function get_web_xml($url, $method="get", $data=array(), $proxy="", $ua="", $ct_out=45, $t_out=45) {
2018-10-20 18:52:28 +00:00
$result = false;
2018-04-13 06:25:33 +00:00
2018-10-20 18:52:28 +00:00
$response = get_web_page($url, $method, $data, $proxy, $ua, $ct_out, $t_out);
if($response['size'] > 0) {
$result = get_parsed_xml($response['content']);
}
2018-04-13 06:25:33 +00:00
2018-10-20 18:52:28 +00:00
return $result;
}
}
if(!function_exists("get_parsed_json")) {
function get_parsed_json($raw, $options=array()) {
$result = false;
if(!array_key_empty("stdClass", $options)) {
$result = json_decode($raw);
} else {
$result = json_decode($raw, true);
}
return $result;
}
}
if(!function_exists("get_parsed_xml")) {
function get_parsed_xml($raw, $options=array()) {
$result = false;
if(function_exists("simplexml_load_string")) {
$result = simplexml_load_string($response['content'], null, LIBXML_NOCDATA);
2018-04-13 06:25:33 +00:00
}
return $result;
2018-03-21 02:05:48 +00:00
}
}
2018-06-01 12:34:59 +00:00
2018-10-20 18:59:14 +00:00
if(!function_exists("get_parsed_dom")) {
function get_parsed_dom($raw, $options=array()) {
$result = false;
if(loadHelper("simple_html_dom")) {
$result = function_exists("str_get_html") ? str_get_html($response['content']) : $raw;
}
return $result;
}
}
2018-09-09 17:30:45 +00:00
// 2018-06-01: Adaptive JSON is always quotes without escape non-ascii characters
2018-06-01 12:34:59 +00:00
if(!function_exists("get_adaptive_json")) {
function get_adaptive_json($data) {
$result = "";
$lines = array();
foreach($data as $k=>$v) {
if(is_array($v)) {
$lines[] = sprintf("\"%s\":%s", addslashes($k), get_adaptive_json($v));
} else {
$lines[] = sprintf("\"%s\":\"%s\"", addslashes($k), addslashes($v));
}
}
$result = "{" . implode(",", $lines) . "}";
return $result;
}
}
2018-09-09 17:30:45 +00:00
// 2018-09-10: support webproxy
if(!function_exists("get_webproxy_url")) {
function get_webproxy_url($url, $route="webproxy") {
return get_route_link($route, array(
"url" => $url
));
}
}