diff --git a/data/test-misc.json b/data/test-misc.json index 3de79dd..30da10a 100644 --- a/data/test-misc.json +++ b/data/test-misc.json @@ -1,6 +1,6 @@ { "description": "WelsonJS test profile (test-misc.json)", - "released": "2024-09-25", + "released": "2024-09-27", "dependencies": { "welsonjs": "0.2.7" }, @@ -93,7 +93,12 @@ }, { "id": "proxy_custom_provider", - "description": "HTTP proxy with a custom provider", + "description": "HTTP proxy with an web proxy provider", + "tags": ["Network", "HTTP"] + }, + { + "id": "proxy_serp", + "description": "HTTP proxy with a SERP provider", "tags": ["Network", "HTTP"] } ] diff --git a/lib/http.js b/lib/http.js index b54f951..9847f0c 100644 --- a/lib/http.js +++ b/lib/http.js @@ -29,6 +29,12 @@ var AVAILABLE_PROXIES = [ "url": "http://scrapeops:{api_key}@residential-proxy.scrapeops.io:8181", "documentation": "https://scrapeops.io?fpr=namhyeon75" }, + { + "type": "serp", + "provider": "searchapi", + "url": "https://www.searchapi.io/api/v1/search?api_key={api_key}&engine={engine}&q={q}", + "documentation": "https://www.searchapi.io/?via=namhyeon" + }, { "type": "stateless-jsonrpc2", "provider": "gnh1201/caterpillar", @@ -99,7 +105,7 @@ var HTTPObject = function(engine) { "host": "127.0.0.1", "port": 80, "credential": null, // { username: "user", password: "pass" } - "url": "" // stateless only + "url": null // stateless only }; this.engine = (typeof(engine) !== "undefined" ? engine : "MSXML"); @@ -140,7 +146,7 @@ var HTTPObject = function(engine) { this.curlOptions = []; this.charset = FILE.CdoCharset.CdoUTF_8; - this.isUseCharsetDetector = false; + this.isUseDetectCharset = false; this.isVerifySSL = true; this.isCompressedResponse = false; @@ -258,17 +264,12 @@ var HTTPObject = function(engine) { }); if (typeof availableProxy !== "undefined") { - this.proxy.provider = proxy['provider']; - - if (proxyType == "stateless") { - this.proxy.url = availableProxy.url; - } else { - this.proxy.protocol = proxy['protocol'] || this.proxy.protocol; - this.proxy.host = proxy['host'] || this.proxy.host; - this.proxy.port = proxy['port'] || this.proxy.port; - this.proxy.credential = proxy['credential'] || this.proxy.credential; - this.proxy.url = proxy['url'] || this.proxy.url; - } + this.proxy.provider = availableProxy['provider']; + this.proxy.protocol = availableProxy['protocol'] || this.proxy.protocol; + this.proxy.host = availableProxy['host'] || this.proxy.host; + this.proxy.port = availableProxy['port'] || this.proxy.port; + this.proxy.credential = availableProxy['credential'] || this.proxy.credential; + this.proxy.url = availableProxy['url'] || this.proxy.url; console.info("Please check documentation:", availableProxy.documentation); } @@ -480,6 +481,14 @@ var HTTPObject = function(engine) { }; this.getProxiedURL = function(url) { + if (!this.proxy.enabled) return url; + + if (this.proxy.type == "serp") { + var serp = this.parseSerpUrl(url); + this.setVariable("engine", serp.engine); + this.setVariable("q", encodeURIComponent(serp.keyword)); + } + this.setVariable("url", encodeURIComponent(url)); url = this.evaluate(this.proxy.url); @@ -488,6 +497,32 @@ var HTTPObject = function(engine) { return url; }; + this.parseSerpUrl = function(url) { + var getEngine = function(url) { + var match = url.match(/(?:https?:\/\/)?(?:www\.)?(google|youtube|bing|baidu|amazon)\.\w+/), result; + if (match) { + result = match[1]; + } else { + result = "google"; + } + return result; + }; + + var getKeyword = function(url) { + var regex = /[?&](q|wd|query|k)=([^&]*)/g; + var match, keywords = []; + while ((match = regex.exec(url)) !== null) { + keywords.push(match[2]); + } + return keywords.join(' '); + }; + + return { + "engine": getEngine(url), + "keyword": getKeyword(url) + } + }; + this.open = function(method, url) { var url = this.serializeParameters(url); @@ -646,7 +681,7 @@ var HTTPObject = function(engine) { } // Add proxy: <[protocol://][user:password@]proxyhost[:port]> - if (this.proxy != null && this.proxy.enabled && this.proxy.type != "stateless") { + if (this.proxy != null && this.proxy.enabled && this.proxy.type == "stateful") { cmd.push("-x"); if (this.proxy.credential != null) { cmd.push([ @@ -703,7 +738,7 @@ var HTTPObject = function(engine) { } // If enabled the charset(text encoding) detector - if (this.isUseCharsetDetector) { + if (this.isUseDetectCharset) { var detectedCharset = this.detectCharset(responseText); console.log("Detected charset:", detectedCharset); @@ -1062,8 +1097,8 @@ var HTTPObject = function(engine) { return this; }; - this.setIsUseCharsetDetector = function(flag) { - this.isUseCharsetDetector = flag; + this.setIsUseDetectCharset = function(flag) { + this.isUseDetectCharset = flag; return this; }; @@ -1175,7 +1210,7 @@ exports.parseURL = parseURL; exports.DEFAULT_USER_AGENT = DEFAULT_USER_AGENT; exports.defaultUserAgent = DEFAULT_USER_AGENT; // compatible -exports.VERSIONINFO = "HTTP REST Client (http.js) version 0.7.35"; +exports.VERSIONINFO = "HTTP REST Client (http.js) version 0.7.36"; exports.AUTHOR = "abuse@catswords.net"; exports.global = global; exports.require = global.require; diff --git a/testloader.js b/testloader.js index e299fc1..b20ab1b 100644 --- a/testloader.js +++ b/testloader.js @@ -1023,6 +1023,25 @@ var test_implements = { .open("GET", "https://example.org") .send(); + console.log("responseBody:", response.responseBody); + }, + + "proxy_serp": function() { + var HTTP = require("lib/http"); + + var response = HTTP.create("CURL") + .setVariables({ + "api_key": "2DG3WQgeL2djLFvnQBw83J4y" + }) + .setProxy({ + "enabled": true, + "provider": "searchapi", + "type": "serp" + }) + .setUserAgent("Mozilla/5.0 (Windows NT 10.0; Trident/7.0; rv:11.0) like Gecko") + .open("GET", "https://www.google.com/search?q=test") + .send(); + console.log("responseBody:", response.responseBody); } };