mirror of
https://github.com/gnh1201/welsonjs.git
synced 2025-02-06 06:54:58 +00:00
fix #145
* Fix #145 * Change the methods and variables `CharsetDetector` to `DetectCharset` * Add searchapi.io SERP support
This commit is contained in:
parent
ad6cb7e430
commit
a60e7e5d9e
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"description": "WelsonJS test profile (test-misc.json)",
|
||||
"released": "2024-09-25",
|
||||
"released": "2024-09-27",
|
||||
"dependencies": {
|
||||
"welsonjs": "0.2.7"
|
||||
},
|
||||
|
@ -93,7 +93,12 @@
|
|||
},
|
||||
{
|
||||
"id": "proxy_custom_provider",
|
||||
"description": "HTTP proxy with a custom provider",
|
||||
"description": "HTTP proxy with an web proxy provider",
|
||||
"tags": ["Network", "HTTP"]
|
||||
},
|
||||
{
|
||||
"id": "proxy_serp",
|
||||
"description": "HTTP proxy with a SERP provider",
|
||||
"tags": ["Network", "HTTP"]
|
||||
}
|
||||
]
|
||||
|
|
71
lib/http.js
71
lib/http.js
|
@ -29,6 +29,12 @@ var AVAILABLE_PROXIES = [
|
|||
"url": "http://scrapeops:{api_key}@residential-proxy.scrapeops.io:8181",
|
||||
"documentation": "https://scrapeops.io?fpr=namhyeon75"
|
||||
},
|
||||
{
|
||||
"type": "serp",
|
||||
"provider": "searchapi",
|
||||
"url": "https://www.searchapi.io/api/v1/search?api_key={api_key}&engine={engine}&q={q}",
|
||||
"documentation": "https://www.searchapi.io/?via=namhyeon"
|
||||
},
|
||||
{
|
||||
"type": "stateless-jsonrpc2",
|
||||
"provider": "gnh1201/caterpillar",
|
||||
|
@ -99,7 +105,7 @@ var HTTPObject = function(engine) {
|
|||
"host": "127.0.0.1",
|
||||
"port": 80,
|
||||
"credential": null, // { username: "user", password: "pass" }
|
||||
"url": "" // stateless only
|
||||
"url": null // stateless only
|
||||
};
|
||||
this.engine = (typeof(engine) !== "undefined" ? engine : "MSXML");
|
||||
|
||||
|
@ -140,7 +146,7 @@ var HTTPObject = function(engine) {
|
|||
this.curlOptions = [];
|
||||
|
||||
this.charset = FILE.CdoCharset.CdoUTF_8;
|
||||
this.isUseCharsetDetector = false;
|
||||
this.isUseDetectCharset = false;
|
||||
this.isVerifySSL = true;
|
||||
this.isCompressedResponse = false;
|
||||
|
||||
|
@ -258,17 +264,12 @@ var HTTPObject = function(engine) {
|
|||
});
|
||||
|
||||
if (typeof availableProxy !== "undefined") {
|
||||
this.proxy.provider = proxy['provider'];
|
||||
|
||||
if (proxyType == "stateless") {
|
||||
this.proxy.url = availableProxy.url;
|
||||
} else {
|
||||
this.proxy.protocol = proxy['protocol'] || this.proxy.protocol;
|
||||
this.proxy.host = proxy['host'] || this.proxy.host;
|
||||
this.proxy.port = proxy['port'] || this.proxy.port;
|
||||
this.proxy.credential = proxy['credential'] || this.proxy.credential;
|
||||
this.proxy.url = proxy['url'] || this.proxy.url;
|
||||
}
|
||||
this.proxy.provider = availableProxy['provider'];
|
||||
this.proxy.protocol = availableProxy['protocol'] || this.proxy.protocol;
|
||||
this.proxy.host = availableProxy['host'] || this.proxy.host;
|
||||
this.proxy.port = availableProxy['port'] || this.proxy.port;
|
||||
this.proxy.credential = availableProxy['credential'] || this.proxy.credential;
|
||||
this.proxy.url = availableProxy['url'] || this.proxy.url;
|
||||
|
||||
console.info("Please check documentation:", availableProxy.documentation);
|
||||
}
|
||||
|
@ -480,6 +481,14 @@ var HTTPObject = function(engine) {
|
|||
};
|
||||
|
||||
this.getProxiedURL = function(url) {
|
||||
if (!this.proxy.enabled) return url;
|
||||
|
||||
if (this.proxy.type == "serp") {
|
||||
var serp = this.parseSerpUrl(url);
|
||||
this.setVariable("engine", serp.engine);
|
||||
this.setVariable("q", encodeURIComponent(serp.keyword));
|
||||
}
|
||||
|
||||
this.setVariable("url", encodeURIComponent(url));
|
||||
url = this.evaluate(this.proxy.url);
|
||||
|
||||
|
@ -488,6 +497,32 @@ var HTTPObject = function(engine) {
|
|||
return url;
|
||||
};
|
||||
|
||||
this.parseSerpUrl = function(url) {
|
||||
var getEngine = function(url) {
|
||||
var match = url.match(/(?:https?:\/\/)?(?:www\.)?(google|youtube|bing|baidu|amazon)\.\w+/), result;
|
||||
if (match) {
|
||||
result = match[1];
|
||||
} else {
|
||||
result = "google";
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
||||
var getKeyword = function(url) {
|
||||
var regex = /[?&](q|wd|query|k)=([^&]*)/g;
|
||||
var match, keywords = [];
|
||||
while ((match = regex.exec(url)) !== null) {
|
||||
keywords.push(match[2]);
|
||||
}
|
||||
return keywords.join(' ');
|
||||
};
|
||||
|
||||
return {
|
||||
"engine": getEngine(url),
|
||||
"keyword": getKeyword(url)
|
||||
}
|
||||
};
|
||||
|
||||
this.open = function(method, url) {
|
||||
var url = this.serializeParameters(url);
|
||||
|
||||
|
@ -646,7 +681,7 @@ var HTTPObject = function(engine) {
|
|||
}
|
||||
|
||||
// Add proxy: <[protocol://][user:password@]proxyhost[:port]>
|
||||
if (this.proxy != null && this.proxy.enabled && this.proxy.type != "stateless") {
|
||||
if (this.proxy != null && this.proxy.enabled && this.proxy.type == "stateful") {
|
||||
cmd.push("-x");
|
||||
if (this.proxy.credential != null) {
|
||||
cmd.push([
|
||||
|
@ -703,7 +738,7 @@ var HTTPObject = function(engine) {
|
|||
}
|
||||
|
||||
// If enabled the charset(text encoding) detector
|
||||
if (this.isUseCharsetDetector) {
|
||||
if (this.isUseDetectCharset) {
|
||||
var detectedCharset = this.detectCharset(responseText);
|
||||
console.log("Detected charset:", detectedCharset);
|
||||
|
||||
|
@ -1062,8 +1097,8 @@ var HTTPObject = function(engine) {
|
|||
return this;
|
||||
};
|
||||
|
||||
this.setIsUseCharsetDetector = function(flag) {
|
||||
this.isUseCharsetDetector = flag;
|
||||
this.setIsUseDetectCharset = function(flag) {
|
||||
this.isUseDetectCharset = flag;
|
||||
return this;
|
||||
};
|
||||
|
||||
|
@ -1175,7 +1210,7 @@ exports.parseURL = parseURL;
|
|||
exports.DEFAULT_USER_AGENT = DEFAULT_USER_AGENT;
|
||||
exports.defaultUserAgent = DEFAULT_USER_AGENT; // compatible
|
||||
|
||||
exports.VERSIONINFO = "HTTP REST Client (http.js) version 0.7.35";
|
||||
exports.VERSIONINFO = "HTTP REST Client (http.js) version 0.7.36";
|
||||
exports.AUTHOR = "abuse@catswords.net";
|
||||
exports.global = global;
|
||||
exports.require = global.require;
|
||||
|
|
|
@ -1023,6 +1023,25 @@ var test_implements = {
|
|||
.open("GET", "https://example.org")
|
||||
.send();
|
||||
|
||||
console.log("responseBody:", response.responseBody);
|
||||
},
|
||||
|
||||
"proxy_serp": function() {
|
||||
var HTTP = require("lib/http");
|
||||
|
||||
var response = HTTP.create("CURL")
|
||||
.setVariables({
|
||||
"api_key": "2DG3WQgeL2djLFvnQBw83J4y"
|
||||
})
|
||||
.setProxy({
|
||||
"enabled": true,
|
||||
"provider": "searchapi",
|
||||
"type": "serp"
|
||||
})
|
||||
.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Trident/7.0; rv:11.0) like Gecko")
|
||||
.open("GET", "https://www.google.com/search?q=test")
|
||||
.send();
|
||||
|
||||
console.log("responseBody:", response.responseBody);
|
||||
}
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue
Block a user