mirror of
https://github.com/gnh1201/welsonjs.git
synced 2025-02-06 15:04:58 +00:00
fix #145
* Fix #145 * Change the methods and variables `CharsetDetector` to `DetectCharset` * Add searchapi.io SERP support
This commit is contained in:
parent
ad6cb7e430
commit
a60e7e5d9e
|
@ -1,6 +1,6 @@
|
||||||
{
|
{
|
||||||
"description": "WelsonJS test profile (test-misc.json)",
|
"description": "WelsonJS test profile (test-misc.json)",
|
||||||
"released": "2024-09-25",
|
"released": "2024-09-27",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"welsonjs": "0.2.7"
|
"welsonjs": "0.2.7"
|
||||||
},
|
},
|
||||||
|
@ -93,7 +93,12 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "proxy_custom_provider",
|
"id": "proxy_custom_provider",
|
||||||
"description": "HTTP proxy with a custom provider",
|
"description": "HTTP proxy with an web proxy provider",
|
||||||
|
"tags": ["Network", "HTTP"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "proxy_serp",
|
||||||
|
"description": "HTTP proxy with a SERP provider",
|
||||||
"tags": ["Network", "HTTP"]
|
"tags": ["Network", "HTTP"]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
71
lib/http.js
71
lib/http.js
|
@ -29,6 +29,12 @@ var AVAILABLE_PROXIES = [
|
||||||
"url": "http://scrapeops:{api_key}@residential-proxy.scrapeops.io:8181",
|
"url": "http://scrapeops:{api_key}@residential-proxy.scrapeops.io:8181",
|
||||||
"documentation": "https://scrapeops.io?fpr=namhyeon75"
|
"documentation": "https://scrapeops.io?fpr=namhyeon75"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"type": "serp",
|
||||||
|
"provider": "searchapi",
|
||||||
|
"url": "https://www.searchapi.io/api/v1/search?api_key={api_key}&engine={engine}&q={q}",
|
||||||
|
"documentation": "https://www.searchapi.io/?via=namhyeon"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"type": "stateless-jsonrpc2",
|
"type": "stateless-jsonrpc2",
|
||||||
"provider": "gnh1201/caterpillar",
|
"provider": "gnh1201/caterpillar",
|
||||||
|
@ -99,7 +105,7 @@ var HTTPObject = function(engine) {
|
||||||
"host": "127.0.0.1",
|
"host": "127.0.0.1",
|
||||||
"port": 80,
|
"port": 80,
|
||||||
"credential": null, // { username: "user", password: "pass" }
|
"credential": null, // { username: "user", password: "pass" }
|
||||||
"url": "" // stateless only
|
"url": null // stateless only
|
||||||
};
|
};
|
||||||
this.engine = (typeof(engine) !== "undefined" ? engine : "MSXML");
|
this.engine = (typeof(engine) !== "undefined" ? engine : "MSXML");
|
||||||
|
|
||||||
|
@ -140,7 +146,7 @@ var HTTPObject = function(engine) {
|
||||||
this.curlOptions = [];
|
this.curlOptions = [];
|
||||||
|
|
||||||
this.charset = FILE.CdoCharset.CdoUTF_8;
|
this.charset = FILE.CdoCharset.CdoUTF_8;
|
||||||
this.isUseCharsetDetector = false;
|
this.isUseDetectCharset = false;
|
||||||
this.isVerifySSL = true;
|
this.isVerifySSL = true;
|
||||||
this.isCompressedResponse = false;
|
this.isCompressedResponse = false;
|
||||||
|
|
||||||
|
@ -258,17 +264,12 @@ var HTTPObject = function(engine) {
|
||||||
});
|
});
|
||||||
|
|
||||||
if (typeof availableProxy !== "undefined") {
|
if (typeof availableProxy !== "undefined") {
|
||||||
this.proxy.provider = proxy['provider'];
|
this.proxy.provider = availableProxy['provider'];
|
||||||
|
this.proxy.protocol = availableProxy['protocol'] || this.proxy.protocol;
|
||||||
if (proxyType == "stateless") {
|
this.proxy.host = availableProxy['host'] || this.proxy.host;
|
||||||
this.proxy.url = availableProxy.url;
|
this.proxy.port = availableProxy['port'] || this.proxy.port;
|
||||||
} else {
|
this.proxy.credential = availableProxy['credential'] || this.proxy.credential;
|
||||||
this.proxy.protocol = proxy['protocol'] || this.proxy.protocol;
|
this.proxy.url = availableProxy['url'] || this.proxy.url;
|
||||||
this.proxy.host = proxy['host'] || this.proxy.host;
|
|
||||||
this.proxy.port = proxy['port'] || this.proxy.port;
|
|
||||||
this.proxy.credential = proxy['credential'] || this.proxy.credential;
|
|
||||||
this.proxy.url = proxy['url'] || this.proxy.url;
|
|
||||||
}
|
|
||||||
|
|
||||||
console.info("Please check documentation:", availableProxy.documentation);
|
console.info("Please check documentation:", availableProxy.documentation);
|
||||||
}
|
}
|
||||||
|
@ -480,6 +481,14 @@ var HTTPObject = function(engine) {
|
||||||
};
|
};
|
||||||
|
|
||||||
this.getProxiedURL = function(url) {
|
this.getProxiedURL = function(url) {
|
||||||
|
if (!this.proxy.enabled) return url;
|
||||||
|
|
||||||
|
if (this.proxy.type == "serp") {
|
||||||
|
var serp = this.parseSerpUrl(url);
|
||||||
|
this.setVariable("engine", serp.engine);
|
||||||
|
this.setVariable("q", encodeURIComponent(serp.keyword));
|
||||||
|
}
|
||||||
|
|
||||||
this.setVariable("url", encodeURIComponent(url));
|
this.setVariable("url", encodeURIComponent(url));
|
||||||
url = this.evaluate(this.proxy.url);
|
url = this.evaluate(this.proxy.url);
|
||||||
|
|
||||||
|
@ -488,6 +497,32 @@ var HTTPObject = function(engine) {
|
||||||
return url;
|
return url;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
this.parseSerpUrl = function(url) {
|
||||||
|
var getEngine = function(url) {
|
||||||
|
var match = url.match(/(?:https?:\/\/)?(?:www\.)?(google|youtube|bing|baidu|amazon)\.\w+/), result;
|
||||||
|
if (match) {
|
||||||
|
result = match[1];
|
||||||
|
} else {
|
||||||
|
result = "google";
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
};
|
||||||
|
|
||||||
|
var getKeyword = function(url) {
|
||||||
|
var regex = /[?&](q|wd|query|k)=([^&]*)/g;
|
||||||
|
var match, keywords = [];
|
||||||
|
while ((match = regex.exec(url)) !== null) {
|
||||||
|
keywords.push(match[2]);
|
||||||
|
}
|
||||||
|
return keywords.join(' ');
|
||||||
|
};
|
||||||
|
|
||||||
|
return {
|
||||||
|
"engine": getEngine(url),
|
||||||
|
"keyword": getKeyword(url)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
this.open = function(method, url) {
|
this.open = function(method, url) {
|
||||||
var url = this.serializeParameters(url);
|
var url = this.serializeParameters(url);
|
||||||
|
|
||||||
|
@ -646,7 +681,7 @@ var HTTPObject = function(engine) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add proxy: <[protocol://][user:password@]proxyhost[:port]>
|
// Add proxy: <[protocol://][user:password@]proxyhost[:port]>
|
||||||
if (this.proxy != null && this.proxy.enabled && this.proxy.type != "stateless") {
|
if (this.proxy != null && this.proxy.enabled && this.proxy.type == "stateful") {
|
||||||
cmd.push("-x");
|
cmd.push("-x");
|
||||||
if (this.proxy.credential != null) {
|
if (this.proxy.credential != null) {
|
||||||
cmd.push([
|
cmd.push([
|
||||||
|
@ -703,7 +738,7 @@ var HTTPObject = function(engine) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// If enabled the charset(text encoding) detector
|
// If enabled the charset(text encoding) detector
|
||||||
if (this.isUseCharsetDetector) {
|
if (this.isUseDetectCharset) {
|
||||||
var detectedCharset = this.detectCharset(responseText);
|
var detectedCharset = this.detectCharset(responseText);
|
||||||
console.log("Detected charset:", detectedCharset);
|
console.log("Detected charset:", detectedCharset);
|
||||||
|
|
||||||
|
@ -1062,8 +1097,8 @@ var HTTPObject = function(engine) {
|
||||||
return this;
|
return this;
|
||||||
};
|
};
|
||||||
|
|
||||||
this.setIsUseCharsetDetector = function(flag) {
|
this.setIsUseDetectCharset = function(flag) {
|
||||||
this.isUseCharsetDetector = flag;
|
this.isUseDetectCharset = flag;
|
||||||
return this;
|
return this;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1175,7 +1210,7 @@ exports.parseURL = parseURL;
|
||||||
exports.DEFAULT_USER_AGENT = DEFAULT_USER_AGENT;
|
exports.DEFAULT_USER_AGENT = DEFAULT_USER_AGENT;
|
||||||
exports.defaultUserAgent = DEFAULT_USER_AGENT; // compatible
|
exports.defaultUserAgent = DEFAULT_USER_AGENT; // compatible
|
||||||
|
|
||||||
exports.VERSIONINFO = "HTTP REST Client (http.js) version 0.7.35";
|
exports.VERSIONINFO = "HTTP REST Client (http.js) version 0.7.36";
|
||||||
exports.AUTHOR = "abuse@catswords.net";
|
exports.AUTHOR = "abuse@catswords.net";
|
||||||
exports.global = global;
|
exports.global = global;
|
||||||
exports.require = global.require;
|
exports.require = global.require;
|
||||||
|
|
|
@ -1023,6 +1023,25 @@ var test_implements = {
|
||||||
.open("GET", "https://example.org")
|
.open("GET", "https://example.org")
|
||||||
.send();
|
.send();
|
||||||
|
|
||||||
|
console.log("responseBody:", response.responseBody);
|
||||||
|
},
|
||||||
|
|
||||||
|
"proxy_serp": function() {
|
||||||
|
var HTTP = require("lib/http");
|
||||||
|
|
||||||
|
var response = HTTP.create("CURL")
|
||||||
|
.setVariables({
|
||||||
|
"api_key": "2DG3WQgeL2djLFvnQBw83J4y"
|
||||||
|
})
|
||||||
|
.setProxy({
|
||||||
|
"enabled": true,
|
||||||
|
"provider": "searchapi",
|
||||||
|
"type": "serp"
|
||||||
|
})
|
||||||
|
.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Trident/7.0; rv:11.0) like Gecko")
|
||||||
|
.open("GET", "https://www.google.com/search?q=test")
|
||||||
|
.send();
|
||||||
|
|
||||||
console.log("responseBody:", response.responseBody);
|
console.log("responseBody:", response.responseBody);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue
Block a user