Add the proxy API services #143

This commit is contained in:
Namhyeon Go 2024-09-20 23:10:30 +09:00
parent e7f65d50b0
commit de6b70a5b1

View File

@ -16,6 +16,21 @@ var PROCESS_VERSION = SYS.getProcessVersion();
var DEFAULT_USER_AGENT = "WelsonJS/0.2.7 (" + OS_NAME + "; " + OS_ARCH + "; " + PROCESS_VERSION + "; " + DEVICE_UUID + "; abuse@catswords.net)";
// [lib/http] Proxy API services (e.g., ScrapeOps) integration #143
// https://github.com/gnh1201/welsonjs/issues/143
var AVAILABLE_PROXIES = [
{
"type": "stateless",
"provider": "scrapeops",
"url": "https://proxy.scrapeops.io/v1/?api_key={api_key}&url={url}&render_js={render_js}&residential={residential}&country={country}"
},
{
"type": "stateful",
"provider": "scrapeops",
"url": "http://scrapeops:{api_key}@residential-proxy.scrapeops.io:8181"
}
];
var HTTPObject = function(engine) {
this._interface = null;
@ -30,10 +45,13 @@ var HTTPObject = function(engine) {
this.isAsynchronous = false;
this.proxy = {
"enabled": false,
"type": "stateful",
"provider": "",
"protocol": "http",
"host": "127.0.0.1",
"port": 80,
"credential": null // { username: "user", password: "pass" }
"credential": null, // { username: "user", password: "pass" }
"url": "" // stateless only
};
this.engine = (typeof(engine) !== "undefined" ? engine : "MSXML");
@ -173,8 +191,33 @@ var HTTPObject = function(engine) {
this.setProxy = function(proxy) {
for (var k in proxy) {
if (k in this.proxy) {
this.proxy[k] = proxy[k];
var v = proxy[k];
switch (k) {
case "provider":
var proxyType = proxy['type'] || this.proxy['type'];
var availableProxy = AVAILABLE_PROXIES.find(function(x) {
return x.provider == v && x.type == proxyType;
});
if (typeof availableProxy !== "undefined") {
this.proxy.provider = v;
if (proxyType == "stateless") {
this.proxy.url = this.evaluate(v);
} else {
var params = parseURL(this.evaluate(v));
this.proxy.protocol = params.protocol;
this.proxy.host = params.host;
this.proxy.port = params.port;
this.proxy.credential = params.credential;
}
}
break;
default:
this.proxy[k] = v;
break;
}
}
return this;
@ -371,6 +414,15 @@ var HTTPObject = function(engine) {
return url;
};
this.getProxiedURL = function(url) {
if (this.proxy != null && this.proxy.enabled && this.proxy.type == "stateless") {
this.setVariable("url", encodeURIComponent(url));
url = this.evaluate(this.proxy.url);
}
return url;
};
this.open = function(method, url) {
var url = this.serializeParameters(url);
@ -462,7 +514,7 @@ var HTTPObject = function(engine) {
// Make CURL context
var state = this.states[this.states.length - 1];
var cmd = [];
var url = state.url;
var url = this.getProxiedURL(state.url);
if (this.isDebugging) {
cmd.push("-v");
@ -529,7 +581,7 @@ var HTTPObject = function(engine) {
}
// Add proxy: <[protocol://][user:password@]proxyhost[:port]>
if (this.proxy != null && this.proxy.enabled) {
if (this.proxy != null && this.proxy.enabled && this.proxy.type != "stateless") {
cmd.push("-x");
if (this.proxy.credential != null) {
cmd.push([
@ -575,7 +627,7 @@ var HTTPObject = function(engine) {
}
// set the URL
cmd.push(state.url);
cmd.push(url);
// Get response text
responseText = this._interface.setCharset(this.charset).exec(cmd);
@ -605,7 +657,7 @@ var HTTPObject = function(engine) {
var job_priority = "normal";
var state = this.states[this.states.length - 1];
var cmd = ["/transfer", job_name];
var url = state.url;
var url = this.getProxiedURL(state.url);
var out = PipeIPC.connect("volatile");
if (this.method == "GET") {
@ -622,7 +674,8 @@ var HTTPObject = function(engine) {
} else if (this.engine == "CERT") {
var state = this.states[this.states.length - 1];
var out = PipeIPC.connect("volatile");
var cmd = ["-urlcache", "-split", "-f", state.url, out.path];
var url = this.getProxiedURL(state.url);
var cmd = ["-urlcache", "-split", "-f", url, out.path];
this._interface.exec(cmd);
out.reload();
responseText = out.read();
@ -1058,15 +1111,15 @@ function _delete(url, params, headers) {
return create().setHeaders(headers).setParameters(params).setUseCache(false)._delete(url).responseBody;
}
function parseURL(input) {
function parseURL(url) {
var pattern = /^(?:(https?):\/\/)?(?:([^:@]+)(?::([^:@]*))?@)?([^:]+)(?::(\d{1,5}))?$/;
var matches = input.match(pattern);
var matches = url.match(pattern);
if (!matches) return null;
var protocol = matches[1] || 'http';
var username = matches[2] || '';
var password = matches[3] || '';
var host = matches[4];
var host = matches[4] || 'localhost';
var port = matches[5] || '';
var credential = null;
@ -1096,7 +1149,7 @@ exports.parseURL = parseURL;
exports.DEFAULT_USER_AGENT = DEFAULT_USER_AGENT;
exports.defaultUserAgent = DEFAULT_USER_AGENT; // compatible with the specific case
exports.VERSIONINFO = "HTTP client module (http.js) version 0.7.25";
exports.VERSIONINFO = "HTTP client module (http.js) version 0.7.26";
exports.AUTHOR = "abuse@catswords.net";
exports.global = global;
exports.require = global.require;