// Example: Crawling a title of websites var FILE = require("lib/file"); var HTTP = require("lib/http"); var Punycode = require("lib/punycode"); function main() { var lines = []; var district = JSON.parse(FILE.readFile("data\\korea-administrative-district.json", FILE.CdoCharset.CdoUTF_8)); var districtData = district.data; var domains = splitLn(FILE.readFile("data\\domains.txt", FILE.CdoCharset.CdoUTF_8)); var digFrame = function(handler, domain, response) { var frameURLs = handler.getFrameURLs(); if (frameURLs.length > 0) { frameURLs.forEach(function(x) { var _handler = HTTP.create("CURL") .setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36") .setIsFollowRedirect(true) .open("GET", "http://" + Punycode.encode(domain) + "/" + x) .send(); response += _handler.responseBody; response = digFrame(_handler, domain, response); }); } return response; }; domains.forEach(function(domain) { var handler = HTTP.create("CURL") .setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36") .setIsFollowRedirect(true) .open("GET", "http://" + Punycode.encode(domain) + "/") .send(); var response = handler.responseBody; response = digFrame(handler, domain, response); var pos = response.search(/[0-9]{3}-[0-9]{2}-[0-9]{5}/g); console.log("Position:", pos); var bizNo = ''; if (pos > -1) { bizNo = response.substring(pos, pos + 12); } var a = response.indexOf(""); var b = response.indexOf("", a + 7); var title = ''; if (a > -1 && b > -1) { title = response.substring(a + 7, b); } var bizRegion = ''; districtData.forEach(function(x) { for (var k in x) { var d = [k].concat(x[k]); d.forEach(function(a) { var s = a.substring(0, 2); if (domain.indexOf(s) > -1 || title.indexOf(s) > -1 || response.indexOf(s) > -1) { bizRegion = k; } }); } if (bizRegion != '') return false; }); var bizType = ''; var bizForm = ''; var bizNote = ''; var row = [domain, title, bizNo, bizRegion, bizType, bizForm, bizNote]; lines.push(row.join(":")); }); FILE.appendFile("data\\matches.txt", lines.join("\r\n"), FILE.CdoCharset.CdoUTF_8); } exports.main = main;