mirror of
https://github.com/gnh1201/caterpillar.git
synced 2024-11-26 15:31:45 +00:00
495 lines
18 KiB
Python
495 lines
18 KiB
Python
# Caterpillar - The simple and parasitic web proxy with spam filter
|
|
# Namyheon Go (Catswords Research) <gnh1201@gmail.com>
|
|
# https://github.com/gnh1201/caterpillar
|
|
# Created at: 2022-10-06
|
|
# Updated at: 2024-12-20
|
|
|
|
import argparse
|
|
import socket
|
|
import sys
|
|
import os
|
|
from _thread import *
|
|
import base64
|
|
import json
|
|
import ssl
|
|
import time
|
|
import re
|
|
import hashlib
|
|
import resource
|
|
#import traceback
|
|
import io
|
|
from subprocess import Popen, PIPE
|
|
from datetime import datetime
|
|
from platform import python_version
|
|
from PIL import Image
|
|
|
|
import requests
|
|
from decouple import config
|
|
|
|
try:
|
|
listening_port = config('PORT', cast=int)
|
|
server_url = config('SERVER_URL')
|
|
cakey = config('CA_KEY')
|
|
cacert = config('CA_CERT')
|
|
certkey = config('CERT_KEY')
|
|
certdir = config('CERT_DIR')
|
|
openssl_binpath = config('OPENSSL_BINPATH')
|
|
client_encoding = config('CLIENT_ENCODING')
|
|
local_domain = config('LOCAL_DOMAIN')
|
|
proxy_pass = config('PROXY_PASS')
|
|
mastodon_server = config('MASTODON_SERVER') # catswords.social
|
|
mastodon_user_token = config('MASTODON_USER_TOKEN') # catswords.social
|
|
truecaptcha_userid = config('TRUECAPTCHA_USERID') # truecaptcha.org
|
|
truecaptcha_apikey = config('TRUECAPTCHA_APIKEY') # truecaptcha.org
|
|
except KeyboardInterrupt:
|
|
print("\n[*] User has requested an interrupt")
|
|
print("[*] Application Exiting.....")
|
|
sys.exit()
|
|
|
|
parser = argparse.ArgumentParser()
|
|
|
|
parser.add_argument('--max_conn', help="Maximum allowed connections", default=255, type=int)
|
|
parser.add_argument('--buffer_size', help="Number of samples to be used", default=8192, type=int)
|
|
|
|
args = parser.parse_args()
|
|
max_connection = args.max_conn
|
|
buffer_size = args.buffer_size
|
|
|
|
# https://stackoverflow.com/questions/25475906/set-ulimit-c-from-outside-shell
|
|
resource.setrlimit(
|
|
resource.RLIMIT_CORE,
|
|
(resource.RLIM_INFINITY, resource.RLIM_INFINITY))
|
|
|
|
def start(): #Main Program
|
|
try:
|
|
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
sock.bind(('', listening_port))
|
|
sock.listen(max_connection)
|
|
print("[*] Server started successfully [ %d ]" %(listening_port))
|
|
except Exception:
|
|
print("[*] Unable to Initialize Socket")
|
|
print(Exception)
|
|
sys.exit(2)
|
|
|
|
while True:
|
|
try:
|
|
conn, addr = sock.accept() #Accept connection from client browser
|
|
data = conn.recv(buffer_size) #Recieve client data
|
|
start_new_thread(conn_string, (conn, data, addr)) #Starting a thread
|
|
except KeyboardInterrupt:
|
|
sock.close()
|
|
print("\n[*] Graceful Shutdown")
|
|
sys.exit(1)
|
|
|
|
def conn_string(conn, data, addr):
|
|
try:
|
|
first_line = data.split(b'\n')[0]
|
|
|
|
method, url = first_line.split()[0:2]
|
|
|
|
http_pos = url.find(b'://') #Finding the position of ://
|
|
scheme = b'http' # check http/https or other protocol
|
|
if http_pos == -1:
|
|
temp = url
|
|
else:
|
|
temp = url[(http_pos+3):]
|
|
scheme = url[0:http_pos]
|
|
|
|
port_pos = temp.find(b':')
|
|
|
|
webserver_pos = temp.find(b'/')
|
|
if webserver_pos == -1:
|
|
webserver_pos = len(temp)
|
|
webserver = b''
|
|
port = -1
|
|
if port_pos == -1 or webserver_pos < port_pos:
|
|
port = 80
|
|
webserver = temp[:webserver_pos]
|
|
else:
|
|
port = int((temp[(port_pos+1):])[:webserver_pos-port_pos-1])
|
|
webserver = temp[:port_pos]
|
|
if port == 443:
|
|
scheme = b'https'
|
|
except Exception as e:
|
|
conn.close()
|
|
print("[*] Exception on parsing the header of %s. Cause: %s" % (str(addr[0]), str(e)))
|
|
return
|
|
|
|
# if it is reverse proxy
|
|
if local_domain != '':
|
|
localserver = local_domain.encode(client_encoding)
|
|
if webserver == localserver or data.find(b'\nHost: ' + localserver) > -1:
|
|
print ("[*] ** Detected the reverse proxy request: %s" % (local_domain))
|
|
scheme, _webserver, _port = proxy_pass.encode(client_encoding).split(b':')
|
|
webserver = _webserver[2:]
|
|
port = int(_port.decode(client_encoding))
|
|
|
|
proxy_server(webserver, port, scheme, method, url, conn, addr, data)
|
|
|
|
def proxy_connect(webserver, conn):
|
|
hostname = webserver.decode(client_encoding)
|
|
certpath = "%s/%s.crt" % (certdir.rstrip('/'), hostname)
|
|
|
|
# https://stackoverflow.com/questions/24055036/handle-https-request-in-proxy-server-by-c-sharp-connect-tunnel
|
|
conn.send(b'HTTP/1.1 200 Connection Established\r\n\r\n')
|
|
|
|
# https://github.com/inaz2/proxy2/blob/master/proxy2.py
|
|
try:
|
|
if not os.path.isfile(certpath):
|
|
epoch = "%d" % (time.time() * 1000)
|
|
p1 = Popen([openssl_binpath, "req", "-new", "-key", certkey, "-subj", "/CN=%s" % hostname], stdout=PIPE)
|
|
p2 = Popen([openssl_binpath, "x509", "-req", "-days", "3650", "-CA", cacert, "-CAkey", cakey, "-set_serial", epoch, "-out", certpath], stdin=p1.stdout, stderr=PIPE)
|
|
p2.communicate()
|
|
except Exception as e:
|
|
print("[*] Skipped generating the certificate. Cause: %s" % (str(e)))
|
|
|
|
# https://stackoverflow.com/questions/11255530/python-simple-ssl-socket-server
|
|
# https://docs.python.org/3/library/ssl.html
|
|
context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
|
context.load_cert_chain(certpath, certkey)
|
|
|
|
# https://stackoverflow.com/questions/11255530/python-simple-ssl-socket-server
|
|
conn = context.wrap_socket(conn, server_side=True)
|
|
data = conn.recv(buffer_size)
|
|
|
|
return (conn, data)
|
|
|
|
def proxy_check_filtered(data, webserver, port, scheme, method, url):
|
|
filtered = False
|
|
|
|
# prevent cache confusing
|
|
if data.find(b'<title>Welcome to nginx!</title>') > -1:
|
|
return True
|
|
|
|
# ctkpaarr
|
|
if data.find(b'ctkpaarr') > -1:
|
|
return True
|
|
|
|
# allowed conditions
|
|
if method == b'GET' or url.find(b'/api') > -1:
|
|
return False
|
|
|
|
# convert to text
|
|
data_length = len(data)
|
|
text = data.decode(client_encoding, errors='ignore')
|
|
error_rate = (data_length - len(text)) / data_length
|
|
if error_rate > 0.9: # it is a binary data
|
|
return False
|
|
|
|
# check ID with K-Anonymity strategy
|
|
pattern = r'\b(?:(?<=\/@)|(?<=acct:))([a-zA-Z0-9]{10})\b'
|
|
matches = list(set(re.findall(pattern, text)))
|
|
if len(matches) > 0:
|
|
print ("[*] Found ID: %s" % (', '.join(matches)))
|
|
try:
|
|
filtered = not all(map(pwnedpasswords_test, matches))
|
|
except Exception as e:
|
|
print ("[*] K-Anonymity strategy not working! %s" % (str(e)))
|
|
filtered = True
|
|
|
|
# check ID with VowelRatio10 strategy
|
|
if filtered and len(matches) > 0:
|
|
def vowel_ratio_test(s):
|
|
ratio = calculate_vowel_ratio(s)
|
|
return ratio > 0.2 and ratio < 0.7
|
|
filtered = not all(map(vowel_ratio_test, matches))
|
|
|
|
# check ID with Palindrome5 strategy
|
|
if filtered and len(matches) > 0:
|
|
filtered = not all(map(has_palindrome, matches))
|
|
|
|
# check an attached images (check images with Not-CAPTCHA strategy)
|
|
if not filtered and len(matches) > 0 and truecaptcha_userid != '':
|
|
def webp_to_png_base64(url):
|
|
try:
|
|
response = requests.get(url)
|
|
img = Image.open(io.BytesIO(response.content))
|
|
img_png = img.convert("RGBA")
|
|
buffered = io.BytesIO()
|
|
img_png.save(buffered, format="PNG")
|
|
encoded_image = base64.b64encode(buffered.getvalue()).decode(client_encoding)
|
|
return encoded_image
|
|
except:
|
|
return None
|
|
|
|
urls = re.findall(r'https://[^\s"]+\.webp', text)
|
|
if len(urls) > 0:
|
|
for url in urls:
|
|
if filtered:
|
|
break
|
|
|
|
print ("[*] downloading... %s" % (url))
|
|
encoded_image = webp_to_png_base64(url)
|
|
print ("[*] downloaded.")
|
|
if encoded_image:
|
|
print ("[*] solving...")
|
|
try:
|
|
solved = truecaptcha_solve(encoded_image)
|
|
if solved:
|
|
print ("[*] solved: %s" % (solved))
|
|
filtered = solved.lower() in ['ctkpaarr', 'spam']
|
|
else:
|
|
print ("[*] not solved")
|
|
except Exception as e:
|
|
print ("[*] Not CAPTCHA strategy not working! %s" % (str(e)))
|
|
|
|
# take action
|
|
if filtered:
|
|
print ("[*] Filtered from %s:%s" % (webserver.decode(client_encoding), str(port)))
|
|
|
|
try:
|
|
savedir = './savedfiles'
|
|
if not os.path.exists(savedir):
|
|
os.makedirs(savedir)
|
|
current_time = datetime.now().strftime("%Y%m%d%H%M%S")
|
|
file_path = os.path.join(savedir, ("%s_%s.bin" % (current_time, webserver.decode(client_encoding))))
|
|
with open(file_path, 'wb') as file:
|
|
file.write(data)
|
|
print ("[*] Saved the file: %s" % (file_path))
|
|
except Exception as e:
|
|
print ("[*] Failed to save the file: %s" % (str(e)))
|
|
|
|
return filtered
|
|
|
|
def proxy_server(webserver, port, scheme, method, url, conn, addr, data):
|
|
try:
|
|
print("[*] Started the request. %s" % (str(addr[0])))
|
|
|
|
# SSL negotiation
|
|
if scheme in [b'https', b'tls', b'ssl'] and method == b'CONNECT':
|
|
while True:
|
|
try:
|
|
conn, data = proxy_connect(webserver, conn)
|
|
break # success
|
|
#except OSError as e:
|
|
# print ("[*] Retrying SSL negotiation... (%s:%s) %s" % (webserver.decode(client_encoding), str(port), str(e)))
|
|
except Exception as e:
|
|
raise Exception("SSL negotiation failed. (%s:%s) %s" % (webserver.decode(client_encoding), str(port), str(e)))
|
|
|
|
# Wait to see if there is more data to transmit
|
|
def sendall(sock, conn, data):
|
|
# send first chuck
|
|
if proxy_check_filtered(data, webserver, port, scheme, method, url):
|
|
sock.close()
|
|
raise Exception("Filtered request")
|
|
sock.send(data)
|
|
if len(data) < buffer_size:
|
|
return
|
|
|
|
# send following chunks
|
|
buffered = b''
|
|
conn.settimeout(1)
|
|
while True:
|
|
try:
|
|
chunk = conn.recv(buffer_size)
|
|
if not chunk:
|
|
break
|
|
buffered += chunk
|
|
if proxy_check_filtered(buffered, webserver, port, scheme, method, url):
|
|
sock.close()
|
|
raise Exception("Filtered request")
|
|
sock.send(chunk)
|
|
if len(buffered) > buffer_size*2:
|
|
buffered = buffered[-buffer_size*2:]
|
|
except:
|
|
break
|
|
|
|
# do response
|
|
if server_url == "localhost":
|
|
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
|
|
if scheme in [b'https', b'tls', b'ssl']:
|
|
context = ssl.create_default_context()
|
|
context.check_hostname = False
|
|
context.verify_mode = ssl.CERT_NONE
|
|
|
|
sock = context.wrap_socket(sock, server_hostname=webserver.decode(client_encoding))
|
|
sock.connect((webserver, port))
|
|
#sock.sendall(data)
|
|
sendall(sock, conn, data)
|
|
else:
|
|
sock.connect((webserver, port))
|
|
#sock.sendall(data)
|
|
sendall(sock, conn, data)
|
|
|
|
i = 0
|
|
buffered = b''
|
|
while True:
|
|
chunk = sock.recv(buffer_size)
|
|
if not chunk:
|
|
break
|
|
buffered += chunk
|
|
if proxy_check_filtered(buffered, webserver, port, scheme, method, url):
|
|
sock.close()
|
|
add_filtered_host(webserver.decode(client_encoding), '127.0.0.1')
|
|
raise Exception("Filtered response")
|
|
conn.send(chunk)
|
|
if len(buffered) > buffer_size*2:
|
|
buffered = buffered[-buffer_size*2:]
|
|
i += 1
|
|
|
|
print("[*] Received %s chunks. (%s bytes per chunk)" % (str(i), str(buffer_size)))
|
|
|
|
else:
|
|
|
|
proxy_data = {
|
|
'headers': {
|
|
"User-Agent": "php-httpproxy/0.1.4 (Client; Python " + python_version() + "; abuse@catswords.net)",
|
|
},
|
|
'data': {
|
|
"data": base64.b64encode(data).decode(client_encoding),
|
|
"client": str(addr[0]),
|
|
"server": webserver.decode(client_encoding),
|
|
"port": str(port),
|
|
"scheme": scheme.decode(client_encoding),
|
|
"url": url.decode(client_encoding),
|
|
"length": str(len(data)),
|
|
"chunksize": str(buffer_size),
|
|
"datetime": datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")
|
|
}
|
|
}
|
|
raw_data = json.dumps(proxy_data['data'])
|
|
|
|
print("[*] Sending %s bytes..." % (str(len(raw_data))))
|
|
|
|
i = 0
|
|
relay = requests.post(server_url, headers=proxy_data['headers'], data=raw_data, stream=True)
|
|
buffered = b''
|
|
for chunk in relay.iter_content(chunk_size=buffer_size):
|
|
buffered += chunk
|
|
if proxy_check_filtered(buffered, webserver, port, scheme, method, url):
|
|
add_filtered_host(webserver.decode(client_encoding), '127.0.0.1')
|
|
raise Exception("Filtered response")
|
|
conn.send(chunk)
|
|
if len(buffered) > buffer_size*2:
|
|
buffered = buffered[-buffer_size*2:]
|
|
i += 1
|
|
|
|
print("[*] Received %s chunks. (%s bytes per chunk)" % (str(i), str(buffer_size)))
|
|
|
|
print("[*] Request and received. Done. %s" % (str(addr[0])))
|
|
conn.close()
|
|
except Exception as e:
|
|
#print(traceback.format_exc())
|
|
print("[*] Exception on requesting the data. Cause: %s" % (str(e)))
|
|
conn.sendall(b"HTTP/1.1 403 Forbidden\n\n{\"status\":403}")
|
|
conn.close()
|
|
|
|
# journaling a filtered hosts
|
|
def add_filtered_host(domain, ip_address):
|
|
hosts_path = './filtered.hosts'
|
|
with open(hosts_path, 'r') as file:
|
|
lines = file.readlines()
|
|
|
|
domain_exists = any(domain in line for line in lines)
|
|
if not domain_exists:
|
|
lines.append(f"{ip_address}\t{domain}\n")
|
|
with open(hosts_path, 'w') as file:
|
|
file.writelines(lines)
|
|
if mastodon_user_token != '': # notify to catswords.social
|
|
post_status_to_mastodon(f"[{mastodon_server} user]\r\n\r\n{domain} is a domain with suspicious spam activity.\r\n\r\n#catswords")
|
|
|
|
# notify to mastodon server
|
|
def post_status_to_mastodon(text, media_ids=None, poll_options=None, poll_expires_in=None, scheduled_at=None, idempotency_key=None):
|
|
url = f"https://{mastodon_server}/api/v1/statuses"
|
|
headers = {
|
|
"Authorization": f"Bearer {user_token}",
|
|
"Content-Type": "application/x-www-form-urlencoded",
|
|
}
|
|
form_data = {
|
|
"status": text,
|
|
"media_ids[]": media_ids,
|
|
"poll[options][]": poll_options,
|
|
"poll[expires_in]": poll_expires_in,
|
|
"scheduled_at": scheduled_at,
|
|
}
|
|
if idempotency_key:
|
|
headers["Idempotency-Key"] = idempotency_key
|
|
|
|
response = requests.post(url, headers=headers, data=form_data)
|
|
return response.json()
|
|
|
|
# Strategy: K-Anonymity test - use api.pwnedpasswords.com
|
|
def pwnedpasswords_test(s):
|
|
# SHA1 of the password
|
|
p_sha1 = hashlib.sha1(s.encode()).hexdigest()
|
|
|
|
# First 5 char of SHA1 for k-anonymity API use
|
|
f5_sha1 = p_sha1[:5]
|
|
|
|
# Last 5 char of SHA1 to match API output
|
|
l5_sha1 = p_sha1[-5:]
|
|
|
|
# Making GET request using Requests library
|
|
response = requests.get(f'https://api.pwnedpasswords.com/range/{f5_sha1}')
|
|
|
|
# Checking if request was successful
|
|
if response.status_code == 200:
|
|
# Parsing response text
|
|
hashes = response.text.split('\r\n')
|
|
|
|
# Using list comprehension to find matching hashes
|
|
matching_hashes = [line.split(':')[0] for line in hashes if line.endswith(l5_sha1)]
|
|
|
|
# If there are matching hashes, return True, else return False
|
|
return bool(matching_hashes)
|
|
else:
|
|
raise Exception("api.pwnedpasswords.com response status: %s" % (str(response.status_code)))
|
|
|
|
return False
|
|
|
|
# Strategy: Not-CAPTCHA - use truecaptcha.org
|
|
def truecaptcha_solve(encoded_image):
|
|
url = 'https://api.apitruecaptcha.org/one/gettext'
|
|
data = {
|
|
'userid': truecaptcha_userid,
|
|
'apikey': truecaptcha_apikey,
|
|
'data': encoded_image,
|
|
'mode': 'human'
|
|
}
|
|
response = requests.post(url = url, json = data)
|
|
|
|
if response.status_code == 200:
|
|
data = response.json()
|
|
|
|
if 'error_message' in data:
|
|
print ("[*] Error: %s" % (data['error_message']))
|
|
return None
|
|
if 'result' in data:
|
|
return data['result']
|
|
else:
|
|
raise Exception("api.apitruecaptcha.org response status: %s" % (str(response.status_code)))
|
|
|
|
return None
|
|
|
|
# Strategy: VowelRatio10
|
|
def calculate_vowel_ratio(s):
|
|
# Calculate the length of the string.
|
|
length = len(s)
|
|
if length == 0:
|
|
return 0.0
|
|
|
|
# Count the number of vowels ('a', 'e', 'i', 'o', 'u') in the string.
|
|
vowel_count = sum(1 for char in s if char.lower() in 'aeiou')
|
|
|
|
# Calculate the ratio of vowels to the total length of the string.
|
|
vowel_ratio = vowel_count / length
|
|
|
|
return vowel_ratio
|
|
|
|
# Strategy: Palindrome5
|
|
def has_palindrome(input_string):
|
|
def is_palindrome(s):
|
|
return s == s[::-1]
|
|
|
|
n = len(input_string)
|
|
for i in range(n):
|
|
for j in range(i + 5, n + 1): # Find substrings of at least 5 characters
|
|
substring = input_string[i:j]
|
|
if is_palindrome(substring):
|
|
return True
|
|
return False
|
|
|
|
if __name__== "__main__":
|
|
start()
|