diff --git a/server.py b/server.py deleted file mode 100644 index cc35674..0000000 --- a/server.py +++ /dev/null @@ -1,206 +0,0 @@ -import time -import asyncio -import aiodns -import aiohttp -import validators - -from sanic import Sanic -from sanic.log import logger -from sanic.response import html, json as json_response -from sanic.exceptions import InvalidUsage - -app = Sanic() - -# keep that in memory -RATE_LIMIT_DB = {} - -# to prevent DDoS or bounce attack attempt or something like that -RATE_LIMIT_SECONDS = 5 - - -def clear_rate_limit_db(now): - to_delete = [] - - "Remove too old rate limit values" - for key, value in RATE_LIMIT_DB.items(): - if now - value > RATE_LIMIT_SECONDS: - # a dictionnary can't be modified during iteration so delegate this - # operation - to_delete.append(key) - - for key in to_delete: - del RATE_LIMIT_DB[key] - - -async def query_dns(host, dns_entry_type): - loop = asyncio.get_event_loop() - dns_resolver = aiodns.DNSResolver(loop=loop) - - try: - return await dns_resolver.query(host, dns_entry_type) - except aiodns.error.DNSError: - return [] - except Exception: - import traceback - traceback.print_exc() - logger.error("Unhandled error while resolving DNS entry") - - -@app.route("/check/", methods=["POST"]) -async def check_http(request): - """ - This function received an HTTP request from a YunoHost instance while this - server is hosted on our infrastructure. The expected request body is: - {"domain": "domain-to-check.tld"} and the method POST - - The general workflow is the following: - - - grab the ip from the request - - check for ip based rate limit (see RATE_LIMIT_SECONDS value) - - get json from body and domain from it - - check for domain based rate limit (see RATE_LIMIT_SECONDS value) - - check domain is in valid format - - check dns entry for domain match the ip of the request (advanced rule for ipv6) - - everything is checked, now try to do an http request on the domain - - answer saying if the domain can be reached - """ - - # this is supposed to be a fast operation if run enough - now = time.time() - clear_rate_limit_db(now) - - ip = request.ip - - if ip in RATE_LIMIT_DB: - since_last_attempt = now - RATE_LIMIT_DB[ip] - if since_last_attempt < RATE_LIMIT_SECONDS: - logger.info(f"Rate limite {ip}, can retry in {int(RATE_LIMIT_SECONDS - since_last_attempt)} seconds") - return json_response({ - "status": "error", - "code": "error_rate_limit", - "content": f"Rate limit on ip, retry in {int(RATE_LIMIT_SECONDS - since_last_attempt)} seconds", - }, status=400) - - RATE_LIMIT_DB[ip] = time.time() - - try: - data = request.json - except InvalidUsage: - logger.info(f"Unvalid json in request, body is : {request.body}") - return json_response({ - "status": "error", - "code": "error_bad_json", - "content": "InvalidUsage, body isn't proper json", - }, status=400) - - if not data or "domain" not in data: - logger.info(f"Unvalid request didn't specified a domain (body is : {request.body}") - return json_response({ - "status": "error", - "code": "error_no_domain", - "content": "request must specify a domain", - }, status=400) - - domain = data["domain"] - - if domain in RATE_LIMIT_DB: - since_last_attempt = now - RATE_LIMIT_DB[domain] - if since_last_attempt < RATE_LIMIT_SECONDS: - logger.info(f"Rate limite {domain}, can retry in {int(RATE_LIMIT_SECONDS - since_last_attempt)} seconds") - return json_response({ - "status": "error", - "code": "error_rate_limit", - "content": f"Rate limit on domain, retry in {int(RATE_LIMIT_SECONDS - since_last_attempt)} seconds", - }, status=400) - - RATE_LIMIT_DB[domain] = time.time() - - if not validators.domain(domain): - logger.info(f"Invalid request, is not in the right format (domain is : {domain})") - return json_response({ - "status": "error", - "code": "error_domain_bad_format", - "content": "domain is not in the right format (do not include http:// or https://)", - }, status=400) - - # TODO handle ipv6 - # ipv6 situation - if ":" in ip: - dns_entry = await query_dns(domain, "AAAA") - - if not dns_entry: - # check if entry in ip4 for custom error - dns_entry = await query_dns(domain, "A") - - # there is an ipv4 entry but the request is made in ipv6, ask to uses ipv4 instead - if dns_entry: - logger.info(f"[ipv6] Invalid request, no AAAA DNS entry for domain {domain} BUT ipv4 entry, ask user to request in ipv4") - return json_response({ - "status": "error", - "code": "error_no_ipv6_dns_entry_but_ipv4_dns_entry", - "content": f"there is not AAAA (ipv6) DNS entry for domain {domain} BUT there is an entry in ipv4, please redo the request in ipv4", - }, status=400) - - else: - logger.info(f"[ipv6] Invalid request, no DNS entry for domain {domain} (both in ipv6 and ip4)") - return json_response({ - "status": "error", - "code": "error_no_ipv4_ipv6_dns_entry_for_domain", - "content": f"there is not A (ipv4) and AAAA (ipv6) DNS entry for domain {domain}", - }, status=400) - # ipv4 situation - else: - dns_entry = await query_dns(domain, "A") - - if not dns_entry: - logger.info(f"[ipv4] Invalid request, no DNS entry for domain {domain}") - return json_response({ - "status": "error", - "code": "error_no_ipv4_dns_entry_for_domain", - "content": f"there is not A (ipv4) and AAAA (ipv6) DNS entry for domain {domain}", - }, status=400) - - dns_entry = dns_entry[0] - - if dns_entry.host != ip: - logger.info(f"Invalid request, A DNS entry {dns_entry.host} for domain {domain} doesn't match request ip {ip}") - return json_response({ - "status": "error", - "code": "error_dns_entry_doesnt_match_request_ip", - "content": f"error, the request is made from the ip {ip} but the dns entry said {domain} has the ip {dns_entry.host}, you can only check a domain configured for your ip", - }, status=400) - - async with aiohttp.ClientSession() as session: - try: - async with session.get("http://" + domain, timeout=aiohttp.ClientTimeout(total=30)) as response: - # XXX in the futur try to do a double check with the server to - # see if the correct content is get - await response.text() - logger.info(f"Success when checking http access for {domain} asked by {ip}") - # TODO various kind of errors - except aiohttp.client_exceptions.ClientConnectorError: - return json_response({ - "status": "error", - "code": "error_http_check_connection_error", - "content": "connection error, could not connect to the requested domain, it's very likely unreachable", - }, status=418) - except Exception: - import traceback - traceback.print_exc() - - return json_response({ - "status": "error", - "code": "error_http_check_unknown_error", - "content": "an error happen while trying to get your domain, it's very likely unreachable", - }, status=400) - - return json_response({"status": "ok"}) - - -@app.route("/") -async def main(request): - return html("You aren't really supposed to use this website using your browser.

It's a small server to check if a YunoHost instance can be reached by http before trying to instal a LE certificate.") - - -if __name__ == "__main__": - app.run(host="0.0.0.0", port=7000) diff --git a/yunodiagnoser.py b/yunodiagnoser.py new file mode 100644 index 0000000..d50d1eb --- /dev/null +++ b/yunodiagnoser.py @@ -0,0 +1,369 @@ +import re +import time +import asyncio +import aiohttp +import validators +import socket + +from sanic import Sanic +from sanic.log import logger +from sanic.response import html, json as json_response +from sanic.exceptions import InvalidUsage + +app = Sanic() + +# ########################################################################### # +# Rate limit # +# ########################################################################### # + +# keep that in memory +RATE_LIMIT_DB = {} + +# to prevent DDoS or bounce attack attempt or something like that +# Can't do more than 10 requests in a 300-seconds window +RATE_LIMIT_SECONDS = 300 +RATE_LIMIT_NB_REQUESTS = 10 + +def clear_rate_limit_db(now): + to_delete = [] + + "Remove too old rate limit values" + for key, times in RATE_LIMIT_DB.items(): + # Remove values older RATE_LIMIT_SECONDS + RATE_LIMIT_DB[key] = [t for t in times if now - t < RATE_LIMIT_SECONDS] + # If list is empty, remove the key + if RATE_LIMIT_DB[key] == []: + # a dictionnary can't be modified during iteration so delegate this + # operation + to_delete.append(key) + + for key in to_delete: + del RATE_LIMIT_DB[key] + + +def check_rate_limit(key, now): + + # If there are more recent attempts than allowed + if key in RATE_LIMIT_DB and len(RATE_LIMIT_DB[key]) > RATE_LIMIT_NB_REQUESTS: + oldest_attempt = RATE_LIMIT_DB[key][0] + logger.info(f"Rate limit reached for {key}, can retry in {int(RATE_LIMIT_SECONDS - now + oldest_attempt)} seconds") + return json_response({ + "error": { + "code": "error_rate_limit", + "content": f"Rate limit reached for this domain or ip, retry in {int(RATE_LIMIT_SECONDS - now + oldest_attempt)} seconds" + } + }, status=400) + + # In any case, add this attempt to the DB + if key not in RATE_LIMIT_DB: + RATE_LIMIT_DB[key] = [now] + else: + RATE_LIMIT_DB[key].append(now) + + +# ########################################################################### # +# HTTP check # +# ########################################################################### # + + +@app.route("/check-http", methods=["POST"]) +async def check_http(request): + """ + This function received an HTTP request from a YunoHost instance while this + server is hosted on our infrastructure. The request is expected to be a + POST request with a body like {"domains": ["domain1.tld", "domain2.tld"], + "nonce": "1234567890abcdef" } + + The nonce value is a single-use ID, and we will try to reach + http://domain.tld/.well-known/ynh-{nonce} which should return 200 if we + are indeed reaching the right server. + + The general workflow is the following: + + - grab the ip from the request + - check for ip based rate limit (see RATE_LIMIT_SECONDS value) + - get json from body and domain from it + - check for domain-based rate limit (see RATE_LIMIT_SECONDS value) + - check domains are in valid format + - for each domain: + - try to do an http request on the ip (using the domain as target host) for the page /.well-known/ynh-diagnosis/{nonce} + - answer saying if the domain can be reached + """ + + # this is supposed to be a fast operation if run often enough + now = time.time() + clear_rate_limit_db(now) + + # ############################################# # + # Validate request and extract the parameters # + # ############################################# # + + ip = request.headers["x-forwarded-for"].split(",")[0] + + check_rate_limit_ip = check_rate_limit(ip, now) + if check_rate_limit_ip: + return check_rate_limit_ip + + try: + data = request.json + except InvalidUsage: + logger.info(f"Invalid json in request, body is: {request.body}") + return json_response({ + "error": { + "code": "error_bad_json", + "content": "Invalid usage, body isn't proper json" + } + }, status=400) + + try: + assert data, "Empty request body" + assert isinstance(data, dict), "Request body ain't a proper dict" + assert "domains" in data, "No 'domains' provided" + assert "nonce" in data, "No 'nonce' provided" + + # Check domain list format + assert isinstance(data["domains"], list), "'domains' ain't a list" + assert len(data["domains"]) > 0, "'domains' list is empty" + assert len(data["domains"]) < 30, "You cannot test that many domains" + for domain in data["domains"]: + assert isinstance(domain, str), "domain names must be strings" + assert len(domain) < 100, "Domain %s name seems pretty long, that's suspicious...?" % domain + assert len(data["domains"]) == len(set(data["domains"])), "'domains' list should contain unique elements" + + # Check domain rate limit + for domain in data["domains"]: + check_rate_limit_domain = check_rate_limit(domain, now) + if check_rate_limit_domain: + return check_rate_limit_domain + + # Check domains are valid domain names + for domain in data["domains"]: + assert validators.domain(domain), f"{domain} is not a valid domain" + + # Check nonce format + assert isinstance(data["nonce"], str), "'nonce' ain't a string" + assert re.match(r"^[a-f0-9]{16}$", data["nonce"]), "'nonce' is not in the right forwat (it should be a 16-digit hexadecimal string)" + except AssertionError as e: + logger.info(f"Invalid request: {e} ... Original request body was: {request.body}") + return json_response({ + "error": { + "code": "error_bad_json_data", + "content": f"Invalid request: {e} ... Original request body was: {request.body}" + } + }, status=400) + + domains = data["domains"] + nonce = data["nonce"] + + return json_response({ + "http": {domain: await check_http_domain(ip, domain, nonce) for domain in domains} + }) + + +async def check_http_domain(ip, domain, nonce): + + if ":" in ip: + ip = "[%s]" % ip + + async with aiohttp.ClientSession() as session: + try: + url = "http://" + ip + "/.well-known/ynh-diagnosis/" + nonce + async with session.get(url, + headers={"Host": domain}, + allow_redirects=False, + timeout=aiohttp.ClientTimeout(total=5)) as response: + # XXX in the futur try to do a double check with the server to + # see if the correct content is get + await response.text() + # TODO various kind of errors + except (aiohttp.client_exceptions.ServerTimeoutError, asyncio.TimeoutError): + return { + "status": "error_http_check_timeout", + "content": "Timed-out while trying to contact your server from outside. It appears to be unreachable. You should check that you're correctly forwarding port 80, that nginx is running, and that a firewall is not interfering.", + } + except aiohttp.client_exceptions.ClientConnectorError as e: + return { + "status": "error_http_check_connection_error", + "content": "Connection error: could not connect to the requested domain, it's very likely unreachable. Raw error: " + str(e), + } + except Exception as e: + import traceback + traceback.print_exc() + + return { + "status": "error_http_check_unknown_error", + "content": "An error happened while trying to reach your domain, it's very likely unreachable. Raw error: %s" % e, + } + + if response.status != 200: + return { + "status": "error_http_check_bad_status_code", + "content": "Could not reach your server as expected, it returned code %s. It might be that another machine answered instead of your server. You should check that you're correctly forwarding port 80, that your nginx configuration is up to date, and that a reverse-proxy is not interfering." % response.status, + } + else: + return { + "status": "ok" + } + + +# ########################################################################### # +# Ports check # +# ########################################################################### # + + +@app.route("/check-ports/", methods=["POST"]) +async def check_ports(request): + """ + This function received an HTTP request from a YunoHost instance while this + server is hosted on our infrastructure. The request is expected to be a + POST request with a body like {"ports": [80,443,22,25]} + + The general workflow is the following: + + - grab the ip from the request + - check for ip based rate limit (see RATE_LIMIT_SECONDS value) + - get json from body and ports list from it + - check ports are opened or closed + - answer the list of opened / closed ports + """ + + # this is supposed to be a fast operation if run often enough + now = time.time() + clear_rate_limit_db(now) + + # ############################################# # + # Validate request and extract the parameters # + # ############################################# # + + ip = request.headers["x-forwarded-for"].split(",")[0] + + check_rate_limit_ip = check_rate_limit(ip, now) + if check_rate_limit_ip: + return check_rate_limit_ip + + try: + data = request.json + except InvalidUsage: + logger.info(f"Invalid json in request, body is: {request.body}") + return json_response({ + "error": { + "code": "error_bad_json", + "content": "Invalid usage, body isn't proper json" + } + }, status=400) + + try: + assert data, "Empty request body" + assert isinstance(data, dict), "Request body ain't a proper dict" + assert "ports" in data, "No 'ports' provided" + + assert isinstance(data["ports"], list), "'ports' ain't a list" + assert len(data["ports"]) > 0, "'ports' list is empty" + assert len(data["ports"]) < 30, "That's too many ports to check" + assert len(data["ports"]) == len(set(data["ports"])), "'ports' list should contain unique elements" + + def is_port_number(p): + return isinstance(p, int) and p > 0 and p < 65535 + assert all(is_port_number(p) for p in data["ports"]), "'ports' should a list of valid port numbers" + except AssertionError as e: + logger.info(f"Invalid request: {e} ... Original request body was: {request.body}") + return json_response({ + "error": { + "code": "error_bad_json_data", + "content": f"Invalid request: {e} ... Original request body was: {request.body}" + } + }, status=400) + + # ############################################# # + # Run the actual check # + # ############################################# # + + result = {} + for port in data["ports"]: + result[int(port)] = await check_port_is_open(ip, port) + + return json_response({"ports": result}) + + +async def check_port_is_open(ip, port): + + if ":" in ip: + sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) + else: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.settimeout(2) + result = sock.connect_ex((ip, port)) + sock.close() + return result == 0 + + +# ########################################################################### # +# SMTP check # +# ########################################################################### # + + +@app.route("/check-smtp/", methods=["POST"]) +async def check_smtp(request): + """ + This function received an HTTP request from a YunoHost instance while this + server is hosted on our infrastructure. The request is expected to be a + POST request with an empty body + + The general workflow is the following: + + - grab the ip from the request + - check for ip based rate limit (see RATE_LIMIT_SECONDS value) + - open a socket on port 25 + - the server is supposed to say '200 domain.tld Service ready' + - we return the domain.tld found + """ + + # this is supposed to be a fast operation if run often enough + now = time.time() + clear_rate_limit_db(now) + + # ############################################# # + # Validate request and extract the parameters # + # ############################################# # + + ip = request.headers["x-forwarded-for"].split(",")[0] + + check_rate_limit_ip = check_rate_limit(ip, now) + if check_rate_limit_ip: + return check_rate_limit_ip + + if ":" in ip: + sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) + else: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + + sock.settimeout(2) + result = sock.connect_ex((ip, 25)) + if result != 0: + return json_response({ + 'status': "error_smtp_unreachable", + 'content': "Could not open a connection on port 25, probably because of a firewall or port forwarding issue" + }) + + try: + recv = sock.recv(1024).decode('utf-8') + assert recv[:3] == "220" + helo_domain = recv.split()[1].strip() + except: + return json_response({ + 'status': "error_smtp_bad_answer", + 'content': "SMTP server did not reply with '220 domain.tld' after opening socket ... Maybe another machine answered." + }) + finally: + sock.close() + + return json_response({'status': 'ok', 'helo': helo_domain}) + + +@app.route("/") +async def main(request): + return html("You aren't really supposed to use this website using your browser.

It's a small server with an API to check if a services running on YunoHost instance can be reached from 'the global internet'.") + + +if __name__ == "__main__": + app.run(host="0.0.0.0", port=7000) diff --git a/yunodiagnoser.service b/yunodiagnoser.service new file mode 100644 index 0000000..7500849 --- /dev/null +++ b/yunodiagnoser.service @@ -0,0 +1,11 @@ +[Unit] +Description=A server providing features for remote-diagnosis for Yunohost servers + +[Service] +Type=simple +WorkingDirectory={{ WORKING_DIR }} +ExecStart={{ WORKING_DIR }}/venv/bin/python3.6 yunodiagnoser.py &> server.log +ExecStop=/bin/kill `/bin/ps aux | /bin/grep yunodiagnoser.py | /bin/grep -v grep | /usr/bin/awk '{ print $2 }'` + +[Install] +WantedBy=multi-user.target