From 6385e11a76e224803b6a24b1f19072690a0fab93 Mon Sep 17 00:00:00 2001
From: tuxmain <tuxmain@zettascript.org>
Date: Sat, 9 May 2020 20:11:59 +0200
Subject: [PATCH] Add cbor, gzip, lzma

---
 README.md    | 15 +++++++--
 getblocks.py | 93 +++++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 84 insertions(+), 24 deletions(-)

diff --git a/README.md b/README.md
index 743e5b3..24732c3 100644
--- a/README.md
+++ b/README.md
@@ -6,10 +6,10 @@ This code is dirty for the moment. Experimental purpose only.
 
 Requires MineTest `blocks` table to be on PostgreSQL.
 
-Requires `python3-psycopg2` (which requires `libpq-dev`):
+Requires `python3-psycopg2` (which requires `libpq-dev`) and `cbor`:
 
     sudo apt install libpq-dev
-    sudo pip3 install psycopg2
+    sudo pip3 install psycopg2 cbor
 
 ## Configure
 
@@ -34,6 +34,17 @@ Response is a JSON list of the blocks of which position verifies these condition
         ]
     }
 
+Options:
+
+* `fmt` Response format: `json` (default), `cbor`
+* `cpr` Response compression: `none` (default), `gzip`, `lzma`
+
+Example:
+
+    curl "http://127.0.0.1:8060/x>=0/x<100/y>=0/y<100/z>=0/z<100/fmt/cbor/cpr/gzip" > test_gzip.cbor
+
+Prefer cbor+gzip for big imports. lzma is much slower than gzip but produces slightly smaller files. cbor is much smaller than json.
+
 ## License
 
 GNU AGPL 3.0
diff --git a/getblocks.py b/getblocks.py
index dfdfdb0..6a9e38a 100644
--- a/getblocks.py
+++ b/getblocks.py
@@ -1,22 +1,37 @@
 #!/usr/bin/env python3
 
-import json, psycopg2, re, socket, time
+import cbor, gzip, json, lzma, psycopg2, re, socket, time
 
 CONFIG_DB_CONNECT = "host=127.0.0.1 port=5432 user=minetest password=PASSWORD dbname=minetest-world"
 CONFIG_LISTEN = ("0.0.0.0", 8060)
 
 RECBUF = 1024
 
+AVAILABLE_FORMATS = {"json": "text/json", "cbor": "application/cbor"}
+AVAILABLE_COMPRESSIONS = {"none": None, "gzip": "gzip", "lzma": "lzma"}
+
 p_clen = re.compile("\r?\ncontent-length: *(\d+)\r?\n?", re.IGNORECASE)
 p_stmt = re.compile("^(x|y|z)(<|>|=|<=|>=)-?\d{1,5}$")
+encode = {
+	"json": lambda x: json.dumps(x).encode(),
+	"cbor": cbor.dumps
+}
+compress = {
+	"gzip": gzip.compress,
+	"lzma": lzma.compress
+}
 
-def send_response(client, code, resp):
-	try:
-		content_raw = json.dumps(resp).encode()
-	except TypeError:
-		content_raw = json.dumps({"error": "non_ascii_resp"}).encode()
-	mime = "text/json"
-	client.sendall(("HTTP/1.1 "+code+"\r\nContent-type: "+mime+"; charset=UTF-8\r\nAccess-Control-Allow-Origin: *\r\nContent-length: "+str(len(content_raw))+"\r\n\r\n").encode()+content_raw)
+def send_response(client, code, resp, resp_format, resp_compression):
+	content_raw = encode[resp_format](resp)
+	
+	compression_str = ""
+	if AVAILABLE_COMPRESSIONS[resp_compression]:
+		compression_str = "Content-Encoding: " + AVAILABLE_COMPRESSIONS[resp_compression] + "\r\n"
+		content_raw = compress[resp_compression](content_raw)
+	
+	mime = AVAILABLE_FORMATS[resp_format]
+	
+	client.sendall(("HTTP/1.1 "+code+"\r\nContent-type: "+mime+"; charset=UTF-8\r\n"+compression_str+"Access-Control-Allow-Origin: *\r\nContent-length: "+str(len(content_raw))+"\r\n\r\n").encode()+content_raw)
 	client.close()
 
 if __name__ == "__main__":
@@ -45,6 +60,8 @@ if __name__ == "__main__":
 		content = b""
 		content_len = 0
 		resp = {}
+		resp_format = "json"
+		resp_compression = "none"
 		lf = 0
 		while True:
 			raw = client.recv(RECBUF)
@@ -79,47 +96,79 @@ if __name__ == "__main__":
 		try:
 			url = httpreq[0].split(b" ")[1].decode().split("/")
 		except IndexError:
-			send_response(client, "400 Bad Request", {"error": "bad_http"})
+			send_response(client, "400 Bad Request", {"error": "bad_http"}, resp_format, resp_compression)
 			continue
 		while "" in url:
 			url.remove("")
 		urll = len(url)
 		
-		if len(url) > 6:
-			send_response(client, "400 Bad Request", {"error": "too_many_statements"})
-			continue
-		
-		if len(url) == 0:
-			send_response(client, "400 Bad Request", {"error": "no_statement"})
+		if urll > 32:
+			send_response(client, "400 Bad Request", {"error": "too_many_args"}, resp_format, resp_compression)
 			continue
 		
 		stmts = []
 		
 		bad = False
-		for stmt in url:
-			if not p_stmt.match(stmt):
+		token = None
+		for val in url:
+			if token:
+				if token == 1:
+					if not val in AVAILABLE_FORMATS:
+						bad = True
+						break
+					resp_format = val
+					token = None
+					continue
+				elif token == 2:
+					if not val in AVAILABLE_COMPRESSIONS:
+						bad = True
+						break
+					resp_compression = val
+					token = None
+					continue
 				bad = True
 				break
-			stmts.append("pos"+stmt)
+			
+			if val == "fmt":
+				token = 1
+				continue
+			elif val == "cpr":
+				token = 2
+				continue
+			
+			if not p_stmt.match(val):
+				bad = True
+				break
+			stmts.append("pos"+val)
 		
 		if bad:
-			send_response(client, "400 Bad Request", {"error": "bad_statement"})
+			send_response(client, "400 Bad Request", {"error": "bad_request"}, resp_format, resp_compression)
+			continue
+		
+		if len(stmts) > 6:
+			send_response(client, "400 Bad Request", {"error": "too_many_statements"}, resp_format, resp_compression)
+			continue
+		
+		if len(stmts) == 0:
+			send_response(client, "400 Bad Request", {"error": "no_statement"}, resp_format, resp_compression)
 			continue
 		
 		req = " AND ".join(stmts)
 		
 		cur = conn.cursor()
-		cur.execute("SELECT * FROM blocks WHERE "+req+" LIMIT 1000;")
+		cur.execute("SELECT * FROM blocks WHERE "+req+" LIMIT 1000000;")
 		resp["blocks"] = []
 		while True:
 			block = cur.fetchone()
 			if not block:
 				break
-			resp["blocks"].append([block[0], block[1], block[2], block[3].hex()])
+			resp["blocks"].append([block[0], block[1], block[2],
+				block[3].hex() if resp_format == "json" else block[3].tobytes()
+			])
 		
 		print(req)
 		
 		# Send response
-		send_response(client, "200 OK", resp)
+		send_response(client, "200 OK", resp, resp_format, resp_compression)
 		
 		time.sleep(.2)