Update main.rs and process.py

process.py: - Add hint for file download when input files are missing main.rs - Add multi-hop-routing - Add searching for source and destination by name
2019-06-09 04:08:31 +02:00 · 2019-06-09 04:08:31 +02:00 · 8e8587a335
commit 8e8587a335
parent 662a0be0e3
3 changed files with 123 additions and 51 deletions
--- a/dumps/process.py
+++ b/dumps/process.py
@ -0,0 +1,180 @@
+import ujson as json
+from tqdm import tqdm
+from pprint import pprint
+import itertools as ITT
+import os
+import sys
+import csv
+import sqlite3
+import pandas as pd
+from urllib.parse import urljoin
+
+
+def is_scoopable(entry):
+    first = entry.type.split()[0]
+    return first == "Neutron" or first == "White" or first in "KGBFOAM"
+
+
+def get_mult(name):
+    try:
+        first = name.split()[0]
+    except:
+        return 1
+    if first == "Neutron":
+        return 4
+    if first == "White":
+        return 1.5
+    return 1
+
+
+def dict_factory(cursor, row):
+    d = {}
+    for idx, col in enumerate(cursor.description):
+        d[col[0]] = row[idx]
+    return d
+
+
+def blocks(files, size=65536):
+    while True:
+        b = files.read(size)
+        if not b:
+            break
+        yield b
+
+
+def getlines(f, fn, show_progbar=False):
+    f.seek(0, 2)
+    size = f.tell()
+    f.seek(0)
+    progbar = tqdm(
+        desc="Processing " + fn,
+        total=size,
+        unit="b",
+        unit_scale=True,
+        unit_divisor=1024,
+        ascii=True,
+        leave=True,
+        disable=(not show_progbar),
+    )
+    buffer = []
+    for block in blocks(f):
+        progbar.n = f.tell()
+        progbar.update(0)
+        if buffer:
+            buffer += (buffer.pop(0) + block).splitlines(keepends=True)
+        else:
+            buffer += block.splitlines(keepends=True)
+        while buffer and buffer[0].endswith("\n"):
+            try:
+                yield json.loads(buffer.pop(0).strip().rstrip(","))
+            except ValueError:
+                pass
+    while buffer:
+        try:
+            yield json.loads(buffer.pop(0).strip().rstrip(","))
+        except ValueError:
+            pass
+
+
+def process_file(fn, show_progbar=False):
+    with open(fn, "r") as f:
+        for line in tqdm(
+            getlines(f, fn, show_progbar),
+            desc=fn,
+            unit=" lines",
+            unit_scale=True,
+            ascii=True,
+            leave=True,
+            disable=(not show_progbar),
+        ):
+            yield line
+
+
+if not (
+    os.path.isfile("bodies.json") and os.path.isfile("systemsWithCoordinates.json")
+):
+    exit(
+        "Please download bodies.json and systemsWithCoordinates.json from https://www.edsm.net/en/nightly-dumps/"
+    )
+
+if not os.path.isfile("stars.jl"):
+    print("Filtering for Stars")
+    with open("stars.jl", "w") as neut:
+        for body in process_file("bodies.json", True):
+            T = body.get("type") or ""
+            if "Star" in T:
+                neut.write(json.dumps(body) + "\n")
+
+
+def load_systems(load=False):
+    load = not os.path.isfile("systems.db")
+    cache = sqlite3.connect("systems.db")
+    cache.row_factory = dict_factory
+    c = cache.cursor()
+    if load:
+        print("Caching Systems")
+        c.execute("DROP TABLE IF EXISTS systems")
+        c.execute(
+            "CREATE TABLE systems (id64 int primary key, name text, x real, y real, z real)"
+        )
+        cache.commit()
+        recs = []
+        for system in process_file("systemsWithCoordinates.json", True):
+            rec = [
+                system["id64"],
+                system["name"],
+                system["coords"]["x"],
+                system["coords"]["y"],
+                system["coords"]["z"],
+            ]
+            recs.append(rec)
+            if len(recs) % 1024 * 1024 == 0:
+                c.executemany("INSERT INTO systems VALUES (?,?,?,?,?)", recs)
+                recs.clear()
+        c.executemany("INSERT INTO systems VALUES (?,?,?,?,?)", recs)
+        cache.commit()
+    return cache, c
+
+
+if not os.path.isfile("stars.csv"):
+    cache, cur = load_systems()
+    rows = []
+    with open("stars.csv", "w", newline="") as sys_csv:
+        csv_writer = csv.writer(sys_csv, dialect="excel")
+        for neut in process_file("stars.jl", True):
+            cur.execute(
+                "SELECT * FROM systems WHERE id64==?", (neut.get("systemId64"),)
+            )
+            system = cur.fetchone()
+            if not system:
+                continue
+            row = [
+                neut["systemId64"],
+                neut["subType"],
+                neut["name"],
+                get_mult(neut["subType"]),
+                system["x"],
+                system["y"],
+                system["z"],
+            ]
+            rows.append(row)
+            if len(rows) > 1024:
+                csv_writer.writerows(rows)
+                rows.clear()
+        csv_writer.writerows(rows)
+        print()
+    cache.close()
+
+if not os.path.isfile("stars.kdt"):
+    tqdm.pandas(ascii=True, leave=True)
+    print("Loading data...")
+    data = pd.read_csv(
+        "stars.csv",
+        encoding="utf-8",
+        names=["id", "type", "name", "mult", "x", "y", "z"],
+    )
+    print("Cleaning data...")
+    data.type.fillna("Unknown", inplace=True)
+    data.drop_duplicates("id", inplace=True)
+    print("Writing CSV...")
+    data.to_csv("stars.csv", header=False, index=False)