Update main.rs and process.py
process.py: - Add hint for file download when input files are missing main.rs - Add multi-hop-routing - Add searching for source and destination by name
This commit is contained in:
parent
662a0be0e3
commit
8e8587a335
3 changed files with 123 additions and 51 deletions
180
dumps/process.py
Normal file
180
dumps/process.py
Normal file
|
@ -0,0 +1,180 @@
|
|||
import ujson as json
|
||||
from tqdm import tqdm
|
||||
from pprint import pprint
|
||||
import itertools as ITT
|
||||
import os
|
||||
import sys
|
||||
import csv
|
||||
import sqlite3
|
||||
import pandas as pd
|
||||
from urllib.parse import urljoin
|
||||
|
||||
|
||||
def is_scoopable(entry):
|
||||
first = entry.type.split()[0]
|
||||
return first == "Neutron" or first == "White" or first in "KGBFOAM"
|
||||
|
||||
|
||||
def get_mult(name):
|
||||
try:
|
||||
first = name.split()[0]
|
||||
except:
|
||||
return 1
|
||||
if first == "Neutron":
|
||||
return 4
|
||||
if first == "White":
|
||||
return 1.5
|
||||
return 1
|
||||
|
||||
|
||||
def dict_factory(cursor, row):
|
||||
d = {}
|
||||
for idx, col in enumerate(cursor.description):
|
||||
d[col[0]] = row[idx]
|
||||
return d
|
||||
|
||||
|
||||
def blocks(files, size=65536):
|
||||
while True:
|
||||
b = files.read(size)
|
||||
if not b:
|
||||
break
|
||||
yield b
|
||||
|
||||
|
||||
def getlines(f, fn, show_progbar=False):
|
||||
f.seek(0, 2)
|
||||
size = f.tell()
|
||||
f.seek(0)
|
||||
progbar = tqdm(
|
||||
desc="Processing " + fn,
|
||||
total=size,
|
||||
unit="b",
|
||||
unit_scale=True,
|
||||
unit_divisor=1024,
|
||||
ascii=True,
|
||||
leave=True,
|
||||
disable=(not show_progbar),
|
||||
)
|
||||
buffer = []
|
||||
for block in blocks(f):
|
||||
progbar.n = f.tell()
|
||||
progbar.update(0)
|
||||
if buffer:
|
||||
buffer += (buffer.pop(0) + block).splitlines(keepends=True)
|
||||
else:
|
||||
buffer += block.splitlines(keepends=True)
|
||||
while buffer and buffer[0].endswith("\n"):
|
||||
try:
|
||||
yield json.loads(buffer.pop(0).strip().rstrip(","))
|
||||
except ValueError:
|
||||
pass
|
||||
while buffer:
|
||||
try:
|
||||
yield json.loads(buffer.pop(0).strip().rstrip(","))
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
|
||||
def process_file(fn, show_progbar=False):
|
||||
with open(fn, "r") as f:
|
||||
for line in tqdm(
|
||||
getlines(f, fn, show_progbar),
|
||||
desc=fn,
|
||||
unit=" lines",
|
||||
unit_scale=True,
|
||||
ascii=True,
|
||||
leave=True,
|
||||
disable=(not show_progbar),
|
||||
):
|
||||
yield line
|
||||
|
||||
|
||||
if not (
|
||||
os.path.isfile("bodies.json") and os.path.isfile("systemsWithCoordinates.json")
|
||||
):
|
||||
exit(
|
||||
"Please download bodies.json and systemsWithCoordinates.json from https://www.edsm.net/en/nightly-dumps/"
|
||||
)
|
||||
|
||||
if not os.path.isfile("stars.jl"):
|
||||
print("Filtering for Stars")
|
||||
with open("stars.jl", "w") as neut:
|
||||
for body in process_file("bodies.json", True):
|
||||
T = body.get("type") or ""
|
||||
if "Star" in T:
|
||||
neut.write(json.dumps(body) + "\n")
|
||||
|
||||
|
||||
def load_systems(load=False):
|
||||
load = not os.path.isfile("systems.db")
|
||||
cache = sqlite3.connect("systems.db")
|
||||
cache.row_factory = dict_factory
|
||||
c = cache.cursor()
|
||||
if load:
|
||||
print("Caching Systems")
|
||||
c.execute("DROP TABLE IF EXISTS systems")
|
||||
c.execute(
|
||||
"CREATE TABLE systems (id64 int primary key, name text, x real, y real, z real)"
|
||||
)
|
||||
cache.commit()
|
||||
recs = []
|
||||
for system in process_file("systemsWithCoordinates.json", True):
|
||||
rec = [
|
||||
system["id64"],
|
||||
system["name"],
|
||||
system["coords"]["x"],
|
||||
system["coords"]["y"],
|
||||
system["coords"]["z"],
|
||||
]
|
||||
recs.append(rec)
|
||||
if len(recs) % 1024 * 1024 == 0:
|
||||
c.executemany("INSERT INTO systems VALUES (?,?,?,?,?)", recs)
|
||||
recs.clear()
|
||||
c.executemany("INSERT INTO systems VALUES (?,?,?,?,?)", recs)
|
||||
cache.commit()
|
||||
return cache, c
|
||||
|
||||
|
||||
if not os.path.isfile("stars.csv"):
|
||||
cache, cur = load_systems()
|
||||
rows = []
|
||||
with open("stars.csv", "w", newline="") as sys_csv:
|
||||
csv_writer = csv.writer(sys_csv, dialect="excel")
|
||||
for neut in process_file("stars.jl", True):
|
||||
cur.execute(
|
||||
"SELECT * FROM systems WHERE id64==?", (neut.get("systemId64"),)
|
||||
)
|
||||
system = cur.fetchone()
|
||||
if not system:
|
||||
continue
|
||||
row = [
|
||||
neut["systemId64"],
|
||||
neut["subType"],
|
||||
neut["name"],
|
||||
get_mult(neut["subType"]),
|
||||
system["x"],
|
||||
system["y"],
|
||||
system["z"],
|
||||
]
|
||||
rows.append(row)
|
||||
if len(rows) > 1024:
|
||||
csv_writer.writerows(rows)
|
||||
rows.clear()
|
||||
csv_writer.writerows(rows)
|
||||
print()
|
||||
cache.close()
|
||||
|
||||
if not os.path.isfile("stars.kdt"):
|
||||
tqdm.pandas(ascii=True, leave=True)
|
||||
print("Loading data...")
|
||||
data = pd.read_csv(
|
||||
"stars.csv",
|
||||
encoding="utf-8",
|
||||
names=["id", "type", "name", "mult", "x", "y", "z"],
|
||||
)
|
||||
print("Cleaning data...")
|
||||
data.type.fillna("Unknown", inplace=True)
|
||||
data.drop_duplicates("id", inplace=True)
|
||||
print("Writing CSV...")
|
||||
data.to_csv("stars.csv", header=False, index=False)
|
Loading…
Add table
Add a link
Reference in a new issue