2019-06-05 23:15:49 +00:00
|
|
|
import ujson as json
|
|
|
|
from tqdm import tqdm
|
|
|
|
from pprint import pprint
|
|
|
|
import itertools as ITT
|
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
import csv
|
|
|
|
import sqlite3
|
|
|
|
import pandas as pd
|
2019-06-09 02:08:31 +00:00
|
|
|
from urllib.parse import urljoin
|
2019-06-05 23:15:49 +00:00
|
|
|
|
|
|
|
|
|
|
|
def is_scoopable(entry):
|
|
|
|
first = entry.type.split()[0]
|
|
|
|
return first == "Neutron" or first == "White" or first in "KGBFOAM"
|
|
|
|
|
|
|
|
|
|
|
|
def get_mult(name):
|
|
|
|
try:
|
|
|
|
first = name.split()[0]
|
|
|
|
except:
|
|
|
|
return 1
|
|
|
|
if first == "Neutron":
|
|
|
|
return 4
|
|
|
|
if first == "White":
|
|
|
|
return 1.5
|
|
|
|
return 1
|
|
|
|
|
|
|
|
|
|
|
|
def dict_factory(cursor, row):
|
|
|
|
d = {}
|
|
|
|
for idx, col in enumerate(cursor.description):
|
|
|
|
d[col[0]] = row[idx]
|
|
|
|
return d
|
|
|
|
|
|
|
|
|
|
|
|
def blocks(files, size=65536):
|
|
|
|
while True:
|
|
|
|
b = files.read(size)
|
|
|
|
if not b:
|
|
|
|
break
|
|
|
|
yield b
|
|
|
|
|
|
|
|
|
|
|
|
def getlines(f, fn, show_progbar=False):
|
|
|
|
f.seek(0, 2)
|
|
|
|
size = f.tell()
|
|
|
|
f.seek(0)
|
|
|
|
progbar = tqdm(
|
|
|
|
desc="Processing " + fn,
|
|
|
|
total=size,
|
|
|
|
unit="b",
|
|
|
|
unit_scale=True,
|
|
|
|
unit_divisor=1024,
|
|
|
|
ascii=True,
|
|
|
|
leave=True,
|
|
|
|
disable=(not show_progbar),
|
|
|
|
)
|
|
|
|
buffer = []
|
|
|
|
for block in blocks(f):
|
|
|
|
progbar.n = f.tell()
|
|
|
|
progbar.update(0)
|
|
|
|
if buffer:
|
|
|
|
buffer += (buffer.pop(0) + block).splitlines(keepends=True)
|
|
|
|
else:
|
|
|
|
buffer += block.splitlines(keepends=True)
|
|
|
|
while buffer and buffer[0].endswith("\n"):
|
|
|
|
try:
|
|
|
|
yield json.loads(buffer.pop(0).strip().rstrip(","))
|
|
|
|
except ValueError:
|
|
|
|
pass
|
|
|
|
while buffer:
|
|
|
|
try:
|
|
|
|
yield json.loads(buffer.pop(0).strip().rstrip(","))
|
|
|
|
except ValueError:
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
def process_file(fn, show_progbar=False):
|
|
|
|
with open(fn, "r") as f:
|
|
|
|
for line in tqdm(
|
|
|
|
getlines(f, fn, show_progbar),
|
|
|
|
desc=fn,
|
|
|
|
unit=" lines",
|
|
|
|
unit_scale=True,
|
|
|
|
ascii=True,
|
|
|
|
leave=True,
|
|
|
|
disable=(not show_progbar),
|
|
|
|
):
|
|
|
|
yield line
|
|
|
|
|
|
|
|
|
2019-06-09 02:08:31 +00:00
|
|
|
if not (
|
|
|
|
os.path.isfile("bodies.json") and os.path.isfile("systemsWithCoordinates.json")
|
|
|
|
):
|
|
|
|
exit(
|
|
|
|
"Please download bodies.json and systemsWithCoordinates.json from https://www.edsm.net/en/nightly-dumps/"
|
|
|
|
)
|
|
|
|
|
2019-06-05 23:15:49 +00:00
|
|
|
if not os.path.isfile("stars.jl"):
|
2019-06-09 02:08:31 +00:00
|
|
|
print("Filtering for Stars")
|
2019-06-05 23:15:49 +00:00
|
|
|
with open("stars.jl", "w") as neut:
|
|
|
|
for body in process_file("bodies.json", True):
|
|
|
|
T = body.get("type") or ""
|
|
|
|
if "Star" in T:
|
|
|
|
neut.write(json.dumps(body) + "\n")
|
|
|
|
|
|
|
|
|
|
|
|
def load_systems(load=False):
|
|
|
|
load = not os.path.isfile("systems.db")
|
|
|
|
cache = sqlite3.connect("systems.db")
|
|
|
|
cache.row_factory = dict_factory
|
|
|
|
c = cache.cursor()
|
|
|
|
if load:
|
|
|
|
print("Caching Systems")
|
|
|
|
c.execute("DROP TABLE IF EXISTS systems")
|
|
|
|
c.execute(
|
|
|
|
"CREATE TABLE systems (id64 int primary key, name text, x real, y real, z real)"
|
|
|
|
)
|
|
|
|
cache.commit()
|
|
|
|
recs = []
|
|
|
|
for system in process_file("systemsWithCoordinates.json", True):
|
|
|
|
rec = [
|
|
|
|
system["id64"],
|
|
|
|
system["name"],
|
|
|
|
system["coords"]["x"],
|
|
|
|
system["coords"]["y"],
|
|
|
|
system["coords"]["z"],
|
|
|
|
]
|
|
|
|
recs.append(rec)
|
|
|
|
if len(recs) % 1024 * 1024 == 0:
|
|
|
|
c.executemany("INSERT INTO systems VALUES (?,?,?,?,?)", recs)
|
|
|
|
recs.clear()
|
|
|
|
c.executemany("INSERT INTO systems VALUES (?,?,?,?,?)", recs)
|
|
|
|
cache.commit()
|
|
|
|
return cache, c
|
|
|
|
|
|
|
|
|
|
|
|
if not os.path.isfile("stars.csv"):
|
|
|
|
cache, cur = load_systems()
|
|
|
|
rows = []
|
|
|
|
with open("stars.csv", "w", newline="") as sys_csv:
|
|
|
|
csv_writer = csv.writer(sys_csv, dialect="excel")
|
|
|
|
for neut in process_file("stars.jl", True):
|
|
|
|
cur.execute(
|
|
|
|
"SELECT * FROM systems WHERE id64==?", (neut.get("systemId64"),)
|
|
|
|
)
|
|
|
|
system = cur.fetchone()
|
|
|
|
if not system:
|
|
|
|
continue
|
|
|
|
row = [
|
|
|
|
neut["systemId64"],
|
|
|
|
neut["subType"],
|
|
|
|
neut["name"],
|
|
|
|
get_mult(neut["subType"]),
|
|
|
|
system["x"],
|
|
|
|
system["y"],
|
|
|
|
system["z"],
|
|
|
|
]
|
|
|
|
rows.append(row)
|
|
|
|
if len(rows) > 1024:
|
|
|
|
csv_writer.writerows(rows)
|
|
|
|
rows.clear()
|
|
|
|
csv_writer.writerows(rows)
|
|
|
|
print()
|
|
|
|
cache.close()
|
|
|
|
|
2019-06-29 08:32:47 +00:00
|
|
|
if not os.path.isfile("stars.csv"):
|
2019-06-05 23:15:49 +00:00
|
|
|
tqdm.pandas(ascii=True, leave=True)
|
|
|
|
print("Loading data...")
|
|
|
|
data = pd.read_csv(
|
|
|
|
"stars.csv",
|
|
|
|
encoding="utf-8",
|
|
|
|
names=["id", "type", "name", "mult", "x", "y", "z"],
|
|
|
|
)
|
|
|
|
print("Cleaning data...")
|
|
|
|
data.type.fillna("Unknown", inplace=True)
|
|
|
|
data.drop_duplicates("id", inplace=True)
|
|
|
|
print("Writing CSV...")
|
|
|
|
data.to_csv("stars.csv", header=False, index=False)
|