import ujson as json from tqdm import tqdm from pprint import pprint import itertools as ITT import os import sys import csv import sqlite3 import pandas as pd from urllib.parse import urljoin def is_scoopable(entry): first = entry.type.split()[0] return first == "Neutron" or first == "White" or first in "KGBFOAM" def get_mult(name): try: first = name.split()[0] except: return 1 if first == "Neutron": return 4 if first == "White": return 1.5 return 1 def dict_factory(cursor, row): d = {} for idx, col in enumerate(cursor.description): d[col[0]] = row[idx] return d def blocks(files, size=65536): while True: b = files.read(size) if not b: break yield b def getlines(f, fn, show_progbar=False): f.seek(0, 2) size = f.tell() f.seek(0) progbar = tqdm( desc="Processing " + fn, total=size, unit="b", unit_scale=True, unit_divisor=1024, ascii=True, leave=True, disable=(not show_progbar), ) buffer = [] for block in blocks(f): progbar.n = f.tell() progbar.update(0) if buffer: buffer += (buffer.pop(0) + block).splitlines(keepends=True) else: buffer += block.splitlines(keepends=True) while buffer and buffer[0].endswith("\n"): try: yield json.loads(buffer.pop(0).strip().rstrip(",")) except ValueError: pass while buffer: try: yield json.loads(buffer.pop(0).strip().rstrip(",")) except ValueError: pass def process_file(fn, show_progbar=False): with open(fn, "r") as f: for line in tqdm( getlines(f, fn, show_progbar), desc=fn, unit=" lines", unit_scale=True, ascii=True, leave=True, disable=(not show_progbar), ): yield line if not ( os.path.isfile("bodies.json") and os.path.isfile("systemsWithCoordinates.json") ): exit( "Please download bodies.json and systemsWithCoordinates.json from https://www.edsm.net/en/nightly-dumps/" ) if not os.path.isfile("stars.jl"): print("Filtering for Stars") with open("stars.jl", "w") as neut: for body in process_file("bodies.json", True): T = body.get("type") or "" if "Star" in T: neut.write(json.dumps(body) + "\n") def load_systems(load=False): load = not os.path.isfile("systems.db") cache = sqlite3.connect("systems.db") cache.row_factory = dict_factory c = cache.cursor() if load: print("Caching Systems") c.execute("DROP TABLE IF EXISTS systems") c.execute( "CREATE TABLE systems (id64 int primary key, name text, x real, y real, z real)" ) cache.commit() recs = [] for system in process_file("systemsWithCoordinates.json", True): rec = [ system["id64"], system["name"], system["coords"]["x"], system["coords"]["y"], system["coords"]["z"], ] recs.append(rec) if len(recs) % 1024 * 1024 == 0: c.executemany("INSERT INTO systems VALUES (?,?,?,?,?)", recs) recs.clear() c.executemany("INSERT INTO systems VALUES (?,?,?,?,?)", recs) cache.commit() return cache, c if not os.path.isfile("stars.csv"): cache, cur = load_systems() rows = [] with open("stars.csv", "w", newline="") as sys_csv: csv_writer = csv.writer(sys_csv, dialect="excel") for neut in process_file("stars.jl", True): cur.execute( "SELECT * FROM systems WHERE id64==?", (neut.get("systemId64"),) ) system = cur.fetchone() if not system: continue row = [ neut["systemId64"], neut["subType"], neut["name"], get_mult(neut["subType"]), system["x"], system["y"], system["z"], ] rows.append(row) if len(rows) > 1024: csv_writer.writerows(rows) rows.clear() csv_writer.writerows(rows) print() cache.close() if not os.path.isfile("stars.csv"): tqdm.pandas(ascii=True, leave=True) print("Loading data...") data = pd.read_csv( "stars.csv", encoding="utf-8", names=["id", "type", "name", "mult", "x", "y", "z"], ) print("Cleaning data...") data.type.fillna("Unknown", inplace=True) data.drop_duplicates("id", inplace=True) print("Writing CSV...") data.to_csv("stars.csv", header=False, index=False)