import ujson as json
from tqdm import tqdm
from pprint import pprint
import itertools as ITT
import os
import sys
import csv
import sqlite3
import pandas as pd


def is_scoopable(entry):
    first = entry.type.split()[0]
    return first == "Neutron" or first == "White" or first in "KGBFOAM"


def get_mult(name):
    try:
        first = name.split()[0]
    except:
        return 1
    if first == "Neutron":
        return 4
    if first == "White":
        return 1.5
    return 1


def dict_factory(cursor, row):
    d = {}
    for idx, col in enumerate(cursor.description):
        d[col[0]] = row[idx]
    return d


def blocks(files, size=65536):
    while True:
        b = files.read(size)
        if not b:
            break
        yield b


def getlines(f, fn, show_progbar=False):
    f.seek(0, 2)
    size = f.tell()
    f.seek(0)
    progbar = tqdm(
        desc="Processing " + fn,
        total=size,
        unit="b",
        unit_scale=True,
        unit_divisor=1024,
        ascii=True,
        leave=True,
        disable=(not show_progbar),
    )
    buffer = []
    for block in blocks(f):
        progbar.n = f.tell()
        progbar.update(0)
        if buffer:
            buffer += (buffer.pop(0) + block).splitlines(keepends=True)
        else:
            buffer += block.splitlines(keepends=True)
        while buffer and buffer[0].endswith("\n"):
            try:
                yield json.loads(buffer.pop(0).strip().rstrip(","))
            except ValueError:
                pass
    while buffer:
        try:
            yield json.loads(buffer.pop(0).strip().rstrip(","))
        except ValueError:
            pass


def process_file(fn, show_progbar=False):
    with open(fn, "r") as f:
        for line in tqdm(
            getlines(f, fn, show_progbar),
            desc=fn,
            unit=" lines",
            unit_scale=True,
            ascii=True,
            leave=True,
            disable=(not show_progbar),
        ):
            yield line


if not os.path.isfile("stars.jl"):
    print("Filtering for Neutron Stars")
    with open("stars.jl", "w") as neut:
        for body in process_file("bodies.json", True):
            T = body.get("type") or ""
            if "Star" in T:
                neut.write(json.dumps(body) + "\n")


def load_systems(load=False):
    load = not os.path.isfile("systems.db")
    cache = sqlite3.connect("systems.db")
    cache.row_factory = dict_factory
    c = cache.cursor()
    if load:
        print("Caching Systems")
        c.execute("DROP TABLE IF EXISTS systems")
        c.execute(
            "CREATE TABLE systems (id64 int primary key, name text, x real, y real, z real)"
        )
        cache.commit()
        recs = []
        for system in process_file("systemsWithCoordinates.json", True):
            rec = [
                system["id64"],
                system["name"],
                system["coords"]["x"],
                system["coords"]["y"],
                system["coords"]["z"],
            ]
            recs.append(rec)
            if len(recs) % 1024 * 1024 == 0:
                c.executemany("INSERT INTO systems VALUES (?,?,?,?,?)", recs)
                recs.clear()
        c.executemany("INSERT INTO systems VALUES (?,?,?,?,?)", recs)
        cache.commit()
    return cache, c


if not os.path.isfile("stars.csv"):
    cache, cur = load_systems()
    rows = []
    with open("stars.csv", "w", newline="") as sys_csv:
        csv_writer = csv.writer(sys_csv, dialect="excel")
        for neut in process_file("stars.jl", True):
            cur.execute(
                "SELECT * FROM systems WHERE id64==?", (neut.get("systemId64"),)
            )
            system = cur.fetchone()
            if not system:
                continue
            row = [
                neut["systemId64"],
                neut["subType"],
                neut["name"],
                get_mult(neut["subType"]),
                system["x"],
                system["y"],
                system["z"],
            ]
            rows.append(row)
            if len(rows) > 1024:
                csv_writer.writerows(rows)
                rows.clear()
        csv_writer.writerows(rows)
        print()
    cache.close()

if not os.path.isfile("stars.kdt"):
    tqdm.pandas(ascii=True, leave=True)
    print("Loading data...")
    data = pd.read_csv(
        "stars.csv",
        encoding="utf-8",
        names=["id", "type", "name", "mult", "x", "y", "z"],
    )
    print("Cleaning data...")
    data.type.fillna("Unknown", inplace=True)
    data.drop_duplicates("id", inplace=True)
    print("Writing CSV...")
    data.to_csv("stars.csv", header=False, index=False)