"""
This file reads all the important data files and create matrices that can be used easily

@author: Stephan Bogs, Chair of Operations Management, RWTH Aachen
"""
import pandas as pd
import geopandas as gpd
import json
import logging

from distances import compute_distance_hav
from visualisation import visualise_demand, visualise_biomass_availability

data = {}

def refresh():
    with open('data/opt_params/params.json') as params_file:
        file_contents = params_file.read()
        params = json.loads(file_contents)

    data["params"] = params
    # Geographic data
    shp = "data/geodata/Europe_reduced.shp"
    all_nuts = gpd.read_file(shp)

    nuts_0 = all_nuts.copy()
    nuts_0 = nuts_0.loc[all_nuts["LEVL_CODE"] == 0]
    if "countries" in params:
        nuts_0 = nuts_0.loc[all_nuts["CNTR_CODE"].isin(params["countries"])]
    nuts_0.reset_index(drop=True, inplace=True)
    nuts_0.loc[:, ['NUTS0']] = nuts_0.NUTS_ID
    nuts_0.reset_index(drop=True, inplace=True)

    # The shapefiles consider oversea territories and far away islands. We do not want
    # to visualise this, so we filter by the file given
    # keep_polygons = gpd.read_file("data/geodata/europe_core.json")
    # exploded = nuts_0.explode()
    # filtered_polygons = exploded[exploded.within(keep_polygons.geometry.unary_union)]
    # nuts_0 = keep_polygons.dissolve(by='NUTS_ID')
    # nuts_0.to_file('reduced_nuts.shp')
    data["nuts_0"] = nuts_0
    

refresh()


def write_list(a_list, filename):
    with open(filename, "w") as fp:
        json.dump(a_list, fp)


def find_closest_years(years_with_data, given_year):
    """
    Finds the closest year before and after the given year in a list of years that have data
    @param years_with_data: sorted list of years
    @param given_year: the year you want to interpolate data for
    @return: closest_before, closest_after, delta, offset
    """
    closest_before = None
    closest_after = None
    for year_candidate in years_with_data:
        if year_candidate <= given_year and \
                (closest_before is None or given_year - year_candidate < given_year - closest_before):
            closest_before = year_candidate
        elif year_candidate >= given_year and \
                (closest_after is None or year_candidate - given_year < closest_after - given_year):
            closest_after = year_candidate
    if closest_after is None:
        # There is no reasonable assumption to make, we just reference the last possible year
        return closest_before, closest_before, 1, 0
    else:
        return closest_before, closest_after, closest_after - closest_before, given_year - closest_before


# Geographic data 
def get_regions():
    write_list(data["nuts_0"].NUTS0.tolist(), "./data/pre_processed/regions.json")
    return data["nuts_0"].NUTS0.tolist()


def get_regions_geometry():
    return data["nuts_0"]


def get_distances_creator():
    logging.info("Calculate distance matrix")
    centroids = data["nuts_0"].to_crs('+proj=cea').centroid.to_crs(data["nuts_0"].crs)
    matrix = []
    for entry1 in zip(centroids.x, centroids.y):
        matrix_row = []
        for entry2 in zip(centroids.x, centroids.y):
            distance = compute_distance_hav({
                "lat": entry1[1],
                "lng": entry1[0]
            }, {
                "lat": entry2[1],
                "lng": entry2[0]
            })
            matrix_row.append(distance)
        matrix.append(matrix_row)

    write_list(matrix, "./data/pre_processed/distance-matrix.json")
    return matrix


def get_biomass_cost_creator():
    logging.info("Reading biomass cost from enspreso db")
    enspreso_db = "data/case_study/ENSPRESO_BIOMASS.xls"
    potential_years = [2030, 2040, 2050]
    biomass_costs_nuts0 = pd.read_excel(enspreso_db, sheet_name="COST - NUTS0 EnergyCom")
    biomass_costs_nuts0 = biomass_costs_nuts0.loc[biomass_costs_nuts0["Scenario"] == data["params"]["scenario"]]
    biomass_to_year_to_region = []
    for biomass in data["params"]["biomasses"]:
        year_to_region = []
        biomass = biomass_costs_nuts0.loc[biomass_costs_nuts0["Energy Commodity"] == biomass]
        for year in range(data["params"]["year_start"], data["params"]["year_end"] + 1):
            if year in potential_years:
                biomass_test_last = biomass.loc[biomass["Year"] == year].copy(deep=True)
                biomass_test_last['interpol'] = biomass_test_last["NUTS0 Energy Commodity Cost "]
            else:
                last_reference, next_reference, delta, offset = find_closest_years(potential_years, year)
                biomass_test_last = biomass.loc[biomass["Year"] == last_reference].reset_index()
                biomass_test_next = biomass.loc[biomass["Year"] == next_reference].reset_index()
                biomass_test_last['interpol'] = biomass_test_last["NUTS0 Energy Commodity Cost "] + \
                                                ((biomass_test_next["NUTS0 Energy Commodity Cost "] -
                                                  biomass_test_last["NUTS0 Energy Commodity Cost "])
                                                 / delta) * offset

            if "countries" in data["params"]:
                biomass_test_last = biomass_test_last.loc[biomass_test_last["NUTS0"].isin(data["params"]["countries"])]
            else:
                biomass_test_last = biomass_test_last
            year_to_region.append(biomass_test_last["interpol"].tolist())
        biomass_to_year_to_region.append(year_to_region)

    write_list(biomass_to_year_to_region, "./data/pre_processed/biomass-costs.json")
    return biomass_to_year_to_region


def get_heating_values_creator():
    logging.info("Reading heating values")
    hv_db = "data/case_study/Heating-Values.xls"
    heating_values = pd.read_excel(hv_db, sheet_name="Sheet1")

    biomass_to_heating = []
    for biomass in data["params"]["biomasses"]:
        biomass_to_heating.append(
            heating_values.loc[heating_values["Biomass"] == biomass]["Heating Value"]
            .iat[0] / 1000000
        )
    write_list(biomass_to_heating, "./data/pre_processed/biomass-heating-values.json")
    return biomass_to_heating


def get_biomass_availability_creator():
    logging.info("Reading biomass availability from enspreso db")
    # Biomass data
    enspreso_db = "data/case_study/ENSPRESO_BIOMASS.xls"
    potential_years = [2030, 2040, 2050]
    biomass_nuts_0 = pd.read_excel(enspreso_db, sheet_name="ENER - NUTS0 EnergyCom")
    logging.info("Read data successfully")

    biomass_to_year_to_region = []
    for biomass in data["params"]["biomasses"]:
        year_to_region = []

        if "countries" in data["params"]:
            biomass_nuts_0 = biomass_nuts_0.loc[biomass_nuts_0["NUTS0"].isin(data["params"]["countries"])]
        # Filter by scenario
        biomass_scenario = biomass_nuts_0.loc[biomass_nuts_0["Scenario"] == data["params"]["scenario"]]

        # Filter by used feedstocks
        biomass_scenario = biomass_scenario.loc[biomass_nuts_0["Energy Commodity"] == biomass]

        for year in range(data["params"]["year_start"], data["params"]["year_end"]):

            if year in potential_years:
                biomass_scenario_last = biomass_scenario.loc[biomass_nuts_0["Year"] == year].copy(deep=True)
                biomass_scenario_last['interpol'] = biomass_scenario_last['Value']
                # biomass_scenario_last.loc[:, ['interpol']] = biomass_scenario_last["Value"]
            else:
                last_reference, next_reference, delta, offset = find_closest_years(potential_years, year)
                # biomass_year_scenario = biomass_scenario.loc[biomass_nuts_0["Year"] == last_reference]

                biomass_scenario_last = biomass_scenario.loc[biomass_scenario["Year"] == last_reference].reset_index()
                biomass_scenario_next = biomass_scenario.loc[biomass_scenario["Year"] == next_reference].reset_index()
                biomass_scenario_last['interpol'] = biomass_scenario_last["Value"] + \
                                                ((biomass_scenario_next["Value"] - biomass_scenario_last["Value"])
                                                 / delta) * offset

            # At this point we have filtered too much and might have lost regions without biomass, we have to refill
            # get them back and fill na values with 0.0
            biomass_year_scenario_complete = pd.merge(data["nuts_0"], biomass_scenario_last, on="NUTS0", how="outer")
            biomass_year_scenario_complete.fillna(0.0, inplace=True)
            year_to_region.append(biomass_year_scenario_complete['interpol'].tolist())
            if year in potential_years:
                visualise_biomass_availability(biomass_year_scenario_complete, year, biomass)

        biomass_to_year_to_region.append(year_to_region)

    write_list(biomass_to_year_to_region, "./data/pre_processed/availability.json")
    return [biomass_to_year_to_region]


def get_demand_creator(factor=1):
    logging.info("Reading biofuel demand from reference scenario")
    # 1 Tonne Of Oil Equivalent (toe) = 4.1868E-5 Petajoules (PJ)
    # 1 ktoe = 4.1868E-2 PJ
    conversion_factor = 4.1868E-2
    eu_reference_db = "data/case_study/EU Reference Scenario 2016 country results.xls"
    potential_years = [2030, 2035, 2040, 2045, 2050]

    country_to_excel = {}
    for country in data["params"]["countries"]:
        country_to_excel[country] = pd.read_excel(eu_reference_db, sheet_name=("%s-B" % country),
                                                  skiprows=1, index_col=0, na_values=['NA'])
    years_to_region = []

    for year in range(data["params"]["year_start"], data["params"]["year_end"]):
        demands = []

        for country in data["params"]["countries"]:
            country_df = country_to_excel[country]
            if year not in potential_years:
                last_reference, next_reference, delta, offset = find_closest_years(potential_years, year)
                last_demand = country_df[last_reference]["Energy demand in transport (ktoe) (4)"] * conversion_factor
                next_demand = country_df[next_reference]["Energy demand in transport (ktoe) (4)"] * conversion_factor
                demand = (last_demand + ((next_demand - last_demand) / delta) * offset)
                last_biofuel_part = country_df[last_reference][
                                        "Biofuels in total fuels (excl.hydrogen and electricity) (%)"] * 0.01
                next_biofuel_part = country_df[next_reference][
                                        "Biofuels in total fuels (excl.hydrogen and electricity) (%)"] * 0.01
                biofuel_part = (last_biofuel_part + ((next_biofuel_part - last_biofuel_part) / delta) * offset)
                demands.append(demand * biofuel_part)

            else:
                demand = country_df[year]["Energy demand in transport (ktoe) (4)"] * conversion_factor
                biofuel_part = country_df[year][
                               "Biofuels in total fuels (excl.hydrogen and electricity) (%)"] * 0.01
                demands.append(demand * biofuel_part)

        demands_by_country = pd.DataFrame({"NUTS0": data["params"]["countries"], "demand": demands})
        demand_nuts_0 = data["nuts_0"].merge(demands_by_country, on="NUTS0")

        years_to_region.append(demand_nuts_0['demand'].tolist())

        visualise_demand(demand_nuts_0, year)

    write_list(years_to_region, "./data/pre_processed/demands.json")
    return years_to_region


# Read list to memory
def read_list(file_name):
    with open(file_name, 'rb') as fp:
        n_list = json.load(fp)
        return n_list


def get_distances():
    print("Read distance matrix")
    return read_list("./data/pre_processed/distance-matrix.json")


def get_biomass_cost():
    print("Reading biomass cost from enspreso db")
    scenarios = []
    for i in range(6):
        scenarios.append(read_list(f"./data/pre_processed/biomass-costs-{i}.json"))
    return scenarios


def get_heating_values():
    print("Reading heating values")
    return read_list("./data/pre_processed/biomass-heating-values.json")


def get_biomass_availability():
    print("Reading biomass availability from enspreso db")
    scenarios = []
    for i in range(6):
        scenarios.append(read_list(f"./data/pre_processed/availability-{i}.json"))
    return scenarios


def get_demand():
    print("Reading biofuel demand from reference scenario")
    scenarios = []
    for i in range(2):
        scenarios.append(read_list(f"./data/pre_processed/demands-{i}.json"))
    return scenarios
