import numpy as np
from pathlib import Path
from typing import List
import pandas as pd

def list_subdirectories(root: str, *, include_root: bool = False) -> List[str]:
    """
    Return absolute paths of every sub‑directory inside *root*.

    Parameters
    ----------
    root : str
        Directory to search.
    include_root : bool, optional (default=False)
        If True, also include *root* itself in the result list.

    Returns
    -------
    List[str]
        A list of directory paths (as strings). No file paths are included.
    """
    root_path = Path(root).resolve()

    if not root_path.is_dir():
        raise NotADirectoryError(f"'{root}' is not a valid directory")

    # rglob('*') walks the whole tree; keep only directories
    dirs = [str(p) for p in root_path.rglob('*') if p.is_dir()]

    if include_root:
        dirs.insert(0, str(root_path))

    return dirs

def normalize_point(complexity: np.int64,
                    c_min: np.float64 = np.float64(1.0),
                    c_max: np.float64 = np.float64(150.0)) -> np.float64:
    """
    Convert (r², complexity) into a point where both objectives are maximized in [0,1].

    r² : already maximised, ranges [0,1].
    complexity : to be minimised, ranges [c_min, c_max].

    Returns (r², 1‑norm_complexity).
    """

    # Map complexity so that low values become high scores in [0,1]
    norm_c = np.float64(1.0) - np.float64((np.float64(complexity) - c_min) / (c_max - c_min))
    return norm_c

def hypervolume_2d(points) -> np.float64:
    """
    Compute the hypervolume (area) dominated by a set of 2‑D points that *maximise* both axes,
    using the reference point (0,0).

    `points` must already be on a [0,1] × [0,1] scale.
    """

    filtered = []
    best_c = 0.0
    for x, y in points:
        if y > best_c:
            filtered.append((x, y))
            best_c = y

    # Now compute area of union of rectangles from each point to (0,0)
    hv = 0.0
    last_x = 0.0
    for x, y in sorted(filtered):
        hv += (x - last_x) * y
        last_x = x
    return hv

def go_through_dir(dir: str, exp_lab: str) -> None:
    """
    Apply a function to all subdirectories of the given directories.
    """

    # Get all subdirectories
    subdirs = list_subdirectories(dir, include_root=False)
    print(f"Found {len(subdirs)} subdirectories")

    hv_list = []
    count_list = []
    exp_list = []
    seed_list = []

    # Apply function to each subdirectory
    for subdir in subdirs:

        if exp_lab == 'starbase':
            # Get the hypervolume from the subdirectory name
            df = pd.read_csv(subdir + '/final_pareto_front.csv')
            r2 = 'R2'
            complexity = 'Feature Count'


        else:
            # load pipeline_summary.csv for each subdirectory into a pandas DataFrame
            df = pd.read_csv(subdir + '/pipeline_summary.csv')
            r2 = 'Pipeline_R2'
            complexity = 'Pipeline_Feature_Count'


        # extract 'Pipeline_R2' and 'Pipeline_Feature_Count' columns
        assert len(df[r2].values) == len(df[complexity].values)
        results = []
        for r2, complexity in zip(df[r2].values,df[complexity].values):
            # Append (r², complexity) to results
            results.append((r2, normalize_point(complexity)))

        # sort results by complexity
        results.sort(key=lambda x: x[1])

        # save eveything in the lists
        hv_list.append(hypervolume_2d(results))
        count_list.append(len(results))
        exp_list.append(exp_lab)
        seed_list.append(int(subdir.split('/')[-1].split('_')[-1]))

    # Create a DataFrame from the lists
    return pd.DataFrame({
        'Hypervolume': hv_list,
        'Count': count_list,
        'Experiment': exp_list,
        'Seed': seed_list
    })

if __name__ == "__main__":

    dir_list = ['/Anonymous/StarBASE-GP/Results/For_Manuscript/PCA_Adjusted/StarBASE_Pareto_Print/',
                '/Anonymous/StarBASE-GP/Results/For_Manuscript/PCA_Adjusted/Naive/',
                '/Anonymous/StarBASE-GP/Results/For_Manuscript/PCA_Adjusted/Random/']
    exp_list = ['starbase','naive','random']

    df_list = []
    for d, exp in zip(dir_list, exp_list):
        df_list.append(go_through_dir(d, exp))
        print(f"Finished processing '{d}'")

    # Concatenate all DataFrames into one
    df = pd.concat(df_list, ignore_index=True)

    # Save the DataFrame to a CSV file
    df.to_csv('pareto_front_data.csv', index=False)