from pathlib import Path
from typing import List
import pandas as pd

REPLICATES = 40

def list_subdirectories(root: str, *, include_root: bool = False) -> List[str]:
    """
    Return absolute paths of every sub‑directory inside *root*.

    Parameters
    ----------
    root : str
        Directory to search.
    include_root : bool, optional (default=False)
        If True, also include *root* itself in the result list.

    Returns
    -------
    List[str]
        A list of directory paths (as strings). No file paths are included.
    """
    root_path = Path(root).resolve()

    if not root_path.is_dir():
        raise NotADirectoryError(f"'{root}' is not a valid directory")

    # rglob('*') walks the whole tree; keep only directories
    dirs = [str(p) for p in root_path.rglob('*') if p.is_dir()]

    if include_root:
        dirs.insert(0, str(root_path))

    return dirs

def go_through_dir(dir: str, exp_lab: str) -> None:
    """
    Apply a function to all subdirectories of the given directories.
    """

    # Get all subdirectories
    print(f"Looking at: {dir}")

    # Get the hypervolume from the subdirectory name
    df = pd.read_csv(dir)

    # extract 'Pipeline_R2' and 'Pipeline_Feature_Count' columns
    assert len(df['Bin_Count'].values) == REPLICATES

    # Create a DataFrame with the bin counts
    return pd.DataFrame({
        'Count': df['Bin_Count'].values,
        'Experiment': [exp_lab] * REPLICATES,
    })

if __name__ == "__main__":

    dir_list = ['/Anonymous/StarBASE-GP/Results/For_Manuscript/PCA_Adjusted/StarBASE_Pareto_Print/bin_counts.csv',
                '/Anonymous/StarBASE-GP/Results/For_Manuscript/PCA_Adjusted/Naive/bin_counts.csv',
                '/Anonymous/StarBASE-GP/Results/For_Manuscript/PCA_Adjusted/Random/bin_counts.csv']
    exp_list = ['starbase','naive','random']

    df_list = []
    for d, exp in zip(dir_list, exp_list):
        df_list.append(go_through_dir(d, exp))
        print(f"Finished processing '{d}'")

    # Concatenate all DataFrames into one
    df = pd.concat(df_list, ignore_index=True)

    # Save the DataFrame to a CSV file
    df.to_csv('bin_diversity_data.csv', index=False)