import os
import requests
import json
import csv
from datetime import datetime
import subprocess
import time
import re
import hashlib
import chardet

api_key_limit = ['***',
]

def process_info_to_csv(input_file, output_csv):
    with open(input_file, 'r') as file:
        lines = file.readlines()

    with open(output_csv, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['FileName', 'Category', 'Class', 'MalFamily', 'FileSize'])

        for i in range(0, len(lines), 8):  
            block = lines[i:i+8]
            data = {}

            for line in block:
                if ':' in line:
                    key, value = line.strip().split(':', 1)
                    data[key.strip()] = value.strip()

            filename = data.get('FileName', '').split('/')[-1]
            detect_name_parts = data.get('DetectName', '').split('.')

            
            class_name = detect_name_parts[0].split(':')[-1] if len(detect_name_parts) > 0 else ''
            target_system = detect_name_parts[1] if len(detect_name_parts) > 1 else ''
            mal_family = detect_name_parts[2] if len(detect_name_parts) > 2 else ''
            variants = detect_name_parts[3] if len(detect_name_parts) > 3 else ''

            writer.writerow([
                filename,
                data.get('DetectType', ''),
                class_name,
                mal_family,
                data.get('FileSize', '')
            ])

def calculate_sha256(file_path):
    sha256 = hashlib.sha256()
    try:
        with open(file_path, 'rb') as file:
            while True:
                data = file.read(8192)  
                if not data:
                    break
                sha256.update(data)
        return sha256.hexdigest()
    except IOError:
        print(f"Cannot open {file_path}")
        return None

def query_virustotal_and_parse(input_folder, output_folder, api_key, csv_file_path):
    api_index = 0
    
    os.system("pwd")
    print(f"cvs_file_path: {csv_file_path}")
    with open(csv_file_path, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['FileName', 'FirstSeen', 'MalFamily'])
        ensure_folder_exists(output_folder)
        for filename in os.listdir(input_folder):
            if filename.endswith('.elf'):
                file_hash = calculate_sha256(os.path.join(input_folder, filename))
                hash_value = filename.split('.')[0]
                output_json_path = os.path.join(output_folder, f"{file_hash}_VT.json")

                if not os.path.exists(output_json_path):
                    try:
                        print(f"\rQuerying {hash_value}",end='')
                        vt_url = f"https://www.virustotal.com/api/v3/files/{file_hash}"
                        headers = {'x-apikey': api_key[api_index]}
                        response = requests.get(vt_url, headers=headers)
                        time.sleep(3)
                        if response.status_code == 200:
                            data = response.json()
                            with open(output_json_path, 'w', encoding='utf-8') as jsonfile:
                                json.dump(data, jsonfile, ensure_ascii=False, indent=4)
                        else:
                            print(f"\nFailed to query VirusTotal for {hash_value}: HTTP {response.status_code}")
                            if(response.status_code == 429):
                                api_index += 1
                                print(api_index)
                                continue
                            return
                    except Exception as e:
                        print(f"\nWe need to wait a little longer")
                        exit(0)
                try:
                    
                    with open(output_json_path, 'r', encoding='utf-8') as jsonfile:
                        data = json.load(jsonfile)

                        attributes = data.get('data', {}).get('attributes', {})
                        last_analysis_stats = attributes.get('last_analysis_stats', {})
                        malicious_count = last_analysis_stats.get('malicious', 0)

                        pop_threat_classification = attributes.get('popular_threat_classification', {})
                        first_seen = datetime.utcfromtimestamp(attributes.get('first_submission_date')).strftime('%Y-%m')
                        suggested_threat_label = pop_threat_classification.get('suggested_threat_label', '')
                        category, mal_family = suggested_threat_label.split('.') if '.' in suggested_threat_label else ('', '')
                        writer.writerow([filename, first_seen, mal_family])
                except:
                    continue

def ensure_folder_exists(folder_path):
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
        print(f"Folder '{folder_path}' created.")
    else:
        print(f"Folser '{folder_path}' existed.")


def get_elf_info(filepath):
    print(f"\rHandling {filepath}", end='')
    
    file_cmd_output = subprocess.run(['file', filepath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True).stdout
    
    readelf_comment_output = subprocess.run(['readelf', '-p', '.comment', filepath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, errors='ignore').stdout
    
    readelf_header_output = subprocess.run(['readelf', '-h', filepath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, errors='ignore').stdout

    
    architecture = ('x86-64' if 'x86-64' in file_cmd_output else
                    'x86' if 'x86' in file_cmd_output else
                    'ARM' if 'ARM' in file_cmd_output else
                    'MIPS' if 'MIPS' in file_cmd_output else
                    'PowerPC' if 'PowerPC' in file_cmd_output else '')
    bit = '64-bit' if '64-bit' in file_cmd_output else '32-bit' if '32-bit' in file_cmd_output else ''
    
    stripped = 'unstripped' if 'not stripped' in file_cmd_output else 'stripped'
    
    endianness = 'little' if 'little-endian' in file_cmd_output else 'big' if 'big-endian' in file_cmd_output else 'little' if 'LSB' in file_cmd_output else 'big' if 'MSB' in file_cmd_output else ''
    
    linking = 'static' if 'statically linked' in file_cmd_output else 'dynamic'
    
    compile_time = ''
    if 'GCC: (GNU)' in readelf_comment_output:
        compile_time_idx = readelf_comment_output.index('GCC: (GNU)') + len('GCC: (GNU)')
        compile_time = readelf_comment_output[compile_time_idx:].split()[0]
    
    libraries = re.findall(r'(/lib[^ ]+|/ld[^ ]+)', file_cmd_output)
    if len(libraries)>=1:
        libraries = str(libraries[0].split("/")[-1].split(".")[0])
    else:
        libraries = ''

    
    file_type = machine = flags = ''
    for line in readelf_header_output.splitlines():
        if 'Type' in line:
            file_type = line.split(':')[1].split("(")[0].strip().replace(" ",'-')
        elif 'Machine' in line:
            machine = line.split(':')[1].strip().replace(' ', '-').replace("/","or")
        elif 'Flags' in line:
            flags = line.split(':')[1].strip().replace(', ','-').replace(' ','').strip("-<unknown>")

    return [os.path.basename(filepath), architecture, bit, endianness, stripped, linking, compile_time, libraries, file_type, machine, flags]

def scan_folder(folder_path):
    elf_files_info = []
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            filepath = os.path.join(root, file)
            if os.path.isfile(filepath) and 'ELF' in subprocess.run(['file', filepath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True).stdout:
                elf_files_info.append(get_elf_info(filepath))
    return elf_files_info

def write_csv(data, output_file):
    headers = ['File Name', 'Architecture', 'Bit', 'Endianness', 'Stripped', 'Linking', 'Compile Time', 'Libraries','FileType','Machine', 'Flags']
    with open(output_file, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(headers)
        writer.writerows(data)

def merge_csv_files(file1, file2, file3, output_file,elf_folder):
    merged_data = {}

    
    with open(file1, 'r', encoding='utf-8') as csvfile:
        reader = csv.reader(csvfile)
        headers = next(reader, None)  
        for row in reader:
            key = row[0]
            merged_data[key] = {headers[i]: row[i] for i in range(1, len(row))}

    keys_to_delete = []
    
    for file in [file2, file3]:
        with open(file, 'r', encoding='utf-8') as csvfile:
            reader = csv.reader(csvfile)
            headers = next(reader, None)  
            for row in reader:
                key = row[0]
                data = {headers[i]: row[i] for i in range(1, len(row))}
                if key not in merged_data:
                    merged_data[key] = data
                else:
                    for k, v in data.items():
                        if k in merged_data[key] and len(v) > len(merged_data[key][k]):
                            merged_data[key][k] = v
                        elif k not in merged_data[key]:
                            merged_data[key][k] = v
    if os.path.exists(output_file):
        os.remove(output_file)
    
    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        
        all_keys = set(k for data in merged_data.values() for k in data.keys())
        writer.writerow(['Key'] + list(all_keys))
        for key, data in merged_data.items():
            row = [key] + [data.get(k, '') for k in all_keys]
            writer.writerow(row)
            
            original_file_name = key
            
            original_file_path = os.path.join(elf_folder, original_file_name)
            
            if os.path.exists(original_file_path):
                if not all(data.get(field) for field in ['FirstSeen', 'Category', 'MalFamily', 'FileSize']):
                    os.remove(original_file_path)
                    keys_to_delete.append(key)
                else:
                    
                    pass
        
        for key in keys_to_delete:
            del merged_data[key]

def get_current_script_path():
    return os.path.dirname(os.path.realpath(__file__))

os.chdir(os.path.join(get_current_script_path(),"./mal/un_cpu"))

print("Querying VirusTotal and Parsing Result...")
query_virustotal_and_parse(input_folder='./',output_folder='../../result/VT_result', api_key=api_key_limit, csv_file_path='../../result/result_VT.csv')
print("\nVirusTotal and Parsing Result saving into result_VT.csv")