Conversation

░▒▓█ 𝕘Rαᐯ𝕀т𝐀ѕ █▓▒░

1
0
2

░▒▓█ 𝕘Rαᐯ𝕀т𝐀ѕ █▓▒░

Attachments such as images are in base64 and have to be extracted, you can use this python script to do that:

import os  
import email  
from email import policy  
import mimetypes  
import sys  


def extract_attachments(folder_path, output_root):  
    output_abs = os.path.abspath(output_root)  
    if not os.path.exists(output_root):  
        os.makedirs(output_root, exist_ok=True)  

    for sd in ["images", "videos", "documents"]:  
        os.makedirs(os.path.join(output_root, sd), exist_ok=True)  

    eml_files = []  
    for root, _, files in os.walk(folder_path):  
        if os.path.abspath(root).startswith(output_abs):  
            continue  
        for file in files:  
            if file.endswith(".eml"):  
                eml_files.append(os.path.join(root, file))  

    total_files = len(eml_files)  
    print(f"Found {total_files} .eml files to process.")  

    total_extracted = 0  
    for i, file_path in enumerate(eml_files, 1):  
        rel_path = os.path.relpath(file_path, folder_path)  
        print(f"[{i}/{total_files}] Processing: {rel_path}", end="\r", flush=True)  

        extracted_from_file = 0  
        try:  
            with open(file_path, "rb") as f:  
                msg = email.message_from_binary_file(f, policy=policy.default)  

                for part in msg.walk():  
                    if part.get_content_maintype() == "multipart":  
                        continue  

                    is_base64 = (  
                        part.get("Content-Transfer-Encoding", "").lower() == "base64"  
                    )  
                    is_media = part.get_content_maintype() in ["image", "video"]  

                    if is_base64 or is_media:  
                        filename = part.get_filename()  
                        if not filename:  
                            ext = (  
                                mimetypes.guess_extension(part.get_content_type())  
                                or ".bin"  
                            )  
                            filename = f"extracted_{hash(file_path)}_{id(part)}{ext}"  

                        maintype = part.get_content_maintype()  
                        target_dir = "documents"  
                        if maintype == "image":  
                            target_dir = "images"  
                        elif maintype == "video":  
                            target_dir = "videos"  

                        dest_path = os.path.join(output_root, target_dir, filename)  

                        base, extension = os.path.splitext(dest_path)  
                        counter = 1  
                        while os.path.exists(dest_path):  
                            dest_path = f"{base}_{counter}{extension}"  
                            counter += 1  

                        try:  
                            payload = part.get_payload(decode=True)  
                            if payload:  
                                with open(dest_path, "wb") as out_f:  
                                    out_f.write(payload)  
                                extracted_from_file += 1  
                                total_extracted += 1  
                        except Exception:  
                            pass  

        except Exception:  
            pass  

        if extracted_from_file > 0:  
            print(  
                f"[{i}/{total_files}] Extracted {extracted_from_file} from: {rel_path}"  
            )  

    print("\n" + "=" * 50)  
    return total_extracted  


if __name__ == "__main__":  
    SOURCE_DIR = "."  
    OUTPUT_DIR = "./extracted_attachments"  

    extracted_count = extract_attachments(SOURCE_DIR, OUTPUT_DIR)  
    print(f"Extraction complete. Total files extracted: {extracted_count}")  
    print(f"Files saved to: {os.path.abspath(OUTPUT_DIR)}")  

0
0
0