import { useEffect } from "react";

export const useFileReader = () => {
    const supportedFileTypes = ['pdf', 'docx', 'txt'];

    useEffect(() => {
        const scriptsUrls = [
            'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/4.0.269/pdf.min.mjs',
            'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/4.0.269/pdf.worker.min.mjs',
        ]

        // let scripts: Array<HTMLScriptElement> = []
        scriptsUrls.forEach((src) => {
            const script = document.createElement('script')
            script.src = src
            script.async = true
            script.type = 'module'
            document.body.appendChild(script)

            // scripts.push(script)
        })

        // return () => {
        //     scripts.forEach((script) => {
        //         document.body.removeChild(script)
        //     })
        // }
    }, [])

    // Function to read a single file
    const readFile = async (file: File): Promise<string> => {
        const extension = getFileExtension(file);

        if (!isSupportedFileType(extension)) {
            throw new Error('Unsupported file type');
        }

        try {
            const buffer = await file.arrayBuffer();
            return await extractContent(buffer, extension);
        } catch (error: any) {
            throw new Error(`Error processing file ${file.name}: ${error.message}`);
        }
    };

    // Function to get the file extension
    const getFileExtension = (file: File): string => {
        return file.name.split('.').pop()?.toLowerCase() ?? '';
    };

    // Function to check if file type is supported
    const isSupportedFileType = (extension: string): boolean => {
        return supportedFileTypes.includes(extension);
    };

    // Function to extract content based on file type
    const extractContent = async (buffer: ArrayBuffer, extension: string): Promise<string> => {
        switch (extension) {
            case 'pdf':
                return await extractPdfContent(buffer);
            case 'docx':
                return await extractDocxContent(buffer);
            case 'txt':
                return extractTxtContent(buffer);
            default:
                throw new Error('Unsupported file type');
        }
    };

    // Function to extract content from a DOCX file
    const extractDocxContent = async (arrayBuffer: ArrayBuffer): Promise<string> => {
        const mammoth = await import('mammoth');
        const result = await mammoth.extractRawText({ arrayBuffer });
        return cleanText(result.value);
    };

    // Function to extract content from a PDF file
    const extractPdfContent = async (arrayBuffer: ArrayBuffer): Promise<string> => {
        const pdfjs = (window as any).pdfjsLib
        pdfjs.GlobalWorkerOptions.workerSrc = (window as any).pdfjsWorker

        const pdf = await pdfjs.getDocument(arrayBuffer).promise
        let content = '';

        for (let i = 1; i <= pdf.numPages; i++) {
            const page = await pdf.getPage(i);
            const textContent = await page.getTextContent();
            content += textContent.items.map((item: any) => item.str).join(' ');
        }

        return cleanText(content);
    };

    // Function to extract content from a TXT file
    const extractTxtContent = (arrayBuffer: ArrayBuffer): string => {
        return cleanText(new TextDecoder().decode(arrayBuffer));
    };

    // Function to clean extracted text
    const cleanText = (text: string): string => {
        return text.replace(/(\r\n|\n|\r)/gm, ' ').trim();
    };

    return { readFile };
};
