import * as pdfjsLib from 'pdfjs-dist';
import pdfjsWorker from 'pdfjs-dist/build/pdf.worker.entry';

pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsWorker;

export const processPdfFile = async (file, charSize) => {
  try {
    const dataBuffer = await file.arrayBuffer();
    const pdf = await pdfjsLib.getDocument({ data: dataBuffer }).promise;
    let rawText = '';
    /* eslint-disable no-await-in-loop */
    for (let i = 1; i <= pdf.numPages; i += 1) {
      const page = await pdf.getPage(i);
      const content = await page.getTextContent();
      const text = content.items.map((item) => item.str).join(' ');
      if (text) {
        rawText += text;
      }
    }
    /* eslint-enable no-await-in-loop */
    rawText = rawText.replace(/\s{2,}|\t|\n/g, ' ');
    const chunks = chunkTextInSentences(rawText, charSize);
    return { success: true, file, chunks };
  } catch (error) {
    console.error('Error processing the image:', error);
    return { success: false, file };
  }
};

function chunkTextInSentences(text, maxChunkSize) {
  const sentences = text.split('.');
  const chunks = [];
  let currentChunk = '';

  for (let i = 0; i < sentences.length; i += 1) {
    const sentence = sentences[i];
    if (currentChunk.length + sentence.length + 1 <= maxChunkSize) {
      currentChunk += sentence.concat('.');
    } else {
      if (currentChunk.length > 0) {
        // Check if the currentChunk has a length
        chunks.push(currentChunk);
      }
      currentChunk = sentence.concat('.');
    }
  }

  if (currentChunk.length > 0) {
    // Check if the last currentChunk has a length
    chunks.push(currentChunk);
  }

  return chunks;
}
