package stirling.software.SPDF.controller.api.misc;

import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.nio.file.attribute.FileAttribute;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.apache.batik.apps.rasterizer.DestinationType;
import org.apache.batik.svggen.font.SVGFont;
import org.apache.pdfbox.contentstream.operator.OperatorName;
import org.apache.pdfbox.pdmodel.documentinterchange.taggedpdf.PDLayoutAttributeObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.Marker;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import stirling.software.SPDF.model.api.misc.ProcessPdfWithOcrRequest;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.WebResponseUtils;

@RequestMapping({"/api/v1/misc"})
@RestController
@Tag(name = "Misc", description = "Miscellaneous APIs")
/* loaded from: input_file:BOOT-INF/classes/stirling/software/SPDF/controller/api/misc/OCRController.class */
public class OCRController {
    private static final Logger logger = LoggerFactory.getLogger((Class<?>) OCRController.class);

    public List<String> getAvailableTesseractLanguages() {
        File[] listFiles = new File("/usr/share/tessdata").listFiles();
        return listFiles == null ? Collections.emptyList() : (List) Arrays.stream(listFiles).filter(file -> {
            return file.getName().endsWith(".traineddata");
        }).map(file2 -> {
            return file2.getName().replace(".traineddata", "");
        }).filter(str -> {
            return !str.equalsIgnoreCase("osd");
        }).collect(Collectors.toList());
    }

    @PostMapping(consumes = {"multipart/form-data"}, value = {"/ocr-pdf"})
    @Operation(summary = "Process a PDF file with OCR", description = "This endpoint processes a PDF file using OCR (Optical Character Recognition). Users can specify languages, sidecar, deskew, clean, cleanFinal, ocrType, ocrRenderType, and removeImagesAfter options. Input:PDF Output:PDF Type:SI-Conditional")
    public ResponseEntity<byte[]> processPdfWithOCR(@ModelAttribute ProcessPdfWithOcrRequest processPdfWithOcrRequest) throws IOException, InterruptedException {
        MultipartFile fileInput = processPdfWithOcrRequest.getFileInput();
        List<String> languages = processPdfWithOcrRequest.getLanguages();
        Boolean valueOf = Boolean.valueOf(processPdfWithOcrRequest.isSidecar());
        Boolean valueOf2 = Boolean.valueOf(processPdfWithOcrRequest.isDeskew());
        Boolean valueOf3 = Boolean.valueOf(processPdfWithOcrRequest.isClean());
        Boolean valueOf4 = Boolean.valueOf(processPdfWithOcrRequest.isCleanFinal());
        String ocrType = processPdfWithOcrRequest.getOcrType();
        String ocrRenderType = processPdfWithOcrRequest.getOcrRenderType();
        Boolean valueOf5 = Boolean.valueOf(processPdfWithOcrRequest.isRemoveImagesAfter());
        if (languages == null || languages.isEmpty()) {
            throw new IOException("Please select at least one language.");
        }
        if (!"hocr".equals(ocrRenderType) && !"sandwich".equals(ocrRenderType)) {
            throw new IOException("ocrRenderType wrong");
        }
        List<String> availableTesseractLanguages = getAvailableTesseractLanguages();
        Stream<String> stream = languages.stream();
        Objects.requireNonNull(availableTesseractLanguages);
        List<String> list = stream.filter((v1) -> {
            return r1.contains(v1);
        }).toList();
        if (list.isEmpty()) {
            throw new IOException("None of the selected languages are valid.");
        }
        Path createTempFile = Files.createTempFile("input_", DestinationType.PDF_EXTENSION, new FileAttribute[0]);
        Files.copy(fileInput.getInputStream(), createTempFile, StandardCopyOption.REPLACE_EXISTING);
        Path createTempFile2 = Files.createTempFile("output_", DestinationType.PDF_EXTENSION, new FileAttribute[0]);
        Path path = null;
        String join = String.join(Marker.ANY_NON_NULL_MARKER, list);
        ArrayList arrayList = new ArrayList(Arrays.asList("ocrmypdf", "--verbose", "2", "--output-type", "pdf", "--pdf-renderer", ocrRenderType));
        if (valueOf != null && valueOf.booleanValue()) {
            path = Files.createTempFile("sidecar", ".txt", new FileAttribute[0]);
            arrayList.add("--sidecar");
            arrayList.add(path.toString());
        }
        if (valueOf2 != null && valueOf2.booleanValue()) {
            arrayList.add("--deskew");
        }
        if (valueOf3 != null && valueOf3.booleanValue()) {
            arrayList.add("--clean");
        }
        if (valueOf4 != null && valueOf4.booleanValue()) {
            arrayList.add("--clean-final");
        }
        if (ocrType != null && !"".equals(ocrType)) {
            if ("skip-text".equals(ocrType)) {
                arrayList.add("--skip-text");
            } else if ("force-ocr".equals(ocrType)) {
                arrayList.add("--force-ocr");
            } else if (PDLayoutAttributeObject.LINE_HEIGHT_NORMAL.equals(ocrType)) {
            }
        }
        arrayList.addAll(Arrays.asList("--language", join, createTempFile.toString(), createTempFile2.toString()));
        ProcessExecutor.ProcessExecutorResult runCommandWithOutputHandling = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(arrayList);
        if (runCommandWithOutputHandling.getRc() != 0 && runCommandWithOutputHandling.getMessages().contains("multiprocessing/synchronize.py") && runCommandWithOutputHandling.getMessages().contains("OSError: [Errno 38] Function not implemented")) {
            arrayList.add("--jobs");
            arrayList.add("1");
            ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(arrayList);
        }
        if (valueOf5 != null && valueOf5.booleanValue()) {
            Path createTempFile3 = Files.createTempFile("output_", "_no_images.pdf", new FileAttribute[0]);
            ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(Arrays.asList(OperatorName.SET_GRAPHICS_STATE_PARAMS, "-sDEVICE=pdfwrite", "-dFILTERIMAGE", SVGFont.ARG_KEY_OUTPUT_PATH, createTempFile3.toString(), createTempFile2.toString()));
            createTempFile2 = createTempFile3;
        }
        byte[] readAllBytes = Files.readAllBytes(createTempFile2);
        Files.delete(createTempFile);
        String str = Filenames.toSimpleFileName(fileInput.getOriginalFilename()).replaceFirst("[.][^.]+$", "") + "_OCR.pdf";
        if (valueOf == null || !valueOf.booleanValue()) {
            Files.delete(createTempFile2);
            return WebResponseUtils.bytesToWebResponse(readAllBytes, str);
        }
        String str2 = Filenames.toSimpleFileName(fileInput.getOriginalFilename()).replaceFirst("[.][^.]+$", "") + "_OCR.zip";
        Path createTempFile4 = Files.createTempFile("output_", ".zip", new FileAttribute[0]);
        ZipOutputStream zipOutputStream = new ZipOutputStream(new FileOutputStream(createTempFile4.toFile()));
        try {
            zipOutputStream.putNextEntry(new ZipEntry(str));
            Files.copy(createTempFile2, zipOutputStream);
            zipOutputStream.closeEntry();
            zipOutputStream.putNextEntry(new ZipEntry(str.replace(DestinationType.PDF_EXTENSION, ".txt")));
            Files.copy(path, zipOutputStream);
            zipOutputStream.closeEntry();
            zipOutputStream.close();
            byte[] readAllBytes2 = Files.readAllBytes(createTempFile4);
            Files.delete(createTempFile4);
            Files.delete(createTempFile2);
            Files.delete(path);
            return WebResponseUtils.bytesToWebResponse(readAllBytes2, str2, MediaType.APPLICATION_OCTET_STREAM);
        } catch (Throwable th) {
            try {
                zipOutputStream.close();
            } catch (Throwable th2) {
                th.addSuppressed(th2);
            }
            throw th;
        }
    }
}
