Source code for ox.apps.files.processors.pdf_processor

from __future__ import annotations
from pathlib import Path


from .processor import Processor


__all__ = ("PDFProcessor",)


[docs] class PDFProcessor(Processor): mime_types = { "application/pdf", "application/epub+zip", "application/vnd.ms-xpsdocument", "application/vnd.comicbook+zip", "application/vnd.comicbook-rar", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "application/vnd.openxmlformats-officedocument.presentationml.presentation", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/msword", "application/vnd.ms-excel", "application/vnd.ms-powerpoint", "application/x-fictionbook+xml", "application/xhtml+xml", "text/html", "text/plain", } def _create_preview(self, path: Path, out: Path, size: tuple[int, int]) -> bool: """Create thumbnail for the input pdf file (on the first page. Thumbnails are saved as JPEG images. """ import pymupdf from PIL import Image doc = pymupdf.open(path) pix = None for page in doc: if not self.is_empty(page): pix = page.get_pixmap(dpi=150) break # default takes the first page if not pix: pix = doc[0].get_pixmap(dpi=150) image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) image.thumbnail(size) image.save(out) return True def is_empty(self, page) -> bool: return not page.get_text().strip() or not page.get_images(full=True) or not page.get_drawings()