From 15ea5923cb2fb7d363a36547da183f876d4ac196 Mon Sep 17 00:00:00 2001 From: Sam Judd Date: Fri, 13 Dec 2024 15:08:49 -0800 Subject: [PATCH] BUG: Truncate mediabox and cropbox values with > 4 points. Closes #2991 --- pypdf/_page.py | 22 +++++++++++++++++----- tests/test_page.py | 14 ++++++++++++++ 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/pypdf/_page.py b/pypdf/_page.py index cbae35817..55c7c9258 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -106,7 +106,7 @@ MERGE_CROP_BOX = "cropbox" # pypdf<=3.4.0 used 'trimbox' -def _get_rectangle(self: Any, name: str, defaults: Iterable[str]) -> RectangleObject: +def _get_rectangle(self: Any, name: str, defaults: Iterable[str], allow_truncate: bool) -> RectangleObject: retval: Union[None, RectangleObject, IndirectObject] = self.get(name) if isinstance(retval, RectangleObject): return retval @@ -117,6 +117,13 @@ def _get_rectangle(self: Any, name: str, defaults: Iterable[str]) -> RectangleOb break if isinstance(retval, IndirectObject): retval = self.pdf.get_object(retval) + if allow_truncate and (isinstance(retval, list) or isinstance(retval, tuple)): + if len(retval) != 4: + logger_warning( + f"Expected {name} to be a rectangle with 4 points, but found: {retval}", + __name__ + ) + retval = retval[:4] retval = RectangleObject(retval) # type: ignore _set_rectangle(self, name, retval) return retval @@ -131,9 +138,14 @@ def _delete_rectangle(self: Any, name: str) -> None: del self[name] -def _create_rectangle_accessor(name: str, fallback: Iterable[str]) -> property: +def _create_rectangle_accessor(name: str, fallback: Iterable[str], allow_truncate: bool = False) -> property: + """ + Params: + allow_truncate: True to permissively truncate the value at name down to the 4 points + expected by RectangleObject if the value is a Tuple or List with a greater length. + """ return property( - lambda self: _get_rectangle(self, name, fallback), + lambda self: _get_rectangle(self, name, fallback, allow_truncate=allow_truncate), lambda self, value: _set_rectangle(self, name, value), lambda self: _delete_rectangle(self, name), ) @@ -2452,12 +2464,12 @@ def _get_fonts(self) -> Tuple[Set[str], Set[str]]: unembedded = fonts - embedded return embedded, unembedded - mediabox = _create_rectangle_accessor(PG.MEDIABOX, ()) + mediabox = _create_rectangle_accessor(PG.MEDIABOX, (), allow_truncate=True) """A :class:`RectangleObject`, expressed in default user space units, defining the boundaries of the physical medium on which the page is intended to be displayed or printed.""" - cropbox = _create_rectangle_accessor("/CropBox", (PG.MEDIABOX,)) + cropbox = _create_rectangle_accessor("/CropBox", (PG.MEDIABOX,), allow_truncate=True) """ A :class:`RectangleObject`, expressed in default user space units, defining the visible region of default user diff --git a/tests/test_page.py b/tests/test_page.py index 7ab59aaeb..bdf4b7b78 100644 --- a/tests/test_page.py +++ b/tests/test_page.py @@ -326,6 +326,20 @@ def test_page_properties(): assert page.bleedbox == RectangleObject((0, 1, 100, 101)) +@pytest.mark.parametrize("key", [PG.MEDIABOX, PG.CROPBOX]) +@pytest.mark.parametrize("values", [ + [0, 0, 612, 792, 0, 0, 612, 792], + (0, 0, 612, 792, 0, 0, 612, 792), + [0, 0, 612, 792, 0, 0, 612, 792, 0, 0], + (0, 0, 612, 792, 0, 0, 612, 792, 0, 0), +]) +def test_page_handles_long_media_and_crop_box_iss_2991(key: str, values: List[float] | Tuple[float, ...]): + reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf") + page = reader.pages[0] + page[NameObject(key)] = ArrayObject(values) + assert page.mediabox == RectangleObject((0, 0, 612, 792)) + + def test_page_rotation(): reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf") page = reader.pages[0]