Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Truncate mediabox and cropbox values with > 4 points. #3001

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions pypdf/_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@
MERGE_CROP_BOX = "cropbox" # pypdf<=3.4.0 used 'trimbox'


def _get_rectangle(self: Any, name: str, defaults: Iterable[str]) -> RectangleObject:
def _get_rectangle(self: Any, name: str, defaults: Iterable[str], allow_truncate: bool) -> RectangleObject:
retval: Union[None, RectangleObject, IndirectObject] = self.get(name)
if isinstance(retval, RectangleObject):
return retval
Expand All @@ -117,6 +117,13 @@ def _get_rectangle(self: Any, name: str, defaults: Iterable[str]) -> RectangleOb
break
if isinstance(retval, IndirectObject):
retval = self.pdf.get_object(retval)
if allow_truncate and (isinstance(retval, list) or isinstance(retval, tuple)):
if len(retval) != 4:
logger_warning(
f"Expected {name} to be a rectangle with 4 points, but found: {retval}",
__name__
)
retval = retval[:4]
retval = RectangleObject(retval) # type: ignore
_set_rectangle(self, name, retval)
return retval
Expand All @@ -131,9 +138,14 @@ def _delete_rectangle(self: Any, name: str) -> None:
del self[name]


def _create_rectangle_accessor(name: str, fallback: Iterable[str]) -> property:
def _create_rectangle_accessor(name: str, fallback: Iterable[str], allow_truncate: bool = False) -> property:
"""
Params:
allow_truncate: True to permissively truncate the value at name down to the 4 points
expected by RectangleObject if the value is a Tuple or List with a greater length.
"""
return property(
lambda self: _get_rectangle(self, name, fallback),
lambda self: _get_rectangle(self, name, fallback, allow_truncate=allow_truncate),
lambda self, value: _set_rectangle(self, name, value),
lambda self: _delete_rectangle(self, name),
)
Expand Down Expand Up @@ -2452,12 +2464,12 @@ def _get_fonts(self) -> Tuple[Set[str], Set[str]]:
unembedded = fonts - embedded
return embedded, unembedded

mediabox = _create_rectangle_accessor(PG.MEDIABOX, ())
mediabox = _create_rectangle_accessor(PG.MEDIABOX, (), allow_truncate=True)
"""A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in
default user space units, defining the boundaries of the physical medium on
which the page is intended to be displayed or printed."""

cropbox = _create_rectangle_accessor("/CropBox", (PG.MEDIABOX,))
cropbox = _create_rectangle_accessor("/CropBox", (PG.MEDIABOX,), allow_truncate=True)
"""
A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in
default user space units, defining the visible region of default user
Expand Down
14 changes: 14 additions & 0 deletions tests/test_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,20 @@ def test_page_properties():
assert page.bleedbox == RectangleObject((0, 1, 100, 101))


@pytest.mark.parametrize("key", [PG.MEDIABOX, PG.CROPBOX])
@pytest.mark.parametrize("values", [
[0, 0, 612, 792, 0, 0, 612, 792],
(0, 0, 612, 792, 0, 0, 612, 792),
[0, 0, 612, 792, 0, 0, 612, 792, 0, 0],
(0, 0, 612, 792, 0, 0, 612, 792, 0, 0),
])
def test_page_handles_long_media_and_crop_box_iss_2991(key: str, values: List[float] | Tuple[float, ...]):
reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
page = reader.pages[0]
page[NameObject(key)] = ArrayObject(values)
assert page.mediabox == RectangleObject((0, 0, 612, 792))


def test_page_rotation():
reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
page = reader.pages[0]
Expand Down
Loading