Description: <short summary of the patch>
 TODO: Put a short summary on the line above and replace this paragraph
 with a longer explanation of this change. Complete the meta-information
 with other relevant fields (see below for details). To make it easier, the
 information below has been extracted from the changelog. Adjust it or drop
 it.
 .
 calibre (5.12.0+dfsg-1+deb11u3) bullseye-security; urgency=medium
 .
   * Non-maintainer upload by the LTS Team.
   * CVE-2021-44686: Regular Expression Denial of Service
   * CVE-2023-46303: HTML Input: Don't add resources that exist outside
     the document root by default
Author: Adrian Bunk <bunk@debian.org>

---
The information above should follow the Patch Tagging Guidelines, please
checkout https://dep.debian.net/deps/dep3/ to learn about the format. Here
are templates for supplementary fields that you might want to add:

Origin: (upstream|backport|vendor|other), (<patch-url>|commit:<commit-id>)
Bug: <upstream-bugtracker-url>
Bug-Debian: https://bugs.debian.org/<bugnumber>
Bug-Ubuntu: https://launchpad.net/bugs/<bugnumber>
Forwarded: (no|not-needed|<patch-forwarded-url>)
Applied-Upstream: <version>, (<commit-url>|commit:<commid-id>)
Reviewed-By: <name and email of someone who approved/reviewed the patch>
Last-Update: 2026-02-26

--- a/src/calibre/ebooks/chm/reader.py
+++ b/src/calibre/ebooks/chm/reader.py
@@ -12,6 +12,7 @@ from calibre.constants import filesystem
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.metadata.toc import TOC
+from calibre.utils.filenames import make_long_path_useable
 from chm.chm import CHMFile, chmlib
 from polyglot.builtins import as_unicode, getcwd, unicode_type
 
@@ -129,39 +130,38 @@ class CHMReader(CHMFile):
     def ExtractFiles(self, output_dir=getcwd(), debug_dump=False):
         html_files = set()
         enc = self.chm_encoding
+        base = output_dir = os.path.abspath(output_dir)
+        if not base.endswith(os.sep):
+            base += os.sep
         for path in self.Contents():
-            fpath = path
             if not isinstance(path, unicode_type):
-                fpath = path.decode(enc)
-            lpath = os.path.join(output_dir, fpath)
+                path = path.decode(enc)
+
+            fpath = path.partition(';')[0]
+            fpath = fpath.replace('/', os.sep)
+            lpath = os.path.abspath(os.path.join(output_dir, fpath))
+            if os.path.commonprefix((lpath, base)) != base:
+                self.log.warn(f'{path!r} outside container, skipping')
+                continue
             self._ensure_dir(lpath)
             try:
                 data = self.GetFile(path)
             except:
                 self.log.exception('Failed to extract %s from CHM, ignoring'%path)
                 continue
-            if lpath.find(';') != -1:
-                # fix file names with ";<junk>" at the end, see _reformat()
-                lpath = lpath.split(';')[0]
+            with open(make_long_path_useable(lpath), 'wb') as f:
+                f.write(data)
             try:
-                with open(lpath, 'wb') as f:
-                    f.write(data)
-                try:
-                    if 'html' in guess_mimetype(path)[0]:
-                        html_files.add(lpath)
-                except:
-                    pass
+                if 'html' in guess_mimetype(os.path.basename(lpath))[0]:
+                    html_files.add(lpath)
             except:
-                if iswindows and len(lpath) > 250:
-                    self.log.warn('%r filename too long, skipping'%path)
-                    continue
-                raise
+                pass
 
         if debug_dump:
             import shutil
             shutil.copytree(output_dir, os.path.join(debug_dump, 'debug_dump'))
         for lpath in html_files:
-            with lopen(lpath, 'r+b') as f:
+            with lopen(make_long_path_useable(lpath), 'r+b') as f:
                 data = f.read()
                 data = self._reformat(data, lpath)
                 if isinstance(data, unicode_type):
