backend/app/epub.py

import aiofiles as aiof
from base64 import b64encode

import ebooklib
from ebooklib import epub

from tempfile import SpooledTemporaryFile


async def epub2html(file: SpooledTemporaryFile):

    """
    Splits epub to tokens and joins them to one html file
    """

    tokens = await epub_to_tokens(file)
    ...
    # TODO: join tokens to HTML
    html_content = ""
    ...
    return html_content


async def epub_to_tokens(file: SpooledTemporaryFile) -> dict[str, str]:

    """
    Passes file content to ebooklib library and parses epub tokens into dict of the following format:

    "\<file_name\>": "\<file_content\>"

    Where file content is either plain text for xhtml or base64 encoded data for other formats, prepared for embeding to html

    """

    async with aiof.tempfile.NamedTemporaryFile() as tmp:

        await tmp.write(file.read())
        await tmp.seek(0)
        content = await tmp.read()

        try:
            book = epub.read_epub(tmp.name)
            tokens = {}
            for item in book.get_items():
                item_type = item.get_type()
                if item_type in (ebooklib.ITEM_COVER, ebooklib.ITEM_IMAGE, ebooklib.ITEM_STYLE, ebooklib.ITEM_VIDEO):
                    name = item.get_name()
                    media_type = item.media_type
                    b64_content = b64encode(item.get_content()).decode()

                    tokens[name] = f'data:{media_type};base64,{b64_content}'
                elif item_type == ebooklib.ITEM_DOCUMENT:
                    name = item.get_name()
                    content = item.get_content()

                    tokens[name] = content
            return tokens
        except Exception as e:
            return 'Error! Wrong epub file format: ' + str(e)