60 lines
1.7 KiB
Python
60 lines
1.7 KiB
Python
import aiofiles as aiof
|
|
from base64 import b64encode
|
|
|
|
import ebooklib
|
|
from ebooklib import epub
|
|
|
|
from tempfile import SpooledTemporaryFile
|
|
|
|
|
|
async def epub2html(file: SpooledTemporaryFile):
|
|
|
|
"""
|
|
Splits epub to tokens and joins them to one html file
|
|
"""
|
|
|
|
tokens = await epub_to_tokens(file)
|
|
...
|
|
# TODO: join tokens to HTML
|
|
html_content = ""
|
|
...
|
|
return html_content
|
|
|
|
|
|
async def epub_to_tokens(file: SpooledTemporaryFile) -> dict[str, str]:
|
|
|
|
"""
|
|
Passes file content to ebooklib library and parses epub tokens into dict of the following format:
|
|
|
|
"\<file_name\>": "\<file_content\>"
|
|
|
|
Where file content is either plain text for xhtml or base64 encoded data for other formats, prepared for embeding to html
|
|
|
|
"""
|
|
|
|
async with aiof.tempfile.NamedTemporaryFile() as tmp:
|
|
|
|
await tmp.write(file.read())
|
|
await tmp.seek(0)
|
|
content = await tmp.read()
|
|
|
|
try:
|
|
book = epub.read_epub(tmp.name)
|
|
tokens = {}
|
|
for item in book.get_items():
|
|
item_type = item.get_type()
|
|
if item_type in (ebooklib.ITEM_COVER, ebooklib.ITEM_IMAGE, ebooklib.ITEM_STYLE, ebooklib.ITEM_VIDEO):
|
|
name = item.get_name()
|
|
media_type = item.media_type
|
|
b64_content = b64encode(item.get_content()).decode()
|
|
|
|
tokens[name] = f'data:{media_type};base64,{b64_content}'
|
|
elif item_type == ebooklib.ITEM_DOCUMENT:
|
|
name = item.get_name()
|
|
content = item.get_content()
|
|
|
|
tokens[name] = content
|
|
return tokens
|
|
except Exception as e:
|
|
return 'Error! Wrong epub file format: ' + str(e)
|