diff --git a/app/epub.py b/app/epub.py index 473d611..c77e093 100644 --- a/app/epub.py +++ b/app/epub.py @@ -14,7 +14,6 @@ async def epub2html(file: SpooledTemporaryFile) -> str: """ try: - tokens = await epub_to_tokens(file) ... # TODO: join tokens to HTML @@ -29,14 +28,14 @@ async def epub2html(file: SpooledTemporaryFile) -> str: async def epub_to_tokens(file: SpooledTemporaryFile) -> dict[str, str]: """ - Passes file content to ebooklib library and parses epub tokens into dict of the following format: + Passes file content to EbookLib library and parses epub tokens into dict of the following format: - "\": "\" + { "\": "\" } Where file content is either plain text for xhtml or base64 encoded data for other formats, prepared for embeding to html """ - tokens = {"metadata": {"test"}} + tokens = {"metadata": {"test": "t"}} async with aiof.tempfile.NamedTemporaryFile() as tmp: await tmp.write(file.read()) @@ -49,6 +48,7 @@ async def epub_to_tokens(file: SpooledTemporaryFile) -> dict[str, str]: content = item.get_content() if item_type == ebooklib.ITEM_DOCUMENT: + # Adding book chapters to tokens list tokens[name] = content elif item_type in ( @@ -58,6 +58,7 @@ async def epub_to_tokens(file: SpooledTemporaryFile) -> dict[str, str]: ebooklib.ITEM_VIDEO, ebooklib.ITEM_VECTOR, ): + # Adding assets to tokens list media_type = item.media_type b64_content = b64encode(content).decode() diff --git a/app/fb2.py b/app/fb2.py new file mode 100644 index 0000000..1104e0f --- /dev/null +++ b/app/fb2.py @@ -0,0 +1,111 @@ +from tempfile import SpooledTemporaryFile +import xml.etree.ElementTree as ET +from xml.etree.ElementTree import Element +from typing import Optional + + +namespaces = { + "": "http://www.gribuser.ru/xml/fictionbook/2.0", + "xlink": "http://www.w3.org/1999/xlink", +} +HREF = f"{{{namespaces['xlink']}}}href" + + +async def fb22html(file: SpooledTemporaryFile) -> str: + + """ + Splits fb2 to tokens and joins them to one html file + """ + + try: + + tokens = fb22tokens(file) + ... + # TODO: join tokens to HTML + html_content = "" + ... + return html_content + + except Exception as e: + return "Error! Wrong FB2 file format: " + str(e) + + +def fb22tokens(file: SpooledTemporaryFile) -> dict[str, str]: + + """ + Parses fb2 file as xml document. It puts book metadata, its content and media to `tokens` dictionary and returns it. + + `tokens` format: + + { "metadata": { ... }, + + "content": "\", + + "\": "\" } + """ + + tokens = {"metadata": {}, "content": ""} + + book = ET.parse(file) + description, body, *assets = book.getroot() + + description: Element + body: Element + assets: list[Element] + + # Reading book metadata + + book_info = description.find("title-info") + if book_info: + metadata = {} + metadata["title"] = book_info.find("book-title", namespaces).text + metadata["author"] = get_author(book_info.find("author", namespaces)) + metadata["cover"] = get_cover(book_info.find("coverpage", namespaces)) + if metadata["cover"] is None: + metadata.pop("cover") + + if len(metadata.keys()): + tokens["metadata"] = metadata.copy() + + # Reading book content + + tokens["content"] = ET.tostring(body).replace(b"ns0:", b"") + + # Reading assets + + for asset in assets: + key = asset.get("id") + media_type = asset.get("content-type") + b64_content = asset.text + tokens[key] = f"data:{media_type};base64,{b64_content}" + + return tokens + + +def get_author(author: Element) -> str: + + """ + Converts author xml structure to string + """ + + res = [] + for tag_name in ("first-name", "middle-name", "last-name"): + el = author.find(tag_name, namespaces) + if not el is None: + res.append(el.text) + if len(res) == 0: + res = author.find("nickname", namespaces).text + else: + res = " ".join(res) + + return res + + +def get_cover(coverpage: Optional[Element]) -> Optional[str]: + + """ + Extracts cover image id if exists + """ + + if coverpage: + return coverpage.find("image", namespaces).get(HREF) diff --git a/app/main.py b/app/main.py index ca6eef4..98a6c1d 100644 --- a/app/main.py +++ b/app/main.py @@ -2,6 +2,7 @@ from fastapi import FastAPI, File, UploadFile from fastapi.responses import HTMLResponse from .epub import epub2html +from .fb2 import fb22html app = FastAPI()