Removed html escaping of book content

This commit is contained in:
Dmitriy Shishkov 2021-07-16 15:34:26 +05:00
parent d2adf23936
commit dcab64c78d
No known key found for this signature in database
GPG Key ID: 14358F96FCDD8060
2 changed files with 4 additions and 22 deletions

View File

@ -108,7 +108,6 @@ async def epub_to_tokens(
def read_metadata(book: epub.EpubBook) -> dict[str, str]: def read_metadata(book: epub.EpubBook) -> dict[str, str]:
""" """
Reads metadata from xml to dict Reads metadata from xml to dict
""" """
@ -121,7 +120,6 @@ def read_metadata(book: epub.EpubBook) -> dict[str, str]:
def convert_list(titles_list: list[tuple[str, dict[str, str]]]) -> str: def convert_list(titles_list: list[tuple[str, dict[str, str]]]) -> str:
""" """
Joins titles list to one string Joins titles list to one string
""" """
@ -134,7 +132,6 @@ def convert_list(titles_list: list[tuple[str, dict[str, str]]]) -> str:
def set_cover(tokens: DocumentTokens) -> None: def set_cover(tokens: DocumentTokens) -> None:
""" """
Converts cover file name to base64 image stored in `tokens` Converts cover file name to base64 image stored in `tokens`
""" """
@ -145,7 +142,6 @@ def set_cover(tokens: DocumentTokens) -> None:
def epub_tokens2html(spine: list[tuple[str, str]], tokens: DocumentTokens) -> bytes: def epub_tokens2html(spine: list[tuple[str, str]], tokens: DocumentTokens) -> bytes:
""" """
Joins chapters in `spice` to one html string Joins chapters in `spice` to one html string
""" """
@ -157,11 +153,10 @@ def epub_tokens2html(spine: list[tuple[str, str]], tokens: DocumentTokens) -> by
if file_path: if file_path:
res += process_xhtml(file_path, tokens) res += process_xhtml(file_path, tokens)
return html.escape(html.unescape(res)) return html.unescape(res)
def process_xhtml(path: str, tokens: DocumentTokens) -> bytes: def process_xhtml(path: str, tokens: DocumentTokens) -> bytes:
""" """
Processes content of one xml body Processes content of one xml body
""" """
@ -179,7 +174,6 @@ def process_xhtml(path: str, tokens: DocumentTokens) -> bytes:
def process_content(node: etree.Element, path: str, tokens: DocumentTokens) -> None: def process_content(node: etree.Element, path: str, tokens: DocumentTokens) -> None:
""" """
Recursive function for xml element convertion to valid html Recursive function for xml element convertion to valid html
""" """
@ -219,7 +213,6 @@ def process_content(node: etree.Element, path: str, tokens: DocumentTokens) -> N
def process_a_element(node: etree.Element, path: str): def process_a_element(node: etree.Element, path: str):
r""" r"""
Converts `filed` links to ids in \<a\> element Converts `filed` links to ids in \<a\> element
""" """
@ -237,7 +230,6 @@ def process_a_element(node: etree.Element, path: str):
def process_media_element(node: etree.Element, path: str, tokens: DocumentTokens): def process_media_element(node: etree.Element, path: str, tokens: DocumentTokens):
""" """
Replaces file paths to base64 encoded media in `src` and `srcset` tags Replaces file paths to base64 encoded media in `src` and `srcset` tags
""" """
@ -256,7 +248,6 @@ def process_media_element(node: etree.Element, path: str, tokens: DocumentTokens
def rel_to_abs_path(parent: str, rel: str): def rel_to_abs_path(parent: str, rel: str):
""" """
Helper for relative path to media convertion to absolute Helper for relative path to media convertion to absolute
""" """
@ -266,7 +257,6 @@ def rel_to_abs_path(parent: str, rel: str):
@cache @cache
def path_to_name(path: str) -> str: def path_to_name(path: str) -> str:
""" """
Helper function for getting file name Helper function for getting file name
""" """
@ -275,7 +265,6 @@ def path_to_name(path: str) -> str:
def children_to_html(root: etree.Element) -> bytes: def children_to_html(root: etree.Element) -> bytes:
""" """
Converts all xml children of element to string and joins them Converts all xml children of element to string and joins them
""" """

View File

@ -32,7 +32,7 @@ async def fb22html(file: SpooledTemporaryFile) -> HTMLBook:
return { return {
**(tokens["metadata"]), **(tokens["metadata"]),
"content": html.escape(html.unescape(html_content.decode())), "content": html.unescape(html_content.decode()),
} }
except Exception as err: except Exception as err:
@ -42,7 +42,6 @@ async def fb22html(file: SpooledTemporaryFile) -> HTMLBook:
def fb22tokens(file: SpooledTemporaryFile) -> DocumentTokens: def fb22tokens(file: SpooledTemporaryFile) -> DocumentTokens:
r""" r"""
Parses fb2 file as xml document. Parses fb2 file as xml document.
It puts book metadata, its content and media to `tokens` dictionary and returns it. It puts book metadata, its content and media to `tokens` dictionary and returns it.
@ -78,7 +77,8 @@ def fb22tokens(file: SpooledTemporaryFile) -> DocumentTokens:
metadata = {} metadata = {}
metadata["title"] = book_info.find("./book-title", namespaces).text metadata["title"] = book_info.find("./book-title", namespaces).text
metadata["author"] = get_author(book_info.find("./author", namespaces)) metadata["author"] = get_author(book_info.find("./author", namespaces))
metadata["cover"] = get_cover(book_info.find("./coverpage", namespaces)) metadata["cover"] = get_cover(
book_info.find("./coverpage", namespaces))
if "cover" not in metadata.keys(): if "cover" not in metadata.keys():
metadata.pop("cover") metadata.pop("cover")
@ -104,7 +104,6 @@ def fb22tokens(file: SpooledTemporaryFile) -> DocumentTokens:
def get_author(author: Element) -> str: def get_author(author: Element) -> str:
""" """
Converts author xml structure to string Converts author xml structure to string
""" """
@ -127,7 +126,6 @@ def get_author(author: Element) -> str:
def get_cover(coverpage: Optional[Element]) -> Optional[str]: def get_cover(coverpage: Optional[Element]) -> Optional[str]:
""" """
Extracts cover image id if exists Extracts cover image id if exists
""" """
@ -148,7 +146,6 @@ def set_cover(tokens: DocumentTokens) -> None:
def fb2body2html(tokens: DocumentTokens) -> str: def fb2body2html(tokens: DocumentTokens) -> str:
""" """
Convert fb2 xml to html, joins bodies into one string Convert fb2 xml to html, joins bodies into one string
""" """
@ -163,7 +160,6 @@ def fb2body2html(tokens: DocumentTokens) -> str:
def process_section(body: Element, tokens: DocumentTokens) -> str: def process_section(body: Element, tokens: DocumentTokens) -> str:
""" """
Processes individual sections, recursively goes throw sections tree Processes individual sections, recursively goes throw sections tree
""" """
@ -191,7 +187,6 @@ def process_section(body: Element, tokens: DocumentTokens) -> str:
def children_to_html(root: Element) -> str: def children_to_html(root: Element) -> str:
""" """
Converts xml tag children to string Converts xml tag children to string
""" """
@ -205,7 +200,6 @@ def children_to_html(root: Element) -> str:
def process_image(element: Element, tokens: DocumentTokens) -> None: def process_image(element: Element, tokens: DocumentTokens) -> None:
r""" r"""
Converts fb2 \<image /\> to html \<img /\>. Replaces xlink:href with src="\<base64_image_data\>" Converts fb2 \<image /\> to html \<img /\>. Replaces xlink:href with src="\<base64_image_data\>"
""" """
@ -236,7 +230,6 @@ tag_with_class = {
def process_content(root: Element, tokens: DocumentTokens) -> None: def process_content(root: Element, tokens: DocumentTokens) -> None:
""" """
Converts fb2 xml tag names to html equivalents and my own styled elements. Converts fb2 xml tag names to html equivalents and my own styled elements.
Resolves binary data dependencies Resolves binary data dependencies