Removed html escaping of book content
This commit is contained in:
parent
d2adf23936
commit
dcab64c78d
13
app/epub.py
13
app/epub.py
@ -108,7 +108,6 @@ async def epub_to_tokens(
|
|||||||
|
|
||||||
|
|
||||||
def read_metadata(book: epub.EpubBook) -> dict[str, str]:
|
def read_metadata(book: epub.EpubBook) -> dict[str, str]:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Reads metadata from xml to dict
|
Reads metadata from xml to dict
|
||||||
"""
|
"""
|
||||||
@ -121,7 +120,6 @@ def read_metadata(book: epub.EpubBook) -> dict[str, str]:
|
|||||||
|
|
||||||
|
|
||||||
def convert_list(titles_list: list[tuple[str, dict[str, str]]]) -> str:
|
def convert_list(titles_list: list[tuple[str, dict[str, str]]]) -> str:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Joins titles list to one string
|
Joins titles list to one string
|
||||||
"""
|
"""
|
||||||
@ -134,7 +132,6 @@ def convert_list(titles_list: list[tuple[str, dict[str, str]]]) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def set_cover(tokens: DocumentTokens) -> None:
|
def set_cover(tokens: DocumentTokens) -> None:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Converts cover file name to base64 image stored in `tokens`
|
Converts cover file name to base64 image stored in `tokens`
|
||||||
"""
|
"""
|
||||||
@ -145,7 +142,6 @@ def set_cover(tokens: DocumentTokens) -> None:
|
|||||||
|
|
||||||
|
|
||||||
def epub_tokens2html(spine: list[tuple[str, str]], tokens: DocumentTokens) -> bytes:
|
def epub_tokens2html(spine: list[tuple[str, str]], tokens: DocumentTokens) -> bytes:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Joins chapters in `spice` to one html string
|
Joins chapters in `spice` to one html string
|
||||||
"""
|
"""
|
||||||
@ -157,11 +153,10 @@ def epub_tokens2html(spine: list[tuple[str, str]], tokens: DocumentTokens) -> by
|
|||||||
if file_path:
|
if file_path:
|
||||||
res += process_xhtml(file_path, tokens)
|
res += process_xhtml(file_path, tokens)
|
||||||
|
|
||||||
return html.escape(html.unescape(res))
|
return html.unescape(res)
|
||||||
|
|
||||||
|
|
||||||
def process_xhtml(path: str, tokens: DocumentTokens) -> bytes:
|
def process_xhtml(path: str, tokens: DocumentTokens) -> bytes:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Processes content of one xml body
|
Processes content of one xml body
|
||||||
"""
|
"""
|
||||||
@ -179,7 +174,6 @@ def process_xhtml(path: str, tokens: DocumentTokens) -> bytes:
|
|||||||
|
|
||||||
|
|
||||||
def process_content(node: etree.Element, path: str, tokens: DocumentTokens) -> None:
|
def process_content(node: etree.Element, path: str, tokens: DocumentTokens) -> None:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Recursive function for xml element convertion to valid html
|
Recursive function for xml element convertion to valid html
|
||||||
"""
|
"""
|
||||||
@ -219,7 +213,6 @@ def process_content(node: etree.Element, path: str, tokens: DocumentTokens) -> N
|
|||||||
|
|
||||||
|
|
||||||
def process_a_element(node: etree.Element, path: str):
|
def process_a_element(node: etree.Element, path: str):
|
||||||
|
|
||||||
r"""
|
r"""
|
||||||
Converts `filed` links to ids in \<a\> element
|
Converts `filed` links to ids in \<a\> element
|
||||||
"""
|
"""
|
||||||
@ -237,7 +230,6 @@ def process_a_element(node: etree.Element, path: str):
|
|||||||
|
|
||||||
|
|
||||||
def process_media_element(node: etree.Element, path: str, tokens: DocumentTokens):
|
def process_media_element(node: etree.Element, path: str, tokens: DocumentTokens):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Replaces file paths to base64 encoded media in `src` and `srcset` tags
|
Replaces file paths to base64 encoded media in `src` and `srcset` tags
|
||||||
"""
|
"""
|
||||||
@ -256,7 +248,6 @@ def process_media_element(node: etree.Element, path: str, tokens: DocumentTokens
|
|||||||
|
|
||||||
|
|
||||||
def rel_to_abs_path(parent: str, rel: str):
|
def rel_to_abs_path(parent: str, rel: str):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Helper for relative path to media convertion to absolute
|
Helper for relative path to media convertion to absolute
|
||||||
"""
|
"""
|
||||||
@ -266,7 +257,6 @@ def rel_to_abs_path(parent: str, rel: str):
|
|||||||
|
|
||||||
@cache
|
@cache
|
||||||
def path_to_name(path: str) -> str:
|
def path_to_name(path: str) -> str:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Helper function for getting file name
|
Helper function for getting file name
|
||||||
"""
|
"""
|
||||||
@ -275,7 +265,6 @@ def path_to_name(path: str) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def children_to_html(root: etree.Element) -> bytes:
|
def children_to_html(root: etree.Element) -> bytes:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Converts all xml children of element to string and joins them
|
Converts all xml children of element to string and joins them
|
||||||
"""
|
"""
|
||||||
|
13
app/fb2.py
13
app/fb2.py
@ -32,7 +32,7 @@ async def fb22html(file: SpooledTemporaryFile) -> HTMLBook:
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
**(tokens["metadata"]),
|
**(tokens["metadata"]),
|
||||||
"content": html.escape(html.unescape(html_content.decode())),
|
"content": html.unescape(html_content.decode()),
|
||||||
}
|
}
|
||||||
|
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
@ -42,7 +42,6 @@ async def fb22html(file: SpooledTemporaryFile) -> HTMLBook:
|
|||||||
|
|
||||||
|
|
||||||
def fb22tokens(file: SpooledTemporaryFile) -> DocumentTokens:
|
def fb22tokens(file: SpooledTemporaryFile) -> DocumentTokens:
|
||||||
|
|
||||||
r"""
|
r"""
|
||||||
Parses fb2 file as xml document.
|
Parses fb2 file as xml document.
|
||||||
It puts book metadata, its content and media to `tokens` dictionary and returns it.
|
It puts book metadata, its content and media to `tokens` dictionary and returns it.
|
||||||
@ -78,7 +77,8 @@ def fb22tokens(file: SpooledTemporaryFile) -> DocumentTokens:
|
|||||||
metadata = {}
|
metadata = {}
|
||||||
metadata["title"] = book_info.find("./book-title", namespaces).text
|
metadata["title"] = book_info.find("./book-title", namespaces).text
|
||||||
metadata["author"] = get_author(book_info.find("./author", namespaces))
|
metadata["author"] = get_author(book_info.find("./author", namespaces))
|
||||||
metadata["cover"] = get_cover(book_info.find("./coverpage", namespaces))
|
metadata["cover"] = get_cover(
|
||||||
|
book_info.find("./coverpage", namespaces))
|
||||||
if "cover" not in metadata.keys():
|
if "cover" not in metadata.keys():
|
||||||
metadata.pop("cover")
|
metadata.pop("cover")
|
||||||
|
|
||||||
@ -104,7 +104,6 @@ def fb22tokens(file: SpooledTemporaryFile) -> DocumentTokens:
|
|||||||
|
|
||||||
|
|
||||||
def get_author(author: Element) -> str:
|
def get_author(author: Element) -> str:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Converts author xml structure to string
|
Converts author xml structure to string
|
||||||
"""
|
"""
|
||||||
@ -127,7 +126,6 @@ def get_author(author: Element) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def get_cover(coverpage: Optional[Element]) -> Optional[str]:
|
def get_cover(coverpage: Optional[Element]) -> Optional[str]:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Extracts cover image id if exists
|
Extracts cover image id if exists
|
||||||
"""
|
"""
|
||||||
@ -148,7 +146,6 @@ def set_cover(tokens: DocumentTokens) -> None:
|
|||||||
|
|
||||||
|
|
||||||
def fb2body2html(tokens: DocumentTokens) -> str:
|
def fb2body2html(tokens: DocumentTokens) -> str:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Convert fb2 xml to html, joins bodies into one string
|
Convert fb2 xml to html, joins bodies into one string
|
||||||
"""
|
"""
|
||||||
@ -163,7 +160,6 @@ def fb2body2html(tokens: DocumentTokens) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def process_section(body: Element, tokens: DocumentTokens) -> str:
|
def process_section(body: Element, tokens: DocumentTokens) -> str:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Processes individual sections, recursively goes throw sections tree
|
Processes individual sections, recursively goes throw sections tree
|
||||||
"""
|
"""
|
||||||
@ -191,7 +187,6 @@ def process_section(body: Element, tokens: DocumentTokens) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def children_to_html(root: Element) -> str:
|
def children_to_html(root: Element) -> str:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Converts xml tag children to string
|
Converts xml tag children to string
|
||||||
"""
|
"""
|
||||||
@ -205,7 +200,6 @@ def children_to_html(root: Element) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def process_image(element: Element, tokens: DocumentTokens) -> None:
|
def process_image(element: Element, tokens: DocumentTokens) -> None:
|
||||||
|
|
||||||
r"""
|
r"""
|
||||||
Converts fb2 \<image /\> to html \<img /\>. Replaces xlink:href with src="\<base64_image_data\>"
|
Converts fb2 \<image /\> to html \<img /\>. Replaces xlink:href with src="\<base64_image_data\>"
|
||||||
"""
|
"""
|
||||||
@ -236,7 +230,6 @@ tag_with_class = {
|
|||||||
|
|
||||||
|
|
||||||
def process_content(root: Element, tokens: DocumentTokens) -> None:
|
def process_content(root: Element, tokens: DocumentTokens) -> None:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Converts fb2 xml tag names to html equivalents and my own styled elements.
|
Converts fb2 xml tag names to html equivalents and my own styled elements.
|
||||||
Resolves binary data dependencies
|
Resolves binary data dependencies
|
||||||
|
Loading…
x
Reference in New Issue
Block a user