Compare commits
12 Commits
Author | SHA1 | Date | |
---|---|---|---|
f80673ade2 | |||
87e5a16a06 | |||
ca0a10e7b7 | |||
a1a4d15e4e | |||
dcab64c78d | |||
d2adf23936 | |||
5b4a4cc75d | |||
a52520c4e2 | |||
5155790357 | |||
2f4a683cb4 | |||
bcc0de495c | |||
3ca47d915e |
8
.github/workflows/format.yml
vendored
8
.github/workflows/format.yml
vendored
@ -19,12 +19,10 @@ jobs:
|
|||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install flake8 black
|
pip install -r requirements/dev.txt
|
||||||
|
|
||||||
- name: Lint with flake8
|
- name: Lint with pylint
|
||||||
run: |
|
run: pylint app --extension-pkg-allow-list=lxml
|
||||||
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
|
|
||||||
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
|
|
||||||
|
|
||||||
- name: Format with black
|
- name: Format with black
|
||||||
run: black .
|
run: black .
|
||||||
|
11
.github/workflows/main-ci.yml
vendored
11
.github/workflows/main-ci.yml
vendored
@ -13,12 +13,6 @@ jobs:
|
|||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v2
|
uses: actions/checkout@v2
|
||||||
|
|
||||||
- name: Docker meta
|
|
||||||
id: meta
|
|
||||||
uses: docker/metadata-action@v3
|
|
||||||
with:
|
|
||||||
images: ghcr.io/${{ github.repository }}
|
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
- name: Set up Docker Buildx
|
||||||
id: buildx
|
id: buildx
|
||||||
uses: docker/setup-buildx-action@v1
|
uses: docker/setup-buildx-action@v1
|
||||||
@ -44,9 +38,10 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
|
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
builder: ${{ steps.buildx.outputs.name }}
|
||||||
cache-from: type=local,src=/tmp/.buildx-cache
|
cache-from: type=local,src=/tmp/.buildx-cache
|
||||||
cache-to: type=local,dest=/tmp/.buildx-cache
|
cache-to: type=local,dest=/tmp/.buildx-cache
|
||||||
|
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
tags: ${{ steps.meta.outputs.tags }}
|
tags: ghcr.io/${{ github.repository }}:${{ github.sha }},ghcr.io/${{ github.repository }}:latest
|
||||||
labels: ${{ steps.meta.outputs.labels }}
|
|
||||||
|
2
.github/workflows/release-ci.yml
vendored
2
.github/workflows/release-ci.yml
vendored
@ -50,9 +50,11 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
|
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
builder: ${{ steps.buildx.outputs.name }}
|
||||||
cache-from: type=local,src=/tmp/.buildx-cache
|
cache-from: type=local,src=/tmp/.buildx-cache
|
||||||
cache-to: type=local,dest=/tmp/.buildx-cache
|
cache-to: type=local,dest=/tmp/.buildx-cache
|
||||||
|
|
||||||
push: true
|
push: true
|
||||||
tags: ${{ steps.meta.outputs.tags }}
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
labels: ${{ steps.meta.outputs.labels }}
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
|
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,3 +1,4 @@
|
|||||||
.venv
|
.venv
|
||||||
__pycache__/
|
__pycache__/
|
||||||
.vscode
|
.vscode
|
||||||
|
.vercel
|
||||||
|
10
Dockerfile
10
Dockerfile
@ -1,13 +1,11 @@
|
|||||||
FROM python
|
FROM python:alpine
|
||||||
|
|
||||||
WORKDIR /srv
|
WORKDIR /srv
|
||||||
|
|
||||||
COPY ./requirements.txt /srv/requirements.txt
|
COPY ./requirements /srv/requirements
|
||||||
|
|
||||||
RUN pip install -r requirements.txt
|
RUN pip install -r requirements/prod.txt
|
||||||
|
|
||||||
EXPOSE 80
|
|
||||||
|
|
||||||
COPY ./app /srv/app
|
COPY ./app /srv/app
|
||||||
|
|
||||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "80"]
|
CMD uvicorn app.main:app --host 0.0.0.0 --port ${PORT:-8081}
|
||||||
|
41
README.md
41
README.md
@ -10,19 +10,54 @@ Backend for online ebook viewer publite
|
|||||||
|
|
||||||
## Deploy
|
## Deploy
|
||||||
|
|
||||||
|
Run app locally (development only!)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# install requirements
|
||||||
|
pip install -r requirements/dev.txt
|
||||||
|
|
||||||
|
# run app with uvicorn
|
||||||
|
uvicorn app.main:app --reload --port <port>
|
||||||
|
```
|
||||||
|
|
||||||
|
Run app locally (test prod)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# install requirements
|
||||||
|
pip install -r requirements/prod.txt
|
||||||
|
|
||||||
|
# run app with uvicorn
|
||||||
|
uvicorn app.main:app --port <port>
|
||||||
|
|
||||||
|
# or
|
||||||
|
|
||||||
|
# run with python script
|
||||||
|
python run.py
|
||||||
|
```
|
||||||
|
|
||||||
Simple docker deployment
|
Simple docker deployment
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# build docker image
|
# build docker image
|
||||||
docker build . -t publite_backend
|
docker build . -t publite_backend
|
||||||
|
|
||||||
# run it with docker
|
# run it with docker
|
||||||
docker run -p <port>:80 publite_backend
|
docker run -p <port>:8081 publite_backend
|
||||||
```
|
```
|
||||||
|
|
||||||
Dokku deployment with image from Docker Hub
|
Dokku deployment with image from Docker Hub
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
dokku apps:create publitebackend
|
dokku apps:create publitebackend
|
||||||
|
|
||||||
|
# increase file size limit to be able to upload bigger books
|
||||||
|
dokku nginx:set publitebackend client_max_body_size 50m
|
||||||
|
|
||||||
dokku git:from-image publitebackend publite/backend:latest
|
dokku git:from-image publitebackend publite/backend:latest
|
||||||
```
|
```
|
||||||
|
|
||||||
|
# TODO
|
||||||
|
|
||||||
|
- Separate epub and fb2 files to python modules
|
||||||
|
- Rewrite own `.opf` file parsing to get rid of dependency on EbookLib
|
||||||
|
- Add cli interfaces for epub and fb2 libs
|
||||||
|
0
app/__init__.py
Normal file
0
app/__init__.py
Normal file
248
app/epub.py
248
app/epub.py
@ -1,16 +1,28 @@
|
|||||||
import aiofiles as aiof
|
"""
|
||||||
|
Module for EPUB file conversion to html
|
||||||
|
"""
|
||||||
|
|
||||||
|
import html
|
||||||
|
import os
|
||||||
from base64 import b64encode
|
from base64 import b64encode
|
||||||
from fastapi import HTTPException
|
from functools import cache
|
||||||
|
|
||||||
import ebooklib
|
|
||||||
from ebooklib import epub
|
|
||||||
|
|
||||||
from tempfile import SpooledTemporaryFile
|
from tempfile import SpooledTemporaryFile
|
||||||
|
|
||||||
from .utils import Document_Tokens, strip_whitespace
|
import aiofiles
|
||||||
|
import ebooklib
|
||||||
|
from ebooklib import epub
|
||||||
|
from fastapi import HTTPException
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
from .utils import DocumentTokens, HTMLBook, strip_whitespace
|
||||||
|
|
||||||
|
parser = etree.XMLParser(recover=True)
|
||||||
|
|
||||||
|
IMAGE = "{http://www.w3.org/2000/svg}image"
|
||||||
|
HREF = "{http://www.w3.org/1999/xlink}href"
|
||||||
|
|
||||||
|
|
||||||
async def epub2html(file: SpooledTemporaryFile) -> str:
|
async def epub2html(file: SpooledTemporaryFile) -> HTMLBook:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Splits epub to tokens and joins them to one html file
|
Splits epub to tokens and joins them to one html file
|
||||||
@ -22,40 +34,43 @@ async def epub2html(file: SpooledTemporaryFile) -> str:
|
|||||||
|
|
||||||
html_content = epub_tokens2html(spine, tokens)
|
html_content = epub_tokens2html(spine, tokens)
|
||||||
|
|
||||||
return {**(tokens["metadata"]), "content": strip_whitespace(html_content)}
|
return {
|
||||||
|
**(tokens["metadata"]),
|
||||||
|
"content": html_content,
|
||||||
|
}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as err:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=500, detail="Error! Wrong epub file format: " + str(e)
|
status_code=500, detail="Error! Wrong epub file format: " + str(err)
|
||||||
)
|
) from err
|
||||||
|
|
||||||
|
|
||||||
async def epub_to_tokens(
|
async def epub_to_tokens(
|
||||||
file: SpooledTemporaryFile,
|
file: SpooledTemporaryFile,
|
||||||
) -> tuple[Document_Tokens, list[tuple[str, str]]]:
|
) -> tuple[DocumentTokens, list[tuple[str, str]]]:
|
||||||
|
|
||||||
"""
|
r"""
|
||||||
Passes file content to EbookLib library and parses epub tokens into dict of the following format:
|
Passes file content to EbookLib library and parses epub tokens into dict of
|
||||||
|
the following format:
|
||||||
|
|
||||||
{ "\<file_name\>": "\<file_content\>" }
|
{ "\<file_name\>": "\<file_content\>" }
|
||||||
|
|
||||||
Where file content is either plain text for xhtml or base64 encoded data for other formats, prepared for embeding to html
|
Where file content is either plain text for xhtml or base64 encoded data
|
||||||
|
for other formats, prepared for embeding to html
|
||||||
"""
|
"""
|
||||||
|
|
||||||
tokens = {}
|
tokens = {}
|
||||||
|
|
||||||
async with aiof.tempfile.NamedTemporaryFile() as tmp:
|
async with aiofiles.tempfile.NamedTemporaryFile() as tmp:
|
||||||
await tmp.write(file.read())
|
await tmp.write(file.read())
|
||||||
|
|
||||||
book = epub.read_epub(tmp.name)
|
# Reading book file
|
||||||
|
reader = epub.EpubReader(tmp.name)
|
||||||
|
book = reader.load()
|
||||||
|
reader.process()
|
||||||
|
|
||||||
# Adding book metadata to tokens list
|
tokens["metadata"] = read_metadata(book)
|
||||||
|
tokens["toc"] = {}
|
||||||
metadata = {}
|
|
||||||
metadata["title"] = convert_list(book.get_metadata("DC", "title"))
|
|
||||||
metadata["author"] = convert_list(book.get_metadata("DC", "creator"))
|
|
||||||
|
|
||||||
tokens["metadata"] = metadata.copy()
|
|
||||||
|
|
||||||
# Iterating over Items
|
# Iterating over Items
|
||||||
|
|
||||||
@ -63,34 +78,52 @@ async def epub_to_tokens(
|
|||||||
item: epub.EpubItem
|
item: epub.EpubItem
|
||||||
|
|
||||||
item_type = item.get_type()
|
item_type = item.get_type()
|
||||||
|
file_path = os.path.join(reader.opf_dir, item.get_name())
|
||||||
|
|
||||||
if item_type == ebooklib.ITEM_DOCUMENT:
|
if item_type == ebooklib.ITEM_DOCUMENT:
|
||||||
# Adding book chapters to tokens list
|
# Adding book chapters to tokens list
|
||||||
name = item.id
|
name = item.get_id()
|
||||||
tokens[name] = item.get_body_content()
|
tokens[file_path] = strip_whitespace(item.get_body_content())
|
||||||
|
tokens["toc"][name] = file_path
|
||||||
|
|
||||||
elif item_type in (
|
elif item_type in (
|
||||||
|
ebooklib.ITEM_AUDIO,
|
||||||
ebooklib.ITEM_COVER,
|
ebooklib.ITEM_COVER,
|
||||||
ebooklib.ITEM_IMAGE,
|
ebooklib.ITEM_IMAGE,
|
||||||
ebooklib.ITEM_STYLE,
|
|
||||||
ebooklib.ITEM_VIDEO,
|
|
||||||
ebooklib.ITEM_VECTOR,
|
ebooklib.ITEM_VECTOR,
|
||||||
|
ebooklib.ITEM_VIDEO,
|
||||||
):
|
):
|
||||||
# Adding assets to tokens list
|
# Adding assets to tokens list
|
||||||
name = item.get_name()
|
|
||||||
content = item.get_content()
|
content = item.get_content()
|
||||||
media_type = item.media_type
|
media_type = item.media_type
|
||||||
b64_content = b64encode(content).decode()
|
b64_content = b64encode(content).decode()
|
||||||
|
|
||||||
tokens[name] = f"data:{media_type};base64,{b64_content}"
|
tokens[file_path] = f"data:{media_type};base64,{b64_content}"
|
||||||
|
|
||||||
if item_type == ebooklib.ITEM_COVER:
|
if item_type == ebooklib.ITEM_COVER:
|
||||||
tokens["metadata"]["cover"] = name
|
tokens["metadata"]["cover"] = file_path
|
||||||
|
|
||||||
return tokens, book.spine.copy()
|
return tokens, book.spine.copy()
|
||||||
|
|
||||||
|
|
||||||
def convert_list(titles_list: list[tuple[str, dict[str, str]]]):
|
def read_metadata(book: epub.EpubBook) -> dict[str, str]:
|
||||||
|
"""
|
||||||
|
Reads metadata from xml to dict
|
||||||
|
"""
|
||||||
|
|
||||||
|
metadata = {}
|
||||||
|
metadata["title"] = book.get_metadata("DC", "title")[0][0]
|
||||||
|
metadata["author"] = convert_list(book.get_metadata("DC", "creator"))
|
||||||
|
|
||||||
|
return metadata.copy()
|
||||||
|
|
||||||
|
|
||||||
|
def convert_list(titles_list: list[tuple[str, dict[str, str]]]) -> str:
|
||||||
|
"""
|
||||||
|
Joins titles list to one string
|
||||||
|
"""
|
||||||
|
|
||||||
res = []
|
res = []
|
||||||
for title_obj in titles_list:
|
for title_obj in titles_list:
|
||||||
res.append(title_obj[0])
|
res.append(title_obj[0])
|
||||||
@ -98,24 +131,147 @@ def convert_list(titles_list: list[tuple[str, dict[str, str]]]):
|
|||||||
return "; ".join(res)
|
return "; ".join(res)
|
||||||
|
|
||||||
|
|
||||||
def set_cover(tokens: Document_Tokens):
|
def set_cover(tokens: DocumentTokens) -> None:
|
||||||
cover_name = tokens["metadata"]["cover"]
|
"""
|
||||||
|
Converts cover file name to base64 image stored in `tokens`
|
||||||
|
"""
|
||||||
|
|
||||||
|
cover_name = tokens["metadata"].get("cover")
|
||||||
if cover_name in tokens.keys():
|
if cover_name in tokens.keys():
|
||||||
tokens["metadata"]["cover"] = tokens[cover_name]
|
tokens["metadata"]["cover"] = tokens[cover_name]
|
||||||
|
|
||||||
|
|
||||||
def epub_tokens2html(spine: list[tuple[str, str]], tokens: Document_Tokens):
|
def epub_tokens2html(spine: list[tuple[str, str]], tokens: DocumentTokens) -> bytes:
|
||||||
|
"""
|
||||||
|
Joins chapters in `spice` to one html string
|
||||||
|
"""
|
||||||
|
|
||||||
|
res = ""
|
||||||
|
|
||||||
|
for name, _ in spine:
|
||||||
|
file_path = tokens["toc"].get(name)
|
||||||
|
if file_path:
|
||||||
|
res += process_xhtml(file_path, tokens)
|
||||||
|
|
||||||
|
return html.unescape(res)
|
||||||
|
|
||||||
|
|
||||||
|
def process_xhtml(path: str, tokens: DocumentTokens) -> bytes:
|
||||||
|
"""
|
||||||
|
Processes content of one xml body
|
||||||
|
"""
|
||||||
|
|
||||||
|
xml: etree.Element = etree.fromstring(tokens[path], parser=parser)
|
||||||
|
|
||||||
|
if xml.tag == "body":
|
||||||
|
xml.tag = "div"
|
||||||
|
|
||||||
|
process_content(xml, path, tokens)
|
||||||
|
|
||||||
|
return (
|
||||||
|
f'<section id="b_{path_to_name(path)}">{etree.tostring(xml).decode()}</section>'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def process_content(node: etree.Element, path: str, tokens: DocumentTokens) -> None:
|
||||||
|
"""
|
||||||
|
Recursive function for xml element convertion to valid html
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Process universal tags
|
||||||
|
|
||||||
|
if node.get("epub:type"):
|
||||||
|
node.attrib.pop("epub:type")
|
||||||
|
el_id = node.get("id")
|
||||||
|
if el_id:
|
||||||
|
node.set("id", f"{path_to_name(path)}_{el_id}")
|
||||||
|
|
||||||
|
# Tag processing
|
||||||
|
|
||||||
|
if node.tag == "a":
|
||||||
|
process_a_element(node, path)
|
||||||
|
|
||||||
|
elif node.tag == "hgroup":
|
||||||
|
node.tag = "div"
|
||||||
|
|
||||||
|
elif node.tag in ("img", "source", "video", "audio"):
|
||||||
|
process_media_element(node, path, tokens)
|
||||||
|
|
||||||
|
elif node.tag == IMAGE:
|
||||||
|
href = node.get(HREF)
|
||||||
|
media_path = rel_to_abs_path(path, href)
|
||||||
|
if media_path in tokens.keys():
|
||||||
|
node.set(HREF, tokens[media_path])
|
||||||
|
|
||||||
|
elif node.tag == "trigger":
|
||||||
|
node.getparent().remove(node)
|
||||||
|
|
||||||
|
# Recursively run for all children
|
||||||
|
|
||||||
|
for child in node:
|
||||||
|
process_content(child, path, tokens)
|
||||||
|
|
||||||
|
|
||||||
|
def process_a_element(node: etree.Element, path: str):
|
||||||
|
r"""
|
||||||
|
Converts `filed` links to ids in \<a\> element
|
||||||
|
"""
|
||||||
|
|
||||||
|
href = node.get("href")
|
||||||
|
if href.count(".xhtml") or href.count(".html"):
|
||||||
|
id_pos = href.rfind("#")
|
||||||
|
if id_pos != -1:
|
||||||
|
href_path, el_id = href[:id_pos], href[id_pos:]
|
||||||
|
node.set("href", f"#{path_to_name(href_path)}_{el_id[1:]}")
|
||||||
|
else:
|
||||||
|
node.set("href", f"#b_{path_to_name(href)}")
|
||||||
|
elif href.count("#"):
|
||||||
|
node.set("href", f"#{path_to_name(path)}_{href[1:]}")
|
||||||
|
|
||||||
|
|
||||||
|
def process_media_element(node: etree.Element, path: str, tokens: DocumentTokens):
|
||||||
|
"""
|
||||||
|
Replaces file paths to base64 encoded media in `src` and `srcset` tags
|
||||||
|
"""
|
||||||
|
|
||||||
|
src = node.get("src")
|
||||||
|
attr = "src"
|
||||||
|
|
||||||
|
if not src:
|
||||||
|
src = node.get("srcset")
|
||||||
|
attr = "srcset"
|
||||||
|
|
||||||
|
if src:
|
||||||
|
media_path = rel_to_abs_path(path, src)
|
||||||
|
if media_path in tokens.keys():
|
||||||
|
node.set(attr, tokens[media_path])
|
||||||
|
|
||||||
|
|
||||||
|
def rel_to_abs_path(parent: str, rel: str):
|
||||||
|
"""
|
||||||
|
Helper for relative path to media convertion to absolute
|
||||||
|
"""
|
||||||
|
|
||||||
|
return os.path.normpath(os.path.join(os.path.dirname(parent), rel))
|
||||||
|
|
||||||
|
|
||||||
|
@cache
|
||||||
|
def path_to_name(path: str) -> str:
|
||||||
|
"""
|
||||||
|
Helper function for getting file name
|
||||||
|
"""
|
||||||
|
|
||||||
|
return os.path.basename(path).split(".")[0]
|
||||||
|
|
||||||
|
|
||||||
|
def children_to_html(root: etree.Element) -> bytes:
|
||||||
|
"""
|
||||||
|
Converts all xml children of element to string and joins them
|
||||||
|
"""
|
||||||
|
|
||||||
res = b""
|
res = b""
|
||||||
|
|
||||||
print(spine)
|
for child in root:
|
||||||
|
res += etree.tostring(child)
|
||||||
for name, enabled in spine:
|
|
||||||
if name in tokens.keys():
|
|
||||||
res += process_xhtml(tokens[name], tokens)
|
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
def process_xhtml(xhtml: bytes, tokens: Document_Tokens):
|
|
||||||
# TODO: Add xhtml procession
|
|
||||||
return xhtml
|
|
||||||
|
86
app/fb2.py
86
app/fb2.py
@ -1,11 +1,16 @@
|
|||||||
from tempfile import SpooledTemporaryFile
|
"""
|
||||||
|
Module for FB2 file conversion to html
|
||||||
|
"""
|
||||||
|
|
||||||
|
import html
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
from xml.etree.ElementTree import Element
|
from tempfile import SpooledTemporaryFile
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
from xml.etree.ElementTree import Element
|
||||||
|
|
||||||
from fastapi import HTTPException
|
from fastapi import HTTPException
|
||||||
|
|
||||||
from .utils import Document_Tokens, strip_whitespace
|
from .utils import DocumentTokens, HTMLBook, strip_whitespace
|
||||||
|
|
||||||
|
|
||||||
namespaces = {
|
namespaces = {
|
||||||
"": "http://www.gribuser.ru/xml/fictionbook/2.0",
|
"": "http://www.gribuser.ru/xml/fictionbook/2.0",
|
||||||
@ -14,7 +19,7 @@ namespaces = {
|
|||||||
HREF = f"{{{namespaces['xlink']}}}href"
|
HREF = f"{{{namespaces['xlink']}}}href"
|
||||||
|
|
||||||
|
|
||||||
async def fb22html(file: SpooledTemporaryFile) -> dict[str, str]:
|
async def fb22html(file: SpooledTemporaryFile) -> HTMLBook:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Splits fb2 to tokens and joins them to one html file
|
Splits fb2 to tokens and joins them to one html file
|
||||||
@ -25,17 +30,19 @@ async def fb22html(file: SpooledTemporaryFile) -> dict[str, str]:
|
|||||||
set_cover(tokens)
|
set_cover(tokens)
|
||||||
html_content = fb2body2html(tokens)
|
html_content = fb2body2html(tokens)
|
||||||
|
|
||||||
return {**(tokens["metadata"]), "content": strip_whitespace(html_content)}
|
return {
|
||||||
|
**(tokens["metadata"]),
|
||||||
|
"content": html.unescape(html_content.decode()),
|
||||||
|
}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as err:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=500, detail="Error! Wrong fb2 file format: " + str(e)
|
status_code=500, detail="Error! Wrong fb2 file format: " + str(err)
|
||||||
)
|
) from err
|
||||||
|
|
||||||
|
|
||||||
def fb22tokens(file: SpooledTemporaryFile) -> Document_Tokens:
|
def fb22tokens(file: SpooledTemporaryFile) -> DocumentTokens:
|
||||||
|
r"""
|
||||||
"""
|
|
||||||
Parses fb2 file as xml document.
|
Parses fb2 file as xml document.
|
||||||
It puts book metadata, its content and media to `tokens` dictionary and returns it.
|
It puts book metadata, its content and media to `tokens` dictionary and returns it.
|
||||||
|
|
||||||
@ -70,11 +77,12 @@ def fb22tokens(file: SpooledTemporaryFile) -> Document_Tokens:
|
|||||||
metadata = {}
|
metadata = {}
|
||||||
metadata["title"] = book_info.find("./book-title", namespaces).text
|
metadata["title"] = book_info.find("./book-title", namespaces).text
|
||||||
metadata["author"] = get_author(book_info.find("./author", namespaces))
|
metadata["author"] = get_author(book_info.find("./author", namespaces))
|
||||||
metadata["cover"] = get_cover(book_info.find("./coverpage", namespaces))
|
metadata["cover"] = get_cover(
|
||||||
|
book_info.find("./coverpage", namespaces))
|
||||||
if "cover" not in metadata.keys():
|
if "cover" not in metadata.keys():
|
||||||
metadata.pop("cover")
|
metadata.pop("cover")
|
||||||
|
|
||||||
if len(metadata.keys()):
|
if len(metadata.keys()) != 0:
|
||||||
tokens["metadata"] = metadata.copy()
|
tokens["metadata"] = metadata.copy()
|
||||||
|
|
||||||
# Reading book content
|
# Reading book content
|
||||||
@ -96,7 +104,6 @@ def fb22tokens(file: SpooledTemporaryFile) -> Document_Tokens:
|
|||||||
|
|
||||||
|
|
||||||
def get_author(author: Element) -> str:
|
def get_author(author: Element) -> str:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Converts author xml structure to string
|
Converts author xml structure to string
|
||||||
"""
|
"""
|
||||||
@ -107,9 +114,9 @@ def get_author(author: Element) -> str:
|
|||||||
"middle-name",
|
"middle-name",
|
||||||
"last-name",
|
"last-name",
|
||||||
):
|
):
|
||||||
el = author.find("./" + tag_name, namespaces)
|
tag = author.find("./" + tag_name, namespaces)
|
||||||
if el is not None:
|
if tag is not None and tag.text is not None:
|
||||||
res.append(el.text)
|
res.append(tag.text)
|
||||||
if len(res) == 0:
|
if len(res) == 0:
|
||||||
res = author.find("./nickname", namespaces).text
|
res = author.find("./nickname", namespaces).text
|
||||||
else:
|
else:
|
||||||
@ -119,7 +126,6 @@ def get_author(author: Element) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def get_cover(coverpage: Optional[Element]) -> Optional[str]:
|
def get_cover(coverpage: Optional[Element]) -> Optional[str]:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Extracts cover image id if exists
|
Extracts cover image id if exists
|
||||||
"""
|
"""
|
||||||
@ -127,8 +133,11 @@ def get_cover(coverpage: Optional[Element]) -> Optional[str]:
|
|||||||
if coverpage:
|
if coverpage:
|
||||||
return coverpage.find("./image", namespaces).get(HREF)
|
return coverpage.find("./image", namespaces).get(HREF)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
def set_cover(tokens: Document_Tokens) -> None:
|
|
||||||
|
def set_cover(tokens: DocumentTokens) -> None:
|
||||||
|
"""Gets cover from book and sets it in metadata"""
|
||||||
cover = tokens["metadata"]["cover"]
|
cover = tokens["metadata"]["cover"]
|
||||||
if cover is None:
|
if cover is None:
|
||||||
tokens["metadata"]["cover"] = "none"
|
tokens["metadata"]["cover"] = "none"
|
||||||
@ -136,34 +145,32 @@ def set_cover(tokens: Document_Tokens) -> None:
|
|||||||
tokens["metadata"]["cover"] = tokens[cover[1:]]
|
tokens["metadata"]["cover"] = tokens[cover[1:]]
|
||||||
|
|
||||||
|
|
||||||
def fb2body2html(tokens: Document_Tokens) -> str:
|
def fb2body2html(tokens: DocumentTokens) -> str:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Convert fb2 xml to html, joins bodies into one string
|
Convert fb2 xml to html, joins bodies into one string
|
||||||
"""
|
"""
|
||||||
|
|
||||||
res = b""
|
res = b""
|
||||||
|
|
||||||
xml_root = ET.fromstring(tokens["content"])
|
xml_root = ET.fromstring(strip_whitespace(tokens["content"]))
|
||||||
for body in xml_root.iterfind("./body"):
|
for body in xml_root.iterfind("./body"):
|
||||||
res += process_section(body, tokens)
|
res += process_section(body, tokens)
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
def process_section(body: Element, tokens: Document_Tokens) -> str:
|
def process_section(body: Element, tokens: DocumentTokens) -> str:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Processes individual sections, recursively goes throw sections tree
|
Processes individual sections, recursively goes throw sections tree
|
||||||
"""
|
"""
|
||||||
|
|
||||||
res = b"<section>\n"
|
res = b"<section>\n"
|
||||||
|
|
||||||
for tag in ("title", "epigraph", "annotation"):
|
for tag_name in ("title", "epigraph", "annotation"):
|
||||||
el = body.find("./" + tag)
|
tag = body.find("./" + tag_name)
|
||||||
if el:
|
if tag:
|
||||||
process_content(el, tokens)
|
process_content(tag, tokens)
|
||||||
res += children_to_html(el)
|
res += children_to_html(tag)
|
||||||
image = body.find("./image")
|
image = body.find("./image")
|
||||||
if image:
|
if image:
|
||||||
process_image(image, tokens)
|
process_image(image, tokens)
|
||||||
@ -180,7 +187,6 @@ def process_section(body: Element, tokens: Document_Tokens) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def children_to_html(root: Element) -> str:
|
def children_to_html(root: Element) -> str:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Converts xml tag children to string
|
Converts xml tag children to string
|
||||||
"""
|
"""
|
||||||
@ -193,18 +199,17 @@ def children_to_html(root: Element) -> str:
|
|||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
def process_image(el: Element, tokens: Document_Tokens) -> None:
|
def process_image(element: Element, tokens: DocumentTokens) -> None:
|
||||||
|
r"""
|
||||||
"""
|
|
||||||
Converts fb2 \<image /\> to html \<img /\>. Replaces xlink:href with src="\<base64_image_data\>"
|
Converts fb2 \<image /\> to html \<img /\>. Replaces xlink:href with src="\<base64_image_data\>"
|
||||||
"""
|
"""
|
||||||
|
|
||||||
el.tag = "img"
|
element.tag = "img"
|
||||||
|
|
||||||
href = el.get(HREF)
|
href = element.get(HREF)
|
||||||
el.attrib.pop(HREF)
|
element.attrib.pop(HREF)
|
||||||
|
|
||||||
el.set("src", tokens[href[1:]] if href[0] == "#" else href)
|
element.set("src", tokens[href[1:]] if href[0] == "#" else href)
|
||||||
|
|
||||||
|
|
||||||
tag_replacement = {
|
tag_replacement = {
|
||||||
@ -219,15 +224,12 @@ tag_with_class = {
|
|||||||
"cite": "div",
|
"cite": "div",
|
||||||
"poem": "div",
|
"poem": "div",
|
||||||
"stanza": "div",
|
"stanza": "div",
|
||||||
"poem": "div",
|
|
||||||
"poem": "div",
|
|
||||||
"epigraph": "div",
|
"epigraph": "div",
|
||||||
"text-author": "p",
|
"text-author": "p",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def process_content(root: Element, tokens: Document_Tokens) -> None:
|
def process_content(root: Element, tokens: DocumentTokens) -> None:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Converts fb2 xml tag names to html equivalents and my own styled elements.
|
Converts fb2 xml tag names to html equivalents and my own styled elements.
|
||||||
Resolves binary data dependencies
|
Resolves binary data dependencies
|
||||||
|
59
app/main.py
59
app/main.py
@ -1,23 +1,68 @@
|
|||||||
from fastapi import FastAPI, File, UploadFile, HTTPException
|
"""Webserver for epub and fb2 files convertation to html"""
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from fastapi import FastAPI, File, HTTPException, UploadFile
|
||||||
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
from pydantic import BaseModel # pylint: disable=no-name-in-module
|
||||||
|
|
||||||
from .epub import epub2html
|
from .epub import epub2html
|
||||||
from .fb2 import fb22html
|
from .fb2 import fb22html
|
||||||
from .utils import HTMLBook
|
from .utils import HashedHTMLBook, add_hash
|
||||||
|
|
||||||
|
origins = (
|
||||||
|
"*"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DebugInfo(BaseModel): # pylint: disable=too-few-public-methods
|
||||||
|
"""Main handler return types"""
|
||||||
|
|
||||||
|
startup_time: str
|
||||||
|
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
|
|
||||||
|
app.add_middleware(
|
||||||
|
CORSMiddleware,
|
||||||
|
allow_origins=origins,
|
||||||
|
allow_credentials=True,
|
||||||
|
allow_methods=["*"],
|
||||||
|
allow_headers=["*"],
|
||||||
|
)
|
||||||
|
|
||||||
@app.get("/")
|
start_time = datetime.now()
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/", response_model=DebugInfo)
|
||||||
def root():
|
def root():
|
||||||
return "Hello, World!"
|
"""
|
||||||
|
Test if server is running.
|
||||||
|
|
||||||
|
Returns startup time
|
||||||
|
"""
|
||||||
|
return {"startup_time": start_time.isoformat()}
|
||||||
|
|
||||||
|
|
||||||
@app.post("/uploadfile/", response_model=HTMLBook)
|
@app.post("/uploadfile/", response_model=HashedHTMLBook)
|
||||||
async def create_upload_file(file: UploadFile = File(...)):
|
async def create_upload_file(file: UploadFile = File(...)):
|
||||||
|
"""
|
||||||
|
Main api handler:
|
||||||
|
|
||||||
|
Accepts files with fb2 and epub extensions
|
||||||
|
|
||||||
|
Returns HTTP 415 error if file has unsupported format
|
||||||
|
|
||||||
|
Else returns object with book metadata and its html
|
||||||
|
"""
|
||||||
if file.filename.endswith(".fb2"):
|
if file.filename.endswith(".fb2"):
|
||||||
content = await fb22html(file.file)
|
content = await fb22html(file.file)
|
||||||
elif file.filename.endswith(".epub"):
|
elif file.filename.endswith(".epub"):
|
||||||
content = await epub2html(file.file)
|
content = await epub2html(file.file)
|
||||||
else:
|
else:
|
||||||
raise HTTPException(status_code=415, detail="Error! Unsupported file type")
|
raise HTTPException(
|
||||||
return content
|
status_code=415, detail="Error! Unsupported file type")
|
||||||
|
|
||||||
|
h_content = add_hash(content)
|
||||||
|
|
||||||
|
return h_content
|
||||||
|
49
app/utils.py
49
app/utils.py
@ -1,27 +1,58 @@
|
|||||||
from typing import Union, Optional
|
"""
|
||||||
from pydantic import BaseModel
|
Utils for publite_backend module
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
from hashlib import sha256
|
||||||
|
from typing import Optional, Union
|
||||||
|
|
||||||
Document_Tokens = dict[str, Union[str, dict[str, str]]]
|
from pydantic import BaseModel # pylint: disable=no-name-in-module
|
||||||
|
|
||||||
|
DocumentTokens = dict[str, Union[str, dict[str, str]]]
|
||||||
|
|
||||||
|
|
||||||
class HTMLBook(BaseModel):
|
class HTMLBook(BaseModel): # pylint: disable=too-few-public-methods
|
||||||
|
"""Transitional model for returned book data"""
|
||||||
|
|
||||||
title: str
|
title: str
|
||||||
author: str
|
author: str
|
||||||
cover: Optional[str]
|
cover: Optional[str] = None
|
||||||
content: str
|
content: str
|
||||||
|
|
||||||
|
|
||||||
|
class HashedHTMLBook(HTMLBook): # pylint: disable=too-few-public-methods
|
||||||
|
"""Model for returned book data with content hash"""
|
||||||
|
|
||||||
|
hash: str
|
||||||
|
|
||||||
|
|
||||||
replacements = [
|
replacements = [
|
||||||
(" ", "\r"),
|
(" ", ""),
|
||||||
(">\s+?<", "><"),
|
("", ""),
|
||||||
|
(r">\s+?<", "><"),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def strip_whitespace(s: bytes) -> str:
|
def strip_whitespace(string: bytes) -> str:
|
||||||
res = s.decode()
|
|
||||||
|
"""Removes"""
|
||||||
|
|
||||||
|
res = string.decode()
|
||||||
|
|
||||||
for old, new in replacements:
|
for old, new in replacements:
|
||||||
res = re.sub(old, new, res)
|
res = re.sub(old, new, res)
|
||||||
|
|
||||||
return res.strip()
|
return res.strip()
|
||||||
|
|
||||||
|
|
||||||
|
def add_hash(content: HTMLBook) -> HashedHTMLBook:
|
||||||
|
|
||||||
|
"""
|
||||||
|
Adds hash of book content
|
||||||
|
"""
|
||||||
|
|
||||||
|
h_content: HashedHTMLBook = content.copy()
|
||||||
|
h_content["hash"] = sha256(content["content"].encode()).hexdigest()
|
||||||
|
|
||||||
|
return h_content
|
||||||
|
@ -1,23 +1 @@
|
|||||||
aiofiles==0.7.0
|
-r requirements/prod.txt
|
||||||
appdirs==1.4.4
|
|
||||||
asgiref==3.4.0
|
|
||||||
black==21.6b0
|
|
||||||
click==8.0.1
|
|
||||||
EbookLib==0.17.1
|
|
||||||
fastapi==0.65.2
|
|
||||||
flake8==3.9.2
|
|
||||||
h11==0.12.0
|
|
||||||
lxml==4.6.3
|
|
||||||
mccabe==0.6.1
|
|
||||||
mypy-extensions==0.4.3
|
|
||||||
pathspec==0.8.1
|
|
||||||
pycodestyle==2.7.0
|
|
||||||
pydantic==1.8.2
|
|
||||||
pyflakes==2.3.1
|
|
||||||
python-multipart==0.0.5
|
|
||||||
regex==2021.7.1
|
|
||||||
six==1.16.0
|
|
||||||
starlette==0.14.2
|
|
||||||
toml==0.10.2
|
|
||||||
typing-extensions==3.10.0.0
|
|
||||||
uvicorn==0.14.0
|
|
4
requirements/dev.txt
Normal file
4
requirements/dev.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
-r prod.txt
|
||||||
|
pylint
|
||||||
|
rope
|
||||||
|
black
|
7
requirements/prod.txt
Normal file
7
requirements/prod.txt
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
fastapi
|
||||||
|
uvicorn
|
||||||
|
aiofiles
|
||||||
|
ebooklib
|
||||||
|
python-multipart
|
||||||
|
lxml
|
||||||
|
pydantic
|
4
run.py
Normal file
4
run.py
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
import uvicorn
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
uvicorn.run("app.main:app")
|
5
vercel.json
Normal file
5
vercel.json
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"rewrites": [
|
||||||
|
{ "source": "/(.*)", "destination": "/api/main"}
|
||||||
|
]
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user