Compare commits

..

No commits in common. "main" and "v1.0.1" have entirely different histories.
main ... v1.0.1

9 changed files with 33 additions and 44 deletions

3
.gitignore vendored
View File

@ -1,4 +1,3 @@
.venv
__pycache__/
.vscode
.vercel
.vscode

View File

@ -1,4 +1,4 @@
FROM python:alpine
FROM python
WORKDIR /srv
@ -6,6 +6,8 @@ COPY ./requirements /srv/requirements
RUN pip install -r requirements/prod.txt
EXPOSE 80
COPY ./app /srv/app
CMD uvicorn app.main:app --host 0.0.0.0 --port ${PORT:-8081}
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "80"]

View File

@ -42,22 +42,12 @@ Simple docker deployment
docker build . -t publite_backend
# run it with docker
docker run -p <port>:8081 publite_backend
docker run -p <port>:80 publite_backend
```
Dokku deployment with image from Docker Hub
```bash
dokku apps:create publitebackend
# increase file size limit to be able to upload bigger books
dokku nginx:set publitebackend client_max_body_size 50m
dokku git:from-image publitebackend publite/backend:latest
```
# TODO
- Separate epub and fb2 files to python modules
- Rewrite own `.opf` file parsing to get rid of dependency on EbookLib
- Add cli interfaces for epub and fb2 libs

1
api
View File

@ -1 +0,0 @@
app

View File

@ -8,7 +8,7 @@ from base64 import b64encode
from functools import cache
from tempfile import SpooledTemporaryFile
import aiofiles
import aiofiles as aiof
import ebooklib
from ebooklib import epub
from fastapi import HTTPException
@ -61,7 +61,7 @@ async def epub_to_tokens(
tokens = {}
async with aiofiles.tempfile.NamedTemporaryFile() as tmp:
async with aiof.tempfile.NamedTemporaryFile() as tmp:
await tmp.write(file.read())
# Reading book file
@ -108,6 +108,7 @@ async def epub_to_tokens(
def read_metadata(book: epub.EpubBook) -> dict[str, str]:
"""
Reads metadata from xml to dict
"""
@ -120,6 +121,7 @@ def read_metadata(book: epub.EpubBook) -> dict[str, str]:
def convert_list(titles_list: list[tuple[str, dict[str, str]]]) -> str:
"""
Joins titles list to one string
"""
@ -132,6 +134,7 @@ def convert_list(titles_list: list[tuple[str, dict[str, str]]]) -> str:
def set_cover(tokens: DocumentTokens) -> None:
"""
Converts cover file name to base64 image stored in `tokens`
"""
@ -142,6 +145,7 @@ def set_cover(tokens: DocumentTokens) -> None:
def epub_tokens2html(spine: list[tuple[str, str]], tokens: DocumentTokens) -> bytes:
"""
Joins chapters in `spice` to one html string
"""
@ -153,10 +157,11 @@ def epub_tokens2html(spine: list[tuple[str, str]], tokens: DocumentTokens) -> by
if file_path:
res += process_xhtml(file_path, tokens)
return html.unescape(res)
return html.escape(html.unescape(res))
def process_xhtml(path: str, tokens: DocumentTokens) -> bytes:
"""
Processes content of one xml body
"""
@ -174,6 +179,7 @@ def process_xhtml(path: str, tokens: DocumentTokens) -> bytes:
def process_content(node: etree.Element, path: str, tokens: DocumentTokens) -> None:
"""
Recursive function for xml element convertion to valid html
"""
@ -213,6 +219,7 @@ def process_content(node: etree.Element, path: str, tokens: DocumentTokens) -> N
def process_a_element(node: etree.Element, path: str):
r"""
Converts `filed` links to ids in \<a\> element
"""
@ -230,6 +237,7 @@ def process_a_element(node: etree.Element, path: str):
def process_media_element(node: etree.Element, path: str, tokens: DocumentTokens):
"""
Replaces file paths to base64 encoded media in `src` and `srcset` tags
"""
@ -248,6 +256,7 @@ def process_media_element(node: etree.Element, path: str, tokens: DocumentTokens
def rel_to_abs_path(parent: str, rel: str):
"""
Helper for relative path to media convertion to absolute
"""
@ -257,6 +266,7 @@ def rel_to_abs_path(parent: str, rel: str):
@cache
def path_to_name(path: str) -> str:
"""
Helper function for getting file name
"""
@ -265,6 +275,7 @@ def path_to_name(path: str) -> str:
def children_to_html(root: etree.Element) -> bytes:
"""
Converts all xml children of element to string and joins them
"""

View File

@ -32,7 +32,7 @@ async def fb22html(file: SpooledTemporaryFile) -> HTMLBook:
return {
**(tokens["metadata"]),
"content": html.unescape(html_content.decode()),
"content": html.escape(html.unescape(html_content.decode())),
}
except Exception as err:
@ -42,6 +42,7 @@ async def fb22html(file: SpooledTemporaryFile) -> HTMLBook:
def fb22tokens(file: SpooledTemporaryFile) -> DocumentTokens:
r"""
Parses fb2 file as xml document.
It puts book metadata, its content and media to `tokens` dictionary and returns it.
@ -77,8 +78,7 @@ def fb22tokens(file: SpooledTemporaryFile) -> DocumentTokens:
metadata = {}
metadata["title"] = book_info.find("./book-title", namespaces).text
metadata["author"] = get_author(book_info.find("./author", namespaces))
metadata["cover"] = get_cover(
book_info.find("./coverpage", namespaces))
metadata["cover"] = get_cover(book_info.find("./coverpage", namespaces))
if "cover" not in metadata.keys():
metadata.pop("cover")
@ -104,6 +104,7 @@ def fb22tokens(file: SpooledTemporaryFile) -> DocumentTokens:
def get_author(author: Element) -> str:
"""
Converts author xml structure to string
"""
@ -115,7 +116,7 @@ def get_author(author: Element) -> str:
"last-name",
):
tag = author.find("./" + tag_name, namespaces)
if tag is not None and tag.text is not None:
if tag is not None:
res.append(tag.text)
if len(res) == 0:
res = author.find("./nickname", namespaces).text
@ -126,6 +127,7 @@ def get_author(author: Element) -> str:
def get_cover(coverpage: Optional[Element]) -> Optional[str]:
"""
Extracts cover image id if exists
"""
@ -146,6 +148,7 @@ def set_cover(tokens: DocumentTokens) -> None:
def fb2body2html(tokens: DocumentTokens) -> str:
"""
Convert fb2 xml to html, joins bodies into one string
"""
@ -160,6 +163,7 @@ def fb2body2html(tokens: DocumentTokens) -> str:
def process_section(body: Element, tokens: DocumentTokens) -> str:
"""
Processes individual sections, recursively goes throw sections tree
"""
@ -187,6 +191,7 @@ def process_section(body: Element, tokens: DocumentTokens) -> str:
def children_to_html(root: Element) -> str:
"""
Converts xml tag children to string
"""
@ -200,6 +205,7 @@ def children_to_html(root: Element) -> str:
def process_image(element: Element, tokens: DocumentTokens) -> None:
r"""
Converts fb2 \<image /\> to html \<img /\>. Replaces xlink:href with src="\<base64_image_data\>"
"""
@ -230,6 +236,7 @@ tag_with_class = {
def process_content(root: Element, tokens: DocumentTokens) -> None:
"""
Converts fb2 xml tag names to html equivalents and my own styled elements.
Resolves binary data dependencies

View File

@ -3,17 +3,12 @@
from datetime import datetime
from fastapi import FastAPI, File, HTTPException, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel # pylint: disable=no-name-in-module
from .epub import epub2html
from .fb2 import fb22html
from .utils import HashedHTMLBook, add_hash
origins = (
"*"
)
class DebugInfo(BaseModel): # pylint: disable=too-few-public-methods
"""Main handler return types"""
@ -23,14 +18,6 @@ class DebugInfo(BaseModel): # pylint: disable=too-few-public-methods
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
start_time = datetime.now()
@ -60,8 +47,7 @@ async def create_upload_file(file: UploadFile = File(...)):
elif file.filename.endswith(".epub"):
content = await epub2html(file.file)
else:
raise HTTPException(
status_code=415, detail="Error! Unsupported file type")
raise HTTPException(status_code=415, detail="Error! Unsupported file type")
h_content = add_hash(content)

View File

@ -17,7 +17,7 @@ class HTMLBook(BaseModel): # pylint: disable=too-few-public-methods
title: str
author: str
cover: Optional[str] = None
cover: Optional[str]
content: str

View File

@ -1,5 +0,0 @@
{
"rewrites": [
{ "source": "/(.*)", "destination": "/api/main"}
]
}