Compare commits

...

8 Commits
v1.0.0 ... main

Author SHA1 Message Date
f80673ade2
Deployed to vercel 2023-09-18 15:38:58 +03:00
87e5a16a06
Fixed empty cover error
Updated deployment instructions
Switched to alpine python image
2023-09-17 16:40:26 +03:00
ca0a10e7b7 Updated container port setting 2022-10-01 11:30:40 +03:00
a1a4d15e4e
Added cors 2021-07-17 23:41:35 +03:00
dcab64c78d
Removed html escaping of book content 2021-07-16 15:34:26 +05:00
d2adf23936
Add TODO section to readme 2021-07-08 12:49:59 +05:00
5b4a4cc75d
Update README.md 2021-07-07 21:12:36 +05:00
a52520c4e2
Refactored dependencies description, switched from flake8 to pylint in format gh action 2021-07-07 19:26:48 +05:00
15 changed files with 104 additions and 78 deletions

View File

@ -19,12 +19,10 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
python -m pip install --upgrade pip python -m pip install --upgrade pip
pip install flake8 black pip install -r requirements/dev.txt
- name: Lint with flake8 - name: Lint with pylint
run: | run: pylint app --extension-pkg-allow-list=lxml
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Format with black - name: Format with black
run: black . run: black .

3
.gitignore vendored
View File

@ -1,3 +1,4 @@
.venv .venv
__pycache__/ __pycache__/
.vscode .vscode
.vercel

View File

@ -1,13 +1,11 @@
FROM python FROM python:alpine
WORKDIR /srv WORKDIR /srv
COPY ./requirements.txt /srv/requirements.txt COPY ./requirements /srv/requirements
RUN pip install -r requirements.txt RUN pip install -r requirements/prod.txt
EXPOSE 80
COPY ./app /srv/app COPY ./app /srv/app
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "80"] CMD uvicorn app.main:app --host 0.0.0.0 --port ${PORT:-8081}

View File

@ -10,19 +10,54 @@ Backend for online ebook viewer publite
## Deploy ## Deploy
Run app locally (development only!)
```bash
# install requirements
pip install -r requirements/dev.txt
# run app with uvicorn
uvicorn app.main:app --reload --port <port>
```
Run app locally (test prod)
```bash
# install requirements
pip install -r requirements/prod.txt
# run app with uvicorn
uvicorn app.main:app --port <port>
# or
# run with python script
python run.py
```
Simple docker deployment Simple docker deployment
```bash ```bash
# build docker image # build docker image
docker build . -t publite_backend docker build . -t publite_backend
# run it with docker # run it with docker
docker run -p <port>:80 publite_backend docker run -p <port>:8081 publite_backend
``` ```
Dokku deployment with image from Docker Hub Dokku deployment with image from Docker Hub
```bash ```bash
dokku apps:create publitebackend dokku apps:create publitebackend
# increase file size limit to be able to upload bigger books
dokku nginx:set publitebackend client_max_body_size 50m
dokku git:from-image publitebackend publite/backend:latest dokku git:from-image publitebackend publite/backend:latest
``` ```
# TODO
- Separate epub and fb2 files to python modules
- Rewrite own `.opf` file parsing to get rid of dependency on EbookLib
- Add cli interfaces for epub and fb2 libs

1
api Symbolic link
View File

@ -0,0 +1 @@
app

0
app/__init__.py Normal file
View File

View File

@ -2,19 +2,19 @@
Module for EPUB file conversion to html Module for EPUB file conversion to html
""" """
from base64 import b64encode
from functools import cache
import html import html
import os import os
from base64 import b64encode
from functools import cache
from tempfile import SpooledTemporaryFile from tempfile import SpooledTemporaryFile
import aiofiles as aiof import aiofiles
from fastapi import HTTPException
from lxml import etree
import ebooklib import ebooklib
from ebooklib import epub from ebooklib import epub
from fastapi import HTTPException
from lxml import etree
from .utils import DocumentTokens, strip_whitespace, HTMLBook from .utils import DocumentTokens, HTMLBook, strip_whitespace
parser = etree.XMLParser(recover=True) parser = etree.XMLParser(recover=True)
@ -61,7 +61,7 @@ async def epub_to_tokens(
tokens = {} tokens = {}
async with aiof.tempfile.NamedTemporaryFile() as tmp: async with aiofiles.tempfile.NamedTemporaryFile() as tmp:
await tmp.write(file.read()) await tmp.write(file.read())
# Reading book file # Reading book file
@ -108,7 +108,6 @@ async def epub_to_tokens(
def read_metadata(book: epub.EpubBook) -> dict[str, str]: def read_metadata(book: epub.EpubBook) -> dict[str, str]:
""" """
Reads metadata from xml to dict Reads metadata from xml to dict
""" """
@ -121,7 +120,6 @@ def read_metadata(book: epub.EpubBook) -> dict[str, str]:
def convert_list(titles_list: list[tuple[str, dict[str, str]]]) -> str: def convert_list(titles_list: list[tuple[str, dict[str, str]]]) -> str:
""" """
Joins titles list to one string Joins titles list to one string
""" """
@ -134,7 +132,6 @@ def convert_list(titles_list: list[tuple[str, dict[str, str]]]) -> str:
def set_cover(tokens: DocumentTokens) -> None: def set_cover(tokens: DocumentTokens) -> None:
""" """
Converts cover file name to base64 image stored in `tokens` Converts cover file name to base64 image stored in `tokens`
""" """
@ -145,7 +142,6 @@ def set_cover(tokens: DocumentTokens) -> None:
def epub_tokens2html(spine: list[tuple[str, str]], tokens: DocumentTokens) -> bytes: def epub_tokens2html(spine: list[tuple[str, str]], tokens: DocumentTokens) -> bytes:
""" """
Joins chapters in `spice` to one html string Joins chapters in `spice` to one html string
""" """
@ -157,11 +153,10 @@ def epub_tokens2html(spine: list[tuple[str, str]], tokens: DocumentTokens) -> by
if file_path: if file_path:
res += process_xhtml(file_path, tokens) res += process_xhtml(file_path, tokens)
return html.escape(html.unescape(res)) return html.unescape(res)
def process_xhtml(path: str, tokens: DocumentTokens) -> bytes: def process_xhtml(path: str, tokens: DocumentTokens) -> bytes:
""" """
Processes content of one xml body Processes content of one xml body
""" """
@ -179,7 +174,6 @@ def process_xhtml(path: str, tokens: DocumentTokens) -> bytes:
def process_content(node: etree.Element, path: str, tokens: DocumentTokens) -> None: def process_content(node: etree.Element, path: str, tokens: DocumentTokens) -> None:
""" """
Recursive function for xml element convertion to valid html Recursive function for xml element convertion to valid html
""" """
@ -219,7 +213,6 @@ def process_content(node: etree.Element, path: str, tokens: DocumentTokens) -> N
def process_a_element(node: etree.Element, path: str): def process_a_element(node: etree.Element, path: str):
r""" r"""
Converts `filed` links to ids in \<a\> element Converts `filed` links to ids in \<a\> element
""" """
@ -237,7 +230,6 @@ def process_a_element(node: etree.Element, path: str):
def process_media_element(node: etree.Element, path: str, tokens: DocumentTokens): def process_media_element(node: etree.Element, path: str, tokens: DocumentTokens):
""" """
Replaces file paths to base64 encoded media in `src` and `srcset` tags Replaces file paths to base64 encoded media in `src` and `srcset` tags
""" """
@ -256,7 +248,6 @@ def process_media_element(node: etree.Element, path: str, tokens: DocumentTokens
def rel_to_abs_path(parent: str, rel: str): def rel_to_abs_path(parent: str, rel: str):
""" """
Helper for relative path to media convertion to absolute Helper for relative path to media convertion to absolute
""" """
@ -266,7 +257,6 @@ def rel_to_abs_path(parent: str, rel: str):
@cache @cache
def path_to_name(path: str) -> str: def path_to_name(path: str) -> str:
""" """
Helper function for getting file name Helper function for getting file name
""" """
@ -275,7 +265,6 @@ def path_to_name(path: str) -> str:
def children_to_html(root: etree.Element) -> bytes: def children_to_html(root: etree.Element) -> bytes:
""" """
Converts all xml children of element to string and joins them Converts all xml children of element to string and joins them
""" """

View File

@ -2,16 +2,15 @@
Module for FB2 file conversion to html Module for FB2 file conversion to html
""" """
from tempfile import SpooledTemporaryFile
import xml.etree.ElementTree as ET
from xml.etree.ElementTree import Element
from typing import Optional
import html import html
import xml.etree.ElementTree as ET
from tempfile import SpooledTemporaryFile
from typing import Optional
from xml.etree.ElementTree import Element
from fastapi import HTTPException from fastapi import HTTPException
from .utils import DocumentTokens, strip_whitespace, HTMLBook from .utils import DocumentTokens, HTMLBook, strip_whitespace
namespaces = { namespaces = {
"": "http://www.gribuser.ru/xml/fictionbook/2.0", "": "http://www.gribuser.ru/xml/fictionbook/2.0",
@ -33,7 +32,7 @@ async def fb22html(file: SpooledTemporaryFile) -> HTMLBook:
return { return {
**(tokens["metadata"]), **(tokens["metadata"]),
"content": html.escape(html.unescape(html_content.decode())), "content": html.unescape(html_content.decode()),
} }
except Exception as err: except Exception as err:
@ -43,7 +42,6 @@ async def fb22html(file: SpooledTemporaryFile) -> HTMLBook:
def fb22tokens(file: SpooledTemporaryFile) -> DocumentTokens: def fb22tokens(file: SpooledTemporaryFile) -> DocumentTokens:
r""" r"""
Parses fb2 file as xml document. Parses fb2 file as xml document.
It puts book metadata, its content and media to `tokens` dictionary and returns it. It puts book metadata, its content and media to `tokens` dictionary and returns it.
@ -79,7 +77,8 @@ def fb22tokens(file: SpooledTemporaryFile) -> DocumentTokens:
metadata = {} metadata = {}
metadata["title"] = book_info.find("./book-title", namespaces).text metadata["title"] = book_info.find("./book-title", namespaces).text
metadata["author"] = get_author(book_info.find("./author", namespaces)) metadata["author"] = get_author(book_info.find("./author", namespaces))
metadata["cover"] = get_cover(book_info.find("./coverpage", namespaces)) metadata["cover"] = get_cover(
book_info.find("./coverpage", namespaces))
if "cover" not in metadata.keys(): if "cover" not in metadata.keys():
metadata.pop("cover") metadata.pop("cover")
@ -105,7 +104,6 @@ def fb22tokens(file: SpooledTemporaryFile) -> DocumentTokens:
def get_author(author: Element) -> str: def get_author(author: Element) -> str:
""" """
Converts author xml structure to string Converts author xml structure to string
""" """
@ -117,7 +115,7 @@ def get_author(author: Element) -> str:
"last-name", "last-name",
): ):
tag = author.find("./" + tag_name, namespaces) tag = author.find("./" + tag_name, namespaces)
if tag is not None: if tag is not None and tag.text is not None:
res.append(tag.text) res.append(tag.text)
if len(res) == 0: if len(res) == 0:
res = author.find("./nickname", namespaces).text res = author.find("./nickname", namespaces).text
@ -128,7 +126,6 @@ def get_author(author: Element) -> str:
def get_cover(coverpage: Optional[Element]) -> Optional[str]: def get_cover(coverpage: Optional[Element]) -> Optional[str]:
""" """
Extracts cover image id if exists Extracts cover image id if exists
""" """
@ -149,7 +146,6 @@ def set_cover(tokens: DocumentTokens) -> None:
def fb2body2html(tokens: DocumentTokens) -> str: def fb2body2html(tokens: DocumentTokens) -> str:
""" """
Convert fb2 xml to html, joins bodies into one string Convert fb2 xml to html, joins bodies into one string
""" """
@ -164,7 +160,6 @@ def fb2body2html(tokens: DocumentTokens) -> str:
def process_section(body: Element, tokens: DocumentTokens) -> str: def process_section(body: Element, tokens: DocumentTokens) -> str:
""" """
Processes individual sections, recursively goes throw sections tree Processes individual sections, recursively goes throw sections tree
""" """
@ -192,7 +187,6 @@ def process_section(body: Element, tokens: DocumentTokens) -> str:
def children_to_html(root: Element) -> str: def children_to_html(root: Element) -> str:
""" """
Converts xml tag children to string Converts xml tag children to string
""" """
@ -206,7 +200,6 @@ def children_to_html(root: Element) -> str:
def process_image(element: Element, tokens: DocumentTokens) -> None: def process_image(element: Element, tokens: DocumentTokens) -> None:
r""" r"""
Converts fb2 \<image /\> to html \<img /\>. Replaces xlink:href with src="\<base64_image_data\>" Converts fb2 \<image /\> to html \<img /\>. Replaces xlink:href with src="\<base64_image_data\>"
""" """
@ -237,7 +230,6 @@ tag_with_class = {
def process_content(root: Element, tokens: DocumentTokens) -> None: def process_content(root: Element, tokens: DocumentTokens) -> None:
""" """
Converts fb2 xml tag names to html equivalents and my own styled elements. Converts fb2 xml tag names to html equivalents and my own styled elements.
Resolves binary data dependencies Resolves binary data dependencies

View File

@ -2,13 +2,18 @@
from datetime import datetime from datetime import datetime
from fastapi import FastAPI, File, UploadFile, HTTPException from fastapi import FastAPI, File, HTTPException, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel # pylint: disable=no-name-in-module from pydantic import BaseModel # pylint: disable=no-name-in-module
from .epub import epub2html from .epub import epub2html
from .fb2 import fb22html from .fb2 import fb22html
from .utils import HashedHTMLBook, add_hash from .utils import HashedHTMLBook, add_hash
origins = (
"*"
)
class DebugInfo(BaseModel): # pylint: disable=too-few-public-methods class DebugInfo(BaseModel): # pylint: disable=too-few-public-methods
"""Main handler return types""" """Main handler return types"""
@ -18,6 +23,14 @@ class DebugInfo(BaseModel): # pylint: disable=too-few-public-methods
app = FastAPI() app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
start_time = datetime.now() start_time = datetime.now()
@ -47,7 +60,8 @@ async def create_upload_file(file: UploadFile = File(...)):
elif file.filename.endswith(".epub"): elif file.filename.endswith(".epub"):
content = await epub2html(file.file) content = await epub2html(file.file)
else: else:
raise HTTPException(status_code=415, detail="Error! Unsupported file type") raise HTTPException(
status_code=415, detail="Error! Unsupported file type")
h_content = add_hash(content) h_content = add_hash(content)

View File

@ -3,9 +3,9 @@ Utils for publite_backend module
""" """
from typing import Union, Optional
import re import re
from hashlib import sha256 from hashlib import sha256
from typing import Optional, Union
from pydantic import BaseModel # pylint: disable=no-name-in-module from pydantic import BaseModel # pylint: disable=no-name-in-module
@ -17,7 +17,7 @@ class HTMLBook(BaseModel): # pylint: disable=too-few-public-methods
title: str title: str
author: str author: str
cover: Optional[str] cover: Optional[str] = None
content: str content: str

View File

@ -1,23 +1 @@
aiofiles==0.7.0 -r requirements/prod.txt
appdirs==1.4.4
asgiref==3.4.0
black==21.6b0
click==8.0.1
EbookLib==0.17.1
fastapi==0.65.2
flake8==3.9.2
h11==0.12.0
lxml==4.6.3
mccabe==0.6.1
mypy-extensions==0.4.3
pathspec==0.8.1
pycodestyle==2.7.0
pydantic==1.8.2
pyflakes==2.3.1
python-multipart==0.0.5
regex==2021.7.1
six==1.16.0
starlette==0.14.2
toml==0.10.2
typing-extensions==3.10.0.0
uvicorn==0.14.0

4
requirements/dev.txt Normal file
View File

@ -0,0 +1,4 @@
-r prod.txt
pylint
rope
black

7
requirements/prod.txt Normal file
View File

@ -0,0 +1,7 @@
fastapi
uvicorn
aiofiles
ebooklib
python-multipart
lxml
pydantic

4
run.py Normal file
View File

@ -0,0 +1,4 @@
import uvicorn
if __name__ == "__main__":
uvicorn.run("app.main:app")

5
vercel.json Normal file
View File

@ -0,0 +1,5 @@
{
"rewrites": [
{ "source": "/(.*)", "destination": "/api/main"}
]
}