Added epub file processing: currently epub file tokenization is ready
This commit is contained in:
parent
54e1dca653
commit
c24a26ab29
4
.gitignore
vendored
4
.gitignore
vendored
@ -1 +1,3 @@
|
||||
.venv
|
||||
.venv
|
||||
__pycache__/
|
||||
.vscode
|
59
app/epub.py
Normal file
59
app/epub.py
Normal file
@ -0,0 +1,59 @@
|
||||
import aiofiles as aiof
|
||||
from base64 import b64encode
|
||||
|
||||
import ebooklib
|
||||
from ebooklib import epub
|
||||
|
||||
from tempfile import SpooledTemporaryFile
|
||||
|
||||
|
||||
async def epub2html(file: SpooledTemporaryFile):
|
||||
|
||||
"""
|
||||
Splits epub to tokens and joins them to one html file
|
||||
"""
|
||||
|
||||
tokens = await epub_to_tokens(file)
|
||||
...
|
||||
# TODO: join tokens to HTML
|
||||
html_content = ""
|
||||
...
|
||||
return html_content
|
||||
|
||||
|
||||
async def epub_to_tokens(file: SpooledTemporaryFile) -> dict[str, str]:
|
||||
|
||||
"""
|
||||
Passes file content to ebooklib library and parses epub tokens into dict of the following format:
|
||||
|
||||
"\<file_name\>": "\<file_content\>"
|
||||
|
||||
Where file content is either plain text for xhtml or base64 encoded data for other formats, prepared for embeding to html
|
||||
|
||||
"""
|
||||
|
||||
async with aiof.tempfile.NamedTemporaryFile() as tmp:
|
||||
|
||||
await tmp.write(file.read())
|
||||
await tmp.seek(0)
|
||||
content = await tmp.read()
|
||||
|
||||
try:
|
||||
book = epub.read_epub(tmp.name)
|
||||
tokens = {}
|
||||
for item in book.get_items():
|
||||
item_type = item.get_type()
|
||||
if item_type in (ebooklib.ITEM_COVER, ebooklib.ITEM_IMAGE, ebooklib.ITEM_STYLE, ebooklib.ITEM_VIDEO):
|
||||
name = item.get_name()
|
||||
media_type = item.media_type
|
||||
b64_content = b64encode(item.get_content()).decode()
|
||||
|
||||
tokens[name] = f'data:{media_type};base64,{b64_content}'
|
||||
elif item_type == ebooklib.ITEM_DOCUMENT:
|
||||
name = item.get_name()
|
||||
content = item.get_content()
|
||||
|
||||
tokens[name] = content
|
||||
return tokens
|
||||
except Exception as e:
|
||||
return 'Error! Wrong epub file format: ' + str(e)
|
17
app/main.py
17
app/main.py
@ -1,7 +1,20 @@
|
||||
from fastapi import FastAPI
|
||||
from fastapi import FastAPI, File, UploadFile
|
||||
from fastapi.responses import HTMLResponse
|
||||
|
||||
from .epub import epub2html
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
@app.get('/')
|
||||
def root():
|
||||
return "Hello, World!"
|
||||
return "Hello, World!"
|
||||
|
||||
@app.post('/uploadfile/', )
|
||||
async def create_upload_file(file: UploadFile = File(...)):
|
||||
if file.filename.endswith('.epub'):
|
||||
content = await epub2html(file.file)
|
||||
elif file.filename.endswith('.fb2'):
|
||||
content = await fb22html(file.file)
|
||||
else:
|
||||
content = 'Error! Unsupported file type'
|
||||
return HTMLResponse(content=content)
|
@ -1,3 +1,4 @@
|
||||
aiofiles==0.7.0
|
||||
asgiref==3.4.0
|
||||
click==8.0.1
|
||||
EbookLib==0.17.1
|
||||
@ -5,6 +6,7 @@ fastapi==0.65.2
|
||||
h11==0.12.0
|
||||
lxml==4.6.3
|
||||
pydantic==1.8.2
|
||||
python-multipart==0.0.5
|
||||
six==1.16.0
|
||||
starlette==0.14.2
|
||||
typing-extensions==3.10.0.0
|
||||
|
Loading…
x
Reference in New Issue
Block a user