Refactored code and added server upload
This commit is contained in:
parent
9def092281
commit
2841f29d2d
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,3 +1,4 @@
|
||||
venv/
|
||||
tmp_*
|
||||
.vscode/
|
||||
__pycache__/
|
0
src/__init__.py
Normal file
0
src/__init__.py
Normal file
145
src/app.py
145
src/app.py
@ -1,131 +1,76 @@
|
||||
#!/usr/bin/env python3
|
||||
''' Main module '''
|
||||
|
||||
import re
|
||||
import requests
|
||||
import sys
|
||||
import getopt
|
||||
import os
|
||||
import sys
|
||||
import shutil
|
||||
|
||||
|
||||
def download_and_save(url, out_dir):
|
||||
|
||||
file = requests.get(url)
|
||||
|
||||
out_path = './' + out_dir + url.split('/')[-1]
|
||||
|
||||
try:
|
||||
open(out_path, 'wb').write(file.content)
|
||||
return out_path
|
||||
except:
|
||||
print('Couldn\'t open file for write')
|
||||
raise 'Couldn\'t open file for write'
|
||||
|
||||
|
||||
def get_paths(string):
|
||||
img_reg = re.compile(r'!\[.*?\]\(.*?\)')
|
||||
path_reg = re.compile(r'(?<=\()http[s]{0,1}.*?(?=\))')
|
||||
|
||||
imgs = img_reg.findall(string)
|
||||
|
||||
paths = []
|
||||
|
||||
for img in imgs:
|
||||
res = path_reg.search(img)
|
||||
if res:
|
||||
paths.append(res.group())
|
||||
|
||||
return paths
|
||||
|
||||
|
||||
def parse_args(argv):
|
||||
input_file = ''
|
||||
output_directory = ''
|
||||
|
||||
try:
|
||||
opts = getopt.getopt(argv, 'hi:o:')[0]
|
||||
except getopt.GetoptError:
|
||||
print('Usage: ./article_uploader -i <inputfile> -o <output directory>')
|
||||
sys.exit(2)
|
||||
|
||||
for opt, arg in opts:
|
||||
if opt == '-h':
|
||||
print('Usage: ./article_uploader -i <inputfile> -o <output directory>')
|
||||
sys.exit()
|
||||
elif opt == '-i':
|
||||
input_file = arg
|
||||
elif opt == '-o':
|
||||
output_directory = arg
|
||||
|
||||
if not (input_file and output_directory):
|
||||
print('Usage: ./article_uploader -i <inputfile> -o <output directory>')
|
||||
sys.exit(2)
|
||||
|
||||
return input_file, output_directory
|
||||
|
||||
|
||||
def get_article_header(string):
|
||||
|
||||
header = ''
|
||||
|
||||
if string[0] == '#':
|
||||
header = string.split('\n')[0]
|
||||
while header[0] == '#' or header[0] == ' ':
|
||||
header = header[1:]
|
||||
header += '.md'
|
||||
|
||||
return header
|
||||
|
||||
|
||||
def replace_article_paths(string, orig_paths, res_paths):
|
||||
for i in range(len(res_paths)):
|
||||
string = string.replace(orig_paths[i], (res_paths[i]).split('/')[-1])
|
||||
|
||||
return string
|
||||
from get_args import parse_args
|
||||
from article_process import get_article_title, get_paths, replace_article_paths, rm_first_line
|
||||
from netw_ops import download_and_save, upload_to_server, add_to_list_on_server
|
||||
|
||||
|
||||
def main(argv):
|
||||
input_file, output_directory = parse_args(argv)
|
||||
'''Main function'''
|
||||
|
||||
input_file = ''
|
||||
server_cred = ''
|
||||
output_directory = ''
|
||||
|
||||
res = parse_args(argv)
|
||||
|
||||
if len(res) == 2:
|
||||
input_file, output_directory = res # pylint: disable=unbalanced-tuple-unpacking
|
||||
elif len(res) == 3:
|
||||
input_file, server_cred, output_directory = res
|
||||
|
||||
try:
|
||||
ifile = open(input_file, "r")
|
||||
except:
|
||||
print("Couldn't open file")
|
||||
except IOError as ex:
|
||||
print("Couldn't open input file")
|
||||
print(ex)
|
||||
sys.exit(2)
|
||||
|
||||
string = ifile.read()
|
||||
text = ifile.read()
|
||||
|
||||
if output_directory[-1] != '/':
|
||||
output_directory += '/'
|
||||
|
||||
article_filename = get_article_header(string)
|
||||
article_filename = get_article_title(text)
|
||||
|
||||
if not article_filename:
|
||||
article_filename = input_file.split('/')[-1]
|
||||
else:
|
||||
text = rm_first_line(text)
|
||||
|
||||
article_path = output_directory + article_filename.split('.')[0] + '/'
|
||||
article_folder = article_filename.split('.')[0] + '/'
|
||||
|
||||
if not os.path.exists(article_path):
|
||||
os.makedirs(article_path)
|
||||
if not os.path.exists(article_folder):
|
||||
os.makedirs(article_folder)
|
||||
|
||||
paths = get_paths(string)
|
||||
paths = get_paths(text)
|
||||
|
||||
res_paths = []
|
||||
|
||||
try:
|
||||
for url in paths:
|
||||
try:
|
||||
res_paths.append(download_and_save(url, article_path))
|
||||
except:
|
||||
paths.remove(url)
|
||||
print("Couldn't process image" + url)
|
||||
for url in paths:
|
||||
try:
|
||||
res_paths.append(download_and_save(url, article_folder))
|
||||
except Exception as ex:
|
||||
paths.remove(url)
|
||||
print("Couldn't process image:", ex, '\nurl:', url)
|
||||
raise "Couldn't process image"
|
||||
|
||||
string = replace_article_paths(string, paths, res_paths)
|
||||
text = replace_article_paths(text, paths, res_paths)
|
||||
|
||||
open(article_path + article_filename, "w").write(string)
|
||||
open(article_folder + article_filename, "w").write(text)
|
||||
|
||||
except:
|
||||
print("Couldn't process article")
|
||||
sys.exit(2)
|
||||
if server_cred:
|
||||
upload_to_server(server_cred, article_folder, output_directory)
|
||||
|
||||
shutil.rmtree(article_folder)
|
||||
|
||||
add_to_list_on_server(
|
||||
server_cred, article_folder, output_directory)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
52
src/article_process.py
Normal file
52
src/article_process.py
Normal file
@ -0,0 +1,52 @@
|
||||
'''Provides some article content operations'''
|
||||
|
||||
import re
|
||||
|
||||
|
||||
def get_paths(string):
|
||||
'''Gets images paths in article'''
|
||||
|
||||
img_reg = re.compile(r'!\[.*?\]\(.*?\)')
|
||||
path_reg = re.compile(r'(?<=\()http[s]{0,1}.*?(?=\))')
|
||||
|
||||
imgs = img_reg.findall(string)
|
||||
|
||||
paths = []
|
||||
|
||||
for img in imgs:
|
||||
res = path_reg.search(img)
|
||||
if res:
|
||||
paths.append(res.group())
|
||||
|
||||
return paths
|
||||
|
||||
|
||||
def rm_first_line(string):
|
||||
'''Removes first line from string'''
|
||||
|
||||
return string[string.find('\n') + 1:]
|
||||
|
||||
|
||||
def get_article_title(string):
|
||||
'''Gets article title'''
|
||||
|
||||
header = ''
|
||||
|
||||
if string[0] == '#':
|
||||
header = string.split('\n')[0]
|
||||
while header[0] == '#' or header[0] == ' ':
|
||||
header = header[1:]
|
||||
header += '.md'
|
||||
header = header.replace(' ', '_')
|
||||
|
||||
return header
|
||||
|
||||
|
||||
def replace_article_paths(string, orig_paths, res_paths):
|
||||
'''Replaces all web links with downloaded ones'''
|
||||
|
||||
for i, val in enumerate(res_paths):
|
||||
print(val[2:])
|
||||
string = string.replace(orig_paths[i], '/articles/' + val[2:])
|
||||
|
||||
return string
|
78
src/get_args.py
Normal file
78
src/get_args.py
Normal file
@ -0,0 +1,78 @@
|
||||
''' Module to get arguments passed into Main module'''
|
||||
|
||||
import getopt
|
||||
import sys
|
||||
|
||||
|
||||
def read_cfg_file(path):
|
||||
'''Reads config file'''
|
||||
|
||||
cfg = open(path, 'r')
|
||||
buff = cfg.read()
|
||||
for line in buff.split('\n'):
|
||||
if line.split('=')[0] == 'output':
|
||||
output_directory = line.split('=')[1]
|
||||
elif line.split('=')[0] == 'host':
|
||||
server_cred = line.split('=')[1]
|
||||
if not (output_directory and server_cred):
|
||||
print("No config file provided")
|
||||
sys.exit(2)
|
||||
|
||||
return output_directory, server_cred
|
||||
|
||||
|
||||
def usage():
|
||||
'''Prints usage instructions'''
|
||||
|
||||
print(''''Usage: ./article_uploader -i <inputfile> -o <output directory>
|
||||
or ./article_uploader -u -o <output_directory> -s <server username and hostname in username@hostname notation>
|
||||
or ./article_uploader -u -c <configuration file>
|
||||
with configuration file such as
|
||||
path=<output path on server>
|
||||
host=<server username and hostname in username@hostname notation>''')
|
||||
|
||||
|
||||
def parse_args(argv):
|
||||
'''Parses arguments provided by user'''
|
||||
|
||||
input_file = ''
|
||||
output_directory = ''
|
||||
upload_to_server = False
|
||||
server_cred = ''
|
||||
cfg_path = ''
|
||||
|
||||
try:
|
||||
opts = getopt.getopt(argv, 'hi:o:us:c:')[0]
|
||||
except getopt.GetoptError:
|
||||
usage()
|
||||
sys.exit(2)
|
||||
|
||||
for opt, arg in opts:
|
||||
if opt == '-h':
|
||||
usage()
|
||||
sys.exit()
|
||||
elif opt == '-i':
|
||||
input_file = arg
|
||||
elif opt == '-o':
|
||||
output_directory = arg
|
||||
elif opt == '-u':
|
||||
upload_to_server = True
|
||||
elif opt == '-s':
|
||||
server_cred = arg
|
||||
elif opt == '-c':
|
||||
cfg_path = arg
|
||||
|
||||
if not (input_file and (output_directory or upload_to_server) or cfg_path):
|
||||
usage()
|
||||
sys.exit(2)
|
||||
|
||||
if upload_to_server and not (server_cred and output_directory) and cfg_path:
|
||||
output_directory, server_cred = read_cfg_file(cfg_path)
|
||||
else:
|
||||
usage()
|
||||
sys.exit(2)
|
||||
|
||||
if server_cred and output_directory:
|
||||
return input_file, server_cred, output_directory
|
||||
else:
|
||||
return input_file, output_directory
|
71
src/netw_ops.py
Normal file
71
src/netw_ops.py
Normal file
@ -0,0 +1,71 @@
|
||||
'''Provides network related operations'''
|
||||
|
||||
import subprocess
|
||||
import os
|
||||
import time
|
||||
import requests
|
||||
|
||||
|
||||
def download_and_save(url, out_dir):
|
||||
'''Downloads file from url and saves it into out_dir'''
|
||||
|
||||
file = requests.get(url)
|
||||
|
||||
out_path = './' + out_dir + url.split('/')[-1]
|
||||
|
||||
try:
|
||||
open(out_path, 'wb').write(file.content)
|
||||
return out_path
|
||||
except IOError as ex:
|
||||
print(ex)
|
||||
raise 'Couldn\'t open file for write'
|
||||
|
||||
|
||||
def scp_wrap(recursively, from_path, to_path):
|
||||
'''Downloads/uploads files from/to server using scp'''
|
||||
|
||||
if recursively:
|
||||
proc = subprocess.Popen(["scp", "-r", from_path, to_path])
|
||||
else:
|
||||
proc = subprocess.Popen(["scp", from_path, to_path])
|
||||
|
||||
sts = os.waitpid(proc.pid, 0) # pylint: disable=unused-variable
|
||||
|
||||
|
||||
def upload_to_server(server_cred, local_path, server_path):
|
||||
'''Uploads selected folder to server using scp'''
|
||||
|
||||
scp_wrap(True, local_path, server_cred + ':' + server_path)
|
||||
|
||||
|
||||
def add_to_list_on_server(server_cred, local_path, server_path):
|
||||
'''Reads list of articles on server and add new article to it'''
|
||||
|
||||
article_name = local_path[:-1]
|
||||
|
||||
scp_wrap(False, server_cred + ':' + server_path + 'list.db', './')
|
||||
|
||||
articles_list_file = open('list.db', 'r+')
|
||||
articles_list = articles_list_file.read()
|
||||
|
||||
articles_list_s = articles_list.split('\n')
|
||||
|
||||
flag = True
|
||||
for i, val in enumerate(articles_list_s):
|
||||
if article_name in val:
|
||||
line_s = val.split(' ')
|
||||
line_s[0] = str(int(time.time()))
|
||||
articles_list_s[i] = ' '.join(line_s)
|
||||
|
||||
flag = False
|
||||
|
||||
if flag:
|
||||
articles_list_s.append(str(int(time.time())) + ' ' + article_name)
|
||||
|
||||
articles_list = '\n'.join(filter(None, articles_list_s))
|
||||
|
||||
articles_list_file.seek(0)
|
||||
articles_list_file.write(articles_list)
|
||||
articles_list_file.close()
|
||||
|
||||
scp_wrap(False, 'list.db', server_cred + ':' + server_path)
|
Loading…
x
Reference in New Issue
Block a user