Refactored code and added server upload
This commit is contained in:
parent
9def092281
commit
2841f29d2d
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,3 +1,4 @@
|
|||||||
venv/
|
venv/
|
||||||
tmp_*
|
tmp_*
|
||||||
.vscode/
|
.vscode/
|
||||||
|
__pycache__/
|
0
src/__init__.py
Normal file
0
src/__init__.py
Normal file
145
src/app.py
145
src/app.py
@ -1,131 +1,76 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
''' Main module '''
|
||||||
|
|
||||||
import re
|
|
||||||
import requests
|
|
||||||
import sys
|
|
||||||
import getopt
|
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
from get_args import parse_args
|
||||||
def download_and_save(url, out_dir):
|
from article_process import get_article_title, get_paths, replace_article_paths, rm_first_line
|
||||||
|
from netw_ops import download_and_save, upload_to_server, add_to_list_on_server
|
||||||
file = requests.get(url)
|
|
||||||
|
|
||||||
out_path = './' + out_dir + url.split('/')[-1]
|
|
||||||
|
|
||||||
try:
|
|
||||||
open(out_path, 'wb').write(file.content)
|
|
||||||
return out_path
|
|
||||||
except:
|
|
||||||
print('Couldn\'t open file for write')
|
|
||||||
raise 'Couldn\'t open file for write'
|
|
||||||
|
|
||||||
|
|
||||||
def get_paths(string):
|
|
||||||
img_reg = re.compile(r'!\[.*?\]\(.*?\)')
|
|
||||||
path_reg = re.compile(r'(?<=\()http[s]{0,1}.*?(?=\))')
|
|
||||||
|
|
||||||
imgs = img_reg.findall(string)
|
|
||||||
|
|
||||||
paths = []
|
|
||||||
|
|
||||||
for img in imgs:
|
|
||||||
res = path_reg.search(img)
|
|
||||||
if res:
|
|
||||||
paths.append(res.group())
|
|
||||||
|
|
||||||
return paths
|
|
||||||
|
|
||||||
|
|
||||||
def parse_args(argv):
|
|
||||||
input_file = ''
|
|
||||||
output_directory = ''
|
|
||||||
|
|
||||||
try:
|
|
||||||
opts = getopt.getopt(argv, 'hi:o:')[0]
|
|
||||||
except getopt.GetoptError:
|
|
||||||
print('Usage: ./article_uploader -i <inputfile> -o <output directory>')
|
|
||||||
sys.exit(2)
|
|
||||||
|
|
||||||
for opt, arg in opts:
|
|
||||||
if opt == '-h':
|
|
||||||
print('Usage: ./article_uploader -i <inputfile> -o <output directory>')
|
|
||||||
sys.exit()
|
|
||||||
elif opt == '-i':
|
|
||||||
input_file = arg
|
|
||||||
elif opt == '-o':
|
|
||||||
output_directory = arg
|
|
||||||
|
|
||||||
if not (input_file and output_directory):
|
|
||||||
print('Usage: ./article_uploader -i <inputfile> -o <output directory>')
|
|
||||||
sys.exit(2)
|
|
||||||
|
|
||||||
return input_file, output_directory
|
|
||||||
|
|
||||||
|
|
||||||
def get_article_header(string):
|
|
||||||
|
|
||||||
header = ''
|
|
||||||
|
|
||||||
if string[0] == '#':
|
|
||||||
header = string.split('\n')[0]
|
|
||||||
while header[0] == '#' or header[0] == ' ':
|
|
||||||
header = header[1:]
|
|
||||||
header += '.md'
|
|
||||||
|
|
||||||
return header
|
|
||||||
|
|
||||||
|
|
||||||
def replace_article_paths(string, orig_paths, res_paths):
|
|
||||||
for i in range(len(res_paths)):
|
|
||||||
string = string.replace(orig_paths[i], (res_paths[i]).split('/')[-1])
|
|
||||||
|
|
||||||
return string
|
|
||||||
|
|
||||||
|
|
||||||
def main(argv):
|
def main(argv):
|
||||||
input_file, output_directory = parse_args(argv)
|
'''Main function'''
|
||||||
|
|
||||||
|
input_file = ''
|
||||||
|
server_cred = ''
|
||||||
|
output_directory = ''
|
||||||
|
|
||||||
|
res = parse_args(argv)
|
||||||
|
|
||||||
|
if len(res) == 2:
|
||||||
|
input_file, output_directory = res # pylint: disable=unbalanced-tuple-unpacking
|
||||||
|
elif len(res) == 3:
|
||||||
|
input_file, server_cred, output_directory = res
|
||||||
|
|
||||||
try:
|
try:
|
||||||
ifile = open(input_file, "r")
|
ifile = open(input_file, "r")
|
||||||
except:
|
except IOError as ex:
|
||||||
print("Couldn't open file")
|
print("Couldn't open input file")
|
||||||
|
print(ex)
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
|
|
||||||
string = ifile.read()
|
text = ifile.read()
|
||||||
|
|
||||||
if output_directory[-1] != '/':
|
if output_directory[-1] != '/':
|
||||||
output_directory += '/'
|
output_directory += '/'
|
||||||
|
|
||||||
article_filename = get_article_header(string)
|
article_filename = get_article_title(text)
|
||||||
|
|
||||||
if not article_filename:
|
if not article_filename:
|
||||||
article_filename = input_file.split('/')[-1]
|
article_filename = input_file.split('/')[-1]
|
||||||
|
else:
|
||||||
|
text = rm_first_line(text)
|
||||||
|
|
||||||
article_path = output_directory + article_filename.split('.')[0] + '/'
|
article_folder = article_filename.split('.')[0] + '/'
|
||||||
|
|
||||||
if not os.path.exists(article_path):
|
if not os.path.exists(article_folder):
|
||||||
os.makedirs(article_path)
|
os.makedirs(article_folder)
|
||||||
|
|
||||||
paths = get_paths(string)
|
paths = get_paths(text)
|
||||||
|
|
||||||
res_paths = []
|
res_paths = []
|
||||||
|
|
||||||
try:
|
for url in paths:
|
||||||
for url in paths:
|
try:
|
||||||
try:
|
res_paths.append(download_and_save(url, article_folder))
|
||||||
res_paths.append(download_and_save(url, article_path))
|
except Exception as ex:
|
||||||
except:
|
paths.remove(url)
|
||||||
paths.remove(url)
|
print("Couldn't process image:", ex, '\nurl:', url)
|
||||||
print("Couldn't process image" + url)
|
raise "Couldn't process image"
|
||||||
|
|
||||||
string = replace_article_paths(string, paths, res_paths)
|
text = replace_article_paths(text, paths, res_paths)
|
||||||
|
|
||||||
open(article_path + article_filename, "w").write(string)
|
open(article_folder + article_filename, "w").write(text)
|
||||||
|
|
||||||
except:
|
if server_cred:
|
||||||
print("Couldn't process article")
|
upload_to_server(server_cred, article_folder, output_directory)
|
||||||
sys.exit(2)
|
|
||||||
|
shutil.rmtree(article_folder)
|
||||||
|
|
||||||
|
add_to_list_on_server(
|
||||||
|
server_cred, article_folder, output_directory)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
52
src/article_process.py
Normal file
52
src/article_process.py
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
'''Provides some article content operations'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
def get_paths(string):
|
||||||
|
'''Gets images paths in article'''
|
||||||
|
|
||||||
|
img_reg = re.compile(r'!\[.*?\]\(.*?\)')
|
||||||
|
path_reg = re.compile(r'(?<=\()http[s]{0,1}.*?(?=\))')
|
||||||
|
|
||||||
|
imgs = img_reg.findall(string)
|
||||||
|
|
||||||
|
paths = []
|
||||||
|
|
||||||
|
for img in imgs:
|
||||||
|
res = path_reg.search(img)
|
||||||
|
if res:
|
||||||
|
paths.append(res.group())
|
||||||
|
|
||||||
|
return paths
|
||||||
|
|
||||||
|
|
||||||
|
def rm_first_line(string):
|
||||||
|
'''Removes first line from string'''
|
||||||
|
|
||||||
|
return string[string.find('\n') + 1:]
|
||||||
|
|
||||||
|
|
||||||
|
def get_article_title(string):
|
||||||
|
'''Gets article title'''
|
||||||
|
|
||||||
|
header = ''
|
||||||
|
|
||||||
|
if string[0] == '#':
|
||||||
|
header = string.split('\n')[0]
|
||||||
|
while header[0] == '#' or header[0] == ' ':
|
||||||
|
header = header[1:]
|
||||||
|
header += '.md'
|
||||||
|
header = header.replace(' ', '_')
|
||||||
|
|
||||||
|
return header
|
||||||
|
|
||||||
|
|
||||||
|
def replace_article_paths(string, orig_paths, res_paths):
|
||||||
|
'''Replaces all web links with downloaded ones'''
|
||||||
|
|
||||||
|
for i, val in enumerate(res_paths):
|
||||||
|
print(val[2:])
|
||||||
|
string = string.replace(orig_paths[i], '/articles/' + val[2:])
|
||||||
|
|
||||||
|
return string
|
78
src/get_args.py
Normal file
78
src/get_args.py
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
''' Module to get arguments passed into Main module'''
|
||||||
|
|
||||||
|
import getopt
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
def read_cfg_file(path):
|
||||||
|
'''Reads config file'''
|
||||||
|
|
||||||
|
cfg = open(path, 'r')
|
||||||
|
buff = cfg.read()
|
||||||
|
for line in buff.split('\n'):
|
||||||
|
if line.split('=')[0] == 'output':
|
||||||
|
output_directory = line.split('=')[1]
|
||||||
|
elif line.split('=')[0] == 'host':
|
||||||
|
server_cred = line.split('=')[1]
|
||||||
|
if not (output_directory and server_cred):
|
||||||
|
print("No config file provided")
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
return output_directory, server_cred
|
||||||
|
|
||||||
|
|
||||||
|
def usage():
|
||||||
|
'''Prints usage instructions'''
|
||||||
|
|
||||||
|
print(''''Usage: ./article_uploader -i <inputfile> -o <output directory>
|
||||||
|
or ./article_uploader -u -o <output_directory> -s <server username and hostname in username@hostname notation>
|
||||||
|
or ./article_uploader -u -c <configuration file>
|
||||||
|
with configuration file such as
|
||||||
|
path=<output path on server>
|
||||||
|
host=<server username and hostname in username@hostname notation>''')
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args(argv):
|
||||||
|
'''Parses arguments provided by user'''
|
||||||
|
|
||||||
|
input_file = ''
|
||||||
|
output_directory = ''
|
||||||
|
upload_to_server = False
|
||||||
|
server_cred = ''
|
||||||
|
cfg_path = ''
|
||||||
|
|
||||||
|
try:
|
||||||
|
opts = getopt.getopt(argv, 'hi:o:us:c:')[0]
|
||||||
|
except getopt.GetoptError:
|
||||||
|
usage()
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
for opt, arg in opts:
|
||||||
|
if opt == '-h':
|
||||||
|
usage()
|
||||||
|
sys.exit()
|
||||||
|
elif opt == '-i':
|
||||||
|
input_file = arg
|
||||||
|
elif opt == '-o':
|
||||||
|
output_directory = arg
|
||||||
|
elif opt == '-u':
|
||||||
|
upload_to_server = True
|
||||||
|
elif opt == '-s':
|
||||||
|
server_cred = arg
|
||||||
|
elif opt == '-c':
|
||||||
|
cfg_path = arg
|
||||||
|
|
||||||
|
if not (input_file and (output_directory or upload_to_server) or cfg_path):
|
||||||
|
usage()
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
if upload_to_server and not (server_cred and output_directory) and cfg_path:
|
||||||
|
output_directory, server_cred = read_cfg_file(cfg_path)
|
||||||
|
else:
|
||||||
|
usage()
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
if server_cred and output_directory:
|
||||||
|
return input_file, server_cred, output_directory
|
||||||
|
else:
|
||||||
|
return input_file, output_directory
|
71
src/netw_ops.py
Normal file
71
src/netw_ops.py
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
'''Provides network related operations'''
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
def download_and_save(url, out_dir):
|
||||||
|
'''Downloads file from url and saves it into out_dir'''
|
||||||
|
|
||||||
|
file = requests.get(url)
|
||||||
|
|
||||||
|
out_path = './' + out_dir + url.split('/')[-1]
|
||||||
|
|
||||||
|
try:
|
||||||
|
open(out_path, 'wb').write(file.content)
|
||||||
|
return out_path
|
||||||
|
except IOError as ex:
|
||||||
|
print(ex)
|
||||||
|
raise 'Couldn\'t open file for write'
|
||||||
|
|
||||||
|
|
||||||
|
def scp_wrap(recursively, from_path, to_path):
|
||||||
|
'''Downloads/uploads files from/to server using scp'''
|
||||||
|
|
||||||
|
if recursively:
|
||||||
|
proc = subprocess.Popen(["scp", "-r", from_path, to_path])
|
||||||
|
else:
|
||||||
|
proc = subprocess.Popen(["scp", from_path, to_path])
|
||||||
|
|
||||||
|
sts = os.waitpid(proc.pid, 0) # pylint: disable=unused-variable
|
||||||
|
|
||||||
|
|
||||||
|
def upload_to_server(server_cred, local_path, server_path):
|
||||||
|
'''Uploads selected folder to server using scp'''
|
||||||
|
|
||||||
|
scp_wrap(True, local_path, server_cred + ':' + server_path)
|
||||||
|
|
||||||
|
|
||||||
|
def add_to_list_on_server(server_cred, local_path, server_path):
|
||||||
|
'''Reads list of articles on server and add new article to it'''
|
||||||
|
|
||||||
|
article_name = local_path[:-1]
|
||||||
|
|
||||||
|
scp_wrap(False, server_cred + ':' + server_path + 'list.db', './')
|
||||||
|
|
||||||
|
articles_list_file = open('list.db', 'r+')
|
||||||
|
articles_list = articles_list_file.read()
|
||||||
|
|
||||||
|
articles_list_s = articles_list.split('\n')
|
||||||
|
|
||||||
|
flag = True
|
||||||
|
for i, val in enumerate(articles_list_s):
|
||||||
|
if article_name in val:
|
||||||
|
line_s = val.split(' ')
|
||||||
|
line_s[0] = str(int(time.time()))
|
||||||
|
articles_list_s[i] = ' '.join(line_s)
|
||||||
|
|
||||||
|
flag = False
|
||||||
|
|
||||||
|
if flag:
|
||||||
|
articles_list_s.append(str(int(time.time())) + ' ' + article_name)
|
||||||
|
|
||||||
|
articles_list = '\n'.join(filter(None, articles_list_s))
|
||||||
|
|
||||||
|
articles_list_file.seek(0)
|
||||||
|
articles_list_file.write(articles_list)
|
||||||
|
articles_list_file.close()
|
||||||
|
|
||||||
|
scp_wrap(False, 'list.db', server_cred + ':' + server_path)
|
Loading…
x
Reference in New Issue
Block a user