diff --git a/.gitignore b/.gitignore index cec508c..362d16f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ venv/ tmp_* .vscode/ +__pycache__/ \ No newline at end of file diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/app.py b/src/app.py index 975ee8b..7d13e5d 100644 --- a/src/app.py +++ b/src/app.py @@ -1,131 +1,76 @@ #!/usr/bin/env python3 +''' Main module ''' -import re -import requests -import sys -import getopt import os +import sys +import shutil - -def download_and_save(url, out_dir): - - file = requests.get(url) - - out_path = './' + out_dir + url.split('/')[-1] - - try: - open(out_path, 'wb').write(file.content) - return out_path - except: - print('Couldn\'t open file for write') - raise 'Couldn\'t open file for write' - - -def get_paths(string): - img_reg = re.compile(r'!\[.*?\]\(.*?\)') - path_reg = re.compile(r'(?<=\()http[s]{0,1}.*?(?=\))') - - imgs = img_reg.findall(string) - - paths = [] - - for img in imgs: - res = path_reg.search(img) - if res: - paths.append(res.group()) - - return paths - - -def parse_args(argv): - input_file = '' - output_directory = '' - - try: - opts = getopt.getopt(argv, 'hi:o:')[0] - except getopt.GetoptError: - print('Usage: ./article_uploader -i -o ') - sys.exit(2) - - for opt, arg in opts: - if opt == '-h': - print('Usage: ./article_uploader -i -o ') - sys.exit() - elif opt == '-i': - input_file = arg - elif opt == '-o': - output_directory = arg - - if not (input_file and output_directory): - print('Usage: ./article_uploader -i -o ') - sys.exit(2) - - return input_file, output_directory - - -def get_article_header(string): - - header = '' - - if string[0] == '#': - header = string.split('\n')[0] - while header[0] == '#' or header[0] == ' ': - header = header[1:] - header += '.md' - - return header - - -def replace_article_paths(string, orig_paths, res_paths): - for i in range(len(res_paths)): - string = string.replace(orig_paths[i], (res_paths[i]).split('/')[-1]) - - return string +from get_args import parse_args +from article_process import get_article_title, get_paths, replace_article_paths, rm_first_line +from netw_ops import download_and_save, upload_to_server, add_to_list_on_server def main(argv): - input_file, output_directory = parse_args(argv) + '''Main function''' + + input_file = '' + server_cred = '' + output_directory = '' + + res = parse_args(argv) + + if len(res) == 2: + input_file, output_directory = res # pylint: disable=unbalanced-tuple-unpacking + elif len(res) == 3: + input_file, server_cred, output_directory = res try: ifile = open(input_file, "r") - except: - print("Couldn't open file") + except IOError as ex: + print("Couldn't open input file") + print(ex) sys.exit(2) - string = ifile.read() + text = ifile.read() if output_directory[-1] != '/': output_directory += '/' - article_filename = get_article_header(string) + article_filename = get_article_title(text) if not article_filename: article_filename = input_file.split('/')[-1] + else: + text = rm_first_line(text) - article_path = output_directory + article_filename.split('.')[0] + '/' + article_folder = article_filename.split('.')[0] + '/' - if not os.path.exists(article_path): - os.makedirs(article_path) + if not os.path.exists(article_folder): + os.makedirs(article_folder) - paths = get_paths(string) + paths = get_paths(text) res_paths = [] - try: - for url in paths: - try: - res_paths.append(download_and_save(url, article_path)) - except: - paths.remove(url) - print("Couldn't process image" + url) + for url in paths: + try: + res_paths.append(download_and_save(url, article_folder)) + except Exception as ex: + paths.remove(url) + print("Couldn't process image:", ex, '\nurl:', url) + raise "Couldn't process image" - string = replace_article_paths(string, paths, res_paths) + text = replace_article_paths(text, paths, res_paths) - open(article_path + article_filename, "w").write(string) + open(article_folder + article_filename, "w").write(text) - except: - print("Couldn't process article") - sys.exit(2) + if server_cred: + upload_to_server(server_cred, article_folder, output_directory) + + shutil.rmtree(article_folder) + + add_to_list_on_server( + server_cred, article_folder, output_directory) if __name__ == '__main__': diff --git a/src/article_process.py b/src/article_process.py new file mode 100644 index 0000000..867e8c0 --- /dev/null +++ b/src/article_process.py @@ -0,0 +1,52 @@ +'''Provides some article content operations''' + +import re + + +def get_paths(string): + '''Gets images paths in article''' + + img_reg = re.compile(r'!\[.*?\]\(.*?\)') + path_reg = re.compile(r'(?<=\()http[s]{0,1}.*?(?=\))') + + imgs = img_reg.findall(string) + + paths = [] + + for img in imgs: + res = path_reg.search(img) + if res: + paths.append(res.group()) + + return paths + + +def rm_first_line(string): + '''Removes first line from string''' + + return string[string.find('\n') + 1:] + + +def get_article_title(string): + '''Gets article title''' + + header = '' + + if string[0] == '#': + header = string.split('\n')[0] + while header[0] == '#' or header[0] == ' ': + header = header[1:] + header += '.md' + header = header.replace(' ', '_') + + return header + + +def replace_article_paths(string, orig_paths, res_paths): + '''Replaces all web links with downloaded ones''' + + for i, val in enumerate(res_paths): + print(val[2:]) + string = string.replace(orig_paths[i], '/articles/' + val[2:]) + + return string diff --git a/src/get_args.py b/src/get_args.py new file mode 100644 index 0000000..c7860d4 --- /dev/null +++ b/src/get_args.py @@ -0,0 +1,78 @@ +''' Module to get arguments passed into Main module''' + +import getopt +import sys + + +def read_cfg_file(path): + '''Reads config file''' + + cfg = open(path, 'r') + buff = cfg.read() + for line in buff.split('\n'): + if line.split('=')[0] == 'output': + output_directory = line.split('=')[1] + elif line.split('=')[0] == 'host': + server_cred = line.split('=')[1] + if not (output_directory and server_cred): + print("No config file provided") + sys.exit(2) + + return output_directory, server_cred + + +def usage(): + '''Prints usage instructions''' + + print(''''Usage: ./article_uploader -i -o + or ./article_uploader -u -o -s + or ./article_uploader -u -c + with configuration file such as + path= + host=''') + + +def parse_args(argv): + '''Parses arguments provided by user''' + + input_file = '' + output_directory = '' + upload_to_server = False + server_cred = '' + cfg_path = '' + + try: + opts = getopt.getopt(argv, 'hi:o:us:c:')[0] + except getopt.GetoptError: + usage() + sys.exit(2) + + for opt, arg in opts: + if opt == '-h': + usage() + sys.exit() + elif opt == '-i': + input_file = arg + elif opt == '-o': + output_directory = arg + elif opt == '-u': + upload_to_server = True + elif opt == '-s': + server_cred = arg + elif opt == '-c': + cfg_path = arg + + if not (input_file and (output_directory or upload_to_server) or cfg_path): + usage() + sys.exit(2) + + if upload_to_server and not (server_cred and output_directory) and cfg_path: + output_directory, server_cred = read_cfg_file(cfg_path) + else: + usage() + sys.exit(2) + + if server_cred and output_directory: + return input_file, server_cred, output_directory + else: + return input_file, output_directory diff --git a/src/netw_ops.py b/src/netw_ops.py new file mode 100644 index 0000000..3fe0a56 --- /dev/null +++ b/src/netw_ops.py @@ -0,0 +1,71 @@ +'''Provides network related operations''' + +import subprocess +import os +import time +import requests + + +def download_and_save(url, out_dir): + '''Downloads file from url and saves it into out_dir''' + + file = requests.get(url) + + out_path = './' + out_dir + url.split('/')[-1] + + try: + open(out_path, 'wb').write(file.content) + return out_path + except IOError as ex: + print(ex) + raise 'Couldn\'t open file for write' + + +def scp_wrap(recursively, from_path, to_path): + '''Downloads/uploads files from/to server using scp''' + + if recursively: + proc = subprocess.Popen(["scp", "-r", from_path, to_path]) + else: + proc = subprocess.Popen(["scp", from_path, to_path]) + + sts = os.waitpid(proc.pid, 0) # pylint: disable=unused-variable + + +def upload_to_server(server_cred, local_path, server_path): + '''Uploads selected folder to server using scp''' + + scp_wrap(True, local_path, server_cred + ':' + server_path) + + +def add_to_list_on_server(server_cred, local_path, server_path): + '''Reads list of articles on server and add new article to it''' + + article_name = local_path[:-1] + + scp_wrap(False, server_cred + ':' + server_path + 'list.db', './') + + articles_list_file = open('list.db', 'r+') + articles_list = articles_list_file.read() + + articles_list_s = articles_list.split('\n') + + flag = True + for i, val in enumerate(articles_list_s): + if article_name in val: + line_s = val.split(' ') + line_s[0] = str(int(time.time())) + articles_list_s[i] = ' '.join(line_s) + + flag = False + + if flag: + articles_list_s.append(str(int(time.time())) + ' ' + article_name) + + articles_list = '\n'.join(filter(None, articles_list_s)) + + articles_list_file.seek(0) + articles_list_file.write(articles_list) + articles_list_file.close() + + scp_wrap(False, 'list.db', server_cred + ':' + server_path)