From da84adf37cf5a75ff05ce654aad3c47b49217f59 Mon Sep 17 00:00:00 2001 From: Maboroshy Date: Mon, 14 Aug 2017 19:11:51 +0400 Subject: [PATCH] Added Inbox script --- Inbox/inbox.py | 183 +++++++++++++++++++++++++++++++++++++++++ Inbox/inbox.qml | 75 +++++++++++++++++ Inbox/info.json | 31 +++++++ Inbox/md_link.py | 197 +++++++++++++++++++++++++++++++++++++++++++++ Inbox/safe_path.py | 24 ++++++ 5 files changed, 510 insertions(+) create mode 100644 Inbox/inbox.py create mode 100644 Inbox/inbox.qml create mode 100644 Inbox/info.json create mode 100644 Inbox/md_link.py create mode 100644 Inbox/safe_path.py diff --git a/Inbox/inbox.py b/Inbox/inbox.py new file mode 100644 index 0000000..a0bb06e --- /dev/null +++ b/Inbox/inbox.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python + +import os +import re +import sys +import time +import platform +import collections +import multiprocessing.dummy + +import md_link +import safe_path + + +def text_to_md(file_attrs): + """ + This will process specified text file getting its tags and replacing urls with favicons and titles where possible + :param file_attrs: File_attrs named tuple + :return: list of Note_attrs named tuple + """ + filename = os.path.splitext(os.path.basename(file_attrs.file_path))[0] + mtime = time.localtime(os.path.getmtime(file_attrs.file_path)) + + with open(file_attrs.file_path, 'r') as text_file: + text = text_file.read() + + tags = re.findall(file_attrs.tag_marker + '(\w*)', text) + text = re.sub(file_attrs.tag_marker + '\w*[ ]?', '', text).strip() + + if re.match('^http[s]?://[^\s]*$', text): + is_bookmark = True + else: + is_bookmark = False + + for link in re.findall('(^|\s)(http[s]?://.*)(\s|$)', text, re.MULTILINE | re.IGNORECASE): + url = md_link.URL(link[1], file_attrs.folder_dir_path) + text = text.replace(link[1], url.md) + if is_bookmark: + bookmark_title = url.title + + if file_attrs.inbox_file: + output_files = [file_attrs.inbox_file] + headline_title = '' + elif tags: + output_files = [tag + '.md' for tag in tags] + headline_title = '' + elif is_bookmark: + output_files = [time.strftime('%m-%d %H:%M', mtime) + ' ' + bookmark_title + '.md'] + headline_title = '# {}\n'.format(bookmark_title) + else: + output_files = [time.strftime('%m-%d %H:%M', mtime) + ' ' + filename + '.md'] + headline_title = '# {}\n'.format(filename) + + output = [] + for output_file in output_files: + output.append(Note_attrs(input_file_path=file_attrs.file_path, + output_file_path=file_attrs.output_dir_path + os.sep + safe_path.filename(output_file), + text=text, + mtime='**{}** \n'.format(time.strftime('%x %a %X', mtime)), + title=headline_title)) + return output + + +def file_to_md(file_attrs, media_dir_name): + """ + This will move specified file to media_dir_name and put note with a reference to that file instead + :param file_attrs: File_attrs named tuple + :param media_dir_name: name of sub-directory in folder_dir_path where file will be moved (for non-text files) + :return: Note_attrs named tuple + """ + mtime = time.localtime(os.path.getmtime(file_attrs.file_path)) + new_filename = str(time.mktime(mtime))[:-2] + '_' + os.path.basename(file_attrs.file_path) + new_path = os.path.join(file_attrs.folder_dir_path, media_dir_name, new_filename) + + try: + os.rename(file_attrs.file_path, new_path) + except OSError: + pass + + file = md_link.File(new_path, file_attrs.folder_dir_path, os.path.splitext(os.path.basename(file_attrs.file_path))[0]) + + if file_attrs.inbox_file: + output_file = file_attrs.inbox_file + else: + output_file = time.strftime('%m-%d %H:%M', mtime) + ' ' + file.title + '.md' + + return Note_attrs(input_file_path=file_attrs.file_path, + output_file_path=file_attrs.output_dir_path + os.sep + safe_path.filename(output_file), + text=file.md, + mtime='**{}** \n'.format(time.strftime('%x %a %X', mtime)), + title='# {}\n'.format(file.title)) + + +if __name__ == '__main__': + + File_attrs = collections.namedtuple('File_attrs', 'file_path folder_dir_path output_dir_path tag_marker inbox_file') + """ + A named tuple which functions use to pass input data - data of files to be processed + :param file_path: full absolute path to the file to process + :param folder_dir_path: full absolute path to directory where 'media' and 'attachment' directories are + :param output_dir_path: full absolute path to directory where resulting text file will be stored + :param tag_marker: symbol(s) which start the tag word (for text files) + :param inbox_file: full absolute path to the text file which will be appended with a new entry, + if none the entry will go to new standalone text file + """ + + Note_attrs = collections.namedtuple('Note_attrs', 'input_file_path output_file_path text mtime title') + '''A named tuple which functions use to pass output data - data of notes to be written. + :param input_file_path: full absolute path to the file which was processed to this tuple + :param output_file_path: full absolute path to the output text file which should be written + :param text: content of the text file which should be written + :param mtime: modification time of input file as markdown headline to optionally prepend a text + :param title: title of a input file as markdown headline to optionally prepend a text''' + + def process_by_ext(file_attrs): + """ + This will run different functions to process specified File_attrs tuple based on file extension + :param file_attrs: File_attrs named tuple + :return: Note_attrs named tuple + """ + if file_attrs.file_path.endswith('.txt') or not os.path.splitext(file_attrs.file_path)[1]: + return text_to_md(file_attrs) + elif file_attrs.file_path.endswith(('.jpg', '.png', '.gif')): + return file_to_md(file_attrs, 'media') + else: + return file_to_md(file_attrs, 'attachments') + + + inbox_dir = sys.argv[1] + folder_dir = sys.argv[2] + tag_marker = sys.argv[3] + + os.makedirs(inbox_dir, exist_ok=True) + os.makedirs(folder_dir + os.sep + 'media', exist_ok=True) + os.makedirs(folder_dir + os.sep + 'attachments', exist_ok=True) + + # Prepare a list of File_attrs tuples for process_by_ext function, based on file location, older files first + file_list = [] + for file_path in sorted([inbox_dir + os.sep + path for path in os.listdir(inbox_dir)], key=os.path.getmtime): + if os.path.isdir(file_path) and not os.path.basename(file_path).startswith('.'): + for sub_file in sorted([file_path + os.sep + path for path in os.listdir(file_path)], key=os.path.getmtime): + if not sub_file.endswith('.md') and not os.path.basename(sub_file).startswith('.'): + file_list.append([File_attrs(file_path=sub_file, folder_dir_path=folder_dir, output_dir_path=inbox_dir, + tag_marker=tag_marker, inbox_file=os.path.basename(file_path) + '.md')]) + else: + if not file_path.endswith('.md') and not os.path.basename(file_path).startswith('.'): + file_list.append([File_attrs(file_path=file_path, folder_dir_path=folder_dir, output_dir_path=inbox_dir, + tag_marker=tag_marker, inbox_file='')]) + + # Run process_by_ext for each File_attrs tuple putting resulted Note_attrs tuples to write_list + write_list = multiprocessing.dummy.Pool().starmap(process_by_ext, file_list) + + # Due to text_to_md outputs list of Note_attrs tuples, this should turn write_list to a flat list + flat_write_list = [] + for object in write_list: + if type(object) == list: + for item in object: + flat_write_list.append(item) + else: + flat_write_list.append(object) + + # Create or append existing text files based on Note_attrs tuples data + for note_attrs in flat_write_list: + try: + with open(note_attrs.output_file_path, 'r') as source: + content = note_attrs.mtime + note_attrs.text + '\n\n' + source.read() + except OSError: + if note_attrs.title: + content = note_attrs.title + note_attrs.text + else: + content = note_attrs.mtime + note_attrs.text + + with open(note_attrs.output_file_path, 'w') as output: + output.write(content) + + if os.path.isfile(note_attrs.output_file_path): + try: + os.remove(note_attrs.input_file_path) + except OSError: + pass + + if platform.system() == 'Linux': + os.system('notify-send "-a" "Inbox script" "Your inbox is organized"') # TODO maybe change to gi.repository: Notify \ No newline at end of file diff --git a/Inbox/inbox.qml b/Inbox/inbox.qml new file mode 100644 index 0000000..7396c88 --- /dev/null +++ b/Inbox/inbox.qml @@ -0,0 +1,75 @@ +import QtQml 2.2 +import QOwnNotesTypes 1.0 + +/* + */ + +Script { + property string scriptDirPath + property string inboxFolder + property string tagMarker + property string pyBin + + function getPyCommand() { + var pyVer = script.startSynchronousProcess('python3', '-V', '').toString() + if (pyVer.indexOf('Python 3') != '-1') { + return 'python3' + } + var pyVer = script.startSynchronousProcess('python', '-V', '').toString() + if (pyVer.indexOf('Python 3') != '-1') { + return 'python' + } + var pyVer = script.startSynchronousProcess('py', '-V', '').toString() + if (pyVer.indexOf('Python 3') != '-1') { + return 'py' + } + return '' + } + + property variant settingsVariables: [ + { + 'identifier': 'inboxFolder', + 'name': 'Inbox folder name', + 'description': 'Name of inbox folder located in the root of note folder. It is single for all note folders\n' + + 'An empty inbox folder will be created if no exists.', + 'type': 'string', + 'default': 'Inbox', + }, + { + 'identifier': 'tagMarker', + 'name': 'Tag word marker', + 'description': 'A symbol or group of symbols which start a "tag" word for txt notes. \n' + + 'For example a txt note with "@tag" word will go to "tag.md" note', + 'type': 'string', + 'default': '@', + }, + { + 'identifier': 'pyBin', + 'name': 'Command/path to run Python 3 Interpreter', + 'description': "Put a command or path for Python 3 interpreter here.", + 'type': 'file', + 'default': getPyCommand(), + } + ] + + function init() { + if (pyBin == '') { + script.informationMessageBox("Can't find Python 3 interpreter.\n" + + 'Please set the correct path to its binary in the script settings.', + 'Inbox script') + } + else { + script.registerCustomAction('inbox', 'Process inbox folder', 'Inbox', 'mail-receive.svg') + } + } + + function customActionInvoked(action) { + if (action == 'inbox') { + var pyScriptPath = scriptDirPath + script.dirSeparator() + 'inbox.py' + var inboxPath = script.currentNoteFolderPath() + script.dirSeparator() + inboxFolder + + script.startDetachedProcess(pyBin, [pyScriptPath, inboxPath, script.currentNoteFolderPath(), tagMarker]) + script.log('Processing inbox...') + } + } +} diff --git a/Inbox/info.json b/Inbox/info.json new file mode 100644 index 0000000..26394bf --- /dev/null +++ b/Inbox/info.json @@ -0,0 +1,31 @@ +{ + "name": "Inbox [beta]", + "identifier": "inbox", + "script": "inbox.qml", + "authors": ["@Maboroshy"], + "platforms": ["linux", "macos", "windows"], + "version": "0.0.1", + "minAppVersion": "17.05.8", + "description" : ["Inbox is a complex script to organize data you put to inbox folder from different devices and applications.", + "", + "The script alters files in inbox folder you set. It's currently at beta stage, so using it for unrecoverable data is discouraged.", + "", + "Feature:", + "- The script turns all .txt files and files with no extension to .md note with modification time in the file name.", + "- If there's a word that starts with a '@' (configurable) in file text, content of that file will be added to .md note named like that word. The content of .txt file with '@tag' in text will be added to 'tag.md' file.", + "- Every URL in text file will be converted to markdown link, with web page favicon and title if possible.", + "- Any image file placed to inbox folder will be moved to media folder. The script will put .md note with the in-line image instead.", + "- Any other file placed to inbox folder will be moved to attachments folder. The script will put .md note with a link to the file instead. On Linux the file icon will be put before link.", + "- Text/link of file placed to sub-folder of inbox folder will be added to .md note named like the sub-folder. Everything put to 'topic' sub-folder will be added to 'topic.md' file.", + "- (Linux only) The script will put thumbnail for .pdf file as a link to it.", + "- (Linux only) The script will replace large in-line images with a smaller ones as a link to original ones.", + "", + "Dependencies:", + "Python 3.3+ Interpreter;", + "(Linux only, pdf thumbnails) ghostscript;", + "(Linux only, image thumbnails) imagemagick;", + "(Linux only, file icons) python-gobject.", + "", + "Usage:". + "Run the script by toolbar button or menu item."] +} diff --git a/Inbox/md_link.py b/Inbox/md_link.py new file mode 100644 index 0000000..8519889 --- /dev/null +++ b/Inbox/md_link.py @@ -0,0 +1,197 @@ +import os +import re +import sys +import shutil +import hashlib +import platform +import subprocess +import urllib.request + + +if platform.system() == 'Linux': + try: + import gi # TODO doesn't work on early Python 3 versions + gi.require_version('Gtk', '3.0') + from gi.repository import Gio, Gtk + except ImportError: + pass + + +class URL: + def __init__(self, url, folder_dir_path): + """ + :param url: http(s)://... + :param folder_dir_path: full absolute path to the note folder, to know where media folder is + """ + self.url = url.strip() + self.folder_dir_path = folder_dir_path + + @property + def icon(self): + """ + Get URL favicon from google's service + :return: full absolute path to favicon image this function saved to media/favicons or nothing + """ + try: + favicon = urllib.request.urlopen('http://www.google.com/s2/favicons?domain=' + self.url, timeout=5) + except (urllib.error.HTTPError, urllib.error.URLError): + return + + favicon_content = favicon.read() + favicon_hash = hashlib.md5(favicon_content).hexdigest() + + if favicon_hash == '3ca64f83fdcf25135d87e08af65e68c9': # google's dummy icon + return + else: + favicon_dir_path = self.folder_dir_path + os.sep + 'media' + os.sep + 'favicons' + os.makedirs(favicon_dir_path, exist_ok=True) + favicon_path = favicon_dir_path + os.sep + favicon_hash + '.png' + if not os.path.isfile(favicon_path): + with open(favicon_path, 'wb') as favicon_file: + favicon_file.write(favicon_content) + return favicon_path + + @property + def title(self): + """ + Try to parse title from the web page located at URL + :return: web page title for URL or nothing + """ + try: + html_text = urllib.request.urlopen(self.url).read().decode('utf-8') # TODO encodings other that utf-8, https fails on early Python 3 versions + except (urllib.error.HTTPError, urllib.error.URLError, UnicodeDecodeError): + return '' + else: + return re.search('(.+?)', html_text, re.IGNORECASE | re.DOTALL).group(1) + + @property + def md(self): + """ + Get the most informative markdown syntax link for URL, with favicon and title if available + :return: Markdown syntax link for URL + """ + if self.icon: + md_favicon = '![](file://media/favicons/{}) '.format(os.path.basename(self.icon)) + else: + md_favicon = '' + + if self.title: + md_link = '[{}]({})'.format(self.title, self.url) + else: + md_link = '<{}>'.format(self.url) + + return md_favicon + md_link + + +class File: + def __init__(self, link_path, folder_dir_path, title=''): + """ + :param link_path: full absolute path to the file + :param folder_dir_path: full absolute path to the note folder, to know where media folder is + :param title: optionally specify the file's title, otherwise file name will be used as such + """ + self.path = link_path.strip() + self.folder_dir_path = folder_dir_path + self.filename = os.path.basename(self.path) + self.ext = os.path.splitext(self.filename)[1] + + if self.ext == '.pdf': + self.type = 'pdf' + elif self.ext in ('.jpg', '.png', '.gif'): + self.type = 'image' + else: + self.type = 'other' + + if title: + self.title = title + else: + self.title = os.path.splitext(self.filename)[0] + + @property + def icon(self, icon_size=16, save=True): + """ + Get file type icon for File + :param icon_size: requested icon size + :param save: True to copy icon to media flder, otherwise will return path where OS stores the icon + :return: full absolute path to File icon or nothing + """ + if platform.system() == 'Linux': + try: + file = Gio.File.new_for_path(self.path) + except NameError: + return '' + + file_info = file.query_info('standard::icon', 0, Gio.Cancellable()) + file_icon = file_info.get_icon().get_names()[0] + + icon_theme = Gtk.IconTheme.get_default() + icon_info = icon_theme.lookup_icon(file_icon, icon_size, 0) + icon_path = icon_info.get_filename() + + if os.path.isfile(icon_path): + if save: + icon_store_path = os.path.join(self.folder_dir_path, 'media', 'fileicons') + if not os.path.isfile(icon_store_path + os.sep + os.path.basename(icon_path)): + try: + os.makedirs(icon_store_path, exist_ok=True) + icon_path = shutil.copy(icon_path, icon_store_path) + icon_path = 'media/fileicons/' + os.path.basename(icon_path) + except OSError: + pass + + return icon_path + + @property + def thumb(self): + """ + Make a thumbnail for appropriate File + :return: full absolute path to File's thumbnail or nothing + """ + if self.type == 'pdf': + if platform.system() == 'Linux': + dpi = 30 + thumb_path = os.path.join(self.folder_dir_path, 'media', 'thumbnails', 'th_' + self.filename + '.png') + os.makedirs(os.path.dirname(thumb_path), exist_ok=True) + + subprocess.call(['gs', '-q', '-dNOPAUSE', '-dBATCH', '-sDEVICE=png16m', '-r' + str(dpi), + '-sOutputFile=' + thumb_path, '-dLastPage=1', self.path], + timeout=10) + + if os.path.isfile(thumb_path): + return thumb_path + + if self.type == 'image': + if platform.system() == 'Linux': + target_width = 600 + img_width = int(subprocess.check_output(['identify', '-ping', '-format', '%w', self.path], + timeout=5).decode(sys.stdout.encoding)) + if img_width > target_width: + thumb_path = os.path.join(self.folder_dir_path, 'media', 'thumbnails', 'th_' + self.filename) + os.makedirs(os.path.dirname(thumb_path), exist_ok=True) + + subprocess.call(['convert', self.path, '-thumbnail', str(target_width), + '-auto-orient', '-unsharp', '0x.5', thumb_path], + timeout=10) + + if os.path.isfile(thumb_path): + return thumb_path + + @property + def md(self): + """ + Get the most informative markdown syntax link for File, as a thumbnail or with file icon if available + :return: Markdown syntax link for File + """ + link_path = os.path.relpath(self.path, self.folder_dir_path).replace(os.sep, '/') + thumb = self.thumb + if thumb: + return '[![{}](file://{})](file://{})'.format(self.title, 'media/thumbnails/' + os.path.basename(thumb), link_path) + elif self.type == 'image': + return '![{}](file://{})'.format(self.title, link_path) + else: + icon = self.icon + if icon: + return '![](file://{})'.format('media/fileicons/' + os.path.basename(self.icon)) + ' ' + \ + '[{}](file://{})'.format(self.title, link_path) + else: + return '[{}](file://{})'.format(self.title, link_path) \ No newline at end of file diff --git a/Inbox/safe_path.py b/Inbox/safe_path.py new file mode 100644 index 0000000..03e76fe --- /dev/null +++ b/Inbox/safe_path.py @@ -0,0 +1,24 @@ +import platform + + +def filename(filename): + """ + Replace the characters in file name that are not allowed in current OS + :param filename: file name + :return: file name which is safe to use in current OS + """ + if platform.system() == 'Linux': + safe_filename = filename.replace('/', '-') + elif platform.system() == 'Darwin': + safe_filename = filename.replace('/', '-').replace(':', '-') + else: + safe_filename = filename + for char in (':', '/', '\\', '|'): + safe_filename = safe_filename.replace(char, '-') + for char in ('?', '*'): + safe_filename = safe_filename.replace(char, '') + safe_filename = safe_filename.replace('<', '(') + safe_filename = safe_filename.replace('>', ')') + safe_filename = safe_filename.replace('"', "'") + + return safe_filename \ No newline at end of file