From 7e302ddffb635dd1606f010d14973f7f270a679c Mon Sep 17 00:00:00 2001 From: Maboroshy Date: Mon, 14 Aug 2017 19:37:23 +0400 Subject: [PATCH] Added inbox script... second try --- inbox/inbox.py | 183 +++++++++++++++++++++++++++++++++++++++++ inbox/inbox.qml | 75 +++++++++++++++++ inbox/info.json | 10 +++ inbox/md_link.py | 197 +++++++++++++++++++++++++++++++++++++++++++++ inbox/safe_path.py | 24 ++++++ 5 files changed, 489 insertions(+) create mode 100644 inbox/inbox.py create mode 100644 inbox/inbox.qml create mode 100644 inbox/info.json create mode 100644 inbox/md_link.py create mode 100644 inbox/safe_path.py diff --git a/inbox/inbox.py b/inbox/inbox.py new file mode 100644 index 0000000..a0bb06e --- /dev/null +++ b/inbox/inbox.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python + +import os +import re +import sys +import time +import platform +import collections +import multiprocessing.dummy + +import md_link +import safe_path + + +def text_to_md(file_attrs): + """ + This will process specified text file getting its tags and replacing urls with favicons and titles where possible + :param file_attrs: File_attrs named tuple + :return: list of Note_attrs named tuple + """ + filename = os.path.splitext(os.path.basename(file_attrs.file_path))[0] + mtime = time.localtime(os.path.getmtime(file_attrs.file_path)) + + with open(file_attrs.file_path, 'r') as text_file: + text = text_file.read() + + tags = re.findall(file_attrs.tag_marker + '(\w*)', text) + text = re.sub(file_attrs.tag_marker + '\w*[ ]?', '', text).strip() + + if re.match('^http[s]?://[^\s]*$', text): + is_bookmark = True + else: + is_bookmark = False + + for link in re.findall('(^|\s)(http[s]?://.*)(\s|$)', text, re.MULTILINE | re.IGNORECASE): + url = md_link.URL(link[1], file_attrs.folder_dir_path) + text = text.replace(link[1], url.md) + if is_bookmark: + bookmark_title = url.title + + if file_attrs.inbox_file: + output_files = [file_attrs.inbox_file] + headline_title = '' + elif tags: + output_files = [tag + '.md' for tag in tags] + headline_title = '' + elif is_bookmark: + output_files = [time.strftime('%m-%d %H:%M', mtime) + ' ' + bookmark_title + '.md'] + headline_title = '# {}\n'.format(bookmark_title) + else: + output_files = [time.strftime('%m-%d %H:%M', mtime) + ' ' + filename + '.md'] + headline_title = '# {}\n'.format(filename) + + output = [] + for output_file in output_files: + output.append(Note_attrs(input_file_path=file_attrs.file_path, + output_file_path=file_attrs.output_dir_path + os.sep + safe_path.filename(output_file), + text=text, + mtime='**{}** \n'.format(time.strftime('%x %a %X', mtime)), + title=headline_title)) + return output + + +def file_to_md(file_attrs, media_dir_name): + """ + This will move specified file to media_dir_name and put note with a reference to that file instead + :param file_attrs: File_attrs named tuple + :param media_dir_name: name of sub-directory in folder_dir_path where file will be moved (for non-text files) + :return: Note_attrs named tuple + """ + mtime = time.localtime(os.path.getmtime(file_attrs.file_path)) + new_filename = str(time.mktime(mtime))[:-2] + '_' + os.path.basename(file_attrs.file_path) + new_path = os.path.join(file_attrs.folder_dir_path, media_dir_name, new_filename) + + try: + os.rename(file_attrs.file_path, new_path) + except OSError: + pass + + file = md_link.File(new_path, file_attrs.folder_dir_path, os.path.splitext(os.path.basename(file_attrs.file_path))[0]) + + if file_attrs.inbox_file: + output_file = file_attrs.inbox_file + else: + output_file = time.strftime('%m-%d %H:%M', mtime) + ' ' + file.title + '.md' + + return Note_attrs(input_file_path=file_attrs.file_path, + output_file_path=file_attrs.output_dir_path + os.sep + safe_path.filename(output_file), + text=file.md, + mtime='**{}** \n'.format(time.strftime('%x %a %X', mtime)), + title='# {}\n'.format(file.title)) + + +if __name__ == '__main__': + + File_attrs = collections.namedtuple('File_attrs', 'file_path folder_dir_path output_dir_path tag_marker inbox_file') + """ + A named tuple which functions use to pass input data - data of files to be processed + :param file_path: full absolute path to the file to process + :param folder_dir_path: full absolute path to directory where 'media' and 'attachment' directories are + :param output_dir_path: full absolute path to directory where resulting text file will be stored + :param tag_marker: symbol(s) which start the tag word (for text files) + :param inbox_file: full absolute path to the text file which will be appended with a new entry, + if none the entry will go to new standalone text file + """ + + Note_attrs = collections.namedtuple('Note_attrs', 'input_file_path output_file_path text mtime title') + '''A named tuple which functions use to pass output data - data of notes to be written. + :param input_file_path: full absolute path to the file which was processed to this tuple + :param output_file_path: full absolute path to the output text file which should be written + :param text: content of the text file which should be written + :param mtime: modification time of input file as markdown headline to optionally prepend a text + :param title: title of a input file as markdown headline to optionally prepend a text''' + + def process_by_ext(file_attrs): + """ + This will run different functions to process specified File_attrs tuple based on file extension + :param file_attrs: File_attrs named tuple + :return: Note_attrs named tuple + """ + if file_attrs.file_path.endswith('.txt') or not os.path.splitext(file_attrs.file_path)[1]: + return text_to_md(file_attrs) + elif file_attrs.file_path.endswith(('.jpg', '.png', '.gif')): + return file_to_md(file_attrs, 'media') + else: + return file_to_md(file_attrs, 'attachments') + + + inbox_dir = sys.argv[1] + folder_dir = sys.argv[2] + tag_marker = sys.argv[3] + + os.makedirs(inbox_dir, exist_ok=True) + os.makedirs(folder_dir + os.sep + 'media', exist_ok=True) + os.makedirs(folder_dir + os.sep + 'attachments', exist_ok=True) + + # Prepare a list of File_attrs tuples for process_by_ext function, based on file location, older files first + file_list = [] + for file_path in sorted([inbox_dir + os.sep + path for path in os.listdir(inbox_dir)], key=os.path.getmtime): + if os.path.isdir(file_path) and not os.path.basename(file_path).startswith('.'): + for sub_file in sorted([file_path + os.sep + path for path in os.listdir(file_path)], key=os.path.getmtime): + if not sub_file.endswith('.md') and not os.path.basename(sub_file).startswith('.'): + file_list.append([File_attrs(file_path=sub_file, folder_dir_path=folder_dir, output_dir_path=inbox_dir, + tag_marker=tag_marker, inbox_file=os.path.basename(file_path) + '.md')]) + else: + if not file_path.endswith('.md') and not os.path.basename(file_path).startswith('.'): + file_list.append([File_attrs(file_path=file_path, folder_dir_path=folder_dir, output_dir_path=inbox_dir, + tag_marker=tag_marker, inbox_file='')]) + + # Run process_by_ext for each File_attrs tuple putting resulted Note_attrs tuples to write_list + write_list = multiprocessing.dummy.Pool().starmap(process_by_ext, file_list) + + # Due to text_to_md outputs list of Note_attrs tuples, this should turn write_list to a flat list + flat_write_list = [] + for object in write_list: + if type(object) == list: + for item in object: + flat_write_list.append(item) + else: + flat_write_list.append(object) + + # Create or append existing text files based on Note_attrs tuples data + for note_attrs in flat_write_list: + try: + with open(note_attrs.output_file_path, 'r') as source: + content = note_attrs.mtime + note_attrs.text + '\n\n' + source.read() + except OSError: + if note_attrs.title: + content = note_attrs.title + note_attrs.text + else: + content = note_attrs.mtime + note_attrs.text + + with open(note_attrs.output_file_path, 'w') as output: + output.write(content) + + if os.path.isfile(note_attrs.output_file_path): + try: + os.remove(note_attrs.input_file_path) + except OSError: + pass + + if platform.system() == 'Linux': + os.system('notify-send "-a" "Inbox script" "Your inbox is organized"') # TODO maybe change to gi.repository: Notify \ No newline at end of file diff --git a/inbox/inbox.qml b/inbox/inbox.qml new file mode 100644 index 0000000..7396c88 --- /dev/null +++ b/inbox/inbox.qml @@ -0,0 +1,75 @@ +import QtQml 2.2 +import QOwnNotesTypes 1.0 + +/* + */ + +Script { + property string scriptDirPath + property string inboxFolder + property string tagMarker + property string pyBin + + function getPyCommand() { + var pyVer = script.startSynchronousProcess('python3', '-V', '').toString() + if (pyVer.indexOf('Python 3') != '-1') { + return 'python3' + } + var pyVer = script.startSynchronousProcess('python', '-V', '').toString() + if (pyVer.indexOf('Python 3') != '-1') { + return 'python' + } + var pyVer = script.startSynchronousProcess('py', '-V', '').toString() + if (pyVer.indexOf('Python 3') != '-1') { + return 'py' + } + return '' + } + + property variant settingsVariables: [ + { + 'identifier': 'inboxFolder', + 'name': 'Inbox folder name', + 'description': 'Name of inbox folder located in the root of note folder. It is single for all note folders\n' + + 'An empty inbox folder will be created if no exists.', + 'type': 'string', + 'default': 'Inbox', + }, + { + 'identifier': 'tagMarker', + 'name': 'Tag word marker', + 'description': 'A symbol or group of symbols which start a "tag" word for txt notes. \n' + + 'For example a txt note with "@tag" word will go to "tag.md" note', + 'type': 'string', + 'default': '@', + }, + { + 'identifier': 'pyBin', + 'name': 'Command/path to run Python 3 Interpreter', + 'description': "Put a command or path for Python 3 interpreter here.", + 'type': 'file', + 'default': getPyCommand(), + } + ] + + function init() { + if (pyBin == '') { + script.informationMessageBox("Can't find Python 3 interpreter.\n" + + 'Please set the correct path to its binary in the script settings.', + 'Inbox script') + } + else { + script.registerCustomAction('inbox', 'Process inbox folder', 'Inbox', 'mail-receive.svg') + } + } + + function customActionInvoked(action) { + if (action == 'inbox') { + var pyScriptPath = scriptDirPath + script.dirSeparator() + 'inbox.py' + var inboxPath = script.currentNoteFolderPath() + script.dirSeparator() + inboxFolder + + script.startDetachedProcess(pyBin, [pyScriptPath, inboxPath, script.currentNoteFolderPath(), tagMarker]) + script.log('Processing inbox...') + } + } +} diff --git a/inbox/info.json b/inbox/info.json new file mode 100644 index 0000000..f02191a --- /dev/null +++ b/inbox/info.json @@ -0,0 +1,10 @@ +{ + "name": "Inbox [beta]", + "identifier": "inbox", + "script": "inbox.qml", + "authors": ["@Maboroshy"], + "platforms": ["linux", "macos", "windows"], + "version": "0.0.1", + "minAppVersion": "17.05.8", + "description" : "Inbox is a complex script to organize data you put to inbox folder from different devices and applications.\n\nThe script alters files in inbox folder you set. It's currently at beta stage, so using it for unrecoverable data is discouraged.\n\nFeature:\n- The script turns all .txt files and files with no extension to .md note with modification time in the file name.\n- If there's a word that starts with a '@' (configurable) in file text, content of that file will be added to .md note named like that word. The content of .txt file with '@tag' in text will be added to 'tag.md' file.\n- Every URL in text file will be converted to markdown link, with web page favicon and title if possible.\n- Any image file placed to inbox folder will be moved to media folder. The script will put .md note with the in-line image instead.\n- Any other file placed to inbox folder will be moved to attachments folder. The script will put .md note with a link to the file instead. On Linux the file icon will be put before link.\n- Text/link of file placed to sub-folder of inbox folder will be added to .md note named like the sub-folder. Everything put to 'topic' sub-folder will be added to 'topic.md' file.\n- (Linux only) The script will put thumbnail for .pdf file as a link to it.\n- (Linux only) The script will replace large in-line images with a smaller ones as a link to original ones.\n\nDependencies:\nPython 3.3+ Interpreter;\n(Linux only, pdf thumbnails) ghostscript;\n(Linux only, image thumbnails) imagemagick;\n(Linux only, file icons) python-gobject.\n\nUsage:\nRun the script by toolbar button or menu item." +} diff --git a/inbox/md_link.py b/inbox/md_link.py new file mode 100644 index 0000000..8519889 --- /dev/null +++ b/inbox/md_link.py @@ -0,0 +1,197 @@ +import os +import re +import sys +import shutil +import hashlib +import platform +import subprocess +import urllib.request + + +if platform.system() == 'Linux': + try: + import gi # TODO doesn't work on early Python 3 versions + gi.require_version('Gtk', '3.0') + from gi.repository import Gio, Gtk + except ImportError: + pass + + +class URL: + def __init__(self, url, folder_dir_path): + """ + :param url: http(s)://... + :param folder_dir_path: full absolute path to the note folder, to know where media folder is + """ + self.url = url.strip() + self.folder_dir_path = folder_dir_path + + @property + def icon(self): + """ + Get URL favicon from google's service + :return: full absolute path to favicon image this function saved to media/favicons or nothing + """ + try: + favicon = urllib.request.urlopen('http://www.google.com/s2/favicons?domain=' + self.url, timeout=5) + except (urllib.error.HTTPError, urllib.error.URLError): + return + + favicon_content = favicon.read() + favicon_hash = hashlib.md5(favicon_content).hexdigest() + + if favicon_hash == '3ca64f83fdcf25135d87e08af65e68c9': # google's dummy icon + return + else: + favicon_dir_path = self.folder_dir_path + os.sep + 'media' + os.sep + 'favicons' + os.makedirs(favicon_dir_path, exist_ok=True) + favicon_path = favicon_dir_path + os.sep + favicon_hash + '.png' + if not os.path.isfile(favicon_path): + with open(favicon_path, 'wb') as favicon_file: + favicon_file.write(favicon_content) + return favicon_path + + @property + def title(self): + """ + Try to parse title from the web page located at URL + :return: web page title for URL or nothing + """ + try: + html_text = urllib.request.urlopen(self.url).read().decode('utf-8') # TODO encodings other that utf-8, https fails on early Python 3 versions + except (urllib.error.HTTPError, urllib.error.URLError, UnicodeDecodeError): + return '' + else: + return re.search('(.+?)', html_text, re.IGNORECASE | re.DOTALL).group(1) + + @property + def md(self): + """ + Get the most informative markdown syntax link for URL, with favicon and title if available + :return: Markdown syntax link for URL + """ + if self.icon: + md_favicon = '![](file://media/favicons/{}) '.format(os.path.basename(self.icon)) + else: + md_favicon = '' + + if self.title: + md_link = '[{}]({})'.format(self.title, self.url) + else: + md_link = '<{}>'.format(self.url) + + return md_favicon + md_link + + +class File: + def __init__(self, link_path, folder_dir_path, title=''): + """ + :param link_path: full absolute path to the file + :param folder_dir_path: full absolute path to the note folder, to know where media folder is + :param title: optionally specify the file's title, otherwise file name will be used as such + """ + self.path = link_path.strip() + self.folder_dir_path = folder_dir_path + self.filename = os.path.basename(self.path) + self.ext = os.path.splitext(self.filename)[1] + + if self.ext == '.pdf': + self.type = 'pdf' + elif self.ext in ('.jpg', '.png', '.gif'): + self.type = 'image' + else: + self.type = 'other' + + if title: + self.title = title + else: + self.title = os.path.splitext(self.filename)[0] + + @property + def icon(self, icon_size=16, save=True): + """ + Get file type icon for File + :param icon_size: requested icon size + :param save: True to copy icon to media flder, otherwise will return path where OS stores the icon + :return: full absolute path to File icon or nothing + """ + if platform.system() == 'Linux': + try: + file = Gio.File.new_for_path(self.path) + except NameError: + return '' + + file_info = file.query_info('standard::icon', 0, Gio.Cancellable()) + file_icon = file_info.get_icon().get_names()[0] + + icon_theme = Gtk.IconTheme.get_default() + icon_info = icon_theme.lookup_icon(file_icon, icon_size, 0) + icon_path = icon_info.get_filename() + + if os.path.isfile(icon_path): + if save: + icon_store_path = os.path.join(self.folder_dir_path, 'media', 'fileicons') + if not os.path.isfile(icon_store_path + os.sep + os.path.basename(icon_path)): + try: + os.makedirs(icon_store_path, exist_ok=True) + icon_path = shutil.copy(icon_path, icon_store_path) + icon_path = 'media/fileicons/' + os.path.basename(icon_path) + except OSError: + pass + + return icon_path + + @property + def thumb(self): + """ + Make a thumbnail for appropriate File + :return: full absolute path to File's thumbnail or nothing + """ + if self.type == 'pdf': + if platform.system() == 'Linux': + dpi = 30 + thumb_path = os.path.join(self.folder_dir_path, 'media', 'thumbnails', 'th_' + self.filename + '.png') + os.makedirs(os.path.dirname(thumb_path), exist_ok=True) + + subprocess.call(['gs', '-q', '-dNOPAUSE', '-dBATCH', '-sDEVICE=png16m', '-r' + str(dpi), + '-sOutputFile=' + thumb_path, '-dLastPage=1', self.path], + timeout=10) + + if os.path.isfile(thumb_path): + return thumb_path + + if self.type == 'image': + if platform.system() == 'Linux': + target_width = 600 + img_width = int(subprocess.check_output(['identify', '-ping', '-format', '%w', self.path], + timeout=5).decode(sys.stdout.encoding)) + if img_width > target_width: + thumb_path = os.path.join(self.folder_dir_path, 'media', 'thumbnails', 'th_' + self.filename) + os.makedirs(os.path.dirname(thumb_path), exist_ok=True) + + subprocess.call(['convert', self.path, '-thumbnail', str(target_width), + '-auto-orient', '-unsharp', '0x.5', thumb_path], + timeout=10) + + if os.path.isfile(thumb_path): + return thumb_path + + @property + def md(self): + """ + Get the most informative markdown syntax link for File, as a thumbnail or with file icon if available + :return: Markdown syntax link for File + """ + link_path = os.path.relpath(self.path, self.folder_dir_path).replace(os.sep, '/') + thumb = self.thumb + if thumb: + return '[![{}](file://{})](file://{})'.format(self.title, 'media/thumbnails/' + os.path.basename(thumb), link_path) + elif self.type == 'image': + return '![{}](file://{})'.format(self.title, link_path) + else: + icon = self.icon + if icon: + return '![](file://{})'.format('media/fileicons/' + os.path.basename(self.icon)) + ' ' + \ + '[{}](file://{})'.format(self.title, link_path) + else: + return '[{}](file://{})'.format(self.title, link_path) \ No newline at end of file diff --git a/inbox/safe_path.py b/inbox/safe_path.py new file mode 100644 index 0000000..03e76fe --- /dev/null +++ b/inbox/safe_path.py @@ -0,0 +1,24 @@ +import platform + + +def filename(filename): + """ + Replace the characters in file name that are not allowed in current OS + :param filename: file name + :return: file name which is safe to use in current OS + """ + if platform.system() == 'Linux': + safe_filename = filename.replace('/', '-') + elif platform.system() == 'Darwin': + safe_filename = filename.replace('/', '-').replace(':', '-') + else: + safe_filename = filename + for char in (':', '/', '\\', '|'): + safe_filename = safe_filename.replace(char, '-') + for char in ('?', '*'): + safe_filename = safe_filename.replace(char, '') + safe_filename = safe_filename.replace('<', '(') + safe_filename = safe_filename.replace('>', ')') + safe_filename = safe_filename.replace('"', "'") + + return safe_filename \ No newline at end of file