diff --git a/inbox/inbox.py b/inbox/inbox.py index 776621e..4720e64 100644 --- a/inbox/inbox.py +++ b/inbox/inbox.py @@ -12,11 +12,39 @@ import md_link import md_convert import safe_path +try: + import watchdog.events + import watchdog.observers +except ImportError: + pass -def text_to_md(file_attrs): + +File_attrs = collections.namedtuple('File_attrs', 'file_path folder_dir_path output_dir_path output_file') +""" +A named tuple which functions use to pass input data - data of files to be processed +:param file_path: full absolute path to the file to process +:param folder_dir_path: full absolute path to directory where 'media' and 'attachment' directories are +:param output_dir_path: full absolute path to directory where resulting text file will be stored +:param output_file: empty for new standalone text file with mtime in the name, + '*no mtime*' for or new standalone text file without mtime in the name + or full absolute path to the text file which will be appended with a new entry +""" + + +Note_attrs = collections.namedtuple('Note_attrs', 'input_file_path output_file_path text mtime title') +'''A named tuple which functions use to pass output data - data of notes to be written. +:param input_file_path: full absolute path to the file which was processed to this tuple +:param output_file_path: full absolute path to the output text file which should be written +:param text: content of the text file which should be written +:param mtime: modification time of input file as markdown headline to optionally prepend a text +:param title: title of a input file as markdown headline to optionally prepend a text''' + + +def text_to_md(file_attrs, topic_marker): """ This will process specified text file getting its topics and replacing urls with favicons and titles where possible :param file_attrs: File_attrs named tuple + :param topic_marker: symbol(s) which start the 'topic' word, if such word present in text, it will go to 'topic.md' :return: list of Note_attrs named tuple """ filename = os.path.splitext(os.path.basename(file_attrs.file_path))[0] @@ -28,8 +56,8 @@ def text_to_md(file_attrs): except(UnicodeDecodeError): return - topics = re.findall(file_attrs.topic_marker + '(\w*)', text) - text = re.sub(file_attrs.topic_marker + '\w*[ ]?', '', text).strip() + topics = re.findall(topic_marker + '(\w*)', text) + text = re.sub(topic_marker + '\w*[ ]?', '', text).strip() if re.match('^http[s]?://[^\s]*$', text): is_bookmark = True @@ -70,6 +98,7 @@ def text_to_md(file_attrs): title=headline_title)) return output + def html_to_md(file_attrs, pandoc_bin='pandoc', pandoc_ver=''): """ This will move specified convert specified html file to markdown and move all in-line images to sub-folder at media directory @@ -89,6 +118,7 @@ def html_to_md(file_attrs, pandoc_bin='pandoc', pandoc_ver=''): mtime='**{}** \n'.format(time.strftime('%x %a %X', mtime)), title='') + def file_to_md(file_attrs, media_dir_name): """ This will move specified file to media_dir_name and put note with a reference to that file instead @@ -121,45 +151,100 @@ def file_to_md(file_attrs, media_dir_name): title='# {}\n'.format(file.title)) -if __name__ == '__main__': - - File_attrs = collections.namedtuple('File_attrs', 'file_path folder_dir_path output_dir_path topic_marker output_file') +def make_flat_list(mixed_list, target_item_type=tuple): """ - A named tuple which functions use to pass input data - data of files to be processed - :param file_path: full absolute path to the file to process - :param folder_dir_path: full absolute path to directory where 'media' and 'attachment' directories are - :param output_dir_path: full absolute path to directory where resulting text file will be stored - :param topic_marker: symbol(s) which start the 'topic' word (for text files) - :param output_file: empty for new standalone text file with mtime in the name, - '*no mtime*' for or new standalone text file without mtime in the name - or full absolute path to the text file which will be appended with a new entry + Make a list that has lists and 'target_item_type' as items flat, not recursive. + :param mixed_list: list to make flat + :param target_item_type: type of items in the flat list + :return: flat list of 'target_item_type' """ + flat_list = [] + for object in mixed_list: + if type(object) == list: + for item in object: + if type(item) == target_item_type: + flat_list.append(item) + elif type(object) == target_item_type: + flat_list.append(object) + return flat_list - Note_attrs = collections.namedtuple('Note_attrs', 'input_file_path output_file_path text mtime title') - '''A named tuple which functions use to pass output data - data of notes to be written. - :param input_file_path: full absolute path to the file which was processed to this tuple - :param output_file_path: full absolute path to the output text file which should be written - :param text: content of the text file which should be written - :param mtime: modification time of input file as markdown headline to optionally prepend a text - :param title: title of a input file as markdown headline to optionally prepend a text''' - def process_by_ext(file_attrs): - """ - This will run different functions to process specified File_attrs tuple based on file extension - :param file_attrs: File_attrs named tuple - :return: Note_attrs named tuple - """ - # print(file_attrs.file_path) - if file_attrs.file_path.endswith('.txt') or not os.path.splitext(file_attrs.file_path)[1]: - return text_to_md(file_attrs) - elif args.pandoc_bin and args.pandoc_ver and file_attrs.file_path.endswith(('.htm', '.html')): - return html_to_md(file_attrs, args.pandoc_bin, args.pandoc_ver) - elif file_attrs.file_path.endswith(('.jpg', '.png', '.gif')): - return file_to_md(file_attrs, 'media') +def process_by_path(file_path): + """ + Checks if the file is valid for processing and returns File_attrs tuple depending on its path + :param file_path: Absolute file path + :return: File_attrs named tuple + """ + if file_path.endswith(('.md', 'notes.sqlite')) \ + or file_path.startswith((folder_dir + os.sep + 'media', folder_dir + os.sep + 'attachments')) \ + or os.sep + '.' in file_path[len(folder_dir):] \ + or '_files' + os.sep in file_path[len(folder_dir):]: + return + + if file_path[:len(inbox_dir)] == inbox_dir: + if os.path.dirname(file_path) == inbox_dir: + return File_attrs(file_path=file_path, folder_dir_path=folder_dir, + output_dir_path=inbox_dir, output_file='') else: - return file_to_md(file_attrs, 'attachments') + return File_attrs(file_path=file_path, folder_dir_path=folder_dir, + output_dir_path=inbox_dir, + output_file=os.path.dirname(file_path)[len(inbox_dir)+1:].replace(os.sep, ' - ') + '.md') + else: + return File_attrs(file_path=file_path, folder_dir_path=folder_dir, + output_dir_path=os.path.dirname(file_path), output_file='*no mtime*') +def process_by_ext(file_attrs): + """ + This will run different functions to process specified File_attrs tuple based on file extension + :param file_attrs: File_attrs named tuple + :return: Note_attrs named tuple + """ + if file_attrs.file_path.endswith('.txt') or not os.path.splitext(file_attrs.file_path)[1]: + return text_to_md(file_attrs, args.topic_marker) + elif args.pandoc_bin and args.pandoc_ver and file_attrs.file_path.endswith(('.htm', '.html')): + return html_to_md(file_attrs, args.pandoc_bin, args.pandoc_ver) + elif file_attrs.file_path.endswith(('.jpg', '.png', '.gif')): + return file_to_md(file_attrs, 'media') + else: + return file_to_md(file_attrs, 'attachments') + + +def write_note_and_delete(note_attrs): # TODO Test + """ + Create or append existing note files based on Note_attrs tuples data, then delete the source file + :param note_attrs: Note_attrs named tuple + """ + if os.path.isfile(note_attrs.output_file_path): + if os.path.dirname(note_attrs.output_file_path) == inbox_dir: + note_file_path = note_attrs.output_file_path + with open(note_file_path, 'r') as source: + content = note_attrs.mtime + note_attrs.text + '\n\n' + source.read() + else: + i = 1 + while os.path.isfile(os.path.splitext(note_attrs.output_file_path)[0] + '_' + str(i) + '.md'): + i += 1 + note_file_path = os.path.splitext(note_attrs.output_file_path)[0] + '_' + str(i) + '.md' + content = note_attrs.mtime + note_attrs.text + else: + note_file_path = note_attrs.output_file_path + if note_attrs.title: + content = note_attrs.title + note_attrs.text + else: + content = note_attrs.mtime + note_attrs.text + + with open(note_file_path, 'w') as output: + output.write(content) + + if os.path.isfile(note_file_path): + try: + os.remove(note_attrs.input_file_path) + except OSError: + pass + + +if __name__ == '__main__': + arg_parser = argparse.ArgumentParser(description='A script to turn everything in the inbox directory to markdown notes.') arg_parser.add_argument('-i', '--inbox', action='store', dest='inbox_dir', required=True, help="Full absolute path to the inbox directory to organize") @@ -173,77 +258,88 @@ if __name__ == '__main__': help="Command/path to run pandoc") arg_parser.add_argument('-pv', '--pandoc-ver', action='store', dest='pandoc_ver', required=False, help="Installed pandoc version") + arg_parser.add_argument('-w', '--watch', action='store_true', dest='watch_fs', required=False, + help="Watch and process new files as they appear after initial scan") args = arg_parser.parse_args() inbox_dir = args.inbox_dir folder_dir = args.folder_dir - topic_marker = args.topic_marker os.makedirs(inbox_dir, exist_ok=True) os.makedirs(folder_dir + os.sep + 'media', exist_ok=True) os.makedirs(folder_dir + os.sep + 'attachments', exist_ok=True) - # Prepare a list of File_attrs tuples for process_by_ext function, based on file location, older files first - file_list = [] if args.scan_folder: - for subfolder, dirs, files in os.walk(folder_dir): - for file_path in sorted([subfolder + os.sep + file for file in files], key=os.path.getmtime): - if os.path.isfile(file_path) \ - and not file_path.endswith(('.md', 'notes.sqlite')) \ - and not file_path.startswith((inbox_dir, folder_dir + os.sep + 'media', folder_dir + os.sep + 'attachments')) \ - and os.sep + '.' not in file_path.replace(folder_dir, '') \ - and '_files' + os.sep not in file_path.replace(folder_dir, ''): - file_list.append([File_attrs(file_path=file_path, folder_dir_path=folder_dir, output_dir_path=os.path.dirname(file_path), - topic_marker=topic_marker, output_file='*no mtime*')]) + scan_path = folder_dir + else: + scan_path = inbox_dir - for file_path in sorted([inbox_dir + os.sep + path for path in os.listdir(inbox_dir)], key=os.path.getmtime): - if os.path.isdir(file_path) \ - and not os.path.basename(file_path).startswith('.') \ - and not file_path.endswith('_files'): - for sub_file in sorted([file_path + os.sep + path for path in os.listdir(file_path)], key=os.path.getmtime): - if not sub_file.endswith('.md') \ - and not os.path.basename(sub_file).startswith('.'): - file_list.append([File_attrs(file_path=sub_file, folder_dir_path=folder_dir, output_dir_path=inbox_dir, - topic_marker=topic_marker, output_file=os.path.basename(file_path) + '.md')]) - else: - if os.path.isfile(file_path) \ - and not file_path.endswith('.md') \ - and not os.path.basename(file_path).startswith('.'): - file_list.append([File_attrs(file_path=file_path, folder_dir_path=folder_dir, output_dir_path=inbox_dir, - topic_marker=topic_marker, output_file='')]) + file_list = [] + for dir, subdirs, files in os.walk(scan_path): + for file_path in sorted([dir + os.sep + file for file in files], key=os.path.getmtime): + file_attrs = process_by_path(file_path) + if file_attrs: + file_list.append([file_attrs]) - # Run process_by_ext for each File_attrs tuple putting resulting Note_attrs tuples to write_list write_list = multiprocessing.dummy.Pool(100).starmap(process_by_ext, file_list) - # Due to text_to_md outputs list of Note_attrs tuples, this should turn write_list to a flat list - flat_write_list = [] - for object in write_list: - if type(object) == list: - for item in object: - if type(item) == Note_attrs: - flat_write_list.append(item) - elif type(object) == Note_attrs: - flat_write_list.append(object) + flat_write_list = make_flat_list(write_list, Note_attrs) - # Create or append existing text files based on Note_attrs tuples data for note_attrs in flat_write_list: + write_note_and_delete(note_attrs) + + if args.watch_fs: + try: - with open(note_attrs.output_file_path, 'r') as source: - content = note_attrs.mtime + note_attrs.text + '\n\n' + source.read() - except OSError: - if note_attrs.title: - content = note_attrs.title + note_attrs.text - else: - content = note_attrs.mtime + note_attrs.text + import watchdog.events + import watchdog.observers + except ImportError: + print("Can't find Watchdog module. Watching for changes won't work.") + exit(1) - with open(note_attrs.output_file_path, 'w') as output: - output.write(content) - if os.path.isfile(note_attrs.output_file_path): - try: - os.remove(note_attrs.input_file_path) - except OSError: - pass + class FsEventHandler(watchdog.events.FileSystemEventHandler): + def on_any_event(self, event): + if event.is_directory: + return + elif event.event_type == 'created': + file_path = event.src_path + elif event.event_type == 'moved': + file_path = event.dest_path + else: + return - if platform.system() == 'Linux': - os.system('notify-send "-a" "Inbox script" "Your inbox is organized"') # TODO maybe change to gi.repository: Notify \ No newline at end of file + file_attrs = process_by_path(file_path) + + if file_attrs: + # Wait for all the web page resources saved/synced + if file_path.endswith(('.htm', '.html')): time.sleep(2) + obj_to_write = process_by_ext(file_attrs) + else: + return + + if type(obj_to_write) == list: + for note_attrs in obj_to_write: + write_note_and_delete(note_attrs) + else: + write_note_and_delete(obj_to_write) + + + event_handler = FsEventHandler() + observer = watchdog.observers.Observer() + observer.schedule(event_handler, scan_path, recursive=True) + observer.start() + + try: + while True: + time.sleep(5) + except: + observer.stop() + + observer.join() + + + + + # if platform.system() == 'Linux': + # os.system('notify-send "-a" "Inbox script" "Your inbox is organized"') # TODO maybe change to gi.repository: Notify \ No newline at end of file diff --git a/inbox/inbox.qml b/inbox/inbox.qml index 5a37ef4..5425c00 100644 --- a/inbox/inbox.qml +++ b/inbox/inbox.qml @@ -9,6 +9,7 @@ Script { return '' } + /// TODO Change to platform dependant defaults without checking function setDefaultPyCommand() { if (script.getPersistentVariable('MdNT/pyCommand', '') == '') { script.setPersistentVariable('MdNT/pyCommand', checkPyCommand()) @@ -19,10 +20,10 @@ Script { property string scriptDirPath property string inboxFolder property bool scanFolder + property bool watchFS property string tagMarker property string pyCommand property string pandocCommand - property string pandocVersion property variant settingsVariables: [ @@ -37,16 +38,25 @@ Script { { 'identifier': 'scanFolder', 'name': 'Scan whole folder rather than only Inbox folder', - 'description': 'If true the script will convert any non-".md" file in folder to note. \n' + + 'description': 'If true the script will convert any non-".md" file in folder to note.\n' + '"Sub-folder to single note" and modification times in note titles will still be only for Inbox.', 'type': 'boolean', 'default': 'false', }, + { + 'identifier': 'watchFS', + 'name': 'Continuously watch for new files and process them as they appear', + 'description': 'If true the script will continuously watch inbox/folder (depending on above setting)\n' + + 'for new files and process them as soon as they appear.\n' + + 'The script will start working on load, no toolbar button will appear.', + 'type': 'boolean', + 'default': 'false', + }, { 'identifier': 'tagMarker', 'name': 'Tag word marker', - 'description': 'A symbol or group of symbols which start a "topic" word for ".txt" notes. \n' + - 'For example a txt note with "@tag" word will go to "tag.md" note', + 'description': 'A symbol or string of symbols which start a "topic" word for ".txt" notes. \n' + + 'For example, if set to "@", a ".txt" file with "@tag" word will go to "tag.md" note', 'type': 'string', 'default': '@', }, @@ -66,10 +76,33 @@ Script { }, ] + function runInbox() { + var pyScriptPath = scriptDirPath + script.dirSeparator() + 'inbox.py' + var inboxPath = script.currentNoteFolderPath() + script.dirSeparator() + inboxFolder + + var args = [pyScriptPath, + '--inbox', inboxPath, + '--folder', script.currentNoteFolderPath(), + '--marker', tagMarker] + + if (scanFolder == true) { + args.push('--scan-folder') + } + + if (watchFS == true) { + args.push('--watch') + } + + if (pandocVersion != '') { + args.push('--pandoc-bin', pandocCommand, + '--pandoc-ver', pandocVersion) + } + + script.startDetachedProcess(pyCommand, args) + script.log('Processing inbox...') + } function init() { - pandocVersion = script.getPersistentVariable('MdNT/pandocVersion', '') - /// Check if set pyCommand can run Python 3 if (script.getPersistentVariable('MdNT/pyCommand', '') != pyCommand) { @@ -84,30 +117,36 @@ Script { /// Get the version of pandoc if (script.getPersistentVariable('MdNT/pandocCommand', '') != pandocCommand) { var pandocCheck = script.startSynchronousProcess(pandocCommand, '-v', '').toString().split('\n')[0] + if (pandocCheck.indexOf('pandoc') != '-1') { script.setPersistentVariable('MdNT/pandocCommand', pandocCommand) script.setPersistentVariable('MdNT/pandocVersion', pandocCheck.slice(7)) - pandocVersion = pandocCheck.slice(7) } else { - script.setPersistentVariable('MdNT/pandocCommand', '') + script.setPersistentVariable('MdNT/pandocCommand', '') } } /// Issues alerts if (script.getPersistentVariable('MdNT/pandocCommand', '') == '') { - script.informationMessageBox('The command/path for pandoc in the script settings is not valid\n' + - 'Converting web pages will be disabled.', - 'Script') + script.informationMessageBox('The command/path for pandoc in the script settings is not valid.\n' + + 'Converting web pages to notes will be disabled.', + 'Inbox script') script.setPersistentVariable('MdNT/pandocCommand', pandocCommand) script.setPersistentVariable('MdNT/pandocVersion', '') pandocVersion = '' } + else { + pandocVersion = script.getPersistentVariable('MdNT/pandocVersion', '') + } if (script.getPersistentVariable('MdNT/pyCommand', '') == '') { - script.informationMessageBox('The command/path for Python 3 interpreter in the script settings is not valid\n' + + script.informationMessageBox('The command/path for Python 3 interpreter in the script settings is not valid.\n' + 'Please set the correct command/path.', - 'Script') + 'Inbox script') + } + else if (watchFS == true) { + runInbox() } else { script.registerCustomAction('inbox', 'Process inbox folder', 'Inbox', 'mail-receive.svg') @@ -116,25 +155,7 @@ Script { function customActionInvoked(action) { if (action == 'inbox') { - var pyScriptPath = scriptDirPath + script.dirSeparator() + 'inbox.py' - var inboxPath = script.currentNoteFolderPath() + script.dirSeparator() + inboxFolder - - var args = [pyScriptPath, - '--inbox', inboxPath, - '--folder', script.currentNoteFolderPath(), - '--marker', tagMarker] - - if (scanFolder == true) { - args.push('--scan-folder') - } - - if (pandocVersion != '') { - args.push('--pandoc-bin', pandocCommand, - '--pandoc-ver', pandocVersion) - } - - script.startDetachedProcess(pyCommand, args) - script.log('Processing inbox...') + runInbox() } } } \ No newline at end of file diff --git a/inbox/info.json b/inbox/info.json index f7eb90f..828c437 100644 --- a/inbox/info.json +++ b/inbox/info.json @@ -5,9 +5,7 @@ "resources": ["inbox.py", "md_link.py", "md_convert.py", "safe_path.py"], "authors": ["@Maboroshy"], "platforms": ["linux", "macos", "windows"], - "version": "0.1.1", + "version": "0.2.0", "minAppVersion": "17.05.8", - "description" : "Inbox is a complex script to organize data added from different devices and applications.\n\nIt's currently at beta stage, so using it for unrecoverable data is discouraged.\n\nThe script processes files in inbox folder you set or whole note folder (depending on settings) as follows:\n- The script turns all .txt files and files with no extension to .md note with modification time in the file name.\n- If there's a word that starts with a '@' (configurable) in file text, content of that file will be added to .md note named like that word. The content of .txt file with '@tag' in text will be added to 'tag.md' file.\n- Every URL in text file will be converted to markdown link, with web page favicon and title if possible.\n- Any web page, saved as .htm file with '_files' folder, will be converted to .md note.\n- Any image file placed to inbox folder will be moved to media folder. The script will put .md note with the in-line image instead.\n- Any other file placed to inbox folder will be moved to attachments folder. The script will put .md note with a link to the file instead. On Linux the file icon will be put before link.\n- (Inbox folder only) Text/link of file placed to sub-folder of inbox folder will be added to .md note named like the sub-folder. Everything put to 'topic' sub-folder will be added to 'topic.md' file.\n- (Linux only) The script will put thumbnail for .pdf file as a link to it.\n- (Linux only) The script will replace large in-line images with a smaller ones as a link to original ones.\n\n Workflow examples - - Dependencies:\nPython 3.3+ Interpreter;\nPandoc;\n(Linux only, pdf thumbnails) ghostscript;\n(Linux only, image thumbnails) imagemagick;\n(Linux only, file icons) python-gobject.\n\nUsage:\nRun the script by toolbar button or menu item." + "description" : "Inbox is a complex script to organize data added from different devices and applications.\n\nIt's currently at beta stage, so using it for unrecoverable data is discouraged.\n\nThe script processes files in inbox folder you set or whole note folder (depending on settings) as follows:\n- The script turns all .txt files and files with no extension to .md notes with modification time in the file name.\n- If there's a word that starts with a '@' (configurable) in .txt file, content of that file will be added to .md note named like that word. The content of .txt file with '@topic' in text will be added to 'topic.md' file. Outside of inbox folder the text won't be added to note but will become a new standalone note.\n- Every URL in text file will be converted to markdown link, with web page favicon and title if possible.\n- Any web page, saved as .htm file with '_files' folder, will be converted to .md note.\n- Any image file placed to inbox folder will be moved to media folder. The script will put .md note with the in-line image instead.\n- Any other file placed to inbox folder will be moved to attachments folder. The script will put .md note with a link to the file instead. On Linux the file icon will be put before link.\n- (Inbox folder only) Text/link of file placed to sub-folder of inbox folder will be added to .md note named like the sub-folder. Everything put to 'topic' sub-folder will be added to 'topic.md' file. Topic words won't work in these sub-folders.\n- (Linux only) The script will put thumbnail for .pdf file as a link to it.\n- (Linux only) The script will replace large in-line image with a smaller one as a link to original.\n\n Workflow examples\n\nDependencies:\nPython 3.3+ Interpreter;\n(optional, continuous watch mode)Watchdog module;\n(optional, web page to note)Pandoc;\n(optional, Linux only, pdf thumbnails) ghostscript;\n(optional, Linux only, image thumbnails) imagemagick;\n(optional, Linux only, file icons) python-gobject.\n\nUsage:\nRun the script by toolbar button or menu item. Or, if set to 'continuous watch' mode, it will run itself on load." } diff --git a/inbox/md_convert.py b/inbox/md_convert.py index d61207b..29a703c 100644 --- a/inbox/md_convert.py +++ b/inbox/md_convert.py @@ -23,7 +23,7 @@ def html_text(html_text, pandoc_bin='pandoc', pandoc_ver='1.19.1'): '--atx-headers'] # Remove firefox reader mode panel if there's one - html_text = re.sub('', '', html_text, flags=re.DOTALL) + html_text = re.sub('', '', html_text, flags=re.DOTALL) ## TODO Maybe use html.parser try: pandoc_pipe = subprocess.Popen(pandoc_args, stdin=subprocess.PIPE, stdout=subprocess.PIPE) @@ -66,4 +66,4 @@ def saved_html(html_path, folder_dir_path, pandoc_bin='pandoc', pandoc_ver='1.19 shutil.rmtree(os.path.splitext(html_path)[0] + '_files', True) - return md_text \ No newline at end of file + return md_text diff --git a/inbox/md_link.py b/inbox/md_link.py index 5d61eac..a752c8d 100644 --- a/inbox/md_link.py +++ b/inbox/md_link.py @@ -61,9 +61,14 @@ class URL: try: html_text = urllib.request.urlopen(self.url).read().decode('utf-8') # TODO encodings other that utf-8, https fails on early Python 3 versions except (urllib.error.HTTPError, urllib.error.URLError, UnicodeDecodeError): - return '' + title = '' else: - return re.search('(.+?)', html_text, re.IGNORECASE | re.DOTALL).group(1) + title = re.search('(.+?)', html_text, re.IGNORECASE | re.DOTALL).group(1) + + if title: + return title + else: + return self.url.split('//')[-1].split('/')[0] @property def md(self):