qownnotes-scripts/inbox/inbox.py

#!/usr/bin/env python

import os
import re
import sys
import time
import argparse
import collections
import multiprocessing.dummy

import md_link
import md_convert
import safe_path

try:
    import watchdog.events
    import watchdog.observers
except ImportError:
    pass


File_attrs = collections.namedtuple('File_attrs', 'file_path folder_dir_path output_dir_path output_file')
"""
A named tuple which functions use to pass input data - data of files to be processed
:param file_path: full absolute path to the file to process
:param folder_dir_path: full absolute path to directory where 'media' and 'attachment' directories are
:param output_dir_path: full absolute path to directory where resulting text file will be stored
:param output_file: empty for new standalone text file with mtime in the name, 
                    '*no mtime*' for or new standalone text file without mtime in the name
                    or full absolute path to the text file which will be appended with a new entry
"""


Note_attrs = collections.namedtuple('Note_attrs', 'input_file_path output_file_path text mtime title')
'''A named tuple which functions use to pass output data - data of notes to be written.
:param input_file_path: full absolute path to the file which was processed to this tuple
:param output_file_path: full absolute path to the output text file which should be written
:param text: content of the text file which should be written
:param mtime: modification time of input file as markdown headline to optionally prepend a text
:param title: title of a input file as markdown headline to optionally prepend a text'''


def text_to_md(file_attrs, topic_marker):
    """
    This will process specified text file getting its topics and replacing urls with favicons and titles where possible
    :param file_attrs: File_attrs named tuple
    :param topic_marker: symbol(s) which start the 'topic' word, if such word present in text, it will go to 'topic.md'
    :return: list of Note_attrs named tuple
    """
    filename = os.path.splitext(os.path.basename(file_attrs.file_path))[0]
    mtime = time.localtime(os.path.getmtime(file_attrs.file_path))

    try:
        with open(file_attrs.file_path, 'r') as text_file:
            text = text_file.read()
    except UnicodeDecodeError:
        return

    topics = re.findall(topic_marker + '(\w*)', text)
    text = re.sub(topic_marker + '\w*[ ]?', '', text).strip()

    if re.match('^http[s]?://[^\s]*$', text):
        is_bookmark = True
    else:
        is_bookmark = False

    for link in re.findall('(^|\s)(http[s]?://.*)(\s|$)', text, re.MULTILINE | re.IGNORECASE):
        url = md_link.URL(link[1], file_attrs.folder_dir_path)
        text = text.replace(link[1], url.md)
        if is_bookmark:
            bookmark_title = url.title

    if file_attrs.output_file and file_attrs.output_file != '*no mtime*':
        output_files = [file_attrs.output_file]
        headline_title = ''
    elif topics:
        output_files = [topic + '.md' for topic in topics]
        headline_title = ''
    elif is_bookmark:
        headline_title = '# {}\n'.format(bookmark_title)
        if file_attrs.output_file == '*no mtime*':
            output_files = [bookmark_title + '.md']
        else:
            output_files = [time.strftime('%m-%d %H:%M', mtime) + ' ' + bookmark_title + '.md']
    else:
        headline_title = '# {}\n'.format(filename)
        if file_attrs.output_file == '*no mtime*':
            output_files = [filename + '.md']
        else:
            output_files = [time.strftime('%m-%d %H:%M', mtime) + ' ' + filename + '.md']

    output = []
    for output_file in output_files:
        output.append(Note_attrs(input_file_path=file_attrs.file_path,
                                 output_file_path=file_attrs.output_dir_path + os.sep + safe_path.filename(output_file),
                                 text=text,
                                 mtime='**{}**  \n'.format(time.strftime('%x %a %X', mtime)),
                                 title=headline_title))
    return output


def html_to_md(file_attrs, pandoc_bin='pandoc', pandoc_ver=''):
    """
    This will move specified convert specified html file to markdown and move all in-line images to sub-folder at media directory
    :param file_attrs: File_attrs named tuple
    :return: Note_attrs named tuple
    """
    html_file_name_noext = os.path.splitext(os.path.basename(file_attrs.file_path))[0]
    mtime = time.localtime(os.path.getmtime(file_attrs.file_path))
    md_text = md_convert.saved_html(file_attrs.file_path, file_attrs.folder_dir_path,
                                    pandoc_bin=pandoc_bin, pandoc_ver=pandoc_ver)
    if not md_text:
        return

    return Note_attrs(input_file_path=file_attrs.file_path,
                      output_file_path=file_attrs.output_dir_path + os.sep + safe_path.filename(html_file_name_noext + '.md'),
                      text=md_text,
                      mtime='**{}**  \n'.format(time.strftime('%x %a %X', mtime)),
                      title='')


def file_to_md(file_attrs, media_dir_name):
    """
    This will move specified file to media_dir_name and put note with a reference to that file instead
    :param file_attrs: File_attrs named tuple
    :param media_dir_name: name of sub-directory in folder_dir_path where file will be moved (for non-text files)
    :return: Note_attrs named tuple
    """
    mtime = time.localtime(os.path.getmtime(file_attrs.file_path))
    new_filename = str(time.mktime(mtime))[:-2] + '_' + os.path.basename(file_attrs.file_path)
    new_path = os.path.join(file_attrs.folder_dir_path, media_dir_name, new_filename)

    try:
        os.rename(file_attrs.file_path, new_path)
    except OSError:
        pass

    file = md_link.File(new_path, file_attrs.folder_dir_path, os.path.splitext(os.path.basename(file_attrs.file_path))[0])

    if file_attrs.output_file == '*no mtime*':
        output_file = file.title + '.md'
    elif file_attrs.output_file:
        output_file = file_attrs.output_file
    else:
        output_file = time.strftime('%m-%d %H:%M', mtime) + ' ' + file.title + '.md'

    return Note_attrs(input_file_path=file_attrs.file_path,
                      output_file_path=file_attrs.output_dir_path + os.sep + safe_path.filename(output_file),
                      text=file.md,
                      mtime='**{}**  \n'.format(time.strftime('%x %a %X', mtime)),
                      title='# {}\n'.format(file.title))


def make_flat_list(mixed_list, target_item_type=tuple):
    """
    Make a list that has lists and 'target_item_type' as items flat, not recursive.
    :param mixed_list: list to make flat
    :param target_item_type: type of items in the flat list
    :return: flat list of 'target_item_type'
    """
    flat_list = []
    for obj in mixed_list:
        if type(obj) == list:
            for item in obj:
                if type(item) == target_item_type:
                    flat_list.append(item)
        elif type(obj) == target_item_type:
            flat_list.append(obj)
    return flat_list


def process_by_path(file_path):
    """
    Checks if the file is valid for processing and returns File_attrs tuple depending on its path
    :param file_path: Absolute file path
    :return: File_attrs named tuple
    """
    if file_path.endswith(('.md', 'notes.sqlite')) \
    or file_path.startswith((folder_dir + os.sep + 'media', folder_dir + os.sep + 'attachments')) \
    or os.sep + '.' in file_path[len(folder_dir):] \
    or '_files' + os.sep in file_path[len(folder_dir):]:
        return

    if file_path[:len(inbox_dir)] == inbox_dir:
        if os.path.dirname(file_path) == inbox_dir:
            return File_attrs(file_path=file_path, folder_dir_path=folder_dir,
                              output_dir_path=inbox_dir, output_file='')
        else:
            return File_attrs(file_path=file_path, folder_dir_path=folder_dir,
                              output_dir_path=inbox_dir,
                              output_file=os.path.dirname(file_path)[len(inbox_dir)+1:].replace(os.sep, ' - ') + '.md')
    else:
        return File_attrs(file_path=file_path, folder_dir_path=folder_dir,
                          output_dir_path=os.path.dirname(file_path), output_file='*no mtime*')


def process_by_ext(file_attrs):
    """
    This will run different functions to process specified File_attrs tuple based on file extension
    :param file_attrs: File_attrs named tuple
    :return: Note_attrs named tuple
    """
    if file_attrs.file_path.endswith('.txt') or not os.path.splitext(file_attrs.file_path)[1]:
        return text_to_md(file_attrs, args.topic_marker)
    elif args.pandoc_bin and args.pandoc_ver and file_attrs.file_path.endswith(('.htm', '.html')):
        return html_to_md(file_attrs, args.pandoc_bin, args.pandoc_ver)
    elif file_attrs.file_path.endswith(('.jpg', '.png', '.gif')):
        return file_to_md(file_attrs, 'media')
    else:
        return file_to_md(file_attrs, 'attachments')


def write_note_and_delete(note_attrs):
    """
    Create or append existing note files based on Note_attrs tuples data, then delete the source file
    :param note_attrs: Note_attrs named tuple
    """
    if os.path.isfile(note_attrs.output_file_path):
        if os.path.dirname(note_attrs.output_file_path) == inbox_dir:
            note_file_path = note_attrs.output_file_path
            with open(note_file_path, 'r') as source:
                content = note_attrs.mtime + note_attrs.text + '\n\n' + source.read()
        else:
            i = 1
            while os.path.isfile(os.path.splitext(note_attrs.output_file_path)[0] + '_' + str(i) + '.md'):
                i += 1
            note_file_path = os.path.splitext(note_attrs.output_file_path)[0] + '_' + str(i) + '.md'
            content = note_attrs.mtime + note_attrs.text
    else:
        note_file_path = note_attrs.output_file_path
        if note_attrs.title:
            content = note_attrs.title + note_attrs.text
        else:
            content = note_attrs.mtime + note_attrs.text

    with open(note_file_path, 'w') as output:
        output.write(content)

    if os.path.isfile(note_file_path):
        try:
            os.remove(note_attrs.input_file_path)
        except OSError:
            pass


if __name__ == '__main__':

    script_path = os.path.dirname(sys.argv[0])

    for file in os.listdir(script_path):
        if file[-5:] == '.lock':
            os.remove(script_path + os.sep + file)

    arg_parser = argparse.ArgumentParser(description='A script to turn everything in the inbox directory to markdown notes.')
    arg_parser.add_argument('-i', '--inbox', action='store', dest='inbox_dir', required=True,
                            help="Full absolute path to the inbox directory to organize")
    arg_parser.add_argument('-f', '--folder', action='store', dest='folder_dir', required=True,
                            help="Full absolute path to directory where 'media' and 'attachment' directories are")
    arg_parser.add_argument('-m', '--marker', action='store', dest='topic_marker', required=False, default='@',
                            help="Symbol(s) which start the 'topic' word (for text files)")
    arg_parser.add_argument('-s', '--scan-folder', action='store_true', dest='scan_folder', required=False,
                            help="Process whole folder rather than only inbox")
    arg_parser.add_argument('-p', '--pandoc-bin', action='store', dest='pandoc_bin', required=False,
                            help="Command/path to run pandoc")
    arg_parser.add_argument('-pv', '--pandoc-ver', action='store', dest='pandoc_ver', required=False,
                            help="Installed pandoc version")
    arg_parser.add_argument('-w', '--watch', action='store_true', dest='watch_fs', required=False,
                            help="Watch and process new files as they appear after initial scan")
    args = arg_parser.parse_args()

    inbox_dir = args.inbox_dir
    folder_dir = args.folder_dir

    os.makedirs(inbox_dir, exist_ok=True)
    os.makedirs(folder_dir + os.sep + 'media', exist_ok=True)
    os.makedirs(folder_dir + os.sep + 'attachments', exist_ok=True)

    if args.scan_folder:
        scan_path = folder_dir
    else:
        scan_path = inbox_dir

    file_list = []
    for root, subdirs, files in os.walk(scan_path):
        for file_path in sorted([root + os.sep + file for file in files], key=os.path.getmtime):
            file_attrs = process_by_path(file_path)
            if file_attrs:
                file_list.append([file_attrs])

    write_list = multiprocessing.dummy.Pool(100).starmap(process_by_ext, file_list)

    flat_write_list = make_flat_list(write_list, Note_attrs)

    for note_attrs in flat_write_list:
        write_note_and_delete(note_attrs)

    if args.watch_fs:

        try:
            import watchdog.events
            import watchdog.observers
        except ImportError:
            print("Can't find Watchdog module. Watching for changes won't work.")
            exit(1)


        class FsEventHandler(watchdog.events.FileSystemEventHandler):
            def on_any_event(self, event):
                if event.is_directory:
                    return
                elif event.event_type == 'created':
                    file_path = event.src_path
                elif event.event_type == 'moved':
                    file_path = event.dest_path
                else:
                    return

                file_attrs = process_by_path(file_path)

                if file_attrs:
                    # Wait for all the web page resources saved/synced
                    if file_path.endswith(('.htm', '.html')):
                        time.sleep(2)
                    obj_to_write = process_by_ext(file_attrs)
                else:
                    return

                if type(obj_to_write) == list:
                    for note_attrs in obj_to_write:
                        write_note_and_delete(note_attrs)
                else:
                    write_note_and_delete(obj_to_write)


        lockfile_path = script_path + os.sep + str(int(time.time())) + '.lock'
        open(lockfile_path, 'w').close()

        event_handler = FsEventHandler()
        observer = watchdog.observers.Observer()
        observer.schedule(event_handler, scan_path, recursive=True)
        observer.start()

        try:
            while True:
                if os.path.isfile(lockfile_path):
                    time.sleep(5)
                else:
                    raise Exception
        except:
            observer.stop()

        observer.join()
Added inbox script... second try 2017-08-14 15:37:23 +00:00			`#!/usr/bin/env python`

			`import os`
			`import re`
Forced single instance when watching 2017-09-14 18:08:58 +00:00			`import sys`
Added inbox script... second try 2017-08-14 15:37:23 +00:00			`import time`
New version 2017-09-03 16:29:59 +00:00			`import argparse`
Added inbox script... second try 2017-08-14 15:37:23 +00:00			`import collections`
			`import multiprocessing.dummy`

			`import md_link`
New version 2017-09-03 16:29:59 +00:00			`import md_convert`
Added inbox script... second try 2017-08-14 15:37:23 +00:00			`import safe_path`

Version 0.2 2017-09-13 18:10:38 +00:00			`try:`
			`import watchdog.events`
			`import watchdog.observers`
			`except ImportError:`
			`pass`


			`File_attrs = collections.namedtuple('File_attrs', 'file_path folder_dir_path output_dir_path output_file')`
			`"""`
			`A named tuple which functions use to pass input data - data of files to be processed`
			`:param file_path: full absolute path to the file to process`
			`:param folder_dir_path: full absolute path to directory where 'media' and 'attachment' directories are`
			`:param output_dir_path: full absolute path to directory where resulting text file will be stored`
			`:param output_file: empty for new standalone text file with mtime in the name,`
			`'no mtime' for or new standalone text file without mtime in the name`
			`or full absolute path to the text file which will be appended with a new entry`
			`"""`


			`Note_attrs = collections.namedtuple('Note_attrs', 'input_file_path output_file_path text mtime title')`
			`'''A named tuple which functions use to pass output data - data of notes to be written.`
			`:param input_file_path: full absolute path to the file which was processed to this tuple`
			`:param output_file_path: full absolute path to the output text file which should be written`
			`:param text: content of the text file which should be written`
			`:param mtime: modification time of input file as markdown headline to optionally prepend a text`
			`:param title: title of a input file as markdown headline to optionally prepend a text'''`


			`def text_to_md(file_attrs, topic_marker):`
Added inbox script... second try 2017-08-14 15:37:23 +00:00			`"""`
New version 2017-09-03 16:29:59 +00:00			`This will process specified text file getting its topics and replacing urls with favicons and titles where possible`
Added inbox script... second try 2017-08-14 15:37:23 +00:00			`:param file_attrs: File_attrs named tuple`
Version 0.2 2017-09-13 18:10:38 +00:00			`:param topic_marker: symbol(s) which start the 'topic' word, if such word present in text, it will go to 'topic.md'`
Added inbox script... second try 2017-08-14 15:37:23 +00:00			`:return: list of Note_attrs named tuple`
			`"""`
			`filename = os.path.splitext(os.path.basename(file_attrs.file_path))[0]`
			`mtime = time.localtime(os.path.getmtime(file_attrs.file_path))`

Fixed folder scan 2017-09-08 20:34:13 +00:00			`try:`
			`with open(file_attrs.file_path, 'r') as text_file:`
			`text = text_file.read()`
Forced single instance when watching 2017-09-14 18:08:58 +00:00			`except UnicodeDecodeError:`
Fixed folder scan 2017-09-08 20:34:13 +00:00			`return`
Added inbox script... second try 2017-08-14 15:37:23 +00:00
Version 0.2 2017-09-13 18:10:38 +00:00			`topics = re.findall(topic_marker + '(\w*)', text)`
			`text = re.sub(topic_marker + '\w*[ ]?', '', text).strip()`
Added inbox script... second try 2017-08-14 15:37:23 +00:00
			`if re.match('^http[s]?://[^\s]*$', text):`
			`is_bookmark = True`
			`else:`
			`is_bookmark = False`

			`for link in re.findall('(^\|\s)(http[s]?://.*)(\s\|$)', text, re.MULTILINE \| re.IGNORECASE):`
			`url = md_link.URL(link[1], file_attrs.folder_dir_path)`
			`text = text.replace(link[1], url.md)`
			`if is_bookmark:`
			`bookmark_title = url.title`

New version 2017-09-03 16:29:59 +00:00			`if file_attrs.output_file and file_attrs.output_file != 'no mtime':`
			`output_files = [file_attrs.output_file]`
Added inbox script... second try 2017-08-14 15:37:23 +00:00			`headline_title = ''`
New version 2017-09-03 16:29:59 +00:00			`elif topics:`
			`output_files = [topic + '.md' for topic in topics]`
Added inbox script... second try 2017-08-14 15:37:23 +00:00			`headline_title = ''`
			`elif is_bookmark:`
			`headline_title = '# {}\n'.format(bookmark_title)`
New version 2017-09-03 16:29:59 +00:00			`if file_attrs.output_file == 'no mtime':`
			`output_files = [bookmark_title + '.md']`
			`else:`
			`output_files = [time.strftime('%m-%d %H:%M', mtime) + ' ' + bookmark_title + '.md']`
Added inbox script... second try 2017-08-14 15:37:23 +00:00			`else:`
			`headline_title = '# {}\n'.format(filename)`
New version 2017-09-03 16:29:59 +00:00			`if file_attrs.output_file == 'no mtime':`
			`output_files = [filename + '.md']`
			`else:`
			`output_files = [time.strftime('%m-%d %H:%M', mtime) + ' ' + filename + '.md']`
Added inbox script... second try 2017-08-14 15:37:23 +00:00
			`output = []`
			`for output_file in output_files:`
			`output.append(Note_attrs(input_file_path=file_attrs.file_path,`
			`output_file_path=file_attrs.output_dir_path + os.sep + safe_path.filename(output_file),`
			`text=text,`
			`mtime='{} \n'.format(time.strftime('%x %a %X', mtime)),`
			`title=headline_title))`
			`return output`

Version 0.2 2017-09-13 18:10:38 +00:00
New version 2017-09-03 16:29:59 +00:00			`def html_to_md(file_attrs, pandoc_bin='pandoc', pandoc_ver=''):`
			`"""`
			`This will move specified convert specified html file to markdown and move all in-line images to sub-folder at media directory`
			`:param file_attrs: File_attrs named tuple`
			`:return: Note_attrs named tuple`
			`"""`
			`html_file_name_noext = os.path.splitext(os.path.basename(file_attrs.file_path))[0]`
			`mtime = time.localtime(os.path.getmtime(file_attrs.file_path))`
			`md_text = md_convert.saved_html(file_attrs.file_path, file_attrs.folder_dir_path,`
			`pandoc_bin=pandoc_bin, pandoc_ver=pandoc_ver)`
			`if not md_text:`
			`return`

			`return Note_attrs(input_file_path=file_attrs.file_path,`
			`output_file_path=file_attrs.output_dir_path + os.sep + safe_path.filename(html_file_name_noext + '.md'),`
			`text=md_text,`
			`mtime='{} \n'.format(time.strftime('%x %a %X', mtime)),`
			`title='')`
Added inbox script... second try 2017-08-14 15:37:23 +00:00
Version 0.2 2017-09-13 18:10:38 +00:00
Added inbox script... second try 2017-08-14 15:37:23 +00:00			`def file_to_md(file_attrs, media_dir_name):`
			`"""`
			`This will move specified file to media_dir_name and put note with a reference to that file instead`
			`:param file_attrs: File_attrs named tuple`
			`:param media_dir_name: name of sub-directory in folder_dir_path where file will be moved (for non-text files)`
			`:return: Note_attrs named tuple`
			`"""`
			`mtime = time.localtime(os.path.getmtime(file_attrs.file_path))`
			`new_filename = str(time.mktime(mtime))[:-2] + '_' + os.path.basename(file_attrs.file_path)`
			`new_path = os.path.join(file_attrs.folder_dir_path, media_dir_name, new_filename)`

			`try:`
			`os.rename(file_attrs.file_path, new_path)`
			`except OSError:`
			`pass`

			`file = md_link.File(new_path, file_attrs.folder_dir_path, os.path.splitext(os.path.basename(file_attrs.file_path))[0])`

New version 2017-09-03 16:29:59 +00:00			`if file_attrs.output_file == 'no mtime':`
			`output_file = file.title + '.md'`
			`elif file_attrs.output_file:`
			`output_file = file_attrs.output_file`
Added inbox script... second try 2017-08-14 15:37:23 +00:00			`else:`
			`output_file = time.strftime('%m-%d %H:%M', mtime) + ' ' + file.title + '.md'`

			`return Note_attrs(input_file_path=file_attrs.file_path,`
			`output_file_path=file_attrs.output_dir_path + os.sep + safe_path.filename(output_file),`
			`text=file.md,`
			`mtime='{} \n'.format(time.strftime('%x %a %X', mtime)),`
			`title='# {}\n'.format(file.title))`


Version 0.2 2017-09-13 18:10:38 +00:00			`def make_flat_list(mixed_list, target_item_type=tuple):`
			`"""`
			`Make a list that has lists and 'target_item_type' as items flat, not recursive.`
			`:param mixed_list: list to make flat`
			`:param target_item_type: type of items in the flat list`
			`:return: flat list of 'target_item_type'`
			`"""`
			`flat_list = []`
Forced single instance when watching 2017-09-14 18:08:58 +00:00			`for obj in mixed_list:`
			`if type(obj) == list:`
			`for item in obj:`
Version 0.2 2017-09-13 18:10:38 +00:00			`if type(item) == target_item_type:`
			`flat_list.append(item)`
Forced single instance when watching 2017-09-14 18:08:58 +00:00			`elif type(obj) == target_item_type:`
			`flat_list.append(obj)`
Version 0.2 2017-09-13 18:10:38 +00:00			`return flat_list`

Added inbox script... second try 2017-08-14 15:37:23 +00:00
Version 0.2 2017-09-13 18:10:38 +00:00			`def process_by_path(file_path):`
Added inbox script... second try 2017-08-14 15:37:23 +00:00			`"""`
Version 0.2 2017-09-13 18:10:38 +00:00			`Checks if the file is valid for processing and returns File_attrs tuple depending on its path`
			`:param file_path: Absolute file path`
			`:return: File_attrs named tuple`
Added inbox script... second try 2017-08-14 15:37:23 +00:00			`"""`
Version 0.2 2017-09-13 18:10:38 +00:00			`if file_path.endswith(('.md', 'notes.sqlite')) \`
			`or file_path.startswith((folder_dir + os.sep + 'media', folder_dir + os.sep + 'attachments')) \`
			`or os.sep + '.' in file_path[len(folder_dir):] \`
			`or '_files' + os.sep in file_path[len(folder_dir):]:`
			`return`

			`if file_path[:len(inbox_dir)] == inbox_dir:`
			`if os.path.dirname(file_path) == inbox_dir:`
			`return File_attrs(file_path=file_path, folder_dir_path=folder_dir,`
			`output_dir_path=inbox_dir, output_file='')`
			`else:`
			`return File_attrs(file_path=file_path, folder_dir_path=folder_dir,`
			`output_dir_path=inbox_dir,`
			`output_file=os.path.dirname(file_path)[len(inbox_dir)+1:].replace(os.sep, ' - ') + '.md')`
			`else:`
			`return File_attrs(file_path=file_path, folder_dir_path=folder_dir,`
			`output_dir_path=os.path.dirname(file_path), output_file='no mtime')`
Added inbox script... second try 2017-08-14 15:37:23 +00:00
Version 0.2 2017-09-13 18:10:38 +00:00
			`def process_by_ext(file_attrs):`
			`"""`
			`This will run different functions to process specified File_attrs tuple based on file extension`
			`:param file_attrs: File_attrs named tuple`
			`:return: Note_attrs named tuple`
			`"""`
			`if file_attrs.file_path.endswith('.txt') or not os.path.splitext(file_attrs.file_path)[1]:`
			`return text_to_md(file_attrs, args.topic_marker)`
			`elif args.pandoc_bin and args.pandoc_ver and file_attrs.file_path.endswith(('.htm', '.html')):`
			`return html_to_md(file_attrs, args.pandoc_bin, args.pandoc_ver)`
			`elif file_attrs.file_path.endswith(('.jpg', '.png', '.gif')):`
			`return file_to_md(file_attrs, 'media')`
			`else:`
			`return file_to_md(file_attrs, 'attachments')`


Forced single instance when watching 2017-09-14 18:08:58 +00:00			`def write_note_and_delete(note_attrs):`
Version 0.2 2017-09-13 18:10:38 +00:00			`"""`
			`Create or append existing note files based on Note_attrs tuples data, then delete the source file`
			`:param note_attrs: Note_attrs named tuple`
			`"""`
			`if os.path.isfile(note_attrs.output_file_path):`
			`if os.path.dirname(note_attrs.output_file_path) == inbox_dir:`
			`note_file_path = note_attrs.output_file_path`
			`with open(note_file_path, 'r') as source:`
			`content = note_attrs.mtime + note_attrs.text + '\n\n' + source.read()`
			`else:`
			`i = 1`
			`while os.path.isfile(os.path.splitext(note_attrs.output_file_path)[0] + '_' + str(i) + '.md'):`
			`i += 1`
			`note_file_path = os.path.splitext(note_attrs.output_file_path)[0] + '_' + str(i) + '.md'`
			`content = note_attrs.mtime + note_attrs.text`
			`else:`
			`note_file_path = note_attrs.output_file_path`
			`if note_attrs.title:`
			`content = note_attrs.title + note_attrs.text`
Added inbox script... second try 2017-08-14 15:37:23 +00:00			`else:`
Version 0.2 2017-09-13 18:10:38 +00:00			`content = note_attrs.mtime + note_attrs.text`

			`with open(note_file_path, 'w') as output:`
			`output.write(content)`
Added inbox script... second try 2017-08-14 15:37:23 +00:00
Version 0.2 2017-09-13 18:10:38 +00:00			`if os.path.isfile(note_file_path):`
			`try:`
			`os.remove(note_attrs.input_file_path)`
			`except OSError:`
			`pass`


			`if __name__ == '__main__':`
Added inbox script... second try 2017-08-14 15:37:23 +00:00
Forced single instance when watching 2017-09-14 18:08:58 +00:00			`script_path = os.path.dirname(sys.argv[0])`

			`for file in os.listdir(script_path):`
			`if file[-5:] == '.lock':`
			`os.remove(script_path + os.sep + file)`

New version 2017-09-03 16:29:59 +00:00			`arg_parser = argparse.ArgumentParser(description='A script to turn everything in the inbox directory to markdown notes.')`
			`arg_parser.add_argument('-i', '--inbox', action='store', dest='inbox_dir', required=True,`
Forced single instance when watching 2017-09-14 18:08:58 +00:00			`help="Full absolute path to the inbox directory to organize")`
New version 2017-09-03 16:29:59 +00:00			`arg_parser.add_argument('-f', '--folder', action='store', dest='folder_dir', required=True,`
Forced single instance when watching 2017-09-14 18:08:58 +00:00			`help="Full absolute path to directory where 'media' and 'attachment' directories are")`
New version 2017-09-03 16:29:59 +00:00			`arg_parser.add_argument('-m', '--marker', action='store', dest='topic_marker', required=False, default='@',`
Forced single instance when watching 2017-09-14 18:08:58 +00:00			`help="Symbol(s) which start the 'topic' word (for text files)")`
New version 2017-09-03 16:29:59 +00:00			`arg_parser.add_argument('-s', '--scan-folder', action='store_true', dest='scan_folder', required=False,`
			`help="Process whole folder rather than only inbox")`
			`arg_parser.add_argument('-p', '--pandoc-bin', action='store', dest='pandoc_bin', required=False,`
			`help="Command/path to run pandoc")`
			`arg_parser.add_argument('-pv', '--pandoc-ver', action='store', dest='pandoc_ver', required=False,`
			`help="Installed pandoc version")`
Version 0.2 2017-09-13 18:10:38 +00:00			`arg_parser.add_argument('-w', '--watch', action='store_true', dest='watch_fs', required=False,`
			`help="Watch and process new files as they appear after initial scan")`
New version 2017-09-03 16:29:59 +00:00			`args = arg_parser.parse_args()`

			`inbox_dir = args.inbox_dir`
			`folder_dir = args.folder_dir`
Added inbox script... second try 2017-08-14 15:37:23 +00:00
			`os.makedirs(inbox_dir, exist_ok=True)`
			`os.makedirs(folder_dir + os.sep + 'media', exist_ok=True)`
			`os.makedirs(folder_dir + os.sep + 'attachments', exist_ok=True)`

New version 2017-09-03 16:29:59 +00:00			`if args.scan_folder:`
Version 0.2 2017-09-13 18:10:38 +00:00			`scan_path = folder_dir`
			`else:`
			`scan_path = inbox_dir`

			`file_list = []`
Forced single instance when watching 2017-09-14 18:08:58 +00:00			`for root, subdirs, files in os.walk(scan_path):`
			`for file_path in sorted([root + os.sep + file for file in files], key=os.path.getmtime):`
Version 0.2 2017-09-13 18:10:38 +00:00			`file_attrs = process_by_path(file_path)`
			`if file_attrs:`
			`file_list.append([file_attrs])`
Added inbox script... second try 2017-08-14 15:37:23 +00:00
New version 2017-09-03 16:29:59 +00:00			`write_list = multiprocessing.dummy.Pool(100).starmap(process_by_ext, file_list)`
Added inbox script... second try 2017-08-14 15:37:23 +00:00
Version 0.2 2017-09-13 18:10:38 +00:00			`flat_write_list = make_flat_list(write_list, Note_attrs)`
Added inbox script... second try 2017-08-14 15:37:23 +00:00
			`for note_attrs in flat_write_list:`
Version 0.2 2017-09-13 18:10:38 +00:00			`write_note_and_delete(note_attrs)`

			`if args.watch_fs:`

Added inbox script... second try 2017-08-14 15:37:23 +00:00			`try:`
Version 0.2 2017-09-13 18:10:38 +00:00			`import watchdog.events`
			`import watchdog.observers`
			`except ImportError:`
			`print("Can't find Watchdog module. Watching for changes won't work.")`
			`exit(1)`


			`class FsEventHandler(watchdog.events.FileSystemEventHandler):`
			`def on_any_event(self, event):`
			`if event.is_directory:`
			`return`
			`elif event.event_type == 'created':`
			`file_path = event.src_path`
			`elif event.event_type == 'moved':`
			`file_path = event.dest_path`
			`else:`
			`return`

			`file_attrs = process_by_path(file_path)`

			`if file_attrs:`
			`# Wait for all the web page resources saved/synced`
Forced single instance when watching 2017-09-14 18:08:58 +00:00			`if file_path.endswith(('.htm', '.html')):`
			`time.sleep(2)`
Version 0.2 2017-09-13 18:10:38 +00:00			`obj_to_write = process_by_ext(file_attrs)`
			`else:`
			`return`

			`if type(obj_to_write) == list:`
			`for note_attrs in obj_to_write:`
			`write_note_and_delete(note_attrs)`
			`else:`
			`write_note_and_delete(obj_to_write)`


Minor tweaks 2017-09-14 19:25:44 +00:00			`lockfile_path = script_path + os.sep + str(int(time.time())) + '.lock'`
			`open(lockfile_path, 'w').close()`

Version 0.2 2017-09-13 18:10:38 +00:00			`event_handler = FsEventHandler()`
			`observer = watchdog.observers.Observer()`
			`observer.schedule(event_handler, scan_path, recursive=True)`
			`observer.start()`

			`try:`
			`while True:`
Forced single instance when watching 2017-09-14 18:08:58 +00:00			`if os.path.isfile(lockfile_path):`
			`time.sleep(5)`
			`else:`
			`raise Exception`
Version 0.2 2017-09-13 18:10:38 +00:00			`except:`
			`observer.stop()`

			`observer.join()`