Version 0.2

This commit is contained in:
Maboroshy 2017-09-13 22:10:38 +04:00 committed by GitHub
parent 031666520f
commit 8a2a34c114
5 changed files with 249 additions and 129 deletions

View File

@ -12,11 +12,39 @@ import md_link
import md_convert import md_convert
import safe_path import safe_path
try:
import watchdog.events
import watchdog.observers
except ImportError:
pass
def text_to_md(file_attrs):
File_attrs = collections.namedtuple('File_attrs', 'file_path folder_dir_path output_dir_path output_file')
"""
A named tuple which functions use to pass input data - data of files to be processed
:param file_path: full absolute path to the file to process
:param folder_dir_path: full absolute path to directory where 'media' and 'attachment' directories are
:param output_dir_path: full absolute path to directory where resulting text file will be stored
:param output_file: empty for new standalone text file with mtime in the name,
'*no mtime*' for or new standalone text file without mtime in the name
or full absolute path to the text file which will be appended with a new entry
"""
Note_attrs = collections.namedtuple('Note_attrs', 'input_file_path output_file_path text mtime title')
'''A named tuple which functions use to pass output data - data of notes to be written.
:param input_file_path: full absolute path to the file which was processed to this tuple
:param output_file_path: full absolute path to the output text file which should be written
:param text: content of the text file which should be written
:param mtime: modification time of input file as markdown headline to optionally prepend a text
:param title: title of a input file as markdown headline to optionally prepend a text'''
def text_to_md(file_attrs, topic_marker):
""" """
This will process specified text file getting its topics and replacing urls with favicons and titles where possible This will process specified text file getting its topics and replacing urls with favicons and titles where possible
:param file_attrs: File_attrs named tuple :param file_attrs: File_attrs named tuple
:param topic_marker: symbol(s) which start the 'topic' word, if such word present in text, it will go to 'topic.md'
:return: list of Note_attrs named tuple :return: list of Note_attrs named tuple
""" """
filename = os.path.splitext(os.path.basename(file_attrs.file_path))[0] filename = os.path.splitext(os.path.basename(file_attrs.file_path))[0]
@ -28,8 +56,8 @@ def text_to_md(file_attrs):
except(UnicodeDecodeError): except(UnicodeDecodeError):
return return
topics = re.findall(file_attrs.topic_marker + '(\w*)', text) topics = re.findall(topic_marker + '(\w*)', text)
text = re.sub(file_attrs.topic_marker + '\w*[ ]?', '', text).strip() text = re.sub(topic_marker + '\w*[ ]?', '', text).strip()
if re.match('^http[s]?://[^\s]*$', text): if re.match('^http[s]?://[^\s]*$', text):
is_bookmark = True is_bookmark = True
@ -70,6 +98,7 @@ def text_to_md(file_attrs):
title=headline_title)) title=headline_title))
return output return output
def html_to_md(file_attrs, pandoc_bin='pandoc', pandoc_ver=''): def html_to_md(file_attrs, pandoc_bin='pandoc', pandoc_ver=''):
""" """
This will move specified convert specified html file to markdown and move all in-line images to sub-folder at media directory This will move specified convert specified html file to markdown and move all in-line images to sub-folder at media directory
@ -89,6 +118,7 @@ def html_to_md(file_attrs, pandoc_bin='pandoc', pandoc_ver=''):
mtime='**{}** \n'.format(time.strftime('%x %a %X', mtime)), mtime='**{}** \n'.format(time.strftime('%x %a %X', mtime)),
title='') title='')
def file_to_md(file_attrs, media_dir_name): def file_to_md(file_attrs, media_dir_name):
""" """
This will move specified file to media_dir_name and put note with a reference to that file instead This will move specified file to media_dir_name and put note with a reference to that file instead
@ -121,45 +151,100 @@ def file_to_md(file_attrs, media_dir_name):
title='# {}\n'.format(file.title)) title='# {}\n'.format(file.title))
if __name__ == '__main__': def make_flat_list(mixed_list, target_item_type=tuple):
File_attrs = collections.namedtuple('File_attrs', 'file_path folder_dir_path output_dir_path topic_marker output_file')
""" """
A named tuple which functions use to pass input data - data of files to be processed Make a list that has lists and 'target_item_type' as items flat, not recursive.
:param file_path: full absolute path to the file to process :param mixed_list: list to make flat
:param folder_dir_path: full absolute path to directory where 'media' and 'attachment' directories are :param target_item_type: type of items in the flat list
:param output_dir_path: full absolute path to directory where resulting text file will be stored :return: flat list of 'target_item_type'
:param topic_marker: symbol(s) which start the 'topic' word (for text files)
:param output_file: empty for new standalone text file with mtime in the name,
'*no mtime*' for or new standalone text file without mtime in the name
or full absolute path to the text file which will be appended with a new entry
""" """
flat_list = []
for object in mixed_list:
if type(object) == list:
for item in object:
if type(item) == target_item_type:
flat_list.append(item)
elif type(object) == target_item_type:
flat_list.append(object)
return flat_list
Note_attrs = collections.namedtuple('Note_attrs', 'input_file_path output_file_path text mtime title')
'''A named tuple which functions use to pass output data - data of notes to be written.
:param input_file_path: full absolute path to the file which was processed to this tuple
:param output_file_path: full absolute path to the output text file which should be written
:param text: content of the text file which should be written
:param mtime: modification time of input file as markdown headline to optionally prepend a text
:param title: title of a input file as markdown headline to optionally prepend a text'''
def process_by_ext(file_attrs): def process_by_path(file_path):
""" """
This will run different functions to process specified File_attrs tuple based on file extension Checks if the file is valid for processing and returns File_attrs tuple depending on its path
:param file_attrs: File_attrs named tuple :param file_path: Absolute file path
:return: Note_attrs named tuple :return: File_attrs named tuple
""" """
# print(file_attrs.file_path) if file_path.endswith(('.md', 'notes.sqlite')) \
if file_attrs.file_path.endswith('.txt') or not os.path.splitext(file_attrs.file_path)[1]: or file_path.startswith((folder_dir + os.sep + 'media', folder_dir + os.sep + 'attachments')) \
return text_to_md(file_attrs) or os.sep + '.' in file_path[len(folder_dir):] \
elif args.pandoc_bin and args.pandoc_ver and file_attrs.file_path.endswith(('.htm', '.html')): or '_files' + os.sep in file_path[len(folder_dir):]:
return html_to_md(file_attrs, args.pandoc_bin, args.pandoc_ver) return
elif file_attrs.file_path.endswith(('.jpg', '.png', '.gif')):
return file_to_md(file_attrs, 'media') if file_path[:len(inbox_dir)] == inbox_dir:
if os.path.dirname(file_path) == inbox_dir:
return File_attrs(file_path=file_path, folder_dir_path=folder_dir,
output_dir_path=inbox_dir, output_file='')
else: else:
return file_to_md(file_attrs, 'attachments') return File_attrs(file_path=file_path, folder_dir_path=folder_dir,
output_dir_path=inbox_dir,
output_file=os.path.dirname(file_path)[len(inbox_dir)+1:].replace(os.sep, ' - ') + '.md')
else:
return File_attrs(file_path=file_path, folder_dir_path=folder_dir,
output_dir_path=os.path.dirname(file_path), output_file='*no mtime*')
def process_by_ext(file_attrs):
"""
This will run different functions to process specified File_attrs tuple based on file extension
:param file_attrs: File_attrs named tuple
:return: Note_attrs named tuple
"""
if file_attrs.file_path.endswith('.txt') or not os.path.splitext(file_attrs.file_path)[1]:
return text_to_md(file_attrs, args.topic_marker)
elif args.pandoc_bin and args.pandoc_ver and file_attrs.file_path.endswith(('.htm', '.html')):
return html_to_md(file_attrs, args.pandoc_bin, args.pandoc_ver)
elif file_attrs.file_path.endswith(('.jpg', '.png', '.gif')):
return file_to_md(file_attrs, 'media')
else:
return file_to_md(file_attrs, 'attachments')
def write_note_and_delete(note_attrs): # TODO Test
"""
Create or append existing note files based on Note_attrs tuples data, then delete the source file
:param note_attrs: Note_attrs named tuple
"""
if os.path.isfile(note_attrs.output_file_path):
if os.path.dirname(note_attrs.output_file_path) == inbox_dir:
note_file_path = note_attrs.output_file_path
with open(note_file_path, 'r') as source:
content = note_attrs.mtime + note_attrs.text + '\n\n' + source.read()
else:
i = 1
while os.path.isfile(os.path.splitext(note_attrs.output_file_path)[0] + '_' + str(i) + '.md'):
i += 1
note_file_path = os.path.splitext(note_attrs.output_file_path)[0] + '_' + str(i) + '.md'
content = note_attrs.mtime + note_attrs.text
else:
note_file_path = note_attrs.output_file_path
if note_attrs.title:
content = note_attrs.title + note_attrs.text
else:
content = note_attrs.mtime + note_attrs.text
with open(note_file_path, 'w') as output:
output.write(content)
if os.path.isfile(note_file_path):
try:
os.remove(note_attrs.input_file_path)
except OSError:
pass
if __name__ == '__main__':
arg_parser = argparse.ArgumentParser(description='A script to turn everything in the inbox directory to markdown notes.') arg_parser = argparse.ArgumentParser(description='A script to turn everything in the inbox directory to markdown notes.')
arg_parser.add_argument('-i', '--inbox', action='store', dest='inbox_dir', required=True, arg_parser.add_argument('-i', '--inbox', action='store', dest='inbox_dir', required=True,
help="Full absolute path to the inbox directory to organize") help="Full absolute path to the inbox directory to organize")
@ -173,77 +258,88 @@ if __name__ == '__main__':
help="Command/path to run pandoc") help="Command/path to run pandoc")
arg_parser.add_argument('-pv', '--pandoc-ver', action='store', dest='pandoc_ver', required=False, arg_parser.add_argument('-pv', '--pandoc-ver', action='store', dest='pandoc_ver', required=False,
help="Installed pandoc version") help="Installed pandoc version")
arg_parser.add_argument('-w', '--watch', action='store_true', dest='watch_fs', required=False,
help="Watch and process new files as they appear after initial scan")
args = arg_parser.parse_args() args = arg_parser.parse_args()
inbox_dir = args.inbox_dir inbox_dir = args.inbox_dir
folder_dir = args.folder_dir folder_dir = args.folder_dir
topic_marker = args.topic_marker
os.makedirs(inbox_dir, exist_ok=True) os.makedirs(inbox_dir, exist_ok=True)
os.makedirs(folder_dir + os.sep + 'media', exist_ok=True) os.makedirs(folder_dir + os.sep + 'media', exist_ok=True)
os.makedirs(folder_dir + os.sep + 'attachments', exist_ok=True) os.makedirs(folder_dir + os.sep + 'attachments', exist_ok=True)
# Prepare a list of File_attrs tuples for process_by_ext function, based on file location, older files first
file_list = []
if args.scan_folder: if args.scan_folder:
for subfolder, dirs, files in os.walk(folder_dir): scan_path = folder_dir
for file_path in sorted([subfolder + os.sep + file for file in files], key=os.path.getmtime): else:
if os.path.isfile(file_path) \ scan_path = inbox_dir
and not file_path.endswith(('.md', 'notes.sqlite')) \
and not file_path.startswith((inbox_dir, folder_dir + os.sep + 'media', folder_dir + os.sep + 'attachments')) \
and os.sep + '.' not in file_path.replace(folder_dir, '') \
and '_files' + os.sep not in file_path.replace(folder_dir, ''):
file_list.append([File_attrs(file_path=file_path, folder_dir_path=folder_dir, output_dir_path=os.path.dirname(file_path),
topic_marker=topic_marker, output_file='*no mtime*')])
for file_path in sorted([inbox_dir + os.sep + path for path in os.listdir(inbox_dir)], key=os.path.getmtime): file_list = []
if os.path.isdir(file_path) \ for dir, subdirs, files in os.walk(scan_path):
and not os.path.basename(file_path).startswith('.') \ for file_path in sorted([dir + os.sep + file for file in files], key=os.path.getmtime):
and not file_path.endswith('_files'): file_attrs = process_by_path(file_path)
for sub_file in sorted([file_path + os.sep + path for path in os.listdir(file_path)], key=os.path.getmtime): if file_attrs:
if not sub_file.endswith('.md') \ file_list.append([file_attrs])
and not os.path.basename(sub_file).startswith('.'):
file_list.append([File_attrs(file_path=sub_file, folder_dir_path=folder_dir, output_dir_path=inbox_dir,
topic_marker=topic_marker, output_file=os.path.basename(file_path) + '.md')])
else:
if os.path.isfile(file_path) \
and not file_path.endswith('.md') \
and not os.path.basename(file_path).startswith('.'):
file_list.append([File_attrs(file_path=file_path, folder_dir_path=folder_dir, output_dir_path=inbox_dir,
topic_marker=topic_marker, output_file='')])
# Run process_by_ext for each File_attrs tuple putting resulting Note_attrs tuples to write_list
write_list = multiprocessing.dummy.Pool(100).starmap(process_by_ext, file_list) write_list = multiprocessing.dummy.Pool(100).starmap(process_by_ext, file_list)
# Due to text_to_md outputs list of Note_attrs tuples, this should turn write_list to a flat list flat_write_list = make_flat_list(write_list, Note_attrs)
flat_write_list = []
for object in write_list:
if type(object) == list:
for item in object:
if type(item) == Note_attrs:
flat_write_list.append(item)
elif type(object) == Note_attrs:
flat_write_list.append(object)
# Create or append existing text files based on Note_attrs tuples data
for note_attrs in flat_write_list: for note_attrs in flat_write_list:
write_note_and_delete(note_attrs)
if args.watch_fs:
try: try:
with open(note_attrs.output_file_path, 'r') as source: import watchdog.events
content = note_attrs.mtime + note_attrs.text + '\n\n' + source.read() import watchdog.observers
except OSError: except ImportError:
if note_attrs.title: print("Can't find Watchdog module. Watching for changes won't work.")
content = note_attrs.title + note_attrs.text exit(1)
else:
content = note_attrs.mtime + note_attrs.text
with open(note_attrs.output_file_path, 'w') as output:
output.write(content)
if os.path.isfile(note_attrs.output_file_path): class FsEventHandler(watchdog.events.FileSystemEventHandler):
try: def on_any_event(self, event):
os.remove(note_attrs.input_file_path) if event.is_directory:
except OSError: return
pass elif event.event_type == 'created':
file_path = event.src_path
elif event.event_type == 'moved':
file_path = event.dest_path
else:
return
if platform.system() == 'Linux': file_attrs = process_by_path(file_path)
os.system('notify-send "-a" "Inbox script" "Your inbox is organized"') # TODO maybe change to gi.repository: Notify
if file_attrs:
# Wait for all the web page resources saved/synced
if file_path.endswith(('.htm', '.html')): time.sleep(2)
obj_to_write = process_by_ext(file_attrs)
else:
return
if type(obj_to_write) == list:
for note_attrs in obj_to_write:
write_note_and_delete(note_attrs)
else:
write_note_and_delete(obj_to_write)
event_handler = FsEventHandler()
observer = watchdog.observers.Observer()
observer.schedule(event_handler, scan_path, recursive=True)
observer.start()
try:
while True:
time.sleep(5)
except:
observer.stop()
observer.join()
# if platform.system() == 'Linux':
# os.system('notify-send "-a" "Inbox script" "Your inbox is organized"') # TODO maybe change to gi.repository: Notify

View File

@ -9,6 +9,7 @@ Script {
return '' return ''
} }
/// TODO Change to platform dependant defaults without checking
function setDefaultPyCommand() { function setDefaultPyCommand() {
if (script.getPersistentVariable('MdNT/pyCommand', '') == '') { if (script.getPersistentVariable('MdNT/pyCommand', '') == '') {
script.setPersistentVariable('MdNT/pyCommand', checkPyCommand()) script.setPersistentVariable('MdNT/pyCommand', checkPyCommand())
@ -19,10 +20,10 @@ Script {
property string scriptDirPath property string scriptDirPath
property string inboxFolder property string inboxFolder
property bool scanFolder property bool scanFolder
property bool watchFS
property string tagMarker property string tagMarker
property string pyCommand property string pyCommand
property string pandocCommand property string pandocCommand
property string pandocVersion property string pandocVersion
property variant settingsVariables: [ property variant settingsVariables: [
@ -37,16 +38,25 @@ Script {
{ {
'identifier': 'scanFolder', 'identifier': 'scanFolder',
'name': 'Scan whole folder rather than only Inbox folder', 'name': 'Scan whole folder rather than only Inbox folder',
'description': 'If true the script will convert any non-".md" file in folder to note. \n' + 'description': 'If true the script will convert any non-".md" file in folder to note.\n' +
'"Sub-folder to single note" and modification times in note titles will still be only for Inbox.', '"Sub-folder to single note" and modification times in note titles will still be only for Inbox.',
'type': 'boolean', 'type': 'boolean',
'default': 'false', 'default': 'false',
}, },
{
'identifier': 'watchFS',
'name': 'Continuously watch for new files and process them as they appear',
'description': 'If true the script will continuously watch inbox/folder (depending on above setting)\n' +
'for new files and process them as soon as they appear.\n' +
'The script will start working on load, no toolbar button will appear.',
'type': 'boolean',
'default': 'false',
},
{ {
'identifier': 'tagMarker', 'identifier': 'tagMarker',
'name': 'Tag word marker', 'name': 'Tag word marker',
'description': 'A symbol or group of symbols which start a "topic" word for ".txt" notes. \n' + 'description': 'A symbol or string of symbols which start a "topic" word for ".txt" notes. \n' +
'For example a txt note with "@tag" word will go to "tag.md" note', 'For example, if set to "@", a ".txt" file with "@tag" word will go to "tag.md" note',
'type': 'string', 'type': 'string',
'default': '@', 'default': '@',
}, },
@ -66,10 +76,33 @@ Script {
}, },
] ]
function runInbox() {
var pyScriptPath = scriptDirPath + script.dirSeparator() + 'inbox.py'
var inboxPath = script.currentNoteFolderPath() + script.dirSeparator() + inboxFolder
var args = [pyScriptPath,
'--inbox', inboxPath,
'--folder', script.currentNoteFolderPath(),
'--marker', tagMarker]
if (scanFolder == true) {
args.push('--scan-folder')
}
if (watchFS == true) {
args.push('--watch')
}
if (pandocVersion != '') {
args.push('--pandoc-bin', pandocCommand,
'--pandoc-ver', pandocVersion)
}
script.startDetachedProcess(pyCommand, args)
script.log('Processing inbox...')
}
function init() { function init() {
pandocVersion = script.getPersistentVariable('MdNT/pandocVersion', '')
/// Check if set pyCommand can run Python 3 /// Check if set pyCommand can run Python 3
if (script.getPersistentVariable('MdNT/pyCommand', '') != pyCommand) { if (script.getPersistentVariable('MdNT/pyCommand', '') != pyCommand) {
@ -84,30 +117,36 @@ Script {
/// Get the version of pandoc /// Get the version of pandoc
if (script.getPersistentVariable('MdNT/pandocCommand', '') != pandocCommand) { if (script.getPersistentVariable('MdNT/pandocCommand', '') != pandocCommand) {
var pandocCheck = script.startSynchronousProcess(pandocCommand, '-v', '').toString().split('\n')[0] var pandocCheck = script.startSynchronousProcess(pandocCommand, '-v', '').toString().split('\n')[0]
if (pandocCheck.indexOf('pandoc') != '-1') { if (pandocCheck.indexOf('pandoc') != '-1') {
script.setPersistentVariable('MdNT/pandocCommand', pandocCommand) script.setPersistentVariable('MdNT/pandocCommand', pandocCommand)
script.setPersistentVariable('MdNT/pandocVersion', pandocCheck.slice(7)) script.setPersistentVariable('MdNT/pandocVersion', pandocCheck.slice(7))
pandocVersion = pandocCheck.slice(7)
} }
else { else {
script.setPersistentVariable('MdNT/pandocCommand', '') script.setPersistentVariable('MdNT/pandocCommand', '')
} }
} }
/// Issues alerts /// Issues alerts
if (script.getPersistentVariable('MdNT/pandocCommand', '') == '') { if (script.getPersistentVariable('MdNT/pandocCommand', '') == '') {
script.informationMessageBox('The command/path for pandoc in the script settings is not valid\n' + script.informationMessageBox('The command/path for pandoc in the script settings is not valid.\n' +
'Converting web pages will be disabled.', 'Converting web pages to notes will be disabled.',
'Script') 'Inbox script')
script.setPersistentVariable('MdNT/pandocCommand', pandocCommand) script.setPersistentVariable('MdNT/pandocCommand', pandocCommand)
script.setPersistentVariable('MdNT/pandocVersion', '') script.setPersistentVariable('MdNT/pandocVersion', '')
pandocVersion = '' pandocVersion = ''
} }
else {
pandocVersion = script.getPersistentVariable('MdNT/pandocVersion', '')
}
if (script.getPersistentVariable('MdNT/pyCommand', '') == '') { if (script.getPersistentVariable('MdNT/pyCommand', '') == '') {
script.informationMessageBox('The command/path for Python 3 interpreter in the script settings is not valid\n' + script.informationMessageBox('The command/path for Python 3 interpreter in the script settings is not valid.\n' +
'Please set the correct command/path.', 'Please set the correct command/path.',
'Script') 'Inbox script')
}
else if (watchFS == true) {
runInbox()
} }
else { else {
script.registerCustomAction('inbox', 'Process inbox folder', 'Inbox', 'mail-receive.svg') script.registerCustomAction('inbox', 'Process inbox folder', 'Inbox', 'mail-receive.svg')
@ -116,25 +155,7 @@ Script {
function customActionInvoked(action) { function customActionInvoked(action) {
if (action == 'inbox') { if (action == 'inbox') {
var pyScriptPath = scriptDirPath + script.dirSeparator() + 'inbox.py' runInbox()
var inboxPath = script.currentNoteFolderPath() + script.dirSeparator() + inboxFolder
var args = [pyScriptPath,
'--inbox', inboxPath,
'--folder', script.currentNoteFolderPath(),
'--marker', tagMarker]
if (scanFolder == true) {
args.push('--scan-folder')
}
if (pandocVersion != '') {
args.push('--pandoc-bin', pandocCommand,
'--pandoc-ver', pandocVersion)
}
script.startDetachedProcess(pyCommand, args)
script.log('Processing inbox...')
} }
} }
} }

View File

@ -5,9 +5,7 @@
"resources": ["inbox.py", "md_link.py", "md_convert.py", "safe_path.py"], "resources": ["inbox.py", "md_link.py", "md_convert.py", "safe_path.py"],
"authors": ["@Maboroshy"], "authors": ["@Maboroshy"],
"platforms": ["linux", "macos", "windows"], "platforms": ["linux", "macos", "windows"],
"version": "0.1.1", "version": "0.2.0",
"minAppVersion": "17.05.8", "minAppVersion": "17.05.8",
"description" : "Inbox is a complex script to organize data added from different devices and applications.\n\n<b>It's currently at beta stage, so using it for unrecoverable data is discouraged.</b>\n\nThe script processes files in inbox folder you set or whole note folder (depending on settings) as follows:\n- The script turns all .txt files and files with no extension to .md note with modification time in the file name.\n- If there's a word that starts with a '@' (configurable) in file text, content of that file will be added to .md note named like that word. The content of .txt file with '@tag' in text will be added to 'tag.md' file.\n- Every URL in text file will be converted to markdown link, with web page favicon and title if possible.\n- Any web page, saved as .htm file with '_files' folder, will be converted to .md note.\n- Any image file placed to inbox folder will be moved to media folder. The script will put .md note with the in-line image instead.\n- Any other file placed to inbox folder will be moved to attachments folder. The script will put .md note with a link to the file instead. On Linux the file icon will be put before link.\n- (Inbox folder only) Text/link of file placed to sub-folder of inbox folder will be added to .md note named like the sub-folder. Everything put to 'topic' sub-folder will be added to 'topic.md' file.\n- (Linux only) The script will put thumbnail for .pdf file as a link to it.\n- (Linux only) The script will replace large in-line images with a smaller ones as a link to original ones.\n\n <a href=\"https://github.com/qownnotes/scripts/blob/master/inbox/workflow.md\">Workflow examples</a> "description" : "Inbox is a complex script to organize data added from different devices and applications.\n\n<b>It's currently at beta stage, so using it for unrecoverable data is discouraged.</b>\n\nThe script processes files in inbox folder you set or whole note folder (depending on settings) as follows:\n- The script turns all .txt files and files with no extension to .md notes with modification time in the file name.\n- If there's a word that starts with a '@' (configurable) in .txt file, content of that file will be added to .md note named like that word. The content of .txt file with '@topic' in text will be added to 'topic.md' file. Outside of inbox folder the text won't be added to note but will become a new standalone note.\n- Every URL in text file will be converted to markdown link, with web page favicon and title if possible.\n- Any web page, saved as .htm file with '_files' folder, will be converted to .md note.\n- Any image file placed to inbox folder will be moved to media folder. The script will put .md note with the in-line image instead.\n- Any other file placed to inbox folder will be moved to attachments folder. The script will put .md note with a link to the file instead. On Linux the file icon will be put before link.\n- (Inbox folder only) Text/link of file placed to sub-folder of inbox folder will be added to .md note named like the sub-folder. Everything put to 'topic' sub-folder will be added to 'topic.md' file. Topic words won't work in these sub-folders.\n- (Linux only) The script will put thumbnail for .pdf file as a link to it.\n- (Linux only) The script will replace large in-line image with a smaller one as a link to original.\n\n <a href=\"https://github.com/qownnotes/scripts/blob/master/inbox/workflow.md\">Workflow examples</a>\n\n<b>Dependencies:</b>\n<a href=\"https://www.python.org/downloads/\">Python 3.3+ Interpreter</a>;\n(optional, continuous watch mode)<a href=\"http://pythonhosted.org/watchdog/installation.html\">Watchdog module</a>;\n(optional, web page to note)<a href=\"http://pandoc.org/installing.html\">Pandoc</a>;\n(optional, Linux only, pdf thumbnails) ghostscript;\n(optional, Linux only, image thumbnails) imagemagick;\n(optional, Linux only, file icons) python-gobject.\n\n<b>Usage:</b>\nRun the script by toolbar button or menu item. Or, if set to 'continuous watch' mode, it will run itself on load."
<b>Dependencies:</b>\n<a href=\"https://www.python.org/downloads/\">Python 3.3+ Interpreter</a>;\n<a href=\"http://pandoc.org/installing.html\">Pandoc</a>;\n(Linux only, pdf thumbnails) ghostscript;\n(Linux only, image thumbnails) imagemagick;\n(Linux only, file icons) python-gobject.\n\n<b>Usage:</b>\nRun the script by toolbar button or menu item."
} }

View File

@ -23,7 +23,7 @@ def html_text(html_text, pandoc_bin='pandoc', pandoc_ver='1.19.1'):
'--atx-headers'] '--atx-headers']
# Remove firefox reader mode panel if there's one # Remove firefox reader mode panel if there's one
html_text = re.sub('<ul id="reader-toolbar" class="toolbar">.*</li></ul></ul>', '', html_text, flags=re.DOTALL) html_text = re.sub('<ul id="reader-toolbar" class="toolbar">.*</li></ul></ul>', '', html_text, flags=re.DOTALL) ## TODO Maybe use html.parser
try: try:
pandoc_pipe = subprocess.Popen(pandoc_args, stdin=subprocess.PIPE, stdout=subprocess.PIPE) pandoc_pipe = subprocess.Popen(pandoc_args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
@ -66,4 +66,4 @@ def saved_html(html_path, folder_dir_path, pandoc_bin='pandoc', pandoc_ver='1.19
shutil.rmtree(os.path.splitext(html_path)[0] + '_files', True) shutil.rmtree(os.path.splitext(html_path)[0] + '_files', True)
return md_text return md_text

View File

@ -61,9 +61,14 @@ class URL:
try: try:
html_text = urllib.request.urlopen(self.url).read().decode('utf-8') # TODO encodings other that utf-8, https fails on early Python 3 versions html_text = urllib.request.urlopen(self.url).read().decode('utf-8') # TODO encodings other that utf-8, https fails on early Python 3 versions
except (urllib.error.HTTPError, urllib.error.URLError, UnicodeDecodeError): except (urllib.error.HTTPError, urllib.error.URLError, UnicodeDecodeError):
return '' title = ''
else: else:
return re.search('<title.*?>(.+?)</title>', html_text, re.IGNORECASE | re.DOTALL).group(1) title = re.search('<title.*?>(.+?)</title>', html_text, re.IGNORECASE | re.DOTALL).group(1)
if title:
return title
else:
return self.url.split('//')[-1].split('/')[0]
@property @property
def md(self): def md(self):