Added inbox script... second try

This commit is contained in:
Maboroshy 2017-08-14 19:37:23 +04:00 committed by GitHub
parent 090ee8c64f
commit 7e302ddffb
5 changed files with 489 additions and 0 deletions

183
inbox/inbox.py Normal file
View File

@ -0,0 +1,183 @@
#!/usr/bin/env python
import os
import re
import sys
import time
import platform
import collections
import multiprocessing.dummy
import md_link
import safe_path
def text_to_md(file_attrs):
"""
This will process specified text file getting its tags and replacing urls with favicons and titles where possible
:param file_attrs: File_attrs named tuple
:return: list of Note_attrs named tuple
"""
filename = os.path.splitext(os.path.basename(file_attrs.file_path))[0]
mtime = time.localtime(os.path.getmtime(file_attrs.file_path))
with open(file_attrs.file_path, 'r') as text_file:
text = text_file.read()
tags = re.findall(file_attrs.tag_marker + '(\w*)', text)
text = re.sub(file_attrs.tag_marker + '\w*[ ]?', '', text).strip()
if re.match('^http[s]?://[^\s]*$', text):
is_bookmark = True
else:
is_bookmark = False
for link in re.findall('(^|\s)(http[s]?://.*)(\s|$)', text, re.MULTILINE | re.IGNORECASE):
url = md_link.URL(link[1], file_attrs.folder_dir_path)
text = text.replace(link[1], url.md)
if is_bookmark:
bookmark_title = url.title
if file_attrs.inbox_file:
output_files = [file_attrs.inbox_file]
headline_title = ''
elif tags:
output_files = [tag + '.md' for tag in tags]
headline_title = ''
elif is_bookmark:
output_files = [time.strftime('%m-%d %H:%M', mtime) + ' ' + bookmark_title + '.md']
headline_title = '# {}\n'.format(bookmark_title)
else:
output_files = [time.strftime('%m-%d %H:%M', mtime) + ' ' + filename + '.md']
headline_title = '# {}\n'.format(filename)
output = []
for output_file in output_files:
output.append(Note_attrs(input_file_path=file_attrs.file_path,
output_file_path=file_attrs.output_dir_path + os.sep + safe_path.filename(output_file),
text=text,
mtime='**{}** \n'.format(time.strftime('%x %a %X', mtime)),
title=headline_title))
return output
def file_to_md(file_attrs, media_dir_name):
"""
This will move specified file to media_dir_name and put note with a reference to that file instead
:param file_attrs: File_attrs named tuple
:param media_dir_name: name of sub-directory in folder_dir_path where file will be moved (for non-text files)
:return: Note_attrs named tuple
"""
mtime = time.localtime(os.path.getmtime(file_attrs.file_path))
new_filename = str(time.mktime(mtime))[:-2] + '_' + os.path.basename(file_attrs.file_path)
new_path = os.path.join(file_attrs.folder_dir_path, media_dir_name, new_filename)
try:
os.rename(file_attrs.file_path, new_path)
except OSError:
pass
file = md_link.File(new_path, file_attrs.folder_dir_path, os.path.splitext(os.path.basename(file_attrs.file_path))[0])
if file_attrs.inbox_file:
output_file = file_attrs.inbox_file
else:
output_file = time.strftime('%m-%d %H:%M', mtime) + ' ' + file.title + '.md'
return Note_attrs(input_file_path=file_attrs.file_path,
output_file_path=file_attrs.output_dir_path + os.sep + safe_path.filename(output_file),
text=file.md,
mtime='**{}** \n'.format(time.strftime('%x %a %X', mtime)),
title='# {}\n'.format(file.title))
if __name__ == '__main__':
File_attrs = collections.namedtuple('File_attrs', 'file_path folder_dir_path output_dir_path tag_marker inbox_file')
"""
A named tuple which functions use to pass input data - data of files to be processed
:param file_path: full absolute path to the file to process
:param folder_dir_path: full absolute path to directory where 'media' and 'attachment' directories are
:param output_dir_path: full absolute path to directory where resulting text file will be stored
:param tag_marker: symbol(s) which start the tag word (for text files)
:param inbox_file: full absolute path to the text file which will be appended with a new entry,
if none the entry will go to new standalone text file
"""
Note_attrs = collections.namedtuple('Note_attrs', 'input_file_path output_file_path text mtime title')
'''A named tuple which functions use to pass output data - data of notes to be written.
:param input_file_path: full absolute path to the file which was processed to this tuple
:param output_file_path: full absolute path to the output text file which should be written
:param text: content of the text file which should be written
:param mtime: modification time of input file as markdown headline to optionally prepend a text
:param title: title of a input file as markdown headline to optionally prepend a text'''
def process_by_ext(file_attrs):
"""
This will run different functions to process specified File_attrs tuple based on file extension
:param file_attrs: File_attrs named tuple
:return: Note_attrs named tuple
"""
if file_attrs.file_path.endswith('.txt') or not os.path.splitext(file_attrs.file_path)[1]:
return text_to_md(file_attrs)
elif file_attrs.file_path.endswith(('.jpg', '.png', '.gif')):
return file_to_md(file_attrs, 'media')
else:
return file_to_md(file_attrs, 'attachments')
inbox_dir = sys.argv[1]
folder_dir = sys.argv[2]
tag_marker = sys.argv[3]
os.makedirs(inbox_dir, exist_ok=True)
os.makedirs(folder_dir + os.sep + 'media', exist_ok=True)
os.makedirs(folder_dir + os.sep + 'attachments', exist_ok=True)
# Prepare a list of File_attrs tuples for process_by_ext function, based on file location, older files first
file_list = []
for file_path in sorted([inbox_dir + os.sep + path for path in os.listdir(inbox_dir)], key=os.path.getmtime):
if os.path.isdir(file_path) and not os.path.basename(file_path).startswith('.'):
for sub_file in sorted([file_path + os.sep + path for path in os.listdir(file_path)], key=os.path.getmtime):
if not sub_file.endswith('.md') and not os.path.basename(sub_file).startswith('.'):
file_list.append([File_attrs(file_path=sub_file, folder_dir_path=folder_dir, output_dir_path=inbox_dir,
tag_marker=tag_marker, inbox_file=os.path.basename(file_path) + '.md')])
else:
if not file_path.endswith('.md') and not os.path.basename(file_path).startswith('.'):
file_list.append([File_attrs(file_path=file_path, folder_dir_path=folder_dir, output_dir_path=inbox_dir,
tag_marker=tag_marker, inbox_file='')])
# Run process_by_ext for each File_attrs tuple putting resulted Note_attrs tuples to write_list
write_list = multiprocessing.dummy.Pool().starmap(process_by_ext, file_list)
# Due to text_to_md outputs list of Note_attrs tuples, this should turn write_list to a flat list
flat_write_list = []
for object in write_list:
if type(object) == list:
for item in object:
flat_write_list.append(item)
else:
flat_write_list.append(object)
# Create or append existing text files based on Note_attrs tuples data
for note_attrs in flat_write_list:
try:
with open(note_attrs.output_file_path, 'r') as source:
content = note_attrs.mtime + note_attrs.text + '\n\n' + source.read()
except OSError:
if note_attrs.title:
content = note_attrs.title + note_attrs.text
else:
content = note_attrs.mtime + note_attrs.text
with open(note_attrs.output_file_path, 'w') as output:
output.write(content)
if os.path.isfile(note_attrs.output_file_path):
try:
os.remove(note_attrs.input_file_path)
except OSError:
pass
if platform.system() == 'Linux':
os.system('notify-send "-a" "Inbox script" "Your inbox is organized"') # TODO maybe change to gi.repository: Notify

75
inbox/inbox.qml Normal file
View File

@ -0,0 +1,75 @@
import QtQml 2.2
import QOwnNotesTypes 1.0
/*
*/
Script {
property string scriptDirPath
property string inboxFolder
property string tagMarker
property string pyBin
function getPyCommand() {
var pyVer = script.startSynchronousProcess('python3', '-V', '').toString()
if (pyVer.indexOf('Python 3') != '-1') {
return 'python3'
}
var pyVer = script.startSynchronousProcess('python', '-V', '').toString()
if (pyVer.indexOf('Python 3') != '-1') {
return 'python'
}
var pyVer = script.startSynchronousProcess('py', '-V', '').toString()
if (pyVer.indexOf('Python 3') != '-1') {
return 'py'
}
return ''
}
property variant settingsVariables: [
{
'identifier': 'inboxFolder',
'name': 'Inbox folder name',
'description': 'Name of inbox folder located in the root of note folder. It is single for all note folders\n' +
'An empty inbox folder will be created if no exists.',
'type': 'string',
'default': 'Inbox',
},
{
'identifier': 'tagMarker',
'name': 'Tag word marker',
'description': 'A symbol or group of symbols which start a "tag" word for txt notes. \n' +
'For example a txt note with "@tag" word will go to "tag.md" note',
'type': 'string',
'default': '@',
},
{
'identifier': 'pyBin',
'name': 'Command/path to run Python 3 Interpreter',
'description': "Put a command or path for Python 3 interpreter here.",
'type': 'file',
'default': getPyCommand(),
}
]
function init() {
if (pyBin == '') {
script.informationMessageBox("Can't find Python 3 interpreter.\n" +
'Please set the correct path to its binary in the script settings.',
'Inbox script')
}
else {
script.registerCustomAction('inbox', 'Process inbox folder', 'Inbox', 'mail-receive.svg')
}
}
function customActionInvoked(action) {
if (action == 'inbox') {
var pyScriptPath = scriptDirPath + script.dirSeparator() + 'inbox.py'
var inboxPath = script.currentNoteFolderPath() + script.dirSeparator() + inboxFolder
script.startDetachedProcess(pyBin, [pyScriptPath, inboxPath, script.currentNoteFolderPath(), tagMarker])
script.log('Processing inbox...')
}
}
}

10
inbox/info.json Normal file
View File

@ -0,0 +1,10 @@
{
"name": "Inbox [beta]",
"identifier": "inbox",
"script": "inbox.qml",
"authors": ["@Maboroshy"],
"platforms": ["linux", "macos", "windows"],
"version": "0.0.1",
"minAppVersion": "17.05.8",
"description" : "Inbox is a complex script to organize data you put to inbox folder from different devices and applications.\n\n<b>The script alters files in inbox folder you set. It's currently at beta stage, so using it for unrecoverable data is discouraged.</b>\n\n<b>Feature:</b>\n- The script turns all .txt files and files with no extension to .md note with modification time in the file name.\n- If there's a word that starts with a '@' (configurable) in file text, content of that file will be added to .md note named like that word. The content of .txt file with '@tag' in text will be added to 'tag.md' file.\n- Every URL in text file will be converted to markdown link, with web page favicon and title if possible.\n- Any image file placed to inbox folder will be moved to media folder. The script will put .md note with the in-line image instead.\n- Any other file placed to inbox folder will be moved to attachments folder. The script will put .md note with a link to the file instead. On Linux the file icon will be put before link.\n- Text/link of file placed to sub-folder of inbox folder will be added to .md note named like the sub-folder. Everything put to 'topic' sub-folder will be added to 'topic.md' file.\n- (Linux only) The script will put thumbnail for .pdf file as a link to it.\n- (Linux only) The script will replace large in-line images with a smaller ones as a link to original ones.\n\n<b>Dependencies:</b>\n<a href=\"https://www.python.org/downloads/\">Python 3.3+ Interpreter</a>;\n(Linux only, pdf thumbnails) ghostscript;\n(Linux only, image thumbnails) imagemagick;\n(Linux only, file icons) python-gobject.\n\n<b>Usage:</b>\nRun the script by toolbar button or menu item."
}

197
inbox/md_link.py Normal file
View File

@ -0,0 +1,197 @@
import os
import re
import sys
import shutil
import hashlib
import platform
import subprocess
import urllib.request
if platform.system() == 'Linux':
try:
import gi # TODO doesn't work on early Python 3 versions
gi.require_version('Gtk', '3.0')
from gi.repository import Gio, Gtk
except ImportError:
pass
class URL:
def __init__(self, url, folder_dir_path):
"""
:param url: http(s)://...
:param folder_dir_path: full absolute path to the note folder, to know where media folder is
"""
self.url = url.strip()
self.folder_dir_path = folder_dir_path
@property
def icon(self):
"""
Get URL favicon from google's service
:return: full absolute path to favicon image this function saved to media/favicons or nothing
"""
try:
favicon = urllib.request.urlopen('http://www.google.com/s2/favicons?domain=' + self.url, timeout=5)
except (urllib.error.HTTPError, urllib.error.URLError):
return
favicon_content = favicon.read()
favicon_hash = hashlib.md5(favicon_content).hexdigest()
if favicon_hash == '3ca64f83fdcf25135d87e08af65e68c9': # google's dummy icon
return
else:
favicon_dir_path = self.folder_dir_path + os.sep + 'media' + os.sep + 'favicons'
os.makedirs(favicon_dir_path, exist_ok=True)
favicon_path = favicon_dir_path + os.sep + favicon_hash + '.png'
if not os.path.isfile(favicon_path):
with open(favicon_path, 'wb') as favicon_file:
favicon_file.write(favicon_content)
return favicon_path
@property
def title(self):
"""
Try to parse title from the web page located at URL
:return: web page title for URL or nothing
"""
try:
html_text = urllib.request.urlopen(self.url).read().decode('utf-8') # TODO encodings other that utf-8, https fails on early Python 3 versions
except (urllib.error.HTTPError, urllib.error.URLError, UnicodeDecodeError):
return ''
else:
return re.search('<title.*?>(.+?)</title>', html_text, re.IGNORECASE | re.DOTALL).group(1)
@property
def md(self):
"""
Get the most informative markdown syntax link for URL, with favicon and title if available
:return: Markdown syntax link for URL
"""
if self.icon:
md_favicon = '![](file://media/favicons/{}) '.format(os.path.basename(self.icon))
else:
md_favicon = ''
if self.title:
md_link = '[{}]({})'.format(self.title, self.url)
else:
md_link = '<{}>'.format(self.url)
return md_favicon + md_link
class File:
def __init__(self, link_path, folder_dir_path, title=''):
"""
:param link_path: full absolute path to the file
:param folder_dir_path: full absolute path to the note folder, to know where media folder is
:param title: optionally specify the file's title, otherwise file name will be used as such
"""
self.path = link_path.strip()
self.folder_dir_path = folder_dir_path
self.filename = os.path.basename(self.path)
self.ext = os.path.splitext(self.filename)[1]
if self.ext == '.pdf':
self.type = 'pdf'
elif self.ext in ('.jpg', '.png', '.gif'):
self.type = 'image'
else:
self.type = 'other'
if title:
self.title = title
else:
self.title = os.path.splitext(self.filename)[0]
@property
def icon(self, icon_size=16, save=True):
"""
Get file type icon for File
:param icon_size: requested icon size
:param save: True to copy icon to media flder, otherwise will return path where OS stores the icon
:return: full absolute path to File icon or nothing
"""
if platform.system() == 'Linux':
try:
file = Gio.File.new_for_path(self.path)
except NameError:
return ''
file_info = file.query_info('standard::icon', 0, Gio.Cancellable())
file_icon = file_info.get_icon().get_names()[0]
icon_theme = Gtk.IconTheme.get_default()
icon_info = icon_theme.lookup_icon(file_icon, icon_size, 0)
icon_path = icon_info.get_filename()
if os.path.isfile(icon_path):
if save:
icon_store_path = os.path.join(self.folder_dir_path, 'media', 'fileicons')
if not os.path.isfile(icon_store_path + os.sep + os.path.basename(icon_path)):
try:
os.makedirs(icon_store_path, exist_ok=True)
icon_path = shutil.copy(icon_path, icon_store_path)
icon_path = 'media/fileicons/' + os.path.basename(icon_path)
except OSError:
pass
return icon_path
@property
def thumb(self):
"""
Make a thumbnail for appropriate File
:return: full absolute path to File's thumbnail or nothing
"""
if self.type == 'pdf':
if platform.system() == 'Linux':
dpi = 30
thumb_path = os.path.join(self.folder_dir_path, 'media', 'thumbnails', 'th_' + self.filename + '.png')
os.makedirs(os.path.dirname(thumb_path), exist_ok=True)
subprocess.call(['gs', '-q', '-dNOPAUSE', '-dBATCH', '-sDEVICE=png16m', '-r' + str(dpi),
'-sOutputFile=' + thumb_path, '-dLastPage=1', self.path],
timeout=10)
if os.path.isfile(thumb_path):
return thumb_path
if self.type == 'image':
if platform.system() == 'Linux':
target_width = 600
img_width = int(subprocess.check_output(['identify', '-ping', '-format', '%w', self.path],
timeout=5).decode(sys.stdout.encoding))
if img_width > target_width:
thumb_path = os.path.join(self.folder_dir_path, 'media', 'thumbnails', 'th_' + self.filename)
os.makedirs(os.path.dirname(thumb_path), exist_ok=True)
subprocess.call(['convert', self.path, '-thumbnail', str(target_width),
'-auto-orient', '-unsharp', '0x.5', thumb_path],
timeout=10)
if os.path.isfile(thumb_path):
return thumb_path
@property
def md(self):
"""
Get the most informative markdown syntax link for File, as a thumbnail or with file icon if available
:return: Markdown syntax link for File
"""
link_path = os.path.relpath(self.path, self.folder_dir_path).replace(os.sep, '/')
thumb = self.thumb
if thumb:
return '[![{}](file://{})](file://{})'.format(self.title, 'media/thumbnails/' + os.path.basename(thumb), link_path)
elif self.type == 'image':
return '![{}](file://{})'.format(self.title, link_path)
else:
icon = self.icon
if icon:
return '![](file://{})'.format('media/fileicons/' + os.path.basename(self.icon)) + ' ' + \
'[{}](file://{})'.format(self.title, link_path)
else:
return '[{}](file://{})'.format(self.title, link_path)

24
inbox/safe_path.py Normal file
View File

@ -0,0 +1,24 @@
import platform
def filename(filename):
"""
Replace the characters in file name that are not allowed in current OS
:param filename: file name
:return: file name which is safe to use in current OS
"""
if platform.system() == 'Linux':
safe_filename = filename.replace('/', '-')
elif platform.system() == 'Darwin':
safe_filename = filename.replace('/', '-').replace(':', '-')
else:
safe_filename = filename
for char in (':', '/', '\\', '|'):
safe_filename = safe_filename.replace(char, '-')
for char in ('?', '*'):
safe_filename = safe_filename.replace(char, '')
safe_filename = safe_filename.replace('<', '(')
safe_filename = safe_filename.replace('>', ')')
safe_filename = safe_filename.replace('"', "'")
return safe_filename