scripts: Added vidl and archive scripts

Added script for downloading videos with ytdl or any of its derivatives.
The script is adapted from one in my rapberry pi setup which serves
videos remotely, and makes some hardcoded assumptions that might not be
the best for all situations.

Added archive script which takes a file and puts a hardlink to it into
an arbitrary archival folder. This means even if the original file is
deleted its data stays on disk because of its hardlinked archive
version.
This commit is contained in:
Marty Oehme 2021-12-06 10:47:17 +01:00
parent 63074ae46b
commit 9f1fe95662
Signed by: Marty
GPG key ID: B7538B8F50A1C800
2 changed files with 234 additions and 7 deletions

129
scripts/.local/bin/archive Executable file
View file

@ -0,0 +1,129 @@
#!/usr/bin/env bash
#
# Archives files and directories to a central depository via hardlinks
# Takes either a file or directory as argument and links them to the archive.
#
# Archiving does *not* duplicate the data, it just provides another permanent
# pointer to it, so the file will not be lost even if you delete it from the
# rest of your folders.
show_help() {
printf """
archive.sh: Hard linking your stuff.
archive.sh is not backup software, rather it will create hardlinks for your files.
What this accomplishes is multiple links into the same data on your hard drive,
so wherever you put the original file another pointer to it will reside in your archive
directory. You can think of it almost like the opposite of backups: You still only
have one copy of the file, but multiple places where you link to it.
This allows you to make the file in question the single source of truth in your system,
good for stopping duplicate downloading or contributing continued upload to
decentralized download protocols while still organizing your files.
Usage: archive.sh [OPTION] <file|directory>
Point it to a file or directory that you want linked.
Options:
-h Display this help.
-d Directory to archive to.
-f Force archive to link. By default, will skip file entries already
existing in archive and not link the current file at all.
This forces the archive to link the files. It will not overwrite
the already existing entry, instead creating a new unique archive
entry by prepending the current unix timestamp.
-a Absolute linking. By default, will rebuild the same directory
structure as in the original in the archive. Using this flag tells
archive.sh to instead link everything directly in the root of the
archive directory, creating a flat directory without any nesting.
Can create a more coherent archive, but runs the danger of
accidentally running into file conflicts.
-n Dryrun. Do not link anything yet but print the commands that would
be executed.
"""
}
while getopts "nahfd:" opt; do
case "$opt" in
# v) verbose=1
# ;;
f)
FORCE="true"
;;
a)
ABSOLUTE="true"
;;
d)
ARCHIVEDIR="$OPTARG"
;;
n)
DRYRUN="true"
;;
h | \? | *)
show_help
exit 0
;;
esac
done
shift $((OPTIND - 1))
main() {
archivedir="${ARCHIVEDIR:-/mnt/dietpi_userdata/minio-data/videos/archive}"
source="${1:-"."}"
_ensuredir "${archivedir}"
find "$source" -type f | while read -r file; do
if [ "$ABSOLUTE" = true ]; then
fname="$(basename "$file")"
else
fname="$file"
_ensuredir "${archivedir}/$(dirname "$fname")"
fi
ERROR=$(_link "$file" "${archivedir}/${fname}")
stat="$?"
#force mode: also add duplicates, but inform user
if [ "$FORCE" = "true" ] && [ "$stat" -eq 1 ] && [ -z "${ERROR##*File exists}" ]; then
duplicatef=$(_prepend_date "${fname}")
_link "${file}" "${archivedir}/${duplicated}/${duplicatef}"
printf "ERROR: File %s exists. Linked as %s. Check for duplicates.\n" "$fname" "$duplicatef"
elif [ "$stat" -gt 0 ]; then
printf "ERROR $stat: File %s exists. Nothing done. Use -f to force relinking new file.\n" "$fname"
printf %s "$ERROR"
fi
done
}
_prepend_date() {
fname="$1"
duplicated=$(dirname "$fname")
duplicatef=$(basename "${fname}")
printf "%s/%s-%s" "$duplicated" "$(date +%s)" "$duplicatef"
}
_link() {
#try to create a hard link to file
if [ "$DRYRUN" = "true" ]; then
echo ln "${1}" "${2}"
return
fi
{ ln "${1}" "${2}" >/dev/null; } 2>&1
}
_ensuredir() {
if [ "$DRYRUN" = "true" ]; then
echo mkdir -p "${1}"
return
fi
mkdir -p "${1}"
}
main "$@"

View file

@ -1,8 +1,106 @@
#!/usr/bin/env sh
#
# send video downloads to my raspberry machine
# where they will be automatically queued and
# archived when downloaded.
#!/usr/bin/env bash
# download a file if it does not exist in the archive alread
# otherwise just re-link it from the archive
# shellcheck disable=SC2029
ssh dietpi '$HOME/bin/vidl' "$@"
DL_FOLDER="${DL_FOLDER:-${XDG_VIDEOS_DIR:-$HOME/videos}}/inbox"
ARCHIVE_FOLDER="${ARCHIVE_FOLDER:-${XDG_VIDEOS_DIR:-$HOME/videos}}/archive"
YT_DL_CMD="${YT_DL_CMD:-yt-dlp}"
YT_DL_TITLE="""${YT_DL_TITLE:-%(channel)s_%(title)s_%(id)s}"""
show_help() {
printf """
archive.sh: Hard linking your stuff.
vidl: Video downloader
Simple wrapper for youtube-dl (or yt-dlp or similar).
Usage: vidl [OPTION] <link>
Point it to a link you want downloaded.
Options:
-h Display this help.
-d Directory to check for existence of file and archive to if needed.
-f Directory to download to.
-t Point to youtube-dl command to use. Can be command or absolute link.
By default will use \`yt-dlp\` which is a more up to date fork of the
program.
"""
}
while getopts "t:f:d:h" opt; do
case "$opt" in
# v) verbose=1
# ;;
f)
DL_FOLDER="$OPTARG"
;;
t)
YT_DL_CMD="$OPTARG"
;;
d)
ARCHIVE_FOLDER="$OPTARG"
;;
h | \? | *)
show_help
exit 0
;;
esac
done
shift $((OPTIND - 1))
_call_archive() {
command -v archive >/dev/null 2>&1 || {
echo "archive command not found."
exit 1
}
archive -d "$2" -a "$1"
}
vid_ext="\(mp4\|avi\|webm\|mkv\|mpe?g\|3gp\|m4a\)"
_findfile() {
dir="$1"
fname="$2"
find "$dir" -type f -name "$fname.*" | sed -ne "/$fname\.$vid_ext$/Ip"
}
YT_DL_TITLE="%(channel)s_%(title)s_%(id)s"
_download() {
"$YT_DL_CMD" \
-f 'best[height<=1080]' \
-o "$DL_FOLDER/$YT_DL_TITLE.%(ext)s" \
--write-sub --write-auto-sub --embed-subs --sub-lang en,de,es,fr \
--retries 15 \
"$url"
}
url="$*"
dl_fn=$("$YT_DL_CMD" --get-filename -o "$YT_DL_TITLE" "$url")
video_file_regex="${dl_fn}"
if [ -z "$dl_fn" ]; then
echo Could not get video filename, error with youtube-dl.
exit 1
fi
alreadyexists=$(_findfile "$ARCHIVE_FOLDER" "$video_file_regex")
# # download the video to download folder
if [ "$alreadyexists" = "" ] || [ -z "$alreadyexists" ]; then
_download
# yt-dl never knows the exact filename in advance
file=$(_findfile "$DL_FOLDER" "$video_file_regex")
if [ -z "$file" ]; then exit 1; fi
_call_archive "$file" "$ARCHIVE_FOLDER"
# only link old file if one exists
else
echo "$alreadyexists" | while read -r file; do
echo "file $file exists, not downloading duplicate"
_call_archive "$file" "$DL_FOLDER"
done
fi