From 9f1fe956628ac0c7fa6d31fce321b046b20bb6f8 Mon Sep 17 00:00:00 2001 From: Marty Oehme Date: Mon, 6 Dec 2021 10:47:17 +0100 Subject: [PATCH] scripts: Added vidl and archive scripts Added script for downloading videos with ytdl or any of its derivatives. The script is adapted from one in my rapberry pi setup which serves videos remotely, and makes some hardcoded assumptions that might not be the best for all situations. Added archive script which takes a file and puts a hardlink to it into an arbitrary archival folder. This means even if the original file is deleted its data stays on disk because of its hardlinked archive version. --- scripts/.local/bin/archive | 129 +++++++++++++++++++++++++++++++++++++ scripts/.local/bin/vidl | 112 ++++++++++++++++++++++++++++++-- 2 files changed, 234 insertions(+), 7 deletions(-) create mode 100755 scripts/.local/bin/archive diff --git a/scripts/.local/bin/archive b/scripts/.local/bin/archive new file mode 100755 index 0000000..d820e80 --- /dev/null +++ b/scripts/.local/bin/archive @@ -0,0 +1,129 @@ +#!/usr/bin/env bash +# +# Archives files and directories to a central depository via hardlinks +# Takes either a file or directory as argument and links them to the archive. +# +# Archiving does *not* duplicate the data, it just provides another permanent +# pointer to it, so the file will not be lost even if you delete it from the +# rest of your folders. + +show_help() { + printf """ +archive.sh: Hard linking your stuff. + +archive.sh is not backup software, rather it will create hardlinks for your files. +What this accomplishes is multiple links into the same data on your hard drive, +so wherever you put the original file another pointer to it will reside in your archive +directory. You can think of it almost like the opposite of backups: You still only +have one copy of the file, but multiple places where you link to it. +This allows you to make the file in question the single source of truth in your system, +good for stopping duplicate downloading or contributing continued upload to +decentralized download protocols while still organizing your files. + +Usage: archive.sh [OPTION] + +Point it to a file or directory that you want linked. + +Options: + + -h Display this help. + + -d Directory to archive to. + + -f Force archive to link. By default, will skip file entries already + existing in archive and not link the current file at all. + This forces the archive to link the files. It will not overwrite + the already existing entry, instead creating a new unique archive + entry by prepending the current unix timestamp. + + -a Absolute linking. By default, will rebuild the same directory + structure as in the original in the archive. Using this flag tells + archive.sh to instead link everything directly in the root of the + archive directory, creating a flat directory without any nesting. + Can create a more coherent archive, but runs the danger of + accidentally running into file conflicts. + + -n Dryrun. Do not link anything yet but print the commands that would + be executed. +""" +} + +while getopts "nahfd:" opt; do + case "$opt" in + # v) verbose=1 + # ;; + f) + FORCE="true" + ;; + a) + ABSOLUTE="true" + ;; + d) + ARCHIVEDIR="$OPTARG" + ;; + n) + DRYRUN="true" + ;; + h | \? | *) + show_help + exit 0 + ;; + esac +done +shift $((OPTIND - 1)) + +main() { + archivedir="${ARCHIVEDIR:-/mnt/dietpi_userdata/minio-data/videos/archive}" + source="${1:-"."}" + _ensuredir "${archivedir}" + + find "$source" -type f | while read -r file; do + if [ "$ABSOLUTE" = true ]; then + fname="$(basename "$file")" + else + fname="$file" + _ensuredir "${archivedir}/$(dirname "$fname")" + fi + + ERROR=$(_link "$file" "${archivedir}/${fname}") + stat="$?" + + #force mode: also add duplicates, but inform user + if [ "$FORCE" = "true" ] && [ "$stat" -eq 1 ] && [ -z "${ERROR##*File exists}" ]; then + + duplicatef=$(_prepend_date "${fname}") + + _link "${file}" "${archivedir}/${duplicated}/${duplicatef}" + printf "ERROR: File %s exists. Linked as %s. Check for duplicates.\n" "$fname" "$duplicatef" + elif [ "$stat" -gt 0 ]; then + printf "ERROR $stat: File %s exists. Nothing done. Use -f to force relinking new file.\n" "$fname" + printf %s "$ERROR" + fi + done +} + +_prepend_date() { + fname="$1" + duplicated=$(dirname "$fname") + duplicatef=$(basename "${fname}") + printf "%s/%s-%s" "$duplicated" "$(date +%s)" "$duplicatef" +} + +_link() { + #try to create a hard link to file + if [ "$DRYRUN" = "true" ]; then + echo ln "${1}" "${2}" + return + fi + { ln "${1}" "${2}" >/dev/null; } 2>&1 +} + +_ensuredir() { + if [ "$DRYRUN" = "true" ]; then + echo mkdir -p "${1}" + return + fi + mkdir -p "${1}" +} + +main "$@" diff --git a/scripts/.local/bin/vidl b/scripts/.local/bin/vidl index 0758055..9d4dab3 100755 --- a/scripts/.local/bin/vidl +++ b/scripts/.local/bin/vidl @@ -1,8 +1,106 @@ -#!/usr/bin/env sh -# -# send video downloads to my raspberry machine -# where they will be automatically queued and -# archived when downloaded. +#!/usr/bin/env bash +# download a file if it does not exist in the archive alread +# otherwise just re-link it from the archive -# shellcheck disable=SC2029 -ssh dietpi '$HOME/bin/vidl' "$@" +DL_FOLDER="${DL_FOLDER:-${XDG_VIDEOS_DIR:-$HOME/videos}}/inbox" +ARCHIVE_FOLDER="${ARCHIVE_FOLDER:-${XDG_VIDEOS_DIR:-$HOME/videos}}/archive" +YT_DL_CMD="${YT_DL_CMD:-yt-dlp}" +YT_DL_TITLE="""${YT_DL_TITLE:-%(channel)s_%(title)s_%(id)s}""" + +show_help() { + printf """ +archive.sh: Hard linking your stuff. +vidl: Video downloader + +Simple wrapper for youtube-dl (or yt-dlp or similar). + +Usage: vidl [OPTION] + +Point it to a link you want downloaded. + +Options: + + -h Display this help. + + -d Directory to check for existence of file and archive to if needed. + + -f Directory to download to. + + -t Point to youtube-dl command to use. Can be command or absolute link. + By default will use \`yt-dlp\` which is a more up to date fork of the + program. +""" +} + +while getopts "t:f:d:h" opt; do + case "$opt" in + # v) verbose=1 + # ;; + f) + DL_FOLDER="$OPTARG" + ;; + t) + YT_DL_CMD="$OPTARG" + ;; + d) + ARCHIVE_FOLDER="$OPTARG" + ;; + h | \? | *) + show_help + exit 0 + ;; + esac +done +shift $((OPTIND - 1)) + +_call_archive() { + command -v archive >/dev/null 2>&1 || { + echo "archive command not found." + exit 1 + } + archive -d "$2" -a "$1" +} + +vid_ext="\(mp4\|avi\|webm\|mkv\|mpe?g\|3gp\|m4a\)" +_findfile() { + dir="$1" + fname="$2" + find "$dir" -type f -name "$fname.*" | sed -ne "/$fname\.$vid_ext$/Ip" +} + +YT_DL_TITLE="%(channel)s_%(title)s_%(id)s" +_download() { + "$YT_DL_CMD" \ + -f 'best[height<=1080]' \ + -o "$DL_FOLDER/$YT_DL_TITLE.%(ext)s" \ + --write-sub --write-auto-sub --embed-subs --sub-lang en,de,es,fr \ + --retries 15 \ + "$url" +} + +url="$*" +dl_fn=$("$YT_DL_CMD" --get-filename -o "$YT_DL_TITLE" "$url") +video_file_regex="${dl_fn}" + +if [ -z "$dl_fn" ]; then + echo Could not get video filename, error with youtube-dl. + exit 1 +fi + +alreadyexists=$(_findfile "$ARCHIVE_FOLDER" "$video_file_regex") + +# # download the video to download folder +if [ "$alreadyexists" = "" ] || [ -z "$alreadyexists" ]; then + _download + + # yt-dl never knows the exact filename in advance + file=$(_findfile "$DL_FOLDER" "$video_file_regex") + if [ -z "$file" ]; then exit 1; fi + _call_archive "$file" "$ARCHIVE_FOLDER" +# only link old file if one exists +else + echo "$alreadyexists" | while read -r file; do + echo "file $file exists, not downloading duplicate" + _call_archive "$file" "$DL_FOLDER" + done +fi