From b62649cae03bab30ffebe54c92ee40d0a50713ea Mon Sep 17 00:00:00 2001 From: CroneKorkN Date: Mon, 9 Mar 2026 11:59:47 +0100 Subject: [PATCH] nc picsort in python --- .../nextcloud-picsort/files/nextcloud-picsort | 303 ++++++++++++------ 1 file changed, 201 insertions(+), 102 deletions(-) diff --git a/bundles/nextcloud-picsort/files/nextcloud-picsort b/bundles/nextcloud-picsort/files/nextcloud-picsort index c55b9df..4af20de 100644 --- a/bundles/nextcloud-picsort/files/nextcloud-picsort +++ b/bundles/nextcloud-picsort/files/nextcloud-picsort @@ -1,110 +1,209 @@ -#!/bin/bash -set -euo pipefail +#!/usr/bin/env python3 +import argparse +import base64 +import hashlib +import os +import shutil +import subprocess +from concurrent.futures import ThreadPoolExecutor, as_completed +from datetime import datetime +from pathlib import Path -if [[ $# -ne 4 ]]; then - echo "Usage: $0 " >&2 - exit 1 -fi -NC_USER="$1" -SOURCE_SUBDIR="$2" -DEST_SUBDIR="$3" -UNSORTABLE_SUBDIR="$4" - -REL_SOURCE_PATH="/$NC_USER/files/$SOURCE_SUBDIR" -ABS_SOURCE_PATH="/var/lib/nextcloud/$NC_USER/files/$SOURCE_SUBDIR" - -REL_DEST_PATH="/$NC_USER/files/$DEST_SUBDIR" -ABS_DEST_PATH="/var/lib/nextcloud/$NC_USER/files/$DEST_SUBDIR" - -REL_UNSORTABLE_PATH="/$NC_USER/files/$UNSORTABLE_SUBDIR" -ABS_UNSORTABLE_PATH="/var/lib/nextcloud/$NC_USER/files/$UNSORTABLE_SUBDIR" - -echo "STARTING..." - -chown -R www-data:www-data "$ABS_SOURCE_PATH" -chmod -R 770 "$ABS_SOURCE_PATH" - -process_file() { - local f="$1" - local DATETIME DATE TIME YEAR MONTH DAY HOUR MINUTE SECOND HASH EXT RAW FILE RELPATH DIRNAME - - echo "PROCESSING: $f" - - DATETIME="$( - exiftool -s -s -s -CreateDate "$f" 2>/dev/null | head -n1 - )" - - if [[ -z "$DATETIME" ]]; then - DATETIME="$( - exiftool -s -s -s -FileModifyDate "$f" 2>/dev/null | head -n1 | cut -d'+' -f1 | cut -d'-' -f1 - )" - fi - - if [[ -z "$DATETIME" ]]; then - RELPATH="$(realpath --relative-to="$ABS_SOURCE_PATH" "$f")" - DIRNAME="$(dirname "$ABS_UNSORTABLE_PATH/$RELPATH")" - echo "UNSORTABLE: $f" - mkdir -p "$DIRNAME" - mv -n -- "$f" "$DIRNAME/" - return 0 - fi - - DATE="$(cut -d' ' -f1 <<< "$DATETIME")" - TIME="$(cut -d' ' -f2 <<< "$DATETIME" | cut -d'+' -f1)" - - YEAR="$(cut -d':' -f1 <<< "$DATE")" - MONTH="$(cut -d':' -f2 <<< "$DATE")" - DAY="$(cut -d':' -f3 <<< "$DATE")" - HOUR="$(cut -d':' -f1 <<< "$TIME")" - MINUTE="$(cut -d':' -f2 <<< "$TIME")" - SECOND="$(cut -d':' -f3 <<< "$TIME")" - - HASH="$(sha256sum "$f" | awk '{print $1}' | xxd -r -p | base64 | head -c 6 | tr '/+' '_-')" - EXT="$(tr '[:upper:]' '[:lower:]' <<< "${f##*.}")" - - if [[ "$EXT" == "cr2" || "$EXT" == "cr3" ]]; then - RAW="raw/" - else - RAW="" - fi - - FILE="$ABS_DEST_PATH/$YEAR-$MONTH/${RAW}${YEAR}${MONTH}${DAY}-${HOUR}${MINUTE}${SECOND}_${HASH}.${EXT}" - echo "DESTINATION: $FILE" - mkdir -p "$(dirname "$FILE")" - mv -- "$f" "$FILE" +ALLOWED_EXTS = { + ".png", ".jpg", ".jpeg", ".heic", ".cr2", ".cr3", ".mp4", ".mov", + ".webp", ".avif", ".gif", } -mapfile -d '' -t FILES < <( - find "$ABS_SOURCE_PATH" -type f \( \ - -iname '*.PNG' -o \ - -iname '*.JPG' -o \ - -iname '*.JPEG' -o \ - -iname '*.HEIC' -o \ - -iname '*.CR2' -o \ - -iname '*.CR3' -o \ - -iname '*.MP4' -o \ - -iname '*.MOV' \ - \) -print0 -) +DATETIME_KEYS = [ + ("Composite", "SubSecDateTimeOriginal"), + ("Composite", "SubSecCreateDate"), + ("ExifIFD", "DateTimeOriginal"), + ("ExifIFD", "CreateDate"), + ("XMP-xmp", "CreateDate"), + ("Keys", "CreationDate"), + ("QuickTime", "CreateDate"), + ("XMP-photoshop", "DateCreated"), +] -if ((${#FILES[@]})); then - export -f process_file - export ABS_SOURCE_PATH ABS_DEST_PATH ABS_UNSORTABLE_PATH - printf '%s\0' "${FILES[@]}" | - xargs -0 -n1 -P"$(nproc)" bash -c 'process_file "$1"' _ +def run(command: list[str], check: bool = True) -> subprocess.CompletedProcess: + return subprocess.run(command, text=True, capture_output=True, check=check) - echo "SCANNING..." - chown -R www-data:www-data "$ABS_DEST_PATH" - chown -R www-data:www-data "$ABS_UNSORTABLE_PATH" - chmod -R 770 "$ABS_DEST_PATH" - chmod -R 770 "$ABS_UNSORTABLE_PATH" - sudo -u www-data php /opt/nextcloud/occ files:scan --path "$REL_SOURCE_PATH" - sudo -u www-data php /opt/nextcloud/occ files:scan --path "$REL_UNSORTABLE_PATH" - sudo -u www-data php /opt/nextcloud/occ files:scan --path "$REL_DEST_PATH" -else - echo "NO MATCHING FILES FOUND." -fi -echo "FINISH." \ No newline at end of file +def exiftool_data(file: Path) -> dict | None: + result = run([ + "exiftool", + "-j", + "-a", + "-u", + "-g1", + "-time:all", + "-api", "QuickTimeUTC=1", + "-d", "%Y-%m-%dT%H:%M:%S%z", + str(file), + ], check=False) + if result.returncode != 0: + return None + try: + data = __import__("json").loads(result.stdout) + return data[0] if data else None + except Exception: + return None + + +def exiftool_timestamp(file: Path) -> datetime | None: + data = exiftool_data(file) + if not data: + return None + + for category, key in DATETIME_KEYS: + try: + value = data[category][key] + except (KeyError, TypeError): + continue + try: + return datetime.strptime(value, "%Y-%m-%dT%H:%M:%S%z") + except ValueError: + continue + + return None + + +def short_hash(file: Path) -> str: + h = hashlib.sha256() + with file.open("rb") as fh: + for chunk in iter(lambda: fh.read(1024 * 1024), b""): + h.update(chunk) + digest = h.digest() + b64 = base64.b64encode(digest).decode("ascii") + return b64[:3].replace("/", "_").replace("+", "-") + + +def build_destination(dest_root: Path, file: Path, ts: datetime) -> Path: + ext = file.suffix.lower().lstrip(".") + year = ts.strftime("%Y") + month = ts.strftime("%m") + day = ts.strftime("%d") + hour = ts.strftime("%H") + minute = ts.strftime("%M") + second = ts.strftime("%S") + hash_part = short_hash(file) + + raw_subdir = "raw" if ext in {"cr2", "cr3"} else None + month_dir = dest_root / f"{year}-{month}" + if raw_subdir: + month_dir = month_dir / raw_subdir + + filename = f"{year}{month}{day}-{hour}{minute}{second}_{hash_part}.{ext}" + return month_dir / filename + + +def move_unsortable(file: Path, source_root: Path, unsortable_root: Path) -> None: + relpath = file.relative_to(source_root) + target_dir = (unsortable_root / relpath).parent + target_dir.mkdir(parents=True, exist_ok=True) + shutil.chown(str(target_dir), user="www-data", group="www-data") + target = target_dir / file.name + if target.exists(): + return + shutil.move(str(file), str(target)) + shutil.chown(str(target), user="www-data", group="www-data") + + +def move_sorted(file: Path, target: Path) -> None: + target.parent.mkdir(parents=True, exist_ok=True) + shutil.chown(str(target.parent), user="www-data", group="www-data") + shutil.move(str(file), str(target)) + shutil.chown(str(target), user="www-data", group="www-data") + +def process_file(file: Path, source_root: Path, dest_root: Path, unsortable_root: Path) -> tuple[Path, str]: + print(f"PROCESSING: {file}") + ts = exiftool_timestamp(file) + + if ts is None: + print(f"UNSORTABLE: {file}") + move_unsortable(file, source_root, unsortable_root) + return file, "unsortable" + + target = build_destination(dest_root, file, ts) + print(f"DESTINATION: {target}") + move_sorted(file, target) + return file, "sorted" + + +def scan_nextcloud(rel_source: str, rel_unsortable: str, rel_dest: str) -> None: + print("SCANNING...") + # run(["chown", "-R", "www-data:www-data", abs_source_path], check=True) + # run(["chmod", "-R", "770", abs_source_path], check=True) + + # run(["chown", "-R", "www-data:www-data", abs_dest_path], check=True) + # run(["chown", "-R", "www-data:www-data", abs_unsortable_path], check=True) + # run(["chmod", "-R", "770", abs_dest_path], check=True) + # run(["chmod", "-R", "770", abs_unsortable_path], check=True) + + run(["sudo", "-u", "www-data", "php", "/opt/nextcloud/occ", "files:scan", "--path", rel_source], check=True) + run(["sudo", "-u", "www-data", "php", "/opt/nextcloud/occ", "files:scan", "--path", rel_unsortable], check=True) + run(["sudo", "-u", "www-data", "php", "/opt/nextcloud/occ", "files:scan", "--path", rel_dest], check=True) + + run(["systemctl", "start", "nextcloud-generate-new-previews.service"], check=True) + + +def iter_files(source_root: Path): + for path in source_root.rglob("*"): + if path.is_file() and path.suffix.lower() in ALLOWED_EXTS: + yield path + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Sort Nextcloud media files by embedded timestamp." + ) + parser.add_argument("nc_user") + parser.add_argument("source_subdir") + parser.add_argument("dest_subdir") + parser.add_argument("unsortable_subdir") + parser.add_argument("--workers", type=int, default=os.cpu_count() or 1) + args = parser.parse_args() + + nc_user = args.nc_user + source_subdir = args.source_subdir + dest_subdir = args.dest_subdir + unsortable_subdir = args.unsortable_subdir + + rel_source_path = f"/{nc_user}/files/{source_subdir}" + abs_source_path = f"/var/lib/nextcloud/{nc_user}/files/{source_subdir}" + + rel_dest_path = f"/{nc_user}/files/{dest_subdir}" + abs_dest_path = f"/var/lib/nextcloud/{nc_user}/files/{dest_subdir}" + + rel_unsortable_path = f"/{nc_user}/files/{unsortable_subdir}" + abs_unsortable_path = f"/var/lib/nextcloud/{nc_user}/files/{unsortable_subdir}" + + source_root = Path(abs_source_path) + dest_root = Path(abs_dest_path) + unsortable_root = Path(abs_unsortable_path) + + print("STARTING...") + + run(["chown", "-R", "www-data:www-data", str(source_root)], check=True) + run(["chmod", "-R", "770", str(source_root)], check=True) + + files = list(iter_files(source_root)) + + if not files: + print("NO MATCHING FILES FOUND.") + print("FINISH.") + raise SystemExit(0) + + with ThreadPoolExecutor(max_workers=max(1, args.workers)) as executor: + futures = { + executor.submit(process_file, file, source_root, dest_root, unsortable_root): file + for file in files + } + for future in as_completed(futures): + future.result() + + scan_nextcloud(rel_source_path, rel_unsortable_path, rel_dest_path) + + print("FINISH.") \ No newline at end of file