nc picsort in python

This commit is contained in:
CroneKorkN 2026-03-09 11:59:47 +01:00
parent 60c2c42a49
commit b62649cae0
Signed by: cronekorkn
SSH key fingerprint: SHA256:v0410ZKfuO1QHdgKBsdQNF64xmTxOF8osF1LIqwTcVw

View file

@ -1,110 +1,209 @@
#!/bin/bash #!/usr/bin/env python3
set -euo pipefail import argparse
import base64
import hashlib
import os
import shutil
import subprocess
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
from pathlib import Path
if [[ $# -ne 4 ]]; then
echo "Usage: $0 <nc_user> <source_subdir> <dest_subdir> <unsortable_subdir>" >&2
exit 1
fi
NC_USER="$1" ALLOWED_EXTS = {
SOURCE_SUBDIR="$2" ".png", ".jpg", ".jpeg", ".heic", ".cr2", ".cr3", ".mp4", ".mov",
DEST_SUBDIR="$3" ".webp", ".avif", ".gif",
UNSORTABLE_SUBDIR="$4"
REL_SOURCE_PATH="/$NC_USER/files/$SOURCE_SUBDIR"
ABS_SOURCE_PATH="/var/lib/nextcloud/$NC_USER/files/$SOURCE_SUBDIR"
REL_DEST_PATH="/$NC_USER/files/$DEST_SUBDIR"
ABS_DEST_PATH="/var/lib/nextcloud/$NC_USER/files/$DEST_SUBDIR"
REL_UNSORTABLE_PATH="/$NC_USER/files/$UNSORTABLE_SUBDIR"
ABS_UNSORTABLE_PATH="/var/lib/nextcloud/$NC_USER/files/$UNSORTABLE_SUBDIR"
echo "STARTING..."
chown -R www-data:www-data "$ABS_SOURCE_PATH"
chmod -R 770 "$ABS_SOURCE_PATH"
process_file() {
local f="$1"
local DATETIME DATE TIME YEAR MONTH DAY HOUR MINUTE SECOND HASH EXT RAW FILE RELPATH DIRNAME
echo "PROCESSING: $f"
DATETIME="$(
exiftool -s -s -s -CreateDate "$f" 2>/dev/null | head -n1
)"
if [[ -z "$DATETIME" ]]; then
DATETIME="$(
exiftool -s -s -s -FileModifyDate "$f" 2>/dev/null | head -n1 | cut -d'+' -f1 | cut -d'-' -f1
)"
fi
if [[ -z "$DATETIME" ]]; then
RELPATH="$(realpath --relative-to="$ABS_SOURCE_PATH" "$f")"
DIRNAME="$(dirname "$ABS_UNSORTABLE_PATH/$RELPATH")"
echo "UNSORTABLE: $f"
mkdir -p "$DIRNAME"
mv -n -- "$f" "$DIRNAME/"
return 0
fi
DATE="$(cut -d' ' -f1 <<< "$DATETIME")"
TIME="$(cut -d' ' -f2 <<< "$DATETIME" | cut -d'+' -f1)"
YEAR="$(cut -d':' -f1 <<< "$DATE")"
MONTH="$(cut -d':' -f2 <<< "$DATE")"
DAY="$(cut -d':' -f3 <<< "$DATE")"
HOUR="$(cut -d':' -f1 <<< "$TIME")"
MINUTE="$(cut -d':' -f2 <<< "$TIME")"
SECOND="$(cut -d':' -f3 <<< "$TIME")"
HASH="$(sha256sum "$f" | awk '{print $1}' | xxd -r -p | base64 | head -c 6 | tr '/+' '_-')"
EXT="$(tr '[:upper:]' '[:lower:]' <<< "${f##*.}")"
if [[ "$EXT" == "cr2" || "$EXT" == "cr3" ]]; then
RAW="raw/"
else
RAW=""
fi
FILE="$ABS_DEST_PATH/$YEAR-$MONTH/${RAW}${YEAR}${MONTH}${DAY}-${HOUR}${MINUTE}${SECOND}_${HASH}.${EXT}"
echo "DESTINATION: $FILE"
mkdir -p "$(dirname "$FILE")"
mv -- "$f" "$FILE"
} }
mapfile -d '' -t FILES < <( DATETIME_KEYS = [
find "$ABS_SOURCE_PATH" -type f \( \ ("Composite", "SubSecDateTimeOriginal"),
-iname '*.PNG' -o \ ("Composite", "SubSecCreateDate"),
-iname '*.JPG' -o \ ("ExifIFD", "DateTimeOriginal"),
-iname '*.JPEG' -o \ ("ExifIFD", "CreateDate"),
-iname '*.HEIC' -o \ ("XMP-xmp", "CreateDate"),
-iname '*.CR2' -o \ ("Keys", "CreationDate"),
-iname '*.CR3' -o \ ("QuickTime", "CreateDate"),
-iname '*.MP4' -o \ ("XMP-photoshop", "DateCreated"),
-iname '*.MOV' \ ]
\) -print0
def run(command: list[str], check: bool = True) -> subprocess.CompletedProcess:
return subprocess.run(command, text=True, capture_output=True, check=check)
def exiftool_data(file: Path) -> dict | None:
result = run([
"exiftool",
"-j",
"-a",
"-u",
"-g1",
"-time:all",
"-api", "QuickTimeUTC=1",
"-d", "%Y-%m-%dT%H:%M:%S%z",
str(file),
], check=False)
if result.returncode != 0:
return None
try:
data = __import__("json").loads(result.stdout)
return data[0] if data else None
except Exception:
return None
def exiftool_timestamp(file: Path) -> datetime | None:
data = exiftool_data(file)
if not data:
return None
for category, key in DATETIME_KEYS:
try:
value = data[category][key]
except (KeyError, TypeError):
continue
try:
return datetime.strptime(value, "%Y-%m-%dT%H:%M:%S%z")
except ValueError:
continue
return None
def short_hash(file: Path) -> str:
h = hashlib.sha256()
with file.open("rb") as fh:
for chunk in iter(lambda: fh.read(1024 * 1024), b""):
h.update(chunk)
digest = h.digest()
b64 = base64.b64encode(digest).decode("ascii")
return b64[:3].replace("/", "_").replace("+", "-")
def build_destination(dest_root: Path, file: Path, ts: datetime) -> Path:
ext = file.suffix.lower().lstrip(".")
year = ts.strftime("%Y")
month = ts.strftime("%m")
day = ts.strftime("%d")
hour = ts.strftime("%H")
minute = ts.strftime("%M")
second = ts.strftime("%S")
hash_part = short_hash(file)
raw_subdir = "raw" if ext in {"cr2", "cr3"} else None
month_dir = dest_root / f"{year}-{month}"
if raw_subdir:
month_dir = month_dir / raw_subdir
filename = f"{year}{month}{day}-{hour}{minute}{second}_{hash_part}.{ext}"
return month_dir / filename
def move_unsortable(file: Path, source_root: Path, unsortable_root: Path) -> None:
relpath = file.relative_to(source_root)
target_dir = (unsortable_root / relpath).parent
target_dir.mkdir(parents=True, exist_ok=True)
shutil.chown(str(target_dir), user="www-data", group="www-data")
target = target_dir / file.name
if target.exists():
return
shutil.move(str(file), str(target))
shutil.chown(str(target), user="www-data", group="www-data")
def move_sorted(file: Path, target: Path) -> None:
target.parent.mkdir(parents=True, exist_ok=True)
shutil.chown(str(target.parent), user="www-data", group="www-data")
shutil.move(str(file), str(target))
shutil.chown(str(target), user="www-data", group="www-data")
def process_file(file: Path, source_root: Path, dest_root: Path, unsortable_root: Path) -> tuple[Path, str]:
print(f"PROCESSING: {file}")
ts = exiftool_timestamp(file)
if ts is None:
print(f"UNSORTABLE: {file}")
move_unsortable(file, source_root, unsortable_root)
return file, "unsortable"
target = build_destination(dest_root, file, ts)
print(f"DESTINATION: {target}")
move_sorted(file, target)
return file, "sorted"
def scan_nextcloud(rel_source: str, rel_unsortable: str, rel_dest: str) -> None:
print("SCANNING...")
# run(["chown", "-R", "www-data:www-data", abs_source_path], check=True)
# run(["chmod", "-R", "770", abs_source_path], check=True)
# run(["chown", "-R", "www-data:www-data", abs_dest_path], check=True)
# run(["chown", "-R", "www-data:www-data", abs_unsortable_path], check=True)
# run(["chmod", "-R", "770", abs_dest_path], check=True)
# run(["chmod", "-R", "770", abs_unsortable_path], check=True)
run(["sudo", "-u", "www-data", "php", "/opt/nextcloud/occ", "files:scan", "--path", rel_source], check=True)
run(["sudo", "-u", "www-data", "php", "/opt/nextcloud/occ", "files:scan", "--path", rel_unsortable], check=True)
run(["sudo", "-u", "www-data", "php", "/opt/nextcloud/occ", "files:scan", "--path", rel_dest], check=True)
run(["systemctl", "start", "nextcloud-generate-new-previews.service"], check=True)
def iter_files(source_root: Path):
for path in source_root.rglob("*"):
if path.is_file() and path.suffix.lower() in ALLOWED_EXTS:
yield path
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Sort Nextcloud media files by embedded timestamp."
) )
parser.add_argument("nc_user")
parser.add_argument("source_subdir")
parser.add_argument("dest_subdir")
parser.add_argument("unsortable_subdir")
parser.add_argument("--workers", type=int, default=os.cpu_count() or 1)
args = parser.parse_args()
if ((${#FILES[@]})); then nc_user = args.nc_user
export -f process_file source_subdir = args.source_subdir
export ABS_SOURCE_PATH ABS_DEST_PATH ABS_UNSORTABLE_PATH dest_subdir = args.dest_subdir
unsortable_subdir = args.unsortable_subdir
printf '%s\0' "${FILES[@]}" | rel_source_path = f"/{nc_user}/files/{source_subdir}"
xargs -0 -n1 -P"$(nproc)" bash -c 'process_file "$1"' _ abs_source_path = f"/var/lib/nextcloud/{nc_user}/files/{source_subdir}"
echo "SCANNING..." rel_dest_path = f"/{nc_user}/files/{dest_subdir}"
chown -R www-data:www-data "$ABS_DEST_PATH" abs_dest_path = f"/var/lib/nextcloud/{nc_user}/files/{dest_subdir}"
chown -R www-data:www-data "$ABS_UNSORTABLE_PATH"
chmod -R 770 "$ABS_DEST_PATH"
chmod -R 770 "$ABS_UNSORTABLE_PATH"
sudo -u www-data php /opt/nextcloud/occ files:scan --path "$REL_SOURCE_PATH"
sudo -u www-data php /opt/nextcloud/occ files:scan --path "$REL_UNSORTABLE_PATH"
sudo -u www-data php /opt/nextcloud/occ files:scan --path "$REL_DEST_PATH"
else
echo "NO MATCHING FILES FOUND."
fi
echo "FINISH." rel_unsortable_path = f"/{nc_user}/files/{unsortable_subdir}"
abs_unsortable_path = f"/var/lib/nextcloud/{nc_user}/files/{unsortable_subdir}"
source_root = Path(abs_source_path)
dest_root = Path(abs_dest_path)
unsortable_root = Path(abs_unsortable_path)
print("STARTING...")
run(["chown", "-R", "www-data:www-data", str(source_root)], check=True)
run(["chmod", "-R", "770", str(source_root)], check=True)
files = list(iter_files(source_root))
if not files:
print("NO MATCHING FILES FOUND.")
print("FINISH.")
raise SystemExit(0)
with ThreadPoolExecutor(max_workers=max(1, args.workers)) as executor:
futures = {
executor.submit(process_file, file, source_root, dest_root, unsortable_root): file
for file in files
}
for future in as_completed(futures):
future.result()
scan_nextcloud(rel_source_path, rel_unsortable_path, rel_dest_path)
print("FINISH.")