nc picsort in python
This commit is contained in:
parent
60c2c42a49
commit
b62649cae0
1 changed files with 201 additions and 102 deletions
|
|
@ -1,110 +1,209 @@
|
||||||
#!/bin/bash
|
#!/usr/bin/env python3
|
||||||
set -euo pipefail
|
import argparse
|
||||||
|
import base64
|
||||||
|
import hashlib
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
if [[ $# -ne 4 ]]; then
|
|
||||||
echo "Usage: $0 <nc_user> <source_subdir> <dest_subdir> <unsortable_subdir>" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
NC_USER="$1"
|
ALLOWED_EXTS = {
|
||||||
SOURCE_SUBDIR="$2"
|
".png", ".jpg", ".jpeg", ".heic", ".cr2", ".cr3", ".mp4", ".mov",
|
||||||
DEST_SUBDIR="$3"
|
".webp", ".avif", ".gif",
|
||||||
UNSORTABLE_SUBDIR="$4"
|
|
||||||
|
|
||||||
REL_SOURCE_PATH="/$NC_USER/files/$SOURCE_SUBDIR"
|
|
||||||
ABS_SOURCE_PATH="/var/lib/nextcloud/$NC_USER/files/$SOURCE_SUBDIR"
|
|
||||||
|
|
||||||
REL_DEST_PATH="/$NC_USER/files/$DEST_SUBDIR"
|
|
||||||
ABS_DEST_PATH="/var/lib/nextcloud/$NC_USER/files/$DEST_SUBDIR"
|
|
||||||
|
|
||||||
REL_UNSORTABLE_PATH="/$NC_USER/files/$UNSORTABLE_SUBDIR"
|
|
||||||
ABS_UNSORTABLE_PATH="/var/lib/nextcloud/$NC_USER/files/$UNSORTABLE_SUBDIR"
|
|
||||||
|
|
||||||
echo "STARTING..."
|
|
||||||
|
|
||||||
chown -R www-data:www-data "$ABS_SOURCE_PATH"
|
|
||||||
chmod -R 770 "$ABS_SOURCE_PATH"
|
|
||||||
|
|
||||||
process_file() {
|
|
||||||
local f="$1"
|
|
||||||
local DATETIME DATE TIME YEAR MONTH DAY HOUR MINUTE SECOND HASH EXT RAW FILE RELPATH DIRNAME
|
|
||||||
|
|
||||||
echo "PROCESSING: $f"
|
|
||||||
|
|
||||||
DATETIME="$(
|
|
||||||
exiftool -s -s -s -CreateDate "$f" 2>/dev/null | head -n1
|
|
||||||
)"
|
|
||||||
|
|
||||||
if [[ -z "$DATETIME" ]]; then
|
|
||||||
DATETIME="$(
|
|
||||||
exiftool -s -s -s -FileModifyDate "$f" 2>/dev/null | head -n1 | cut -d'+' -f1 | cut -d'-' -f1
|
|
||||||
)"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ -z "$DATETIME" ]]; then
|
|
||||||
RELPATH="$(realpath --relative-to="$ABS_SOURCE_PATH" "$f")"
|
|
||||||
DIRNAME="$(dirname "$ABS_UNSORTABLE_PATH/$RELPATH")"
|
|
||||||
echo "UNSORTABLE: $f"
|
|
||||||
mkdir -p "$DIRNAME"
|
|
||||||
mv -n -- "$f" "$DIRNAME/"
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
DATE="$(cut -d' ' -f1 <<< "$DATETIME")"
|
|
||||||
TIME="$(cut -d' ' -f2 <<< "$DATETIME" | cut -d'+' -f1)"
|
|
||||||
|
|
||||||
YEAR="$(cut -d':' -f1 <<< "$DATE")"
|
|
||||||
MONTH="$(cut -d':' -f2 <<< "$DATE")"
|
|
||||||
DAY="$(cut -d':' -f3 <<< "$DATE")"
|
|
||||||
HOUR="$(cut -d':' -f1 <<< "$TIME")"
|
|
||||||
MINUTE="$(cut -d':' -f2 <<< "$TIME")"
|
|
||||||
SECOND="$(cut -d':' -f3 <<< "$TIME")"
|
|
||||||
|
|
||||||
HASH="$(sha256sum "$f" | awk '{print $1}' | xxd -r -p | base64 | head -c 6 | tr '/+' '_-')"
|
|
||||||
EXT="$(tr '[:upper:]' '[:lower:]' <<< "${f##*.}")"
|
|
||||||
|
|
||||||
if [[ "$EXT" == "cr2" || "$EXT" == "cr3" ]]; then
|
|
||||||
RAW="raw/"
|
|
||||||
else
|
|
||||||
RAW=""
|
|
||||||
fi
|
|
||||||
|
|
||||||
FILE="$ABS_DEST_PATH/$YEAR-$MONTH/${RAW}${YEAR}${MONTH}${DAY}-${HOUR}${MINUTE}${SECOND}_${HASH}.${EXT}"
|
|
||||||
echo "DESTINATION: $FILE"
|
|
||||||
mkdir -p "$(dirname "$FILE")"
|
|
||||||
mv -- "$f" "$FILE"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
mapfile -d '' -t FILES < <(
|
DATETIME_KEYS = [
|
||||||
find "$ABS_SOURCE_PATH" -type f \( \
|
("Composite", "SubSecDateTimeOriginal"),
|
||||||
-iname '*.PNG' -o \
|
("Composite", "SubSecCreateDate"),
|
||||||
-iname '*.JPG' -o \
|
("ExifIFD", "DateTimeOriginal"),
|
||||||
-iname '*.JPEG' -o \
|
("ExifIFD", "CreateDate"),
|
||||||
-iname '*.HEIC' -o \
|
("XMP-xmp", "CreateDate"),
|
||||||
-iname '*.CR2' -o \
|
("Keys", "CreationDate"),
|
||||||
-iname '*.CR3' -o \
|
("QuickTime", "CreateDate"),
|
||||||
-iname '*.MP4' -o \
|
("XMP-photoshop", "DateCreated"),
|
||||||
-iname '*.MOV' \
|
]
|
||||||
\) -print0
|
|
||||||
)
|
|
||||||
|
|
||||||
if ((${#FILES[@]})); then
|
|
||||||
export -f process_file
|
|
||||||
export ABS_SOURCE_PATH ABS_DEST_PATH ABS_UNSORTABLE_PATH
|
|
||||||
|
|
||||||
printf '%s\0' "${FILES[@]}" |
|
def run(command: list[str], check: bool = True) -> subprocess.CompletedProcess:
|
||||||
xargs -0 -n1 -P"$(nproc)" bash -c 'process_file "$1"' _
|
return subprocess.run(command, text=True, capture_output=True, check=check)
|
||||||
|
|
||||||
echo "SCANNING..."
|
|
||||||
chown -R www-data:www-data "$ABS_DEST_PATH"
|
|
||||||
chown -R www-data:www-data "$ABS_UNSORTABLE_PATH"
|
|
||||||
chmod -R 770 "$ABS_DEST_PATH"
|
|
||||||
chmod -R 770 "$ABS_UNSORTABLE_PATH"
|
|
||||||
sudo -u www-data php /opt/nextcloud/occ files:scan --path "$REL_SOURCE_PATH"
|
|
||||||
sudo -u www-data php /opt/nextcloud/occ files:scan --path "$REL_UNSORTABLE_PATH"
|
|
||||||
sudo -u www-data php /opt/nextcloud/occ files:scan --path "$REL_DEST_PATH"
|
|
||||||
else
|
|
||||||
echo "NO MATCHING FILES FOUND."
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "FINISH."
|
def exiftool_data(file: Path) -> dict | None:
|
||||||
|
result = run([
|
||||||
|
"exiftool",
|
||||||
|
"-j",
|
||||||
|
"-a",
|
||||||
|
"-u",
|
||||||
|
"-g1",
|
||||||
|
"-time:all",
|
||||||
|
"-api", "QuickTimeUTC=1",
|
||||||
|
"-d", "%Y-%m-%dT%H:%M:%S%z",
|
||||||
|
str(file),
|
||||||
|
], check=False)
|
||||||
|
if result.returncode != 0:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
data = __import__("json").loads(result.stdout)
|
||||||
|
return data[0] if data else None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def exiftool_timestamp(file: Path) -> datetime | None:
|
||||||
|
data = exiftool_data(file)
|
||||||
|
if not data:
|
||||||
|
return None
|
||||||
|
|
||||||
|
for category, key in DATETIME_KEYS:
|
||||||
|
try:
|
||||||
|
value = data[category][key]
|
||||||
|
except (KeyError, TypeError):
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
return datetime.strptime(value, "%Y-%m-%dT%H:%M:%S%z")
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def short_hash(file: Path) -> str:
|
||||||
|
h = hashlib.sha256()
|
||||||
|
with file.open("rb") as fh:
|
||||||
|
for chunk in iter(lambda: fh.read(1024 * 1024), b""):
|
||||||
|
h.update(chunk)
|
||||||
|
digest = h.digest()
|
||||||
|
b64 = base64.b64encode(digest).decode("ascii")
|
||||||
|
return b64[:3].replace("/", "_").replace("+", "-")
|
||||||
|
|
||||||
|
|
||||||
|
def build_destination(dest_root: Path, file: Path, ts: datetime) -> Path:
|
||||||
|
ext = file.suffix.lower().lstrip(".")
|
||||||
|
year = ts.strftime("%Y")
|
||||||
|
month = ts.strftime("%m")
|
||||||
|
day = ts.strftime("%d")
|
||||||
|
hour = ts.strftime("%H")
|
||||||
|
minute = ts.strftime("%M")
|
||||||
|
second = ts.strftime("%S")
|
||||||
|
hash_part = short_hash(file)
|
||||||
|
|
||||||
|
raw_subdir = "raw" if ext in {"cr2", "cr3"} else None
|
||||||
|
month_dir = dest_root / f"{year}-{month}"
|
||||||
|
if raw_subdir:
|
||||||
|
month_dir = month_dir / raw_subdir
|
||||||
|
|
||||||
|
filename = f"{year}{month}{day}-{hour}{minute}{second}_{hash_part}.{ext}"
|
||||||
|
return month_dir / filename
|
||||||
|
|
||||||
|
|
||||||
|
def move_unsortable(file: Path, source_root: Path, unsortable_root: Path) -> None:
|
||||||
|
relpath = file.relative_to(source_root)
|
||||||
|
target_dir = (unsortable_root / relpath).parent
|
||||||
|
target_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
shutil.chown(str(target_dir), user="www-data", group="www-data")
|
||||||
|
target = target_dir / file.name
|
||||||
|
if target.exists():
|
||||||
|
return
|
||||||
|
shutil.move(str(file), str(target))
|
||||||
|
shutil.chown(str(target), user="www-data", group="www-data")
|
||||||
|
|
||||||
|
|
||||||
|
def move_sorted(file: Path, target: Path) -> None:
|
||||||
|
target.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
shutil.chown(str(target.parent), user="www-data", group="www-data")
|
||||||
|
shutil.move(str(file), str(target))
|
||||||
|
shutil.chown(str(target), user="www-data", group="www-data")
|
||||||
|
|
||||||
|
def process_file(file: Path, source_root: Path, dest_root: Path, unsortable_root: Path) -> tuple[Path, str]:
|
||||||
|
print(f"PROCESSING: {file}")
|
||||||
|
ts = exiftool_timestamp(file)
|
||||||
|
|
||||||
|
if ts is None:
|
||||||
|
print(f"UNSORTABLE: {file}")
|
||||||
|
move_unsortable(file, source_root, unsortable_root)
|
||||||
|
return file, "unsortable"
|
||||||
|
|
||||||
|
target = build_destination(dest_root, file, ts)
|
||||||
|
print(f"DESTINATION: {target}")
|
||||||
|
move_sorted(file, target)
|
||||||
|
return file, "sorted"
|
||||||
|
|
||||||
|
|
||||||
|
def scan_nextcloud(rel_source: str, rel_unsortable: str, rel_dest: str) -> None:
|
||||||
|
print("SCANNING...")
|
||||||
|
# run(["chown", "-R", "www-data:www-data", abs_source_path], check=True)
|
||||||
|
# run(["chmod", "-R", "770", abs_source_path], check=True)
|
||||||
|
|
||||||
|
# run(["chown", "-R", "www-data:www-data", abs_dest_path], check=True)
|
||||||
|
# run(["chown", "-R", "www-data:www-data", abs_unsortable_path], check=True)
|
||||||
|
# run(["chmod", "-R", "770", abs_dest_path], check=True)
|
||||||
|
# run(["chmod", "-R", "770", abs_unsortable_path], check=True)
|
||||||
|
|
||||||
|
run(["sudo", "-u", "www-data", "php", "/opt/nextcloud/occ", "files:scan", "--path", rel_source], check=True)
|
||||||
|
run(["sudo", "-u", "www-data", "php", "/opt/nextcloud/occ", "files:scan", "--path", rel_unsortable], check=True)
|
||||||
|
run(["sudo", "-u", "www-data", "php", "/opt/nextcloud/occ", "files:scan", "--path", rel_dest], check=True)
|
||||||
|
|
||||||
|
run(["systemctl", "start", "nextcloud-generate-new-previews.service"], check=True)
|
||||||
|
|
||||||
|
|
||||||
|
def iter_files(source_root: Path):
|
||||||
|
for path in source_root.rglob("*"):
|
||||||
|
if path.is_file() and path.suffix.lower() in ALLOWED_EXTS:
|
||||||
|
yield path
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Sort Nextcloud media files by embedded timestamp."
|
||||||
|
)
|
||||||
|
parser.add_argument("nc_user")
|
||||||
|
parser.add_argument("source_subdir")
|
||||||
|
parser.add_argument("dest_subdir")
|
||||||
|
parser.add_argument("unsortable_subdir")
|
||||||
|
parser.add_argument("--workers", type=int, default=os.cpu_count() or 1)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
nc_user = args.nc_user
|
||||||
|
source_subdir = args.source_subdir
|
||||||
|
dest_subdir = args.dest_subdir
|
||||||
|
unsortable_subdir = args.unsortable_subdir
|
||||||
|
|
||||||
|
rel_source_path = f"/{nc_user}/files/{source_subdir}"
|
||||||
|
abs_source_path = f"/var/lib/nextcloud/{nc_user}/files/{source_subdir}"
|
||||||
|
|
||||||
|
rel_dest_path = f"/{nc_user}/files/{dest_subdir}"
|
||||||
|
abs_dest_path = f"/var/lib/nextcloud/{nc_user}/files/{dest_subdir}"
|
||||||
|
|
||||||
|
rel_unsortable_path = f"/{nc_user}/files/{unsortable_subdir}"
|
||||||
|
abs_unsortable_path = f"/var/lib/nextcloud/{nc_user}/files/{unsortable_subdir}"
|
||||||
|
|
||||||
|
source_root = Path(abs_source_path)
|
||||||
|
dest_root = Path(abs_dest_path)
|
||||||
|
unsortable_root = Path(abs_unsortable_path)
|
||||||
|
|
||||||
|
print("STARTING...")
|
||||||
|
|
||||||
|
run(["chown", "-R", "www-data:www-data", str(source_root)], check=True)
|
||||||
|
run(["chmod", "-R", "770", str(source_root)], check=True)
|
||||||
|
|
||||||
|
files = list(iter_files(source_root))
|
||||||
|
|
||||||
|
if not files:
|
||||||
|
print("NO MATCHING FILES FOUND.")
|
||||||
|
print("FINISH.")
|
||||||
|
raise SystemExit(0)
|
||||||
|
|
||||||
|
with ThreadPoolExecutor(max_workers=max(1, args.workers)) as executor:
|
||||||
|
futures = {
|
||||||
|
executor.submit(process_file, file, source_root, dest_root, unsortable_root): file
|
||||||
|
for file in files
|
||||||
|
}
|
||||||
|
for future in as_completed(futures):
|
||||||
|
future.result()
|
||||||
|
|
||||||
|
scan_nextcloud(rel_source_path, rel_unsortable_path, rel_dest_path)
|
||||||
|
|
||||||
|
print("FINISH.")
|
||||||
Loading…
Reference in a new issue