nc picsort in python
This commit is contained in:
parent
60c2c42a49
commit
b62649cae0
1 changed files with 201 additions and 102 deletions
|
|
@ -1,110 +1,209 @@
|
|||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import base64
|
||||
import hashlib
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
if [[ $# -ne 4 ]]; then
|
||||
echo "Usage: $0 <nc_user> <source_subdir> <dest_subdir> <unsortable_subdir>" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
NC_USER="$1"
|
||||
SOURCE_SUBDIR="$2"
|
||||
DEST_SUBDIR="$3"
|
||||
UNSORTABLE_SUBDIR="$4"
|
||||
|
||||
REL_SOURCE_PATH="/$NC_USER/files/$SOURCE_SUBDIR"
|
||||
ABS_SOURCE_PATH="/var/lib/nextcloud/$NC_USER/files/$SOURCE_SUBDIR"
|
||||
|
||||
REL_DEST_PATH="/$NC_USER/files/$DEST_SUBDIR"
|
||||
ABS_DEST_PATH="/var/lib/nextcloud/$NC_USER/files/$DEST_SUBDIR"
|
||||
|
||||
REL_UNSORTABLE_PATH="/$NC_USER/files/$UNSORTABLE_SUBDIR"
|
||||
ABS_UNSORTABLE_PATH="/var/lib/nextcloud/$NC_USER/files/$UNSORTABLE_SUBDIR"
|
||||
|
||||
echo "STARTING..."
|
||||
|
||||
chown -R www-data:www-data "$ABS_SOURCE_PATH"
|
||||
chmod -R 770 "$ABS_SOURCE_PATH"
|
||||
|
||||
process_file() {
|
||||
local f="$1"
|
||||
local DATETIME DATE TIME YEAR MONTH DAY HOUR MINUTE SECOND HASH EXT RAW FILE RELPATH DIRNAME
|
||||
|
||||
echo "PROCESSING: $f"
|
||||
|
||||
DATETIME="$(
|
||||
exiftool -s -s -s -CreateDate "$f" 2>/dev/null | head -n1
|
||||
)"
|
||||
|
||||
if [[ -z "$DATETIME" ]]; then
|
||||
DATETIME="$(
|
||||
exiftool -s -s -s -FileModifyDate "$f" 2>/dev/null | head -n1 | cut -d'+' -f1 | cut -d'-' -f1
|
||||
)"
|
||||
fi
|
||||
|
||||
if [[ -z "$DATETIME" ]]; then
|
||||
RELPATH="$(realpath --relative-to="$ABS_SOURCE_PATH" "$f")"
|
||||
DIRNAME="$(dirname "$ABS_UNSORTABLE_PATH/$RELPATH")"
|
||||
echo "UNSORTABLE: $f"
|
||||
mkdir -p "$DIRNAME"
|
||||
mv -n -- "$f" "$DIRNAME/"
|
||||
return 0
|
||||
fi
|
||||
|
||||
DATE="$(cut -d' ' -f1 <<< "$DATETIME")"
|
||||
TIME="$(cut -d' ' -f2 <<< "$DATETIME" | cut -d'+' -f1)"
|
||||
|
||||
YEAR="$(cut -d':' -f1 <<< "$DATE")"
|
||||
MONTH="$(cut -d':' -f2 <<< "$DATE")"
|
||||
DAY="$(cut -d':' -f3 <<< "$DATE")"
|
||||
HOUR="$(cut -d':' -f1 <<< "$TIME")"
|
||||
MINUTE="$(cut -d':' -f2 <<< "$TIME")"
|
||||
SECOND="$(cut -d':' -f3 <<< "$TIME")"
|
||||
|
||||
HASH="$(sha256sum "$f" | awk '{print $1}' | xxd -r -p | base64 | head -c 6 | tr '/+' '_-')"
|
||||
EXT="$(tr '[:upper:]' '[:lower:]' <<< "${f##*.}")"
|
||||
|
||||
if [[ "$EXT" == "cr2" || "$EXT" == "cr3" ]]; then
|
||||
RAW="raw/"
|
||||
else
|
||||
RAW=""
|
||||
fi
|
||||
|
||||
FILE="$ABS_DEST_PATH/$YEAR-$MONTH/${RAW}${YEAR}${MONTH}${DAY}-${HOUR}${MINUTE}${SECOND}_${HASH}.${EXT}"
|
||||
echo "DESTINATION: $FILE"
|
||||
mkdir -p "$(dirname "$FILE")"
|
||||
mv -- "$f" "$FILE"
|
||||
ALLOWED_EXTS = {
|
||||
".png", ".jpg", ".jpeg", ".heic", ".cr2", ".cr3", ".mp4", ".mov",
|
||||
".webp", ".avif", ".gif",
|
||||
}
|
||||
|
||||
mapfile -d '' -t FILES < <(
|
||||
find "$ABS_SOURCE_PATH" -type f \( \
|
||||
-iname '*.PNG' -o \
|
||||
-iname '*.JPG' -o \
|
||||
-iname '*.JPEG' -o \
|
||||
-iname '*.HEIC' -o \
|
||||
-iname '*.CR2' -o \
|
||||
-iname '*.CR3' -o \
|
||||
-iname '*.MP4' -o \
|
||||
-iname '*.MOV' \
|
||||
\) -print0
|
||||
DATETIME_KEYS = [
|
||||
("Composite", "SubSecDateTimeOriginal"),
|
||||
("Composite", "SubSecCreateDate"),
|
||||
("ExifIFD", "DateTimeOriginal"),
|
||||
("ExifIFD", "CreateDate"),
|
||||
("XMP-xmp", "CreateDate"),
|
||||
("Keys", "CreationDate"),
|
||||
("QuickTime", "CreateDate"),
|
||||
("XMP-photoshop", "DateCreated"),
|
||||
]
|
||||
|
||||
|
||||
def run(command: list[str], check: bool = True) -> subprocess.CompletedProcess:
|
||||
return subprocess.run(command, text=True, capture_output=True, check=check)
|
||||
|
||||
|
||||
def exiftool_data(file: Path) -> dict | None:
|
||||
result = run([
|
||||
"exiftool",
|
||||
"-j",
|
||||
"-a",
|
||||
"-u",
|
||||
"-g1",
|
||||
"-time:all",
|
||||
"-api", "QuickTimeUTC=1",
|
||||
"-d", "%Y-%m-%dT%H:%M:%S%z",
|
||||
str(file),
|
||||
], check=False)
|
||||
if result.returncode != 0:
|
||||
return None
|
||||
try:
|
||||
data = __import__("json").loads(result.stdout)
|
||||
return data[0] if data else None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def exiftool_timestamp(file: Path) -> datetime | None:
|
||||
data = exiftool_data(file)
|
||||
if not data:
|
||||
return None
|
||||
|
||||
for category, key in DATETIME_KEYS:
|
||||
try:
|
||||
value = data[category][key]
|
||||
except (KeyError, TypeError):
|
||||
continue
|
||||
try:
|
||||
return datetime.strptime(value, "%Y-%m-%dT%H:%M:%S%z")
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def short_hash(file: Path) -> str:
|
||||
h = hashlib.sha256()
|
||||
with file.open("rb") as fh:
|
||||
for chunk in iter(lambda: fh.read(1024 * 1024), b""):
|
||||
h.update(chunk)
|
||||
digest = h.digest()
|
||||
b64 = base64.b64encode(digest).decode("ascii")
|
||||
return b64[:3].replace("/", "_").replace("+", "-")
|
||||
|
||||
|
||||
def build_destination(dest_root: Path, file: Path, ts: datetime) -> Path:
|
||||
ext = file.suffix.lower().lstrip(".")
|
||||
year = ts.strftime("%Y")
|
||||
month = ts.strftime("%m")
|
||||
day = ts.strftime("%d")
|
||||
hour = ts.strftime("%H")
|
||||
minute = ts.strftime("%M")
|
||||
second = ts.strftime("%S")
|
||||
hash_part = short_hash(file)
|
||||
|
||||
raw_subdir = "raw" if ext in {"cr2", "cr3"} else None
|
||||
month_dir = dest_root / f"{year}-{month}"
|
||||
if raw_subdir:
|
||||
month_dir = month_dir / raw_subdir
|
||||
|
||||
filename = f"{year}{month}{day}-{hour}{minute}{second}_{hash_part}.{ext}"
|
||||
return month_dir / filename
|
||||
|
||||
|
||||
def move_unsortable(file: Path, source_root: Path, unsortable_root: Path) -> None:
|
||||
relpath = file.relative_to(source_root)
|
||||
target_dir = (unsortable_root / relpath).parent
|
||||
target_dir.mkdir(parents=True, exist_ok=True)
|
||||
shutil.chown(str(target_dir), user="www-data", group="www-data")
|
||||
target = target_dir / file.name
|
||||
if target.exists():
|
||||
return
|
||||
shutil.move(str(file), str(target))
|
||||
shutil.chown(str(target), user="www-data", group="www-data")
|
||||
|
||||
|
||||
def move_sorted(file: Path, target: Path) -> None:
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.chown(str(target.parent), user="www-data", group="www-data")
|
||||
shutil.move(str(file), str(target))
|
||||
shutil.chown(str(target), user="www-data", group="www-data")
|
||||
|
||||
def process_file(file: Path, source_root: Path, dest_root: Path, unsortable_root: Path) -> tuple[Path, str]:
|
||||
print(f"PROCESSING: {file}")
|
||||
ts = exiftool_timestamp(file)
|
||||
|
||||
if ts is None:
|
||||
print(f"UNSORTABLE: {file}")
|
||||
move_unsortable(file, source_root, unsortable_root)
|
||||
return file, "unsortable"
|
||||
|
||||
target = build_destination(dest_root, file, ts)
|
||||
print(f"DESTINATION: {target}")
|
||||
move_sorted(file, target)
|
||||
return file, "sorted"
|
||||
|
||||
|
||||
def scan_nextcloud(rel_source: str, rel_unsortable: str, rel_dest: str) -> None:
|
||||
print("SCANNING...")
|
||||
# run(["chown", "-R", "www-data:www-data", abs_source_path], check=True)
|
||||
# run(["chmod", "-R", "770", abs_source_path], check=True)
|
||||
|
||||
# run(["chown", "-R", "www-data:www-data", abs_dest_path], check=True)
|
||||
# run(["chown", "-R", "www-data:www-data", abs_unsortable_path], check=True)
|
||||
# run(["chmod", "-R", "770", abs_dest_path], check=True)
|
||||
# run(["chmod", "-R", "770", abs_unsortable_path], check=True)
|
||||
|
||||
run(["sudo", "-u", "www-data", "php", "/opt/nextcloud/occ", "files:scan", "--path", rel_source], check=True)
|
||||
run(["sudo", "-u", "www-data", "php", "/opt/nextcloud/occ", "files:scan", "--path", rel_unsortable], check=True)
|
||||
run(["sudo", "-u", "www-data", "php", "/opt/nextcloud/occ", "files:scan", "--path", rel_dest], check=True)
|
||||
|
||||
run(["systemctl", "start", "nextcloud-generate-new-previews.service"], check=True)
|
||||
|
||||
|
||||
def iter_files(source_root: Path):
|
||||
for path in source_root.rglob("*"):
|
||||
if path.is_file() and path.suffix.lower() in ALLOWED_EXTS:
|
||||
yield path
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Sort Nextcloud media files by embedded timestamp."
|
||||
)
|
||||
parser.add_argument("nc_user")
|
||||
parser.add_argument("source_subdir")
|
||||
parser.add_argument("dest_subdir")
|
||||
parser.add_argument("unsortable_subdir")
|
||||
parser.add_argument("--workers", type=int, default=os.cpu_count() or 1)
|
||||
args = parser.parse_args()
|
||||
|
||||
if ((${#FILES[@]})); then
|
||||
export -f process_file
|
||||
export ABS_SOURCE_PATH ABS_DEST_PATH ABS_UNSORTABLE_PATH
|
||||
nc_user = args.nc_user
|
||||
source_subdir = args.source_subdir
|
||||
dest_subdir = args.dest_subdir
|
||||
unsortable_subdir = args.unsortable_subdir
|
||||
|
||||
printf '%s\0' "${FILES[@]}" |
|
||||
xargs -0 -n1 -P"$(nproc)" bash -c 'process_file "$1"' _
|
||||
rel_source_path = f"/{nc_user}/files/{source_subdir}"
|
||||
abs_source_path = f"/var/lib/nextcloud/{nc_user}/files/{source_subdir}"
|
||||
|
||||
echo "SCANNING..."
|
||||
chown -R www-data:www-data "$ABS_DEST_PATH"
|
||||
chown -R www-data:www-data "$ABS_UNSORTABLE_PATH"
|
||||
chmod -R 770 "$ABS_DEST_PATH"
|
||||
chmod -R 770 "$ABS_UNSORTABLE_PATH"
|
||||
sudo -u www-data php /opt/nextcloud/occ files:scan --path "$REL_SOURCE_PATH"
|
||||
sudo -u www-data php /opt/nextcloud/occ files:scan --path "$REL_UNSORTABLE_PATH"
|
||||
sudo -u www-data php /opt/nextcloud/occ files:scan --path "$REL_DEST_PATH"
|
||||
else
|
||||
echo "NO MATCHING FILES FOUND."
|
||||
fi
|
||||
rel_dest_path = f"/{nc_user}/files/{dest_subdir}"
|
||||
abs_dest_path = f"/var/lib/nextcloud/{nc_user}/files/{dest_subdir}"
|
||||
|
||||
echo "FINISH."
|
||||
rel_unsortable_path = f"/{nc_user}/files/{unsortable_subdir}"
|
||||
abs_unsortable_path = f"/var/lib/nextcloud/{nc_user}/files/{unsortable_subdir}"
|
||||
|
||||
source_root = Path(abs_source_path)
|
||||
dest_root = Path(abs_dest_path)
|
||||
unsortable_root = Path(abs_unsortable_path)
|
||||
|
||||
print("STARTING...")
|
||||
|
||||
run(["chown", "-R", "www-data:www-data", str(source_root)], check=True)
|
||||
run(["chmod", "-R", "770", str(source_root)], check=True)
|
||||
|
||||
files = list(iter_files(source_root))
|
||||
|
||||
if not files:
|
||||
print("NO MATCHING FILES FOUND.")
|
||||
print("FINISH.")
|
||||
raise SystemExit(0)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=max(1, args.workers)) as executor:
|
||||
futures = {
|
||||
executor.submit(process_file, file, source_root, dest_root, unsortable_root): file
|
||||
for file in files
|
||||
}
|
||||
for future in as_completed(futures):
|
||||
future.result()
|
||||
|
||||
scan_nextcloud(rel_source_path, rel_unsortable_path, rel_dest_path)
|
||||
|
||||
print("FINISH.")
|
||||
Loading…
Reference in a new issue