From aba9fa40257aaebe21254283dcd58e299ae55344 Mon Sep 17 00:00:00 2001 From: Nick Pegg Date: Sun, 18 May 2025 08:46:41 -0700 Subject: [PATCH] Reorganize command (#4) Adds a command to reorganize a folder of photos, renaming them so that they contain date and time so that they're sorted by that. This also renames files associated with the photos, like the descriptions, like IMG_1234.jpg with IMG_1234.md --- Cargo.lock | 69 +++++ Cargo.toml | 2 + resources/test_album/moon.jpg | Bin 51463 -> 51649 bytes .../test_album/with_description/moon.jpg | Bin 51463 -> 51649 bytes .../test_album/with_description/mountains.jpg | Bin 71986 -> 72172 bytes src/generate.rs | 28 +- src/lib.rs | 6 +- src/main.rs | 17 ++ src/reorganize.rs | 243 ++++++++++++++++++ src/test_util.rs | 25 ++ 10 files changed, 365 insertions(+), 25 deletions(-) create mode 100644 src/reorganize.rs create mode 100644 src/test_util.rs diff --git a/Cargo.lock b/Cargo.lock index 6b81d0f..e2dd689 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -349,6 +349,15 @@ dependencies = [ "typenum", ] +[[package]] +name = "deranged" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e" +dependencies = [ + "powerfmt", +] + [[package]] name = "digest" version = "0.10.7" @@ -698,6 +707,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "kamadak-exif" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1130d80c7374efad55a117d715a3af9368f0fa7a2c54573afc15a188cd984837" +dependencies = [ + "mutate_once", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -792,6 +810,12 @@ dependencies = [ "uuid", ] +[[package]] +name = "mutate_once" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16cf681a23b4d0a43fc35024c176437f9dcd818db34e0f42ab456a0ee5ad497b" + [[package]] name = "new_debug_unreachable" version = "1.0.6" @@ -824,6 +848,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + [[package]] name = "num-derive" version = "0.4.2" @@ -937,6 +967,7 @@ dependencies = [ "fs_extra", "image", "indicatif", + "kamadak-exif", "log", "mktemp", "pulldown-cmark", @@ -945,6 +976,7 @@ dependencies = [ "serde_yml", "tera", "thiserror 2.0.12", + "time", ] [[package]] @@ -981,6 +1013,12 @@ dependencies = [ "portable-atomic", ] +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "ppv-lite86" version = "0.2.21" @@ -1420,6 +1458,37 @@ dependencies = [ "weezl", ] +[[package]] +name = "time" +version = "0.3.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40" +dependencies = [ + "deranged", + "itoa", + "num-conv", + "powerfmt", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c" + +[[package]] +name = "time-macros" +version = "0.2.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3526739392ec93fd8b359c8e98514cb3e8e021beb4e5f597b00a0221f8ed8a49" +dependencies = [ + "num-conv", + "time-core", +] + [[package]] name = "toml" version = "0.8.22" diff --git a/Cargo.toml b/Cargo.toml index b2dab69..f965a74 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,7 @@ env_logger = "^0.11.8" fs_extra = "^1.3.0" image = "^0.25.6" indicatif = "^0.17.11" +kamadak-exif = "^0.6.1" log = "^0.4.27" pulldown-cmark = "^0.13.0" rayon = "^1.10.0" @@ -21,6 +22,7 @@ serde = { version = "^1.0", features = ["derive"] } serde_yml = "^0.0.12" tera = { version = "^1.20", default-features = false } thiserror = "^2.0" +time = { version = "^0.3.41", features = ["formatting", "macros", "parsing"] } [dev-dependencies] mktemp = "^0.5.1" diff --git a/resources/test_album/moon.jpg b/resources/test_album/moon.jpg index ba2fc1153541c5e096b1f2f4df0a46e43de84cb6..51364934d39abfde309bb32cef619a1986103483 100644 GIT binary patch delta 198 zcmZpl#C&iv^90fQhYUMhD>Bm<7<_#hv=|r|I2c$Nr5IR&EJh&qVw8rngBUd!n8D&q z3=B-dP&QCidnN-5RDBeX1_2Ks2I+^;tP>a**nvD210!Rj3Cs*Y{R|>NJZB=KB}9Ue tiJ4&mOp&31!2(8z@&Eq=l>vdFrMZEXfgunn7#dp{n_C$eY!v=-0s!us7&nYn!u^90e2=RchQ03?wHQvd(} diff --git a/resources/test_album/with_description/moon.jpg b/resources/test_album/with_description/moon.jpg index ba2fc1153541c5e096b1f2f4df0a46e43de84cb6..aab6f80d1d5369c851b7bcde499762af133948da 100644 GIT binary patch delta 198 zcmZpl#C&iv^90fQhYUMhD>Bm<7<_#hv=|r|I2c$Nr5IR&EJh&qVw8rngBUd!n8D&q z3=B-dP&QCidnN-5RDBeX1_2Ks2I+^;tP>a**nvD210!Rj3Cs*Y{R|>NJZB=KB}9Ue uiJ4&mOp&31!2(8z@&Eq=l>vdFrMZEXfuWUwk%FPIm9e>%fx$-MFDC%*s2F$v delta 12 UcmX>&nYn!u^90e2=RchQ03?wHQvd(} diff --git a/resources/test_album/with_description/mountains.jpg b/resources/test_album/with_description/mountains.jpg index 59a3b1f8bf14d0103c790b45fa426b7a397cc3d6..085dfbe908815eb3fbe5b5dcaa9dfdae8389859f 100644 GIT binary patch delta 201 zcmdnAiRH~^mI anyhow::Res fs::hard_link(&img.path, &full_size_path) .with_context(|| format!("Error creating hard link at {}", full_size_path.display()))?; - let orig_image = ::image::open(&img.path)?; + let orig_image = ::image::open(&img.path) + .with_context(|| format!("Failed to read image {}", &img.path.display()))?; let thumb_path = output_path.join(&img.thumb_path); log::info!( "Resizing {} -> {}", @@ -303,12 +304,12 @@ struct SlideContext { #[cfg(test)] mod tests { use super::generate; - use crate::skel::make_skeleton; - use mktemp::Temp; use std::collections::{HashSet, VecDeque}; use std::ffi::OsStr; use std::path::{Path, PathBuf}; + use crate::test_util::{init, make_test_album}; + #[test] /// Test that the generate function creates a rendered site as we expect it fn test_generate() { @@ -319,27 +320,6 @@ mod tests { check_album(output_path).unwrap(); } - fn init() { - let _ = env_logger::builder().is_test(true).try_init(); - } - - /// Copies the test album to a tempdir and returns the path to it - fn make_test_album() -> Temp { - let tmpdir = Temp::new_dir().unwrap(); - let source_path = Path::new("resources/test_album"); - - log::info!("Creating test album in {}", tmpdir.display()); - make_skeleton(&tmpdir.to_path_buf()).unwrap(); - fs_extra::dir::copy( - &source_path, - &tmpdir, - &fs_extra::dir::CopyOptions::new().content_only(true), - ) - .unwrap(); - - tmpdir - } - /// Does basic sanity checks on an output album fn check_album(root_path: PathBuf) -> anyhow::Result<()> { log::debug!("Checking album dir {}", root_path.display()); diff --git a/src/lib.rs b/src/lib.rs index 1ca7b53..aa53f28 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,7 @@ -pub mod config; +pub(crate) mod config; pub mod generate; +pub mod reorganize; pub mod skel; + +#[cfg(test)] +pub(crate) mod test_util; diff --git a/src/main.rs b/src/main.rs index cf1a837..c6b6cfe 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,6 @@ use clap::{Parser, Subcommand}; use photojawn::generate::generate; +use photojawn::reorganize::reorganize; use photojawn::skel::make_skeleton; use std::path::Path; @@ -18,6 +19,9 @@ fn main() -> anyhow::Result<()> { let path = generate(&album_path.to_path_buf(), full)?; println!("Album site generated in {}", path.display()); } + Commands::Reorganize { path, dry_run } => { + reorganize(Path::new(&path), dry_run)?; + } } Ok(()) @@ -44,4 +48,17 @@ enum Commands { #[arg(long)] full: bool, }, + /// Reorganize photos in an album by date + Reorganize { + /// Directory of images you want to reorganize. Only image files will be moved. + /// + /// The new image filenames will be the date and time taken, followed by the original + /// filename. For example: + /// original_filename.jpg -> YYYYMMDD_HHSS_original_filename.jpg + #[arg()] + path: String, + /// Don't actually reorganize, just say what renames would happen + #[arg(long)] + dry_run: bool, + }, } diff --git a/src/reorganize.rs b/src/reorganize.rs new file mode 100644 index 0000000..f96c089 --- /dev/null +++ b/src/reorganize.rs @@ -0,0 +1,243 @@ +use anyhow::{anyhow, Context}; +use image::ImageReader; +use std::ffi::OsStr; +use std::fs::{rename, File}; +use std::io::BufReader; +use std::path::{Path, PathBuf}; +use std::str::from_utf8; +use thiserror::Error; +use time::macros::format_description; +use time::{OffsetDateTime, PrimitiveDateTime, UtcDateTime}; + +#[derive(Error, Debug)] +pub enum OrganizeError { + #[error("These files are not supported, unable to parse EXIF data: {0:?}")] + ExifNotSupported(Vec), + #[error("File {0} is missing an EXIF DateTimeOriginal field")] + ExifNoDateTime(PathBuf), +} + +pub fn reorganize(dir: &Path, dry_run: bool) -> anyhow::Result<()> { + let renames = get_renames(dir)?; + + if renames.is_empty() { + println!("Nothing to rename"); + return Ok(()); + } + + // Either do the renames, or if dry-run print what the names would be + if dry_run { + for (src, dst) in renames { + println!("{} -> {}", src.display(), dst.display()); + } + println!("Would have renamed the above files"); + } else { + for (src, dst) in renames { + println!("{} -> {}", src.display(), dst.display()); + rename(&src, &dst).with_context(|| { + format!("Failed to rename {} to {}", src.display(), dst.display()) + })?; + } + } + + Ok(()) +} + +/// Returns a vec of tuples of all the renames that need to happen in a directory +fn get_renames(dir: &Path) -> anyhow::Result> { + let mut renames: Vec<(PathBuf, PathBuf)> = Vec::new(); + + // Run through all the images and figure out new names for them + for entry in dir.read_dir()? { + let entry = entry?; + + if !entry.path().is_file() { + continue; + } + + // Only bother with image files, because those are the only hope for EXIF + let is_image: bool = ImageReader::open(entry.path())? + .with_guessed_format()? + .format() + .is_some(); + + let is_cover: bool = entry + .path() + .file_name() + .is_some_and(|n| n.to_string_lossy().starts_with("cover")); + + if is_image && !is_cover { + // TODO: Should we just skip over images with no EXIF data? Find datetime some other + // way? + let Ok(dt) = get_exif_datetime(entry.path()) else { + log::warn!( + "Unable to read datetime from EXIF for {}", + entry.path().display() + ); + continue; + }; + let orig_filename = entry + .path() + .file_name() + .unwrap_or(OsStr::new("")) + .to_string_lossy() + .into_owned(); + + let ext = entry + .path() + .extension() + .ok_or(anyhow!( + "{} is missing an extension", + entry.path().display() + ))? + .to_string_lossy() + .to_string(); + + let new_filename_base = dt.format(format_description!( + "[year][month][day]_[hour][minute][second]_" + ))?; + + // Renaming an already-renamed file should be a no-op + if orig_filename.starts_with(&new_filename_base) { + log::info!("{orig_filename} looks like it was already renamed, skiping"); + continue; + } + + let new_path = entry + .path() + .with_file_name(new_filename_base + &orig_filename) + .with_extension(ext); + + renames.push((entry.path(), new_path.clone())); + + // Check for files associated with this image and set them up to be renamed too, like + // description files that end with .txt or .md + for ext in ["txt", "md"] { + let side_file_path = entry.path().with_extension(ext); + if side_file_path.exists() { + let new_side_file_path = new_path.with_extension(ext); + renames.push((side_file_path, new_side_file_path)); + } + } + } + } + + // Sort renames by the destination + renames.sort_by_key(|(_, dst)| dst.clone()); + + Ok(renames) +} + +/// Tries to figure out the datetime that the image was created from EXIF metadata +fn get_exif_datetime(path: PathBuf) -> anyhow::Result { + let format_with_offset = format_description!( + "[year]:[month]:[day] [hour]:[minute]:[second][offset_hour]:[offset_minute]" + ); + let format_without_offset = + format_description!(version = 2, "[year]:[month]:[day] [hour]:[minute]:[second]"); + + let file = File::open(&path).with_context(|| format!("Couldn't open {}", path.display()))?; + let mut bufreader = BufReader::new(file); + let exif = exif::Reader::new() + .read_from_container(&mut bufreader) + .with_context(|| format!("Couldn't read EXIF data from {}", path.display()))?; + let field = exif + .get_field(exif::Tag::DateTimeOriginal, exif::In::PRIMARY) + .ok_or(OrganizeError::ExifNoDateTime(path.clone()))?; + + let dt: UtcDateTime = match &field.value { + exif::Value::Ascii(v) => { + let s = from_utf8(&v[0])?; + log::debug!("Date string from file: {s}"); + + match OffsetDateTime::parse(s, format_with_offset) { + Ok(v) => v.to_utc(), + Err(_) => PrimitiveDateTime::parse(s, format_without_offset)?.as_utc(), + } + } + _ => return Err(OrganizeError::ExifNoDateTime(path).into()), + }; + + Ok(dt) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::test_util::{init, make_test_album}; + use time::{Date, Month, Time}; + + #[test] + /// Make sure we can get the datetime from one of our test photos + fn basic_datetime_read() { + init(); + let dt = get_exif_datetime("resources/test_album/moon.jpg".into()).unwrap(); + log::info!("Got dt: {dt}"); + assert_eq!( + dt, + UtcDateTime::new( + Date::from_calendar_date(1970, Month::January, 1).unwrap(), + Time::from_hms(13, 37, 0).unwrap(), + ) + ) + } + + #[test] + fn exif_datetime_missing() { + init(); + let result = get_exif_datetime("resources/test_album/mountains.jpg".into()); + assert!(result.is_err()); + //result.unwrap(); + } + + #[test] + fn test_basic_renames() { + init(); + let tmp_album_dir = make_test_album(); + let dir = tmp_album_dir.join("with_description"); + + log::debug!("Getting renames for {}", dir.display()); + let renames = get_renames(&dir).unwrap(); + + assert_eq!( + renames, + vec![ + (dir.join("moon.jpg"), dir.join("19700102_133700_moon.jpg")), + (dir.join("moon.txt"), dir.join("19700102_133700_moon.txt")), + ( + dir.join("mountains.jpg"), + dir.join("19700103_133700_mountains.jpg") + ), + ] + ); + } + + #[test] + /// get_renames() should ignore other stuff in the directory + fn test_other_junk() { + init(); + let tmp_album_dir = make_test_album(); + + let renames = get_renames(&tmp_album_dir).unwrap(); + // No mountain.jpg since it doesn't have EXIF data + assert_eq!( + renames, + vec![( + tmp_album_dir.join("moon.jpg"), + tmp_album_dir.join("19700101_133700_moon.jpg") + )] + ); + } + + #[test] + /// The rename function will prepend date and time to the original filenames. If we do it a + /// second time, it should be a no-op instead of continuing to prepend date and time. + fn test_rerename() { + let tmp_album_dir = make_test_album(); + let dir = tmp_album_dir.join("with_description"); + reorganize(&dir, false).unwrap(); + + let renames = get_renames(&dir).unwrap(); + assert_eq!(renames, Vec::new()); + } +} diff --git a/src/test_util.rs b/src/test_util.rs new file mode 100644 index 0000000..46a3d53 --- /dev/null +++ b/src/test_util.rs @@ -0,0 +1,25 @@ +use crate::skel::make_skeleton; +use mktemp::Temp; +use std::path::Path; + +pub fn init() { + let _ = env_logger::builder().is_test(true).try_init(); +} + +/// Copies the test album to a tempdir and returns the path to it. Returns a Temp object which +/// cleans up the directory on drop, so make sure to persist the variable until you're done with it +pub fn make_test_album() -> Temp { + let tmpdir = Temp::new_dir().unwrap(); + let source_path = Path::new("resources/test_album").canonicalize().unwrap(); + + log::info!("Creating test album in {}", tmpdir.display()); + make_skeleton(&tmpdir.to_path_buf()).unwrap(); + fs_extra::dir::copy( + &source_path, + &tmpdir, + &fs_extra::dir::CopyOptions::new().content_only(true), + ) + .unwrap(); + + tmpdir +}