dot

packages and services management
Log | Files | Refs | README

commit 78dc214d510b0e3cb4417bac9ccb7c7fde5f96df
parent ae0c4f1c9265c909b4aa3f3f53946bb5c0fc370d
Author: josuah <mail@josuah.net>
Date:   Wed, 28 Sep 2016 21:11:47 -0400

Added a draft for a lsync improvement using hashes.

Diffstat:
Mcron/.config/cron/jobs/beepnow | 2+-
Alsync | 73+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ms-nail/.mailrc | 2+-
3 files changed, 75 insertions(+), 2 deletions(-)

diff --git a/cron/.config/cron/jobs/beepnow b/cron/.config/cron/jobs/beepnow @@ -1,2 +1,2 @@ #!/bin/sh -beep +printf 'beep' > ~/beep diff --git a/lsync b/lsync @@ -0,0 +1,73 @@ +#!/bin/sh + +# Case 1: files identical on same drive with different names +# Case 2: files identical across drives with different names +# Case 3: files identical across drives with same name +# Case 4: files identical across drives with same and different names +# Case 5: files identical across drives with same names and with different name on other drive +# Case 6: files identical across drives with different names and with different name on other drive + +# Conclusion: When files are identical by their content, but differ by their name. + +# Solution to case 1: delete the oldest version. +# Solution to case 2: rename to the latest version. +# Solution to case 3: it is ok. +# Solution to case 4: pick the latest of them all, duplicate it to all drive. +# Solution to case 5: pick the latest of them all, duplicate it to all drive. +# Solution to case 6: pick the latest of them all, duplicate it to all drive. + +# Conclusion: if file names are different, pick the latest of all files and propagates it to all missing place, deleting the old files. + +# Case 7: files are different on same drive with different name. +# Case 8: files are different across drives with different name. +# Case 9: files are different across drives with same name. + +# Case 7: Do not change: genuine different files. +# Case 8: Do not change: genuine different files. +# Case 9: Keep the latest version. + +# Conclusion: If files have the same file name, keep the latest. + +# Summary: +# If some files are identical with different names: Update the names to the latests +# If some files are different with the same name across drives: update the file content. + +# To check if they are identical, do uniq -d -w 32 (duplicate hash in the list) +# To check if they have the same file name across drives, do sort -u on whole list, then split the hash and filename with read hash path, then remove the prefix of each file, then finally print the hash-file combo then uniq -u to check either the duplicate files or the files that are only on one drive. From this list, if there is a version on both drive, delete the older file, and then for both, copy the file to the other drive, + +tmp="$(mktemp lsync.XXXXXX)" + +trap "rm -f '$tmp'" KILL INT EXIT + +# +# Generate md5 sums for all files of both drives. +# +find "$1" "$2" -type f -exec md5sum {} \; > "$tmp" + +# +# Find the duplicates files, delete all but the latest, and copy it to the +# other drive if missing. +# +printf '\n\nDuplicate files:\n\n' +sort -t ' ' -k 1 "$tmp" \ +| uniq -d -w 32 \ +| sed 's/[^ ]* */' \ +| xargs -n 1 -I '{}' grep '{}' "$tmp" \ +| sed 's/[^ ]* */' \ +| xargs stat -c '%Y %n' \ +| sort \ +| sed 's/[^ ]* */' \ +| { + IFS= read -r latest + while IFS= read -r other + do + [ -f "$path/$1" ] && printf 'rm %s\n' "$1/$other" + [ -f "$path/$2" ] && printf 'rm %s\n' "$2/$other" + done + + if [ -f "$1/$latest" ] + then printf 'cp %s %s\n' "$1/$latest" "$2/$latest" + elif [ -f "$2/$latest" ] + then printf 'cp %s %s\n' "$2/$latest" "$1/$latest" + fi +} diff --git a/s-nail/.mailrc b/s-nail/.mailrc @@ -151,7 +151,7 @@ define filter { move (header Received "vmsympa1.univ-rennes1.fr") ~/Mail/l.univ-rennes1 move (header Return-Path "listes.univ-rennes1.fr") ~/Mail/l.univ-rennes1 - move (from "breizh-entropy.org") ~/Mail/l.breizh-entropy + move (header List-Id "breizh-entropy.org") ~/Mail/l.breizh-entropy # Services