From b38e01c72add1faf4ce0eb5c0f5e354e21a0f477 Mon Sep 17 00:00:00 2001 From: Julian Prein Date: Wed, 28 Dec 2022 01:43:01 +0100 Subject: [PATCH] zsh:funcs:finddup: Support filenames with spaces Previously when filenames contained spaces, the function would break as `awk {print $2,$1}` would only print a part of the filename. The field swap was used as a workaround so that `uniq` only compares the sizes, and `uniq` unfortunately only has a flag to **skip** fields. Fix this issue by using a short awk script that mimics `uniq` but only with the first field (i.e. the size). My awk foo is unfortunately not very good, and that is why the one-liner prints out the first duplicated line multiple time. The `sort -u` pipe afterwards gets rid of those. --- .config/zsh/zshrc.d/40-functions.zsh | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/.config/zsh/zshrc.d/40-functions.zsh b/.config/zsh/zshrc.d/40-functions.zsh index abe5193..f5d745c 100644 --- a/.config/zsh/zshrc.d/40-functions.zsh +++ b/.config/zsh/zshrc.d/40-functions.zsh @@ -554,15 +554,17 @@ suffix() { finddup() { # find all files, filter the ones out with unique size, calculate md5 and # print duplicates + # TODO: Fix duplicate lines output in the awk script that currently `sort + # -u` handles find "$@" -type f -exec du '{}' '+' \ - | awk '{print $2,$1}' \ - | sort -k2 \ - | uniq -f1 -D \ - | awk '{print $1}' \ + | sort \ + | awk '{ if (!_[$1]) { _[$1] = $0 } else { print _[$1]; print $0; } }' \ + | sort -u \ + | cut -d$'\t' -f2- \ | xargs -d'\n' md5sum \ | sort \ | uniq -w32 --all-repeated=separate \ - | awk '{print $2}' + | cut -d' ' -f3- } # Wrapper around tmsu that searches for .tmsu/db in all parent directories and