Files
dotfiles/meta/git/hooks/pre-commit
Julian Prein 7f83427749 hooks:pre-commit: Broken link detection on delete
Until now the hook only checked newly added symlinks. This patch is a
first draft of also checking the worktree and index for any dangling
symlinks after staging a deletion.

The whole thing probably breaks when file-names contain newlines and
maybe also a mix of quotes. I plan on making this more robust in the
future but see no urgency for it since this repository has pretty simple
filenames.
2025-09-15 17:40:35 +02:00

181 lines
4.6 KiB
Bash
Executable File

#!/bin/sh
#
# A hook script to verify what is about to be committed.
# Called by "git commit" with no arguments. The hook should exit with non-zero
# status after issuing an appropriate message if it wants to stop the commit.
#
# To enable this hook, save this file in ".git/hooks/pre-commit".
# Source die()
. "$HOME"/.local/bin/helpers.sh
if git rev-parse --verify HEAD >/dev/null 2>&1; then
against=HEAD
else
# Initial commit: diff against an empty tree object
against=$(git hash-object -t tree /dev/null)
fi
# If you want to allow non-ASCII filenames set this variable to true.
allownonascii=$(git config --type=bool hooks.allownonascii)
# Redirect output to stderr.
exec 1>&2
# Check that all filenames include only ASCII characters.
if [ "$allownonascii" != "true" ]; then
# We exploit the fact that the printable range starts at the space
# character and ends with tilde.
# Note that the use of brackets around a tr range is ok here, (it's
# even required, for portability to Solaris 10's /usr/bin/tr), since
# the square bracket bytes happen to fall in the designated range.
num_nonascii=$(
git diff-index --cached --name-only --diff-filter=A -z $against \
| LC_ALL=C tr -d '[ -~]\0' \
| wc -c
)
if [ $num_nonascii != 0 ]; then
die "Rename files with ASCII characters only, or enable hooks.allownonascii"
fi
fi
# Check for whitespace errors.
if ! git diff-index --check --cached $against --; then
die
fi
# Check that added symlinks are neither absolute nor broken
git diff --staged --name-only --diff-filter=AT $against \
| {
abort=0
while read -r line; do
[ -h "$line" ] || continue
target="$(readlink "$line")"
if [ -z "${target##/*}" ]; then
abort=1
printf "%s\n" "$line: Absolute symlink" >&2
fi
if [ ! -e "$line" ]; then
abort=1
printf "%s\n" "$line: Broken symlink" >&2
fi
done
[ "$abort" -eq 0 ] || die
} || exit
# Make sure that a deletion does not break any symlinks (including renaming a
# file)
# TODO: switch all these to null-terminated lines
deleted_files="$(git diff-index --cached --name-only --diff-filter=D $against)"
if [ -n "$deleted_files" ]; then
# First, check for broken symlinks in the tree
all_broken_links="$(find . -xtype l -exec stat -c '%N' '{}' '+')"
# NOTE: The cat could be replaced by instead adding the heredoc to the
# `done` of the loop, but would make the code much less readable
cat <<EOF \
| while read -r deletion
$deleted_files
EOF
do
# As a first heuristic, check if there is a broken symlink with
# a target with the same basename as the deleted file
#
# TODO: stat escapes quotes sometimes. Does everything work
# then?
possible_links="$(
grep "[\"'/]$(
basename "$deletion" \
| sed 's/[.[^$*\\]/\\&/g'
)[\"']\$" <<EOF
$all_broken_links
EOF
)"
[ -n "$possible_links" ] || continue
cat << EOF \
| while read -r link
$possible_links
EOF
do
# TODO: this is probably quite brittle, depending on how
# `stat` quotes source and target
target="${link##* -> [\"\']}"
target="${target%[\"\']}"
source="${link%%[\"\'] -> *}"
source="${source#[\"\']}"
if [ -z "${target##/*}" ]; then
# absolute link
if [ "$target" = "$PWD/$deletion" ]; then
die "You broke the symlink $link"
fi
else
# relative link
target="$(realpath -m "$source/../$target")"
if [ "$target" = "$PWD/$deletion" ]; then
die "You broke the symlink $link"
fi
fi
done || exit
done || exit
# Second, check all symlinks in the index if they still point to the
# deleted file
all_links_in_index="$(
git ls-files --format="%(objectmode) %(objectname) %(path)" \
| grep '^120000'
)"
cat <<EOF \
| while read -r deletion
$deleted_files
EOF
do
# As a first heuristic, get all links in the tree with a target
# with the same basename as the deleted file
possible_links="$(
cut -d' ' -f2 <<EOF \
| git cat-file --batch \
| grep -B1 "\(^\|/\)$(
basename "$deletion" \
| sed 's/[.[^$*\\]/\\&/g'
)\$" \
| paste - -
$all_links_in_index
EOF
)"
[ -n "$possible_links" ] || continue
cat << EOF \
| while read -r link
$possible_links
EOF
do
target="${link#* }"
source="$(
grep -F "${link%% *}" <<EOF \
| cut -d' ' -f3-
$all_links_in_index
EOF
)"
if [ -z "${target##/*}" ]; then
# absolute link
if [ "$target" = "$PWD/$deletion" ]; then
die "You broke the symlink \"$source\" -> \"$target\""
fi
else
# relative link
target="$(realpath -m "$source/../$target")"
if [ "$target" = "$PWD/$deletion" ]; then
die "You broke the symlink \"$source\" -> \"$target\""
fi
fi
done || exit
done || exit
# TODO: also check potential symlinks pointing to now empty directories
fi