1#!/bin/bash 2# 3# Recursively go though a directory structure and replace duplicate files with 4# symlinks. This cuts down our RPM repo size by ~25%. 5# 6# replace-dupes-with-symlinks.sh [DIR] 7# 8# DIR: Directory to traverse. Defaults to current directory if not specified. 9# 10 11src="$1" 12if [ -z "$src" ] ; then 13 src="." 14fi 15 16declare -A db 17 18pushd "$src" 19while read line ; do 20 bn="$(basename $line)" 21 if [ -z "${db[$bn]}" ] ; then 22 # First time this file has been seen 23 db[$bn]="$line" 24 else 25 if diff -b "$line" "${db[$bn]}" &>/dev/null ; then 26 # Files are the same, make a symlink 27 rm "$line" 28 ln -sr "${db[$bn]}" "$line" 29 fi 30 fi 31done <<< "$(find . -type f)" 32popd 33