#!/bin/csh -f # Compare two directories and make identical files hard linked copies # of one of the files. # usage: ~/consolidate /var/tmp/watrous.110922.0959 /var/tmp/watrous # daw; 9/23/11 switch (`uname -s``uname -r`) case SunOS5*: set ECHO = /usr/ucb/echo set TPLUS = "+" breaksw case Linux*: set ECHO = //bin/echo set TPLUS = "--lines=+" breaksw default: exec echo ${0}: does not run on `uname -s` `uname -r` endsw while($#argv) switch ("$1") case -dd: set DDEBUG case -d: set DEBUG = /bin/echo # shift # breaksw # fall through to verbose case -v: set VERBOSE shift breaksw case -df: set DF shift breaksw case -dots: set DOTS shift breaksw case -s: set SUMMARIZE shift breaksw default: if ( -d "$1" && ! $?DIR1) then set DIR1 = `echo $1 | sed 's;/$;;'` shift breaksw else if ( -d "$1" && ! $?DIR2) then set DIR2 = `echo $1 | sed 's;/$;;'` shift breaksw endif set DQ = '"' exec echo ${0}: "What do I do with $DQ$1$DQ?" endsw end if (! $?DIR2) exec echo usage: $0 " " if (! $?DEBUG) set DEBUG set DONE = 0 set SKIPPED = 0 if ($?DF) then /bin/df -k $DIR1 endif if ($?SUMMARIZE) echo `date +%T` Consolidating $DIR1 and $DIR2 set BASE = `basename $0` set TMP = /tmp/$BASE if ($?VERBOSE) echo `date +%T` Finding files in $DIR1 find $DIR1 -type f -ls > $TMP.1 if ($?VERBOSE) echo `date +%T` Finding files in $DIR2 find $DIR2 -type f -ls > $TMP.2 if ($?VERBOSE) echo `date +%T` Isolating inodes #awk '{print $1}' $TMP.1 | sort -n > $TMP.1.inodes #awk '{print $1}' $TMP.2 | sort -n > $TMP.2.inodes # Hmmm. On richelieu, comm complains: 5/14/15 # comm: file 1 is not in sorted order # comm: file 2 is not in sorted order awk '{print $1}' $TMP.1 | sort > $TMP.1.inodes awk '{print $1}' $TMP.2 | sort > $TMP.2.inodes if ($?VERBOSE) echo `date +%T` Finding common inodes comm -23 $TMP.{1,2}.inodes > $TMP.inodes if ($?VERBOSE) echo `date +%T` `cat $TMP.inodes | wc -l` inodes found in commmon @ OFF = 1 # initial offset @ CHUNK = 1000 # how many inodes set MAX = `cat $TMP.inodes | wc -l` while ($OFF <= $MAX) # foreach INODE ( `tail +$OFF $TMP.inodes | head -$CHUNK` ) foreach INODE ( `tail $TPLUS$OFF $TMP.inodes | head -$CHUNK` ) if ($?DDEBUG) echo $INODE # set ORIG = `grep ^$INODE" " $TMP.1 | awk '{print $NF}'` # Left square brackets trip us up. Break trying to consolidate them. # set ORIG = `grep ^$INODE" " $TMP.1 | awk '{print $NF}' | sed 's;\[;_LSQ_;g'` # So do asterixes. # set ORIG = `grep ^$INODE" " $TMP.1 | awk '{print $NF}' | sed -e 's;\[;_LSQ_;g' -e 's;\*;_ESC_;g'` # Sometimes leading spaces... 12/15/16 set ORIG = `grep ^"[ ]*$INODE " $TMP.1 | awk '{print $NF}' | sed -e 's;\[;_LSQ_;g' -e 's;\*;_ESC_;g'` set PDUP = `echo "$ORIG" | sed "s;$DIR1;$DIR2;"` if (-e $PDUP) then if (-e /usr/local/bin/md5) then set OMD5 = `/usr/local/bin/md5 $ORIG | awk '{print $NF}'` set PMD5 = `/usr/local/bin/md5 $PDUP | awk '{print $NF}'` else set OMD5 = `/usr/bin/md5sum $ORIG | awk '{print $1}'` set PMD5 = `/usr/bin/md5sum $PDUP | awk '{print $1}'` endif if ($OMD5 == $PMD5) then if ($?DOTS) /bin/echo "" if ($?VERBOSE) echo `date +%T` consolidating $ORIG $DEBUG /bin/rm $ORIG $DEBUG /bin/ln $PDUP $ORIG @ DONE++ endif else # if ($?DOTS) /usr/ucb/echo -n "." if ($?DOTS) $ECHO -n "." @ SKIPPED++ endif end @ OFF += $CHUNK end if ($?SUMMARIZE) echo `date +%T` $DONE files consolidated\; $SKIPPED skipped if ($?DF) then /bin/df -k $DIR1 endif /bin/rm -f $TMP*