#!/bin/sh
. ./global.sh
use_questionable()
{
    case $questionable in
	\'*\')
	    expr="s/\B{}\B/�\1�/g"
	    ;;
	\'*)
	    expr="s/\B{}\>/�\1/g"
	    ;;
	*\')
	    expr="s/\<{}\B/\1�/g"
	    ;;
	*)
	    echo "Internal error"
	    exit 1
	    ;;
    esac
    echo "# Preserve apostrophe in $questionable" >> quotes.sed
    cipat=`echo $pattern | awk '{ s=""; for(i=1;i<=length;i++) { c=substr($0,i,1); if (c ~ /[[:alpha:]]/) s=s "[" tolower(c) toupper(c) "]"; else s=s c; } print s; }'`
    echo $expr | sed -e "s/{}/$cipat/" -e 's/[()]/\\&/g' >> quotes.sed
}
use_alt()
{
    expr="s/\<$pattern\>/\1/"
    echo "# Treat $questionable as $alt plus quotation marks" >> quotes.sed
    #echo $expr | awk '{ s=substr($0,1,1); for(i=2;i<=length;i++) { c=substr($0,i,1); if (c ~ /[[:alpha:]]/) s=s "[" tolower(c) toupper(c) "]"; else s=s c; } print s; }' | sed 's/[()]/\\&/g' >> quotes.sed
}
# Phase 1: identify all questionable words (words with ' at start or end).
# and attempt to deduce whether the terminal ' or 's is part of the word
# or not.
sh words.sh $1 > quotes.words
grep "^'" quotes.words > quotes.tmp
grep "'$" quotes.words >> quotes.tmp
cat $alsoby | sh words.sh > quotes.alsoby
echo -n '' > quotes.log
echo -n '' > quotes.sed
sort quotes.tmp | uniq | while read questionable; do
    pattern=`echo $questionable | sh escape.sh | \
      sed "s/^\('\?\)\(.*[^']\)\('\?\)$/\1(\2)\3/"`
    alt=`echo $questionable | sed -e "s/^'//" -e "s/'$//"`
    count_un=`fgrep -c -x -e "$alt" quotes.words`
    if fgrep -x -e $questionable quotes.good_words > /dev/null; then
	echo $questionable "(manual)" >> quotes.log
	use_questionable
    elif fgrep -x -e $questionable quotes.bad_words > /dev/null; then
	echo $alt "(manual)" >> quotes.log
	use_alt
    elif [ $count_un -gt 0 ]; then
	case $questionable in
	    *s\')
		echo $questionable $alt "(genative) ?" >> quotes.log
		;;
	    *)
		echo $alt $count_un >> quotes.log
		use_alt
		;;
	esac
    else
	unknown=""
	case $questionable in
	    *s\')
		echo $questionable "(suffix)" >> quotes.log
		use_questionable
		;;
	    *in\')
		misspell=`echo ${alt}g | spell`
		if [ -z "$misspell" ]; then
		    echo $questionable "(suffix)" >> quotes.log
		    use_questionable
		else
		    unknown="y"
		fi
		;;
	    *)
		unknown="y"
		;;
	esac
	if [ "$unknown" = "y" ]; then
	    count_un=`fgrep -c -x -e "$alt" quotes.alsoby`
	    if [ $count_un -gt 0 ]; then
		echo $alt $count_un "(also by)" >> quotes.log
		use_alt
		unknown=""
	    else
		misspell=`echo ${alt} | spell`
		if [ -n "$misspell" ]; then
		    pat=`echo $questionable | sh escape.sh | \
		      sed -e "s/^'/.*/" -e "s/'$/.*/" -e "s/.*/^&$/"`
		    count=`grep -c -x "$pat" /usr/share/dict/words`
		    if [ "$count" -gt 0 ]; then
			echo $questionable $count_qu "(dict)" >> quotes.log
			use_questionable
			unknown=""
		    fi
		fi
	    fi
	fi
	if [ "$unknown" = "y" ]; then
	    echo $questionable $alt "?" >> quotes.log
	fi
    fi
done
sed -f quotes.sed $1 > quotes.tmp
count=`fgrep -c -w -e "?" quotes.log`
if [ "$count" -ne 0 ]; then
    echo "$count questionable word(s) left after quote analysis."
    echo "See quotes.log for details."
    exit 1
fi
# Phase 2: all quote characters in the middle of words are apostrophes
# sed -e "s/\([a-zA-Z-]\)'\([a-zA-Z-]\)/\1�\2/g"
perl \
  -e "while(<>) { \$_ =~ s/([[:alpha:]-])'(?=[[:alpha:]-])/\1�/g; print; }" \
  quotes.tmp > quotes.tmp2
# Phase 3: all quote characters in metadata are special
perl \
  -e "while(<>) { \$_ =~ s/\"(?=[^<]*>)//g; \$_ =~ s/'(?=[^<]*>)//g; print; }" \
  quotes.tmp2 > quotes.tmp
# Phase 4: all other quotes are proper quotation marks
./fancy-quotes quotes.tmp > quotes.tmp2
# Postamble: restore symbols
sed -e "s/�/'/g" -e "s//\"/g" -e "s//'/g" quotes.tmp2 > $2
# Phase 5: validate
sed -n -e '/ [’”]/p' -e '/^[’”]/p' -e '/[‘“] /p' -e '/[‘“]$/p' $2 > quotes.err
if [ -s "quotes.err" ]; then
    echo "Error: spaces inside quotation marks"
    cat quotes.err
    exit 1
fi
