#!/bin/sh
file="$1"
. ./global.sh
# Phase 1: rejoin words split across pages
awk ' \
/-\*$/ { \
    lastline=$0; \
    getline; \
    sepline=$0; \
    getline; \
    half=$1; \
    sub("^\\*","",half); \
    $1=""; \
    firstline=$0; \
    sub("^ ","",firstline); \
    print lastline half; \
    print sepline; \
    print firstline; \
    next } \
{ print }; \
' $file > hyphen-tmp.txt
# Phase 2a: identify -*ed words
echo -n '' > hyphen.log
echo -n '' > hyphen.sed
awk ' \
/-\*/ {
    first=$NF; \
    sub("-\\*.*$","",first); \
    second=$NF; \
    sub(".*-\\*","",second); \
    gsub("[[:punct:]]+$","",second); \
    print first "-*" second, first second, first "-" second; \
    next} \
' hyphen-tmp.txt | while read noted unhyphenated hyphenated; do
    pattern=`echo $noted | sh escape.sh`
    count_un=`fgrep -w -c $unhyphenated $file`
    count_hy=`fgrep -w -c -h $hyphenated $file`
    if [ $count_un -gt $count_hy ]; then
    	echo $unhyphenated $count_un ":" $count_hy >> hyphen.log
	echo "s/\<$pattern\>/$unhyphenated/" >> hyphen.sed
    elif [ $count_un -lt $count_hy ]; then
    	echo $hyphenated $count_hy ":" $count_un >> hyphen.log
	echo "s/\<$pattern\>/$hyphenated/" >> hyphen.sed
    else
	count_un=`(fgrep -w -c -h $unhyphenated $alsoby | tr '\012' '+'; \
	  echo 0) | bc`
	count_hy=`(fgrep -w -c -h $hyphenated $alsoby | tr '\012' '+'; \
	  echo 0) | bc`
	if [ $count_un -gt $count_hy ]; then
	    echo $unhyphenated $count_un ":" $count_hy "(also by)" >> hyphen.log
	    echo "s/\<$pattern\>/$unhyphenated/" >> hyphen.sed
	elif [ $count_un -lt $count_hy ]; then
	    echo $hyphenated $count_hy ":" $count_un "(also by)" >> hyphen.log
	    echo "s/\<$pattern\>/$hyphenated/" >> hyphen.sed
	else
	    misspell_un=`echo $unhyphenated | spell`
	    misspell_hy=`echo $hyphenated | spell`
	    if [ -z "$misspell_un" -a -n "$misspell_hy" ]; then
		echo "$unhyphenated $count_un (spell)" >> hyphen.log
		echo "s/\<$pattern\>/$unhyphenated/" >> hyphen.sed
	    elif [ -n "$misspell_un" -a -z "$misspell_hy" ]; then
		echo "$hyphenated $count_hy (spell)" >> hyphen.log
		echo "s/\<$pattern\>/$hyphenated/" >> hyphen.sed
	    elif fgrep -x $unhyphenated hyphen.good_words > /dev/null; then
		echo "$unhyphenated (manual)" >> hyphen.log
		echo "s/\<$pattern\>/$unhyphenated/" >> hyphen.sed
	    elif fgrep -x $hyphenated hyphen.good_words > /dev/null; then
		echo "$hyphenated (manual)" >> hyphen.log
		echo "s/\<$pattern\>/$hyphenated/" >> hyphen.sed
	    else
		echo $unhyphenated $count_un ":" $count_hy $hyphenated >> hyphen.log
	    fi
	fi
    fi
done
# Phase 2b: resolve hyphenation where we can
sed -f hyphen.sed hyphen-tmp.txt > $2
