#!/bin/sh
set -e
. ./global.sh
gcc -o fancy-quotes fancy-quotes.c `pkg-config --cflags --libs glib-2.0`
(cd tools/gutcheck; gcc -o gutcheck gutcheck.c)
(cd tools/jeebies; gcc -o jeebies jeebies.c)
rm -rf projectID${projectid}
mkdir projectID${projectid}
echo "<html>" > projectID${projectid}/index.html
echo "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">" \
  >> projectID${projectid}/index.html
echo "<body>" >> projectID${projectid}/index.html
echo "<h1>The Camp Fire Girls Solve A Mystery—Text Post Processing Notes</h1>" \
  >> projectID${projectid}/index.html
echo "<p>Some notes on the preparation of <a href=\"output-utf8.txt\">a F2-style output text</a> and associated <a href=\"output-utf8.notes\">transcriber's notes</a>.</p>" >> projectID${projectid}/index.html
echo "<h2>Text Post Processing</h2>" >> projectID${projectid}/index.html
echo Importing...
echo "<h3>Import</h3>" >> projectID${projectid}/index.html
echo "<p>Use UNIX line endings, convert to utf-8 handling simple transliterations (eg., --). Results: <a href=\"input-utf8.txt\">text</a> and <a href=\"good_words.utf8\">good word list</a>.</p>" >> projectID${projectid}/index.html
unzip -o projectID${projectid}_F1_saved.zip
sh import.sh projectID${projectid}_F1_saved.txt \
  projectID${projectid}/input-utf8.txt
sh import.sh good_words.txt projectID${projectid}/good_words.utf8
echo Correcting rounds...
echo "<h3>Correct the output of the rounds</h3>" \
  >> projectID${projectid}/index.html
echo "<p>Fix any mistakes missed by P3/F2. Results: <a href=\"fixup-utf8.txt\">text</a>, or as a <a href=\"fixup.patch\">diff</a>.</p>" >> projectID${projectid}/index.html
cp fixup.patch projectID${projectid}
sh fixup.sh projectID${projectid}/input-utf8.txt \
  projectID${projectid}/fixup-utf8.txt
echo Correct printer\'s errors...
echo "<h3>Correct printer's errors</h3>" >> projectID${projectid}/index.html
echo "<p>Fix printer's errors as noted by proofreaders. Notes detailing fixed errors are transformed to <tt>&lt;corr&gt;</tt> form; notes detailing unfixed errors are left untransformed. Results: <a href=\"errors-utf8.txt\">text</a>.</p>" >> projectID${projectid}/index.html
sh errors.sh projectID${projectid}/fixup-utf8.txt \
  projectID${projectid}/errors-utf8.txt
echo Validating...
echo "<h3>Validation</h3>" >> projectID${projectid}/index.html
echo "<p>Validate the resulting text. Result: <a href=\"validate-utf8.log\">log</a>.</p>" >> projectID${projectid}/index.html
sh validate.sh projectID${projectid}/errors-utf8.txt \
  > projectID${projectid}/validate-utf8.log
echo Fancy quotes...
echo "<h3>Identify quotation marks</h3>" >> projectID${projectid}/index.html
echo "<p>Identify quotation marks and replace them with their curly forms. Apostrophes are left unchanged. Inputs: <a href=\"http://www.gutenberg.org/ebooks/10688\">PG #10688</a>, <a href=\"http://www.gutenberg.org/ebooks/36485\">PG #36485</a>, <a href=\"http://www.gutenberg.org/ebooks/36833\">PG #36833</a>, <a href=\"quotes.good_words\">good words</a>, <a href=\"quotes.bad_words\">bad words</a>. Result: <a href=\"quotes-utf8.txt\">text</a>, <a href=\"quotes-utf8.log\">word analysis log</a>.</p>" >> projectID${projectid}/index.html
sh quotes.sh projectID${projectid}/errors-utf8.txt \
  projectID${projectid}/quotes-utf8.txt
cp quotes.log projectID${projectid}/quotes-utf8.log
cp quotes.good_words quotes.bad_words projectID${projectid}
echo Handling hyphens...
echo "<h3>Handle hyphens</h3>" >> projectID${projectid}/index.html
echo "<p>Rejoin words split across pages. Identify -*ed words and resolve them. Inputs: <a href=\"hyphen.good_words\">good words</a>, Result: <a href=\"hyphen-utf8.txt\">text</a>, <a href=\"hyphen-utf8.log\">resolution log</a>.</p>" >> projectID${projectid}/index.html
sh hyphen.sh projectID${projectid}/quotes-utf8.txt \
  projectID${projectid}/hyphen-utf8.txt
cp hyphen.log projectID${projectid}/hyphen-utf8.log
cp hyphen.good_words projectID${projectid}
echo Handling notes...
echo "<h3>Handle notes</h3>" >> projectID${projectid}/index.html
echo "<p>Remove notes that we've handled. Inputs: <a href=\"denote.handled\">handled notes</a>, Result: <a href=\"denote-utf8.txt\">text</a>.</p>" >> projectID${projectid}/index.html
sh denote.sh projectID${projectid}/hyphen-utf8.txt projectID${projectid}/denote-utf8.txt
cp denote.handled projectID${projectid}
echo "Word frequency analysis..."
echo "<h3>Word frequency analysis</h3>" >> projectID${projectid}/index.html
echo "<p>Generate a word list, and a matching base word by eliding diacritical marks and hyphens. Replace words identified as bad. Inputs: <a href=\"wordfreq.bad_words\">bad words</a>, Result: <a href=\"wordfreq-utf8.txt\">text</a>, <a href=\"wordfreq-utf8.log\">log</a>.</p>" >> projectID${projectid}/index.html
sh wordfreq.sh projectID${projectid}/denote-utf8.txt \
  projectID${projectid}/wordfreq-utf8.txt
cp wordfreq.log projectID${projectid}/wordfreq-utf8.log
cp wordfreq.bad_words projectID${projectid}
echo "Write transcriber's notes..."
echo "<h3>Transcriber's notes</h3>" >> projectID${projectid}/index.html
echo "<p>Use the <tt>&lt;corr&gt;</tt> markers to generate a list of changes made to the original text and remove the markers from the main text. Result: <a href=\"tnotes-utf8.notes\">notes</a>, <a href=\"tnotes-utf8.txt\">text</a>.</p>" >> projectID${projectid}/index.html
sh tnotes.sh projectID${projectid}/wordfreq-utf8.txt \
  projectID${projectid}/tnotes-utf8.txt projectID${projectid}/tnotes-utf8.notes
echo Export...
echo "<h3>Export</h3>" >> projectID${projectid}/index.html
echo "<p>Use DOS line endings. Final result: <a href=\"output-utf8.notes\">notes</a> and <a href=\"output-utf8.txt\">text</a>.</p>" >> projectID${projectid}/index.html
sh export.sh projectID${projectid}/tnotes-utf8.notes \
  projectID${projectid}/output-utf8.notes
sh export.sh projectID${projectid}/tnotes-utf8.txt \
  projectID${projectid}/output-utf8.txt
echo Generating ASCII text output for gutcheck...
echo "<h2>Checking (ASCII)</h2>" >> projectID${projectid}/index.html
echo "<h3>Rejoin pages</h3>" >> projectID${projectid}/index.html
echo "<p>Strip page markers &amp; blank pages, and join blocks that cross pages. Result: <a href=\"rejoin-utf8.txt\">text</a>.</p>" >> projectID${projectid}/index.html
sh rejoin.sh projectID${projectid}/tnotes-utf8.txt \
  projectID${projectid}/rejoin-utf8.txt
echo "<h3>ASCII transliteration</h3>" >> projectID${projectid}/index.html
echo "<p>Convert to ASCII by transliterating unicode. Result: <a href=\"ascii-ascii.txt\">text</a>.</p>" >> projectID${projectid}/index.html
sh ascii.sh projectID${projectid}/rejoin-utf8.txt \
  projectID${projectid}/ascii-ascii.txt
echo "<h3>Convert to plain text</h3>" >> projectID${projectid}/index.html
echo "<p>Convert to plain text by indenting /# and /p blocks, and handling <tt>&lt;tb&gt;</tt>, <tt>&lt;sc&gt;</tt>, <tt>&lt;i&gt;</tt>, <tt>&amp; &lt;b&gt;</tt> (<tt>&lt;f&gt;</tt> and <tt>&lt;g&gt;</tt> are discarded). Result: <a href=\"text-ascii.txt\">text</a>.</p>" >> projectID${projectid}/index.html
sh text.sh projectID${projectid}/ascii-ascii.txt \
  projectID${projectid}/text-ascii.txt
echo "<h3>Re-wrap</h3>" >> projectID${projectid}/index.html
echo "<p>Re-wrap text to 72 coloumns honouring /*, /$ and /p no-wrap blocks. Result: <a href=\"rewrap-ascii.txt\">text</a>.</p>" >> projectID${projectid}/index.html
sh rewrap.sh projectID${projectid}/text-ascii.txt \
  projectID${projectid}/rewrap-ascii.txt
echo "<h3>Discard block markers</h3>" >> projectID${projectid}/index.html
echo "<p>Discard /*, /$, /p, and /# block markers. Result: <a href=\"undiv-ascii.txt\">text</a>.</p>" >> projectID${projectid}/index.html
sh undiv.sh projectID${projectid}/rewrap-ascii.txt \
  projectID${projectid}/undiv-ascii.txt
echo "<h3>Export</h3>" >> projectID${projectid}/index.html
echo "<p>Use DOS line endings. Result: <a href=\"ascii.txt\">text</a>.</p>" >> projectID${projectid}/index.html
sh export.sh projectID${projectid}/undiv-ascii.txt \
  projectID${projectid}/ascii.txt
echo "<h3>Run gutcheck</h3>" >> projectID${projectid}/index.html
echo "<p>Run gutcheck. Result: <a href=\"gutcheck.log\">log</a>.</p>" >> projectID${projectid}/index.html
./tools/gutcheck/gutcheck projectID${projectid}/ascii.txt \
  > projectID${projectid}/gutcheck.log
echo Running jeebies
echo "<h3>Run jeebies</h3>" >> projectID${projectid}/index.html
echo "<p>Run jeebies. Result: <a href=\"jeebies.log\">log</a>.</p>" >> projectID${projectid}/index.html
./tools/jeebies/jeebies projectID${projectid}/ascii.txt \
  > projectID${projectid}/jeebies.log
echo "<h2>Checking (UTF-8)</h2>" >> projectID${projectid}/index.html
echo Generating plain text output for spell check...
echo "<h3>Rejoin pages</h3>" >> projectID${projectid}/index.html
echo "<p>Strip page markers &amp; blank pages, and join blocks that cross pages. Result: <a href=\"rejoin-utf8.txt\">text</a>.</p>" >> projectID${projectid}/index.html
# Note: we don't actually do this step (done in ASCII version above)
#sh rejoin.sh projectID${projectid}/tnotes-utf8.txt \
#  projectID${projectid}/rejoin-utf8.txt
echo "<h3>Convert to plain text</h3>" >> projectID${projectid}/index.html
echo "<p>Convert to plain text by indenting /# and /p blocks, and handling <tt>&lt;tb&gt;</tt>, <tt>&lt;sc&gt;</tt>, <tt>&lt;i&gt;</tt>, <tt>&amp; &lt;b&gt;</tt> (<tt>&lt;f&gt;</tt> and <tt>&lt;g&gt;</tt> are discarded). Result: <a href=\"text-utf8.txt\">text</a>.</p>" >> projectID${projectid}/index.html
sh text.sh projectID${projectid}/rejoin-utf8.txt \
  projectID${projectid}/text-utf8.txt
echo "<h3>Re-wrap</h3>" >> projectID${projectid}/index.html
echo "<p>Re-wrap text to 72 coloumns honouring /*, /$ and /p no-wrap blocks. Result: <a href=\"rewrap-utf8.txt\">text</a>.</p>" >> projectID${projectid}/index.html
sh rewrap.sh projectID${projectid}/text-utf8.txt \
  projectID${projectid}/rewrap-utf8.txt
echo "<h3>Discard block markers</h3>" >> projectID${projectid}/index.html
echo "<p>Discard /*, /$, /p, and /# block markers. Result: <a href=\"undiv-utf8.txt\">text</a>.</p>" >> projectID${projectid}/index.html
sh undiv.sh projectID${projectid}/rewrap-utf8.txt \
  projectID${projectid}/undiv-utf8.txt
echo Spell check...
echo "<h3>Spell check</h3>" >> projectID${projectid}/index.html
echo "<p>Run a spell check honouring the project's good words list. Result: <a href=\"misspellings.utf8\">list of possible misspellings</a>.</p>" >> projectID${projectid}/index.html
sh spell.sh projectID${projectid}/undiv-utf8.txt
cp misspellings.utf8 projectID${projectid}
echo "</body>" >> projectID${projectid}/index.html
echo "</html>" >> projectID${projectid}/index.html
(cd projectID${projectid}; lftp -f ../publish.lftp)
