Web Site Publishing Submit Scripts

Submit

The “submit” script is used to “submit” my pages to the various web sites I maintain. The “submit” script performs various checks on the page and then sends them on their way.

Here’s the “submit” script:

#!/bin/sh
#Use this script to submit documents from the "prepsite" to the
#    "livesite".  Other scripts will be called when you do this.
#    Also the information from livesite will be pushed to other
#    sites as well that are to be kept in synchronization


#Please do not modify below this line


#Bring in configuration file variables and exit codes
. /scripts/wwwmanage/wwwmanage.conf




#Verify User
#For now we have to be root
#   Later I'll see how we can get around this.
if [ "`whoami`" != "root" ]; then
	echo "You must be root to run this command"
	exit $EXIT_NOT_ROOT
fi



#Set up Filename variable
ARG_FORCE=""

if [ $# -gt 0 ]; then

	while [ $# -gt 0 ]; do

		case $1 in
			--force|-f)
			ARG_FORCE=1
			;;
			*)
			FILENAME=$1
			;;
		esac

		shift
	done

else
	FILENAME=$1
fi




if [ "$ARG_FORCE" = 1 ]; then
	#Do not bother checking tags
	:
else
	#Check tags...
	#Verify HTML format
	/scripts/wwwmanage/checkreqtags.sh ${PREPSITE}$FILENAME
	EXIT=$?
	if [ $EXIT -ne 0 ]; then
		echo Check for Required TAGS failed!
		exit $EXIT
	fi
	#Check tags...
	#Verify that all tags open and close in proper order
	/scripts/wwwmanage/openclosetags.sh ${PREPSITE}$FILENAME
	EXIT=$?
	if [ $EXIT -ne 0 ]; then
		echo Check for Matching TAGS failed!
		exit $EXIT
	fi
fi


TAR_ADD() {
	chown root.root ${LIVESITE}$1
	chmod 0644 ${LIVESITE}$1
	MODIFY=`find ${LIVESITE}$1 -printf "%TY%Tb%Td-%TH%TM%TS"`
	cp -rp ${LIVESITE}$1 ${LIVESITE}$1-$MODIFY
	(cd $LIVESITE && tar -rvf $BACKUP $1-$MODIFY)
	rm ${LIVESITE}$1-$MODIFY
}


SITE_UPDATE() {

	#    It is possible we are only updating the format of the doc
	#    and are not changing the content and therefore I would probably
	#    want to keep the original modification date so that it is
	#    reflective of the last date the content changed, rather
	#    than just formatting changes.
	#
	#    We must compare between the current "Site Update" line in
	#    index.html and the date on the file being submitted.
	#
	#    We then use the date contained in the variable
	#    Modify_En to place on the index.html page.

	#Grab Date from "Site Update" line in index.html
	VAR_TMP=`cat ${LIVESITE}index.html | grep "Site Updated" | sed -e \
's/Site Updated //' | sed -e \
's/,//' | sed -e \
's/<BR>//'`

	#Do Year
	CMP_DATE=`echo $VAR_TMP | awk '{print $3}'`
	#There seems to be a trailing Carriage Return that is causing grief
	#    Remove the offending CR!
	CMP_DATE=`echo $CMP_DATE | awk '{sub ("\r$", ""); print}'`

	#Do Month
	TMP_MONTH=`echo $VAR_TMP | awk '{print $1}'`
	case $TMP_MONTH in
	  January) CMP_DATE=${CMP_DATE}01;;
	 February) CMP_DATE=${CMP_DATE}02;;
	    March) CMP_DATE=${CMP_DATE}03;;
	    April) CMP_DATE=${CMP_DATE}04;;
	      May) CMP_DATE=${CMP_DATE}05;;
	     June) CMP_DATE=${CMP_DATE}06;;
	     July) CMP_DATE=${CMP_DATE}07;;
	   August) CMP_DATE=${CMP_DATE}08;;
	September) CMP_DATE=${CMP_DATE}09;;
	  October) CMP_DATE=${CMP_DATE}10;;
	 November) CMP_DATE=${CMP_DATE}11;;
	 December) CMP_DATE=${CMP_DATE}12;;
	*) echo "Unknown Error in Case statement!"; exit 30;;
	#This is from a mangled "Site Updated" line in index.html
	#    We should put some sort of method here to repair that line.
	#    The line does get fixed; however, when we eventually push
	#    index.html
	esac

	#Do Day
	CMP_DATE_INDEX=$CMP_DATE`echo $VAR_TMP | awk '{print $2}'`

	#Get Date of File we are updating
	#    Note: It is probably much faster to do this "find -printf"
	#    command rather than to translate the variable Modify_En
	#    to this format so we can compare.
	CMP_DATE_FILE=`find ${PREPSITE}$FILENAME -printf "%TY%Tm%Td"`

#echo Date in index.html is $CMP_DATE_INDEX
#echo Date of file being updated is $CMP_DATE_FILE

#echo -n "The newer of these two dates is: "
if [ $CMP_DATE_INDEX -gt $CMP_DATE_FILE ]; then
	#Date in header of index.html is newer than the file being
	#   submitted, do nothing
	:
else
	#Date of file being pushed is newer than that in the header
	#   of index.html, update the "Site Updated" line in index.html

	#Save modification date of index.html
	INDEX_MODIFY=`find ${LIVESITE}index.html -printf "%TY%Tm%Td%TH%TM.%TS"`

	#Update "Site Updated" line in index.html
	cat ${LIVESITE}index.html | sed -e \
"s/Site Updated.*<BR>/Site Updated ${MODIFY_EN}<BR>/" >${LIVESITE}index.tmp.html
	mv ${LIVESITE}index.tmp.html ${LIVESITE}index.html

	#Restore modification date of index.html
	touch -t $INDEX_MODIFY ${LIVESITE}index.html
fi
}






#Check to see if backup file exists, or to see
#    if we can at least create a backup in the
#    directory mentioned
if [ -f $BACKUP ]; then
	#Backup exists, life is good
	:
elif [ -d `dirname $BACKUP` ]; then
	#Backup directory exists (no .tarfile exists),
	#    life is still good
	:
else
	echo FAILURE:Backup dir does not exist
	EXIT $EXIT_BACKUP
fi


#Check if we have a file, dir, or unknown
if [ -f ${PREPSITE}$FILENAME ]; then
	#$FILENAME is a file;
	#echo ${PREPSITE}$FILENAME is a regular file
	:
elif [ -d ${PREPSITE}$FILENAME ]; then
	#$FILENAME is a directory
	echo ${PREPSITE}$FILENAME is a directory
	echo Directories are not currently handled;
	exit $EXIT_DIR;
else
	#$FILENAME is not a file nor a directory
	#   boy are we in trouble now.
	echo ${PREPSITE}$FILENAME is of Unknown File Type
	exit $EXIT_UNK_FIL_TYP
fi



#Check to see if directory exists, if not then create it.
DIRNAME=`dirname $FILENAME`
if [ ! -d $DIRNAME ]; then
	mkdir -p ${LIVESITE}$DIRNAME
#work needed here to create directories on ftp sites we upload to.
#maybe set a flag that we look at later.
#    I probably need to have the ftp expect script generated from this
#    script, rather than being an existing script file.  That way I can more
#    easily control how many files I push in one shot, and if any
#    directories are to be created on the remote site.
fi



#
#Copy file from prepsite to livesite
MODIFY_EN=`find ${PREPSITE}$FILENAME -printf "%TB %Td, %TY"`
cat ${PREPSITE}$FILENAME | sed -e "s/June 19, 2001/$MODIFY_EN/" >${LIVESITE}${FILENAME}
#Reset file modification date to that of PrepSite
FILE_MODIFY=`find ${PREPSITE}$FILENAME -printf "%TY%Tm%Td%TH%TM.%TS"`
touch -t $FILE_MODIFY ${LIVESITE}${FILENAME}

#Update "Site Update" line in index.html
SITE_UPDATE

#Add file to backup with date appended to filename
TAR_ADD $FILENAME

#
#Generate a new datesort.html file
(cd $LIVESITE && /scripts/wwwmanage/datesort.sh)
#Add file to backup with date appended to filename
TAR_ADD datesort.html


#Now push all 3 files (datesort.html, index.html and $FILENAME) to riblack6
/scripts/wwwmanage/riblack6.ftp.expect \
     ${LIVESITE}$FILENAME $FILENAME \
     ${LIVESITE}datesort.html datesort.html \
     ${LIVESITE}index.html index.html


#Now push all 3 files (datesort.html, index.html and $FILENAME) to www.geocities.com/rlcomp_1999
/scripts/wwwmanage/geo-rlcomp.ftp.expect \
     ${LIVESITE}$FILENAME $FILENAME \
     ${LIVESITE}datesort.html datesort.html \
     ${LIVESITE}index.html index.html

#Check if file ($Filename) needs to be updated on linux.cca.cpqcorp.net
cat /scripts/wwwmanage/linux.cca.files | grep $FILENAME | (
read SOURCE DESTINATION
if [ -n "$DESTINATION" ]; then
	/scripts/wwwmanage/linux.cca.ftp.expect ${LIVESITE}$SOURCE $DESTINATION
fi
)

SUBMIT
DATESORT_SH
DO_DATESORT_SH
CHECKREQTAGS
TAGSCONF
OPENCLOSETAGS
WWWMANAGECONF
FTPEXPECT
LINUXCCAFILES
NOTES

Datesort.sh Shell Script

datesort.sh is used for the main controlling and packaging of the do_datesort.sh script.

#!/bin/sh
FILENAME=datesort.html
TMPFILE=/tmp/datesort.tmp

cat <<EOF>$FILENAME
<HTML>
<BODY>
<CENTER><H1>Web Pages - Sorted By Date</H1></CENTER>
<HR>
<PRE>
EOF

/scripts/wwwmanage/do_datesort.sh >>$FILENAME
cat /scripts/wwwmanage/datesort.old >>$FILENAME

cat <<EOF>>$FILENAME
</PRE>
<HR>
<CENTER>This file generated at <BR>`date +"%H:%M:%S on %b %d, %Y"`</CENTER>

</BODY>
</HTML>
EOF

cat $FILENAME | grep -v $FILENAME >$TMPFILE; mv $TMPFILE $FILENAME

SUBMIT
DATESORT_SH
DO_DATESORT_SH
CHECKREQTAGS
TAGSCONF
OPENCLOSETAGS
WWWMANAGECONF
FTPEXPECT
LINUXCCAFILES
NOTES

Do_datesort.sh Shell Script

The do_datesort.sh script does the bulk of the work of generating the datesort.html document.

#!/bin/sh
NAME_FIELD=48
TIME_PAD=2
SIZE_FIELD=12
TITLE_PAD=2
TITLE_FIELD=80
DESCR_PAD=2
DESCR_FIELD=160

TEMPFILE=/tmp/sort.tmp

#Please don't modify below this line

#The following find command will generate
#    a tmpfile with last access time and name.
#    We do not care about the time format at this point.
#    Sort - this will be sorted in order of date and time.
find . -type f -printf "%T@ %p\n" | sort -r >$TEMPFILE

#Now that we have the web pages sorted, lets skip
#    over the first column (time) and pick out the
#    2nd column which are the web page names.
#    Then we will process each web page filename in order
#    of date (because we are already sorted)
COUNTER=0
for X in `cat $TEMPFILE`; do
	if [ $COUNTER -eq 1 ]; then
		#
		#Grab name
		#    Left justify, pad with blanks to the right
		#    until end of NAME_FIELD is reached.
		#Also Add in Hyperlink for name
		#
		NAME=`find $X -printf "%p"`
		echo -n '<A HREF='
		echo -n $NAME
		echo -n '>'
		echo -n $NAME
		echo -n '</A>'
		while [ ${#NAME} -lt $NAME_FIELD ]; do
			NAME="${NAME}."
			echo -ne '\040'
		done
		#
		#Grab time
		#Time is a fixed field, pad some spaces at the end
		#
		TIME=`find $X -printf "%TY %Tb %Td - %TH:%TM:%TS\n"`
		echo -n $TIME
		TIME_COUNT=$TIME_PAD
		while [ $TIME_COUNT -ne 0 ]; do
			echo -ne '\040'
			TIME_COUNT=`expr $TIME_COUNT - 1`
		done
		#
		#Grab size
		#Size should be right justified, pad with
		#    spaces on the left.
		#
		SIZE=`find $X -printf "%s"`
		SIZE_C=$SIZE
		while [ ${#SIZE_C} -lt $SIZE_FIELD ]; do
			SIZE_C=".${SIZE_C}"
			echo -ne '\040'
		done
		echo -n $SIZE




		#Lets add on the title of the page
		#First lets space the column over
		TITLE_COUNT=$TITLE_PAD
		while [ $TITLE_COUNT -ne 0 ]; do
			echo -ne '\040'
			TITLE_COUNT=`expr $TITLE_COUNT - 1`
		done

		#display title
		TITLE=`cat $X | grep \<TITLE\> | sed -e \
's,<TITLE>,,' | sed -e \
's,</TITLE>,,'|awk '{sub("\r$","");print}'`
		echo -n $TITLE

		#Have title field pad out with spaces
		while [ ${#TITLE} -lt $TITLE_FIELD ]; do
			TITLE="${TITLE}."
			echo -ne '\040'
		done

echo -n "."





		#Lets display the description as well

		#space over just a little
		DESCR_COUNT=$DESCR_PAD
		while [ $DESCR_COUNT -ne 0 ]; do
			echo -ne '\040'
			DESCR_COUNT=`expr $DESCR_COUNT - 1`
		done

		#display description
		DESCRIPTION=`cat $X | grep '<META name="description"' | sed -e \
's,<META name="description" content=",,' | sed -e \
's/">//' | awk '{sub("\r$","");print}'`
		echo -n $DESCRIPTION

		#Pad to field size with spaces
		while [ ${#DESCRIPTION} -lt $DESCR_FIELD ]; do
			DESCRIPTION="${DESCRIPTION}."
			echo -ne '\040'
		done

echo -n "."

		#Last item on this line, give <BR> for line break
		echo '<BR>'


		#Now reset counter so we can skip
		#    the nextdate and time column.
		COUNTER=0

	else

		#This must be the first column which holds the time
		#    Now that we are sorted by time, we do not
		#    really care about the time field, so just
		#    skip over it.  Do nothing and increment counter.
		COUNTER=`expr $COUNTER + 1`
	fi
done

SUBMIT
DATESORT_SH
DO_DATESORT_SH
CHECKREQTAGS
TAGSCONF
OPENCLOSETAGS
WWWMANAGECONF
FTPEXPECT
LINUXCCAFILES
NOTES

Checkreqtags.sh Shell Script

The checkreqtags.sh script checks the html page for a list of tags that I require to be present. The tags are listed one at a time in a separate configuration file.

#!/bin/sh
#Use this script, named "checktags" is used to check a document for
#    specific html tags that should be present.  An error condition
#    is raised if not enough tags.  Zero is returned if the doc.
#    contains sufficient tags.


FILENAME=$1

#Verify HTML format
EXIT=
cat /scripts/wwwmanage/tags.conf | while read; do

	echo Checking for tag $REPLY

	if [ -z "`cat $FILENAME | grep "$REPLY"`" ]; then
		echo Sorry, not enough tags present
		echo Failed on: $REPLY
		exit 1
	fi

done

EXIT=$?
if [ $EXIT -ne 0 ]; then
	echo Error, exiting, Error Code $EXIT
	exit $EXIT
fi

echo "All Necessary Tags are Present"

SUBMIT
DATESORT_SH
DO_DATESORT_SH
CHECKREQTAGS
TAGSCONF
OPENCLOSETAGS
WWWMANAGECONF
FTPEXPECT
LINUXCCAFILES
NOTES

Tags.conf Configuration File

These are the tags configuration file for the above checkreqtags.sh shell script. As I need more tags I can just add them to this file, one tag per line.

<HTML>
</HTML>
<TITLE>
</TITLE>
<HEAD>
</HEAD>
<META name="keywords" content="
<META name="description" content="
<BODY BGCOLOR="#FFFFFF" BACKGROUND="">
</BODY>

SUBMIT
DATESORT_SH
DO_DATESORT_SH
CHECKREQTAGS
TAGSCONF
OPENCLOSETAGS
WWWMANAGECONF
FTPEXPECT
LINUXCCAFILES
NOTES

Openclosetags.sh Shell Script

Check the html file for proper order on opening and closing tags. The tag to be closed should always be the last one that was opened.

#!/bin/sh

FILENAME=$1

if [ ! -f ${FILENAME} ]; then
	echo No File present,
	echo unable to continue
	exit 1
fi

gettag() {
#Find start of tag (starts with less than sign)
while [ -n "$HEX" ]; do
	HEX=`xxd -l 1 -s $SEEK ${FILENAME} | awk '{print $2}'`
	if [ "$HEX" = 3c ]; then
		#3c is less than sign
		CHAR=`xxd -l 1 -s $SEEK ${FILENAME} | awk '{print $3}'`
		CURR_TAG=$CHAR
		break
	fi
	SEEK=`expr $SEEK + 1`
done
#check exit status of above loop - EOF reached?
if [ -z "$HEX" ]; then
	EOFSTATUS=1
	return 0
fi







#Get rest of tag up to space bar (important part of tag)
#  or up to greater than sign
SEEK=`expr $SEEK + 1`
while [ -n "$HEX" ]; do
	HEX=`xxd -l 1 -s $SEEK ${FILENAME} | awk '{print $2}'`
	if [ "$HEX" = 20 ]; then
		#20 is space bar
		CURR_TAG="${CURR_TAG} "

		#Special Case - <A NAME="
		SEEK_CHECK=$SEEK
		TESTCASE='<A '
		if [ "$CURR_TAG" = "$TESTCASE" ]; then
			SEEK_CHECK=`expr $SEEK_CHECK + 1`
			SP_CASE_CHECK=`xxd -l 6 -s $SEEK_CHECK ${FILENAME} | awk '{print $5}'`
			TESTCASE='NAME="'
			if [ "$SP_CASE_CHECK" = "$TESTCASE" ]; then
				SEEK=`expr $SEEK_CHECK + 5`
				CURR_TAG="${CURR_TAG}${SP_CASE_CHECK}"
			fi
			TESTCASE='HREF="'
			if [ "$SP_CASE_CHECK" = "$TESTCASE" ]; then
				SEEK=`expr $SEEK_CHECK + 5`
				CURR_TAG="${CURR_TAG}${SP_CASE_CHECK}"
			fi
		fi

		#Because the we have hit a space, lets discard the rest
		#    of the tag until we get to the greater than sign
		#Eat up the rest of the input until the greater than sign
		#    is reached (end of tag)
		SEEK=`expr $SEEK + 1`
		while [ -n "$HEX" ]; do
			HEX=`xxd -l 1 -s $SEEK ${FILENAME} | awk '{print $2}'`
			if [ "$HEX" = 3e ]; then
				#3e is greater than sign
				break
			fi
			SEEK=`expr $SEEK + 1`
		done

		break
	elif [ "$HEX" = 3e ]; then
		#3e is greater than sign
		CHAR=`xxd -l 1 -s $SEEK ${FILENAME} | awk '{print $3}'`
		CURR_TAG="${CURR_TAG}$CHAR"
		break
	elif [ -n "$HEX" ]; then
		CHAR=`xxd -l 1 -s $SEEK ${FILENAME} | awk '{print $3}'`
		CURR_TAG="${CURR_TAG}$CHAR"
	fi
	SEEK=`expr $SEEK + 1`
done
#check exit status of above loop - EOF reached?
if [ -z "$HEX" ]; then
	EOFSTATUS=1
	echo EOF reached before end of tag
	return 1
fi

}
checktag() {
TAG_CHECKTAG="$*"

#Investigate tag (start tag, stop tag, comment with no matching stop tag, ...)

#Check for keywords we can ignore
case "$TAG_CHECKTAG" in
    '<BR>')
		TAGSTATUS=IGNORE
		return 0
	;;
    '<HR>')
		TAGSTATUS=IGNORE
		return 0
	;;
    '<A NAME="')
		TAGSTATUS=IGNORE
		return 0
	;;
    '<A HREF="')
		CURR_TAG="<A "
		TAGSTATUS=PUSH
		return 0
	;;
    \<\!--*)
		TAGSTATUS=IGNORE
		return 0
	;;
    \<\!*)
		TAGSTATUS=IGNORE
		return 0
	;;
    \<LINK*)
		TAGSTATUS=IGNORE
		return 0
	;;
    '<IMG ')
		TAGSTATUS=IGNORE
		return 0
	;;
    \<META*)
		TAGSTATUS=IGNORE
		return 0
	;;
    \</*)
		TAGSTATUS=PULL
		return 0
	;;
    *)
		TAGSTATUS=PUSH
		return 0
	;;
esac
}

dotag() {
TAGSTATUS=
checktag "$1"
case "$TAGSTATUS" in
    IGNORE)     return 0
		;;
    PUSH)       if [ "$1" = '<A HREF="' ]; then
			TAGSTACK="${TAGSTACK}<A "
		else
			TAGSTACK="${TAGSTACK}$1"
		fi
		return 0
		;;
    PULL)

TAGCOMPARE="$TAGSTACK"
TAGKILL="`echo $1 | sed -e 's,</,<,'`"

#Try to remove the modified close tag from tagstack:
TAGSTACK=`echo "$TAGSTACK" | sed -e "s,${TAGKILL}$,,"`

if [ "$TAGCOMPARE" = "$TAGSTACK" ]; then
	#If the above removal didn't work, then lets try one more time
	#    to remove TAGKill.
	#This time, lets remove the trailing greater than sign
	#    from tagkill.

	TAGKILL="`echo $TAGKILL | sed -e 's,>$,,'`"
	TAGSTACK="`echo $TAGSTACK | sed -e "s,${TAGKILL}$,,"`"
fi

if [ "$TAGCOMPARE" = "$TAGSTACK" ]; then

	#Removal of tag failed on both attempts.
	#    Tags are out of order
	echo Tagstack is as follows:
	echo "$TAGSTACK"
	echo Failed while attempting to remove "$1"
	echo   from the stack.
	echo Seek count is $SEEK bytes
	return 1
fi
		return 0
		;;
    *)          echo Unknown error in dotag
		echo TAGSTATUS is "$TAGSTATUS"
		return 1
		;;
esac
}


SEEK=0
TAGS=
EOFSTATUS=0
CURR_TAG=
HEX=0
TAGSTACK=

#Go from beginning of file to end of file
while [ -n "$HEX" ]; do
	gettag
	if [ -z "$HEX" ]; then
		break
	fi
	echo "$CURR_TAG"
	dotag "$CURR_TAG"
	EXIT=$?
	if [ $EXIT -gt 0 ]; then
		echo Failure, exiting
		return $EXIT
	fi
#Debug
echo Tagstack is "$TAGSTACK"

done
#check exit status of above loop - EOF reached?
if [ "$EOFSTATUS" -eq 1 ]; then
	echo End of File Reached
fi

#Check if tagstack is empty
if [ -n "$TAGSTACK" ]; then
	echo FAILURE: Failed to remove all items from tagstack
	echo Tags do not match.
	echo Tagstack contents are "$TAGSTACK"
	exit 1
fi
echo Successfully cleared the tagstack, all tags match.
echo Tagstack contents are "$TAGSTACK"
exit 0

SUBMIT
DATESORT_SH
DO_DATESORT_SH
CHECKREQTAGS
TAGSCONF
OPENCLOSETAGS
WWWMANAGECONF
FTPEXPECT
LINUXCCAFILES
NOTES

Wwwmanage.conf Configuration File

#Global Variables

#Livesite is a local version of the live web site.  This livesite
#    can be running a web server, or not, it doesn't matter.
#    Files will be copied to livesite, then other scripts will
#    be run to push files from here to other sites you may wish to
#    keep synchronized.  Also other scripts will be run after files
#    are copied here, such as datesort.
LIVESITE=/home/httpd/www-geocities-rlcomp_1999/


#Prepsite is your preparation site that is not accessible by others.
#    All your work is done in prepsite, once your page is ready
#    "submit" it to the "livesite" by using this "submit" script.
PREPSITE=/home/ftp/pub/techcd/pub/www_prep_site/www-geocities-rlcomp_1999-prep/

#backup is where you will save backups to.  Files will be added
#    with their modification date as part of their filename
#    files will keep appending to the end of that backup.
BACKUP=/home/ftp/pub/techcd/pub/www_prep_site/www-geo-rlcomp_1999.tar

#DateSort Variables
NAME_FIELD=48
TIME_FIELD=2
SIZE_FIELD=12
TEMPFILE=/tmp/sort.tmp


#Home directory for wwwmanage
WWWMANAGE=/scripts/wwwmanage


#Exit Error Codes
EXIT_NO_TAGS=7
EXIT_NOT_ROOT=6
#File Type is unknown; not a regular file, nor a directory
EXIT_UNK_FIL_TYP=5
EXIT_DIR=4
EXIT_BACKUP=3

SUBMIT
DATESORT_SH
DO_DATESORT_SH
CHECKREQTAGS
TAGSCONF
OPENCLOSETAGS
WWWMANAGECONF
FTPEXPECT
LINUXCCAFILES
NOTES

Ftp Expect Scripts

#!/usr/bin/expect -f
#
# This Expect script was generated by autoexpect on Wed Jun  6 18:10:55 2001
# Expect and autoexpect were both written by Don Libes, NIST.
#
# Note that autoexpect does not guarantee a working script.  It
# necessarily has to guess about certain things.  Two reasons a script
# might fail are:
#
# 1) timing - A surprising number of programs (rn, ksh, zsh, telnet,
# etc.) and devices discard or ignore keystrokes that arrive "too
# quickly" after prompts.  If you find your new script hanging up at
# one spot, try adding a short sleep just before the previous send.
# Setting "force_conservative" to 1 (see below) makes Expect do this
# automatically - pausing briefly before sending each character.  This
# pacifies every program I know of.  The -c flag makes the script do
# this in the first place.  The -C flag allows you to define a
# character to toggle this mode off and on.

set force_conservative 0  ;# set to 1 to force conservative mode even if
			  ;# script wasn't run conservatively originally
if {$force_conservative} {
	set send_slow {1 .1}
	proc send {ignore arg} {
		sleep .1
		exp_send -s -- $arg
	}
}

#
# 2) differing output - Some programs produce different output each time
# they run.  The "date" command is an obvious example.  Another is
# ftp, if it produces throughput statistics at the end of a file
# transfer.  If this causes a problem, delete these patterns or replace
# them with wildcards.  An alternative is to use the -p flag (for
# "prompt") which makes Expect only look for the last line of output
# (i.e., the prompt).  The -P flag allows you to define a character to
# toggle this mode off and on.
#
# Read the man page for more info.
#
# -Don


set timeout -1
spawn ftp riblack6.americas.cpqcorp.net
match_max 100000
expect "Name (riblack6.americas.cpqcorp.net:*): "
send -- "geoweb\r"
expect "Password:"
send -- "xxxxxxxxxxxx\r"
expect "ftp> "
send -- "bin\r"
expect "ftp> "
send -- "hash\r"
expect "ftp> "
send -- "put [lrange $argv 0 0] [lrange $argv 1 1]\r"
expect "ftp> "
send -- "put [lrange $argv 2 2] [lrange $argv 3 3]\r"
expect "ftp> "
send -- "put [lrange $argv 4 4] [lrange $argv 5 5]\r"
expect "ftp> "
send -- "quit\r"
expect "221  \r"

SUBMIT
DATESORT_SH
DO_DATESORT_SH
CHECKREQTAGS
TAGSCONF
OPENCLOSETAGS
WWWMANAGECONF
FTPEXPECT
LINUXCCAFILES
NOTES

Ftp Expect Configuration File

I have one server that doesn’t get all the files that I generate. For this one I have created this configuration file so that each time I “submit” a file, it is checked against this file. If it matches something in the first column, it will then be pushed to this special server using the 2nd column as the remote filename.

#filename-exactly-as-we-push-it destination-on-build-server
procedures/kickstart-rh70.html /home/httpd/html/linux/riblack/kickstart-rh70.html

SUBMIT
DATESORT_SH
DO_DATESORT_SH
CHECKREQTAGS
TAGSCONF
OPENCLOSETAGS
WWWMANAGECONF
FTPEXPECT
LINUXCCAFILES
NOTES

Notes

I misuse variables and variable scope in these scripts. I also hardcode a lot of values where they should really be variables. These errors may or may not be cleaned up in time, for now it works – and to quote the old saying “If it isn’t broke, don’t fix it!”.

The openclose tags script is very slow (it seems to do about 1 tag per second). I do plan on migrating this one to C or C++. I feel that C or C++ could scan a whole html file in well under 10 seconds where it is taking more than a minute or two with the bash script.

I am currently – December 19, 2002 – not using any of the above scripts. I have converted over to a new set of scripts as shown below. Also I’m not currently checking for tag order using my own scripts — every once in a while I will download all my pages and use “weblint” to check the tags on my pages.

Here are my current scripts:

re-index.sh
site_index.sh
apply_template.sh
TEMPLATE_GUIDEBAR
TEMPLATE_HEAD
TEMPLATE_TAIL
savedates.sh
restoredates.sh

re-index.sh which runs as a cron job once each night. Here’s the contents:

site_index.sh
apply_template.sh

site_index.sh now takes the place of datesort.sh and indexer.sh (my two previous methods of creating a site map). Here’s the contents:

cd ~/public_html
SORT_KEYS="sitemap-filename sitemap-date sitemap-size sitemap-title sitemap-description"
FILES_TO_PROCESS="`ls *.html | grep -v 'sitemap-.*\.html'`"
#FILENAME_FIELD_LENGTH=48
#DATE_FIELD_LENGTH=22
#DATE_FIELD_LENGTH=180
#SIZE_FIELD_LENGTH=12
#TITLE_FIELD_LENGTH=80
#TITLE_FIELD_LENGTH=280
#DESCR_FIELD_LENGTH=180
#DESCR_FIELD_LENGTH=360
generate_sort_keys()
{
	# get sort keys for filename, date, size, title, and description
	for X in $FILES_TO_PROCESS; do echo "$X $X"; done > sitemap-filename.tmp
	for X in $FILES_TO_PROCESS; do find $X -printf "%T@ %f\n"; done > sitemap-date.tmp
	for X in $FILES_TO_PROCESS; do find $X -printf "%s %f\n"; done > sitemap-size.tmp
	for X in $FILES_TO_PROCESS; do LINE="`cat $X | grep '<TITLE>' | sed -e 's,^.*<TITLE> *,,' -e 's, .*$,,' | head -1 | tr '\n' ' '`"; if [ -z "$LINE" ]; then echo -ne "zzz "; else echo -ne "$LINE"; fi; echo $X; done > sitemap-title.tmp
	for X in $FILES_TO_PROCESS; do LINE="`cat $X | grep '" *description *"' | grep content | sed -e 's,^.*content *= *",,' -e 's, .*$,,' | head -1 | tr '\n' ' '`"; if [ -z "$LINE" ]; then echo -ne "zzz "; else echo -ne "$LINE"; fi; echo $X; done > sitemap-description.tmp
}
print_left()
{
#$1 = field width
#$2 = field value
	if [ "$1" = "nowrap" ]; then WRAP=NOWRAP; else WRAP=""; fi
	shift
	FIELD_VALUE="$@"
	echo -ne '<TD ALIGN="left" VALIGN="TOP" '
	echo -ne "$WRAP"
	echo -ne '>'
	echo -ne "$FIELD_VALUE"
	echo -ne '</TD>'
}
print_right()
{
#$1 = field width
#$2 = field value
	if [ "$1" = "nowrap" ]; then WRAP=NOWRAP; else WRAP=""; fi
	shift
	FIELD_VALUE="$@"
	echo -ne '<TD ALIGN="right" VALIGN="TOP" '
	echo -ne "$WRAP"
	echo -ne '>'
	echo -n $FIELD_VALUE
	echo -ne '</TD>'
}
print_line_formatted()
{
echo -ne '<TR ALIGN="center">'
#print_left  $DESCR_FIELD_LENGTH $RECORD_DESCRIPTION
print_left nowrap $RECORD_FILENAME
print_left nowrap $RECORD_DATE
print_right nowrap $RECORD_SIZE
print_left nowrap $RECORD_TITLE
print_left  wrap $RECORD_DESCRIPTION
echo '</TR>'
}
print_header()
{
echo $1 | grep 'sitemap-filename.html' >/dev/null &&
	RECORD_FILENAME='<A HREF="https://cyanogenmods.org/sitemap-filename-r.html">Filename</A>' ||
	RECORD_FILENAME='<A HREF="https://cyanogenmods.org/sitemap-filename.html">Filename</A>'
echo $1 | grep 'sitemap-date.html' >/dev/null &&
	RECORD_DATE='<A HREF="https://cyanogenmods.org/sitemap-date-r.html">Date</A>' ||
	RECORD_DATE='<A HREF="https://cyanogenmods.org/sitemap-date.html">Date</A>'
echo $1 | grep 'sitemap-size.html' >/dev/null &&
	RECORD_SIZE='<A HREF="https://cyanogenmods.org/sitemap-size-r.html">Size</A>' ||
	RECORD_SIZE='<A HREF="https://cyanogenmods.org/sitemap-size.html">Size</A>'
echo $1 | grep 'sitemap-title.html' >/dev/null &&
	RECORD_TITLE='<A HREF="https://cyanogenmods.org/sitemap-title-r.html">Title</A>' ||
	RECORD_TITLE='<A HREF="https://cyanogenmods.org/sitemap-title.html">Title</A>'
echo $1 | grep 'sitemap-description.html' >/dev/null &&
	RECORD_DESCRIPTION='<A HREF="https://cyanogenmods.org/sitemap-description-r.html">Description</A>' ||
	RECORD_DESCRIPTION='<A HREF="https://cyanogenmods.org/sitemap-description.html">Description</A>'

echo $1 | grep 'sitemap-filename' >/dev/null && SORT_REF="Name"
echo $1 | grep 'sitemap-date' >/dev/null && SORT_REF="Date"
echo $1 | grep 'sitemap-size' >/dev/null && SORT_REF="Size"
echo $1 | grep 'sitemap-title.html' >/dev/null && SORT_REF="Title"
echo $1 | grep 'sitemap-description.html' >/dev/null && SORT_REF="Description"

cat <<EOF
<!--
PLACE_CREATED_DATE_HERE = "December 16, 2002"
PLACE_DESCRIPTION_HERE = "CPQLINUX Site Map - sorted by $SORT_REF"
PLACE_TITLE_HERE = "cpqlinux Site Map - Sorted by $SORT_REF"
PLACE_NEXT_ITEM_HERE = "fixme"
PLACE_PREVIOUS_ITEM_HERE = "fixme"
-->
EOF
echo '<TABLE ALIGN="CENTER">'
print_line_formatted
}
print_record()
{
RECORD_FILENAME="`
echo -ne '<A HREF="https://cyanogenmods.org/'
echo -ne "$1"
echo -ne '">'
echo -ne "$1"
echo -ne '</A>'
`"
RECORD_DATE="`find $1 -printf "%TY %Tb %Td - %TH:%TM:%TS\n"`"
RECORD_SIZE="`find $1 -printf "%s"`"
RECORD_TITLE="`cat $1 | grep '<TITLE>' | sed -e 's,^.*<TITLE> *,,' -e 's,</TITLE>.*$,,' | head -1 | tr '\n' ' '`"
RECORD_DESCRIPTION="`cat $1 | grep '" *description *"' | grep content | sed -e 's,^.*content *= *",,' -e 's,".*$,,' | head -1 | tr '\n' ' '`"
print_line_formatted
}
generate_sort_keys

for X in $SORT_KEYS; do
	print_header ${X}.html > ${X}.html
	print_header ${X}-r.html > ${X}-r.html
	sort -g    ${X}.tmp | while read KEY FILE; do print_record $FILE; done >> ${X}.html
	sort -g -r ${X}.tmp | while read KEY FILE; do print_record $FILE; done >> ${X}-r.html
	rm ${X}.tmp
done
cat sitemap-date.html | sed -e 's,sitemap-date-r.html">Date</A>,sitemap-date.html">Date</A>,' > sitemap-date.html.tmp
cat sitemap-date-r.html | sed -e 's,sitemap-date.html">Date</A>,sitemap-date-r.html">Date</A>,' > sitemap-date.html
mv sitemap-date.html.tmp sitemap-date-r.html
cp -a sitemap-date.html sitemap.html
cp -a sitemap-date.html datesort.html
cp -a sitemap-date.html indexer.html

Here’s the contents of apply_template.sh

TEMPLATE_PATH=~/bin
WEBSITE_PATH=~/public_html
TEMPLATE_LIST="TEMPLATE_HEAD TEMPLATE_GUIDEBAR TEMPLATE_TAIL"
FAILURE=0
for X in $TEMPLATE_LIST; do if [ ! -f $TEMPLATE_PATH/$X ]; then echo unable to find $TEMPLATE_PATH/$X; FAILURE=`expr $FAILURE + 1`; fi; done
if [ $FAILURE -gt 1 ]; then echo check TEMPLATE_PATH variable in $0; fi
if [ $FAILURE -eq 1 ]; then echo check TEMPLATE_LIST variable in $0; fi
if [ $FAILURE -gt 0 ]; then exit; fi

# See if we have updated this file or any of the template files.  If any of that has been updated
# then we need to reprocess _all_ the html files -- maybe a template has changed.  So we remove the template.hist file

if [ -e $TEMPLATE_PATH/template.hist ]; then
	for X in $TEMPLATE_LIST `basename $0`; do
		TEMPLATES_CHANGED=`find $TEMPLATE_PATH/$X -newer $TEMPLATE_PATH/template.hist`
		if [ -n "$TEMPLATES_CHANGED" ]; then
			rm $TEMPLATE_PATH/template.hist
			break
		fi
	done
fi




# #
# # Let's do a file at a time -- here goes a big "for" loop
# #
cd $WEBSITE_PATH
for HTML_FILE in `( if [ -e $TEMPLATE_PATH/template.hist ]; then find . -type f -newer $TEMPLATE_PATH/template.hist -mindepth 1 -maxdepth 1; else find . -type f -mindepth 1 -maxdepth 1; fi; if [ -n "$1" ]; then for X in $@; do find . -type f -mindepth 1 -maxdepth 1 -name $X; done; fi) | grep html$ | grep -v "\./index.html"`; do
#echo -ne '.'
echo $HTML_FILE

#Strip out existing templates, keep file date and time
for TEMPLATE_NAME in $TEMPLATE_LIST; do cat $HTML_FILE | awk "BEGIN { PRINTVAR = 1 } /$TEMPLATE_NAME BEGIN/ { PRINTVAR = 0 } /$TEMPLATE_NAME END/ { PRINTVAR = 1; next } { if ( PRINTVAR == 1 ) {print}}" > $HTML_FILE.template.tmp; touch -t `find $HTML_FILE -mindepth 0 -maxdepth 0 -printf "%TY%Tm%Td%TH%TM.%TS"` $HTML_FILE.template.tmp; mv $HTML_FILE.template.tmp $HTML_FILE; done

#Insert new templates, keep file date and time

#Do TEMPLATE_HEAD
FIXME=0
REPLACEMENT_LIST=`cat $TEMPLATE_PATH/TEMPLATE_HEAD | grep "PLACE_.*_HERE" | sed -e 's,PLACE_,~&,g' -e 's,_HERE,&~,g' | tr '~' '\n' | grep "PLACE_.*_HERE" | sort | uniq`

cat $TEMPLATE_PATH/TEMPLATE_HEAD > template_head.tmp
for REPLACEMENT_ITEM in $REPLACEMENT_LIST; do
	VALUE=`cat $HTML_FILE | grep "${REPLACEMENT_ITEM} *= *\"" | head -1 | sed -e 's,PLACE_,~&,g' | tr '~' '\n' | grep $REPLACEMENT_ITEM | head -1 | sed -e "s,^.*$REPLACEMENT_ITEM *= *\",," -e 's,".*$,,'`
	if [ "$REPLACEMENT_ITEM" == "PLACE_UPDATED_DATE_HERE" ]; then
		VALUE=`find $HTML_FILE -mindepth 0 -maxdepth 0 -printf "%TB %Td, %TY"`
	fi
	if [ "$VALUE" == "fixme" ]; then
		cat template_head.tmp | sed -e "s~$REPLACEMENT_ITEM~~g" >template_head.tmp.1
		mv template_head.tmp.1 template_head.tmp
	elif [ -n "$VALUE" ]; then
		cat template_head.tmp | sed -e "s~$REPLACEMENT_ITEM~$VALUE~g" >template_head.tmp.1
		mv template_head.tmp.1 template_head.tmp
	else
		cat template_head.tmp | sed -e "s~$REPLACEMENT_ITEM~~g" >template_head.tmp.1
		mv template_head.tmp.1 template_head.tmp
		if [ "$FIXME" -eq 0 ]; then echo '<!--' >> template_head.tmp; FIXME=1; fi
		echo $REPLACEMENT_ITEM' = "fixme"' >> template_head.tmp
	fi
done
if [ "$FIXME" -eq 1 ]; then echo '-->' >> template_head.tmp; fi



#Do TEMPLATE_GUIDEBAR
FIXME=0
GUIDEBAR_NOT_READY=0
REPLACEMENT_LIST=`cat $TEMPLATE_PATH/TEMPLATE_GUIDEBAR | grep "PLACE_.*_HERE" | sed -e 's,PLACE_,~&,g' -e 's,_HERE,&~,g' | tr '~' '\n' | grep "PLACE_.*_HERE" | sort | uniq`

cat $TEMPLATE_PATH/TEMPLATE_GUIDEBAR > template_guidebar.tmp
for REPLACEMENT_ITEM in $REPLACEMENT_LIST; do
	VALUE=`cat $HTML_FILE | grep "${REPLACEMENT_ITEM} *= *\"" | head -1 | sed -e 's,PLACE_,~&,g' | tr '~' '\n' | grep $REPLACEMENT_ITEM | head -1 | sed -e "s,^.*$REPLACEMENT_ITEM *= *\",," -e 's,".*$,,'`
	if [ "$VALUE" == "fixme" ]; then
		GUIDEBAR_NOT_READY=1
		cat template_guidebar.tmp | sed -e "s~$REPLACEMENT_ITEM~~g" >template_guidebar.tmp.1
		mv template_guidebar.tmp.1 template_guidebar.tmp
	elif [ -n "$VALUE" ]; then
		cat template_guidebar.tmp | sed -e "s~$REPLACEMENT_ITEM~$VALUE~g" >template_guidebar.tmp.1
		mv template_guidebar.tmp.1 template_guidebar.tmp
	else
		cat template_guidebar.tmp | sed -e "s~$REPLACEMENT_ITEM~~g" >template_guidebar.tmp.1
		mv template_guidebar.tmp.1 template_guidebar.tmp
		if [ "$FIXME" -eq 0 ]; then echo '<!--' >> template_guidebar.tmp; FIXME=1; GUIDEBAR_NOT_READY=1; fi
		echo $REPLACEMENT_ITEM' = "fixme"' >> template_guidebar.tmp
	fi
done
if [ "$FIXME" -ne 0 ]; then
	# Close off the fixme section
	echo '-->' >> template_guidebar.tmp
fi
if [ "$GUIDEBAR_NOT_READY" -gt 0 ]; then
	# Also if the guidebar isn't finished, I don't want to display it, so strip it out
	for TEMPLATE_NAME in TEMPLATE_GUIDEBAR; do cat template_guidebar.tmp | awk "BEGIN { PRINTVAR = 1 } /$TEMPLATE_NAME BEGIN/ { PRINTVAR = 0 } /$TEMPLATE_NAME END/ { PRINTVAR = 1; next } { if ( PRINTVAR == 1 ) {print}}" > template_guidebar.tmp.1; mv template_guidebar.tmp.1 template_guidebar.tmp; done
fi





#Do TEMPLATE_TAIL
FIXME=0
REPLACEMENT_LIST=`cat $TEMPLATE_PATH/TEMPLATE_TAIL | grep "PLACE_.*_HERE" | sed -e 's,PLACE_,~&,g' -e 's,_HERE,&~,g' | tr '~' '\n' | grep "PLACE_.*_HERE" | sort | uniq`

cat $TEMPLATE_PATH/TEMPLATE_TAIL > template_tail.tmp
for REPLACEMENT_ITEM in $REPLACEMENT_LIST; do
	VALUE=`cat $HTML_FILE | grep "${REPLACEMENT_ITEM} *= *\"" | head -1 | sed -e 's,PLACE_,~&,g' | tr '~' '\n' | grep $REPLACEMENT_ITEM | head -1 | sed -e "s,^.*$REPLACEMENT_ITEM *= *\",," -e 's,".*$,,'`
	if [ "$REPLACEMENT_ITEM" == "PLACE_UPDATED_DATE_HERE" ]; then
		VALUE=`find $HTML_FILE -mindepth 0 -maxdepth 0 -printf "%TB %Td, %TY"`
	fi
	if [ "$VALUE" == "fixme" ]; then
		cat template_tail.tmp | sed -e "s~$REPLACEMENT_ITEM~~g" >template_tail.tmp.1
		mv template_tail.tmp.1 template_tail.tmp
	elif [ -n "$VALUE" ]; then
		cat template_tail.tmp | sed -e "s~$REPLACEMENT_ITEM~$VALUE~g" >template_tail.tmp.1
		mv template_tail.tmp.1 template_tail.tmp
	else
		cat template_tail.tmp | sed -e "s~$REPLACEMENT_ITEM~~g" >template_tail.tmp.1
		mv template_tail.tmp.1 template_tail.tmp
		if [ "$FIXME" -eq 0 ]; then echo '<!--' >> template_tail.tmp; FIXME=1; fi
		echo $REPLACEMENT_ITEM' = "fixme"' >> template_tail.tmp
	fi
done
if [ "$FIXME" -eq 1 ]; then echo '-->' >> template_tail.tmp; fi









# Put all the pieces together
(
#How many lines in template_guidebar?  If template_guidebar is populated then strip off the last HR from template_head
LINECOUNT=`cat template_guidebar.tmp | grep -v ^PLACE_ | wc -l`
if [ "$LINECOUNT" -eq 0 ]; then GUIDEBAREMPTY="empty"; else GUIDEBAREMPTY="not_empty"; fi

#Remove off last HR tag from template_head if GUIDEBAR is not empty, else don't strip
if [ "$GUIDEBAREMPTY" = "not_empty" ]; then
	LASTHR=`cat template_head.tmp | grep '<HR>' | wc -l`
	cat template_head.tmp | awk -v LAST="$LASTHR" 'BEGIN {COUNT=0} /<HR>/ {COUNT=COUNT+1; if (COUNT==LAST) {next}} {print}'
else
	cat template_head.tmp
fi

#Send the guidebar
cat template_guidebar.tmp

#Send the main html body - strip off any CRLF chars while we are at it
cat $HTML_FILE | awk '{gsub("\r$",""); print}'

if [ "$GUIDEBAREMPTY" = "not_empty" ]; then
	#if guidebar is populated, then send it, with no "fixup"'s, and strip HR from template_tail.tmp
	cat template_guidebar.tmp | grep -v ^PLACE_
	#Remove off first HR tag from template_tail
	cat template_tail.tmp | awk 'BEGIN {FOUND=0} /<HR>/ {if (FOUND == 0) {FOUND=1; next}} {print}'
else
	# else if guidebar is empty, then don't send it and don't strip any HR's from tail.
	cat template_tail.tmp
fi

) > $HTML_FILE.template.tmp
touch -t `find $HTML_FILE -mindepth 0 -maxdepth 0 -printf "%TY%Tm%Td%TH%TM.%TS"` $HTML_FILE.template.tmp
mv $HTML_FILE.template.tmp $HTML_FILE
#cleanup after putting all the pieces together
rm template_head.tmp
rm template_guidebar.tmp
rm template_tail.tmp

done # End HTML_FILE big loop


# Here we keep track of which files we need to process
touch $TEMPLATE_PATH/template.hist




#Original for strip out templates from existing html file
#for Y in awk.html; do for X in $TEMPLATE_LIST; do cat $Y | awk "BEGIN { PRINTVAR = 1 } /$X BEGIN/ { PRINTVAR = 0 } /$X END/ { PRINTVAR = 1; next } { if ( PRINTVAR == 1 ) {print}}" > $Y.template.tmp; touch -t `find $Y -mindepth 0 -maxdepth 1 -printf "%TY%Tm%Td%TH%TM.%TS"` $Y.template.tmp; mv $Y.template.tmp $Y; done; done

#DO TEMPLATE_HEAD_DATESORT
#cat if.html | awk '{gsub("\r$",""); print}' > if.html.1

Here’s savedates.sh, which I use when modifying formatting rather than content:

cd ../public_html
for X in *.html; do echo -ne '.' >&2; find $X -type f -mindepth 0 -maxdepth 0 -printf "%TY%Tm%Td%TH%TM.%TS %p\n"; done > ../allfiles.txt; echo

And here’s restoredates.sh, which is savedates.sh complement:

cd ../public_html
cat ../allfiles.txt | while read DATE FILENAME; do touch -t $DATE $FILENAME; echo -ne '.'; done; echo

Submit

Datesort.sh Shell Script

Do_datesort.sh Shell Script

Checkreqtags.sh Shell Script

Tags.conf Configuration File

Openclosetags.sh Shell Script

Wwwmanage.conf Configuration File

Ftp Expect Scripts

Ftp Expect Configuration File

Notes

Leave a Comment Cancel reply