Submit
The “submit” script is used to “submit” my pages to the various web sites I maintain. The “submit” script performs various checks on the page and then sends them on their way.
Here’s the “submit” script:
#!/bin/sh #Use this script to submit documents from the "prepsite" to the # "livesite". Other scripts will be called when you do this. # Also the information from livesite will be pushed to other # sites as well that are to be kept in synchronization #Please do not modify below this line #Bring in configuration file variables and exit codes . /scripts/wwwmanage/wwwmanage.conf #Verify User #For now we have to be root # Later I'll see how we can get around this. if [ "`whoami`" != "root" ]; then echo "You must be root to run this command" exit $EXIT_NOT_ROOT fi #Set up Filename variable ARG_FORCE="" if [ $# -gt 0 ]; then while [ $# -gt 0 ]; do case $1 in --force|-f) ARG_FORCE=1 ;; *) FILENAME=$1 ;; esac shift done else FILENAME=$1 fi if [ "$ARG_FORCE" = 1 ]; then #Do not bother checking tags : else #Check tags... #Verify HTML format /scripts/wwwmanage/checkreqtags.sh ${PREPSITE}$FILENAME EXIT=$? if [ $EXIT -ne 0 ]; then echo Check for Required TAGS failed! exit $EXIT fi #Check tags... #Verify that all tags open and close in proper order /scripts/wwwmanage/openclosetags.sh ${PREPSITE}$FILENAME EXIT=$? if [ $EXIT -ne 0 ]; then echo Check for Matching TAGS failed! exit $EXIT fi fi TAR_ADD() { chown root.root ${LIVESITE}$1 chmod 0644 ${LIVESITE}$1 MODIFY=`find ${LIVESITE}$1 -printf "%TY%Tb%Td-%TH%TM%TS"` cp -rp ${LIVESITE}$1 ${LIVESITE}$1-$MODIFY (cd $LIVESITE && tar -rvf $BACKUP $1-$MODIFY) rm ${LIVESITE}$1-$MODIFY } SITE_UPDATE() { # It is possible we are only updating the format of the doc # and are not changing the content and therefore I would probably # want to keep the original modification date so that it is # reflective of the last date the content changed, rather # than just formatting changes. # # We must compare between the current "Site Update" line in # index.html and the date on the file being submitted. # # We then use the date contained in the variable # Modify_En to place on the index.html page. #Grab Date from "Site Update" line in index.html VAR_TMP=`cat ${LIVESITE}index.html | grep "Site Updated" | sed -e \ 's/Site Updated //' | sed -e \ 's/,//' | sed -e \ 's/<BR>//'` #Do Year CMP_DATE=`echo $VAR_TMP | awk '{print $3}'` #There seems to be a trailing Carriage Return that is causing grief # Remove the offending CR! CMP_DATE=`echo $CMP_DATE | awk '{sub ("\r$", ""); print}'` #Do Month TMP_MONTH=`echo $VAR_TMP | awk '{print $1}'` case $TMP_MONTH in January) CMP_DATE=${CMP_DATE}01;; February) CMP_DATE=${CMP_DATE}02;; March) CMP_DATE=${CMP_DATE}03;; April) CMP_DATE=${CMP_DATE}04;; May) CMP_DATE=${CMP_DATE}05;; June) CMP_DATE=${CMP_DATE}06;; July) CMP_DATE=${CMP_DATE}07;; August) CMP_DATE=${CMP_DATE}08;; September) CMP_DATE=${CMP_DATE}09;; October) CMP_DATE=${CMP_DATE}10;; November) CMP_DATE=${CMP_DATE}11;; December) CMP_DATE=${CMP_DATE}12;; *) echo "Unknown Error in Case statement!"; exit 30;; #This is from a mangled "Site Updated" line in index.html # We should put some sort of method here to repair that line. # The line does get fixed; however, when we eventually push # index.html esac #Do Day CMP_DATE_INDEX=$CMP_DATE`echo $VAR_TMP | awk '{print $2}'` #Get Date of File we are updating # Note: It is probably much faster to do this "find -printf" # command rather than to translate the variable Modify_En # to this format so we can compare. CMP_DATE_FILE=`find ${PREPSITE}$FILENAME -printf "%TY%Tm%Td"` #echo Date in index.html is $CMP_DATE_INDEX #echo Date of file being updated is $CMP_DATE_FILE #echo -n "The newer of these two dates is: " if [ $CMP_DATE_INDEX -gt $CMP_DATE_FILE ]; then #Date in header of index.html is newer than the file being # submitted, do nothing : else #Date of file being pushed is newer than that in the header # of index.html, update the "Site Updated" line in index.html #Save modification date of index.html INDEX_MODIFY=`find ${LIVESITE}index.html -printf "%TY%Tm%Td%TH%TM.%TS"` #Update "Site Updated" line in index.html cat ${LIVESITE}index.html | sed -e \ "s/Site Updated.*<BR>/Site Updated ${MODIFY_EN}<BR>/" >${LIVESITE}index.tmp.html mv ${LIVESITE}index.tmp.html ${LIVESITE}index.html #Restore modification date of index.html touch -t $INDEX_MODIFY ${LIVESITE}index.html fi } #Check to see if backup file exists, or to see # if we can at least create a backup in the # directory mentioned if [ -f $BACKUP ]; then #Backup exists, life is good : elif [ -d `dirname $BACKUP` ]; then #Backup directory exists (no .tarfile exists), # life is still good : else echo FAILURE:Backup dir does not exist EXIT $EXIT_BACKUP fi #Check if we have a file, dir, or unknown if [ -f ${PREPSITE}$FILENAME ]; then #$FILENAME is a file; #echo ${PREPSITE}$FILENAME is a regular file : elif [ -d ${PREPSITE}$FILENAME ]; then #$FILENAME is a directory echo ${PREPSITE}$FILENAME is a directory echo Directories are not currently handled; exit $EXIT_DIR; else #$FILENAME is not a file nor a directory # boy are we in trouble now. echo ${PREPSITE}$FILENAME is of Unknown File Type exit $EXIT_UNK_FIL_TYP fi #Check to see if directory exists, if not then create it. DIRNAME=`dirname $FILENAME` if [ ! -d $DIRNAME ]; then mkdir -p ${LIVESITE}$DIRNAME #work needed here to create directories on ftp sites we upload to. #maybe set a flag that we look at later. # I probably need to have the ftp expect script generated from this # script, rather than being an existing script file. That way I can more # easily control how many files I push in one shot, and if any # directories are to be created on the remote site. fi # #Copy file from prepsite to livesite MODIFY_EN=`find ${PREPSITE}$FILENAME -printf "%TB %Td, %TY"` cat ${PREPSITE}$FILENAME | sed -e "s/June 19, 2001/$MODIFY_EN/" >${LIVESITE}${FILENAME} #Reset file modification date to that of PrepSite FILE_MODIFY=`find ${PREPSITE}$FILENAME -printf "%TY%Tm%Td%TH%TM.%TS"` touch -t $FILE_MODIFY ${LIVESITE}${FILENAME} #Update "Site Update" line in index.html SITE_UPDATE #Add file to backup with date appended to filename TAR_ADD $FILENAME # #Generate a new datesort.html file (cd $LIVESITE && /scripts/wwwmanage/datesort.sh) #Add file to backup with date appended to filename TAR_ADD datesort.html #Now push all 3 files (datesort.html, index.html and $FILENAME) to riblack6 /scripts/wwwmanage/riblack6.ftp.expect \ ${LIVESITE}$FILENAME $FILENAME \ ${LIVESITE}datesort.html datesort.html \ ${LIVESITE}index.html index.html #Now push all 3 files (datesort.html, index.html and $FILENAME) to www.geocities.com/rlcomp_1999 /scripts/wwwmanage/geo-rlcomp.ftp.expect \ ${LIVESITE}$FILENAME $FILENAME \ ${LIVESITE}datesort.html datesort.html \ ${LIVESITE}index.html index.html #Check if file ($Filename) needs to be updated on linux.cca.cpqcorp.net cat /scripts/wwwmanage/linux.cca.files | grep $FILENAME | ( read SOURCE DESTINATION if [ -n "$DESTINATION" ]; then /scripts/wwwmanage/linux.cca.ftp.expect ${LIVESITE}$SOURCE $DESTINATION fi )
SUBMIT
DATESORT_SH
DO_DATESORT_SH
CHECKREQTAGS
TAGSCONF
OPENCLOSETAGS
WWWMANAGECONF
FTPEXPECT
LINUXCCAFILES
NOTES
Datesort.sh Shell Script
datesort.sh is used for the main controlling and packaging of the do_datesort.sh script.
#!/bin/sh FILENAME=datesort.html TMPFILE=/tmp/datesort.tmp cat <<EOF>$FILENAME <HTML> <BODY> <CENTER><H1>Web Pages - Sorted By Date</H1></CENTER> <HR> <PRE> EOF /scripts/wwwmanage/do_datesort.sh >>$FILENAME cat /scripts/wwwmanage/datesort.old >>$FILENAME cat <<EOF>>$FILENAME </PRE> <HR> <CENTER>This file generated at <BR>`date +"%H:%M:%S on %b %d, %Y"`</CENTER> </BODY> </HTML> EOF cat $FILENAME | grep -v $FILENAME >$TMPFILE; mv $TMPFILE $FILENAME
SUBMIT
DATESORT_SH
DO_DATESORT_SH
CHECKREQTAGS
TAGSCONF
OPENCLOSETAGS
WWWMANAGECONF
FTPEXPECT
LINUXCCAFILES
NOTES
Do_datesort.sh Shell Script
The do_datesort.sh script does the bulk of the work of generating the datesort.html document.
#!/bin/sh NAME_FIELD=48 TIME_PAD=2 SIZE_FIELD=12 TITLE_PAD=2 TITLE_FIELD=80 DESCR_PAD=2 DESCR_FIELD=160 TEMPFILE=/tmp/sort.tmp #Please don't modify below this line #The following find command will generate # a tmpfile with last access time and name. # We do not care about the time format at this point. # Sort - this will be sorted in order of date and time. find . -type f -printf "%[email protected] %p\n" | sort -r >$TEMPFILE #Now that we have the web pages sorted, lets skip # over the first column (time) and pick out the # 2nd column which are the web page names. # Then we will process each web page filename in order # of date (because we are already sorted) COUNTER=0 for X in `cat $TEMPFILE`; do if [ $COUNTER -eq 1 ]; then # #Grab name # Left justify, pad with blanks to the right # until end of NAME_FIELD is reached. #Also Add in Hyperlink for name # NAME=`find $X -printf "%p"` echo -n '<A HREF=' echo -n $NAME echo -n '>' echo -n $NAME echo -n '</A>' while [ ${#NAME} -lt $NAME_FIELD ]; do NAME="${NAME}." echo -ne '\040' done # #Grab time #Time is a fixed field, pad some spaces at the end # TIME=`find $X -printf "%TY %Tb %Td - %TH:%TM:%TS\n"` echo -n $TIME TIME_COUNT=$TIME_PAD while [ $TIME_COUNT -ne 0 ]; do echo -ne '\040' TIME_COUNT=`expr $TIME_COUNT - 1` done # #Grab size #Size should be right justified, pad with # spaces on the left. # SIZE=`find $X -printf "%s"` SIZE_C=$SIZE while [ ${#SIZE_C} -lt $SIZE_FIELD ]; do SIZE_C=".${SIZE_C}" echo -ne '\040' done echo -n $SIZE #Lets add on the title of the page #First lets space the column over TITLE_COUNT=$TITLE_PAD while [ $TITLE_COUNT -ne 0 ]; do echo -ne '\040' TITLE_COUNT=`expr $TITLE_COUNT - 1` done #display title TITLE=`cat $X | grep \<TITLE\> | sed -e \ 's,<TITLE>,,' | sed -e \ 's,</TITLE>,,'|awk '{sub("\r$","");print}'` echo -n $TITLE #Have title field pad out with spaces while [ ${#TITLE} -lt $TITLE_FIELD ]; do TITLE="${TITLE}." echo -ne '\040' done echo -n "." #Lets display the description as well #space over just a little DESCR_COUNT=$DESCR_PAD while [ $DESCR_COUNT -ne 0 ]; do echo -ne '\040' DESCR_COUNT=`expr $DESCR_COUNT - 1` done #display description DESCRIPTION=`cat $X | grep '<META name="description"' | sed -e \ 's,<META name="description" content=",,' | sed -e \ 's/">//' | awk '{sub("\r$","");print}'` echo -n $DESCRIPTION #Pad to field size with spaces while [ ${#DESCRIPTION} -lt $DESCR_FIELD ]; do DESCRIPTION="${DESCRIPTION}." echo -ne '\040' done echo -n "." #Last item on this line, give <BR> for line break echo '<BR>' #Now reset counter so we can skip # the nextdate and time column. COUNTER=0 else #This must be the first column which holds the time # Now that we are sorted by time, we do not # really care about the time field, so just # skip over it. Do nothing and increment counter. COUNTER=`expr $COUNTER + 1` fi done
SUBMIT
DATESORT_SH
DO_DATESORT_SH
CHECKREQTAGS
TAGSCONF
OPENCLOSETAGS
WWWMANAGECONF
FTPEXPECT
LINUXCCAFILES
NOTES
Checkreqtags.sh Shell Script
The checkreqtags.sh script checks the html page for a list of tags that I require to be present. The tags are listed one at a time in a separate configuration file.
#!/bin/sh #Use this script, named "checktags" is used to check a document for # specific html tags that should be present. An error condition # is raised if not enough tags. Zero is returned if the doc. # contains sufficient tags. FILENAME=$1 #Verify HTML format EXIT= cat /scripts/wwwmanage/tags.conf | while read; do echo Checking for tag $REPLY if [ -z "`cat $FILENAME | grep "$REPLY"`" ]; then echo Sorry, not enough tags present echo Failed on: $REPLY exit 1 fi done EXIT=$? if [ $EXIT -ne 0 ]; then echo Error, exiting, Error Code $EXIT exit $EXIT fi echo "All Necessary Tags are Present"
SUBMIT
DATESORT_SH
DO_DATESORT_SH
CHECKREQTAGS
TAGSCONF
OPENCLOSETAGS
WWWMANAGECONF
FTPEXPECT
LINUXCCAFILES
NOTES
Tags.conf Configuration File
These are the tags configuration file for the above checkreqtags.sh shell script. As I need more tags I can just add them to this file, one tag per line.
<HTML> </HTML> <TITLE> </TITLE> <HEAD> </HEAD> <META name="keywords" content=" <META name="description" content=" <BODY BGCOLOR="#FFFFFF" BACKGROUND=""> </BODY>
SUBMIT
DATESORT_SH
DO_DATESORT_SH
CHECKREQTAGS
TAGSCONF
OPENCLOSETAGS
WWWMANAGECONF
FTPEXPECT
LINUXCCAFILES
NOTES
Openclosetags.sh Shell Script
Check the html file for proper order on opening and closing tags. The tag to be closed should always be the last one that was opened.
#!/bin/sh FILENAME=$1 if [ ! -f ${FILENAME} ]; then echo No File present, echo unable to continue exit 1 fi gettag() { #Find start of tag (starts with less than sign) while [ -n "$HEX" ]; do HEX=`xxd -l 1 -s $SEEK ${FILENAME} | awk '{print $2}'` if [ "$HEX" = 3c ]; then #3c is less than sign CHAR=`xxd -l 1 -s $SEEK ${FILENAME} | awk '{print $3}'` CURR_TAG=$CHAR break fi SEEK=`expr $SEEK + 1` done #check exit status of above loop - EOF reached? if [ -z "$HEX" ]; then EOFSTATUS=1 return 0 fi #Get rest of tag up to space bar (important part of tag) # or up to greater than sign SEEK=`expr $SEEK + 1` while [ -n "$HEX" ]; do HEX=`xxd -l 1 -s $SEEK ${FILENAME} | awk '{print $2}'` if [ "$HEX" = 20 ]; then #20 is space bar CURR_TAG="${CURR_TAG} " #Special Case - <A NAME=" SEEK_CHECK=$SEEK TESTCASE='<A ' if [ "$CURR_TAG" = "$TESTCASE" ]; then SEEK_CHECK=`expr $SEEK_CHECK + 1` SP_CASE_CHECK=`xxd -l 6 -s $SEEK_CHECK ${FILENAME} | awk '{print $5}'` TESTCASE='NAME="' if [ "$SP_CASE_CHECK" = "$TESTCASE" ]; then SEEK=`expr $SEEK_CHECK + 5` CURR_TAG="${CURR_TAG}${SP_CASE_CHECK}" fi TESTCASE='HREF="' if [ "$SP_CASE_CHECK" = "$TESTCASE" ]; then SEEK=`expr $SEEK_CHECK + 5` CURR_TAG="${CURR_TAG}${SP_CASE_CHECK}" fi fi #Because the we have hit a space, lets discard the rest # of the tag until we get to the greater than sign #Eat up the rest of the input until the greater than sign # is reached (end of tag) SEEK=`expr $SEEK + 1` while [ -n "$HEX" ]; do HEX=`xxd -l 1 -s $SEEK ${FILENAME} | awk '{print $2}'` if [ "$HEX" = 3e ]; then #3e is greater than sign break fi SEEK=`expr $SEEK + 1` done break elif [ "$HEX" = 3e ]; then #3e is greater than sign CHAR=`xxd -l 1 -s $SEEK ${FILENAME} | awk '{print $3}'` CURR_TAG="${CURR_TAG}$CHAR" break elif [ -n "$HEX" ]; then CHAR=`xxd -l 1 -s $SEEK ${FILENAME} | awk '{print $3}'` CURR_TAG="${CURR_TAG}$CHAR" fi SEEK=`expr $SEEK + 1` done #check exit status of above loop - EOF reached? if [ -z "$HEX" ]; then EOFSTATUS=1 echo EOF reached before end of tag return 1 fi } checktag() { TAG_CHECKTAG="$*" #Investigate tag (start tag, stop tag, comment with no matching stop tag, ...) #Check for keywords we can ignore case "$TAG_CHECKTAG" in '<BR>') TAGSTATUS=IGNORE return 0 ;; '<HR>') TAGSTATUS=IGNORE return 0 ;; '<A NAME="') TAGSTATUS=IGNORE return 0 ;; '<A HREF="') CURR_TAG="<A " TAGSTATUS=PUSH return 0 ;; \<\!--*) TAGSTATUS=IGNORE return 0 ;; \<\!*) TAGSTATUS=IGNORE return 0 ;; \<LINK*) TAGSTATUS=IGNORE return 0 ;; '<IMG ') TAGSTATUS=IGNORE return 0 ;; \<META*) TAGSTATUS=IGNORE return 0 ;; \</*) TAGSTATUS=PULL return 0 ;; *) TAGSTATUS=PUSH return 0 ;; esac } dotag() { TAGSTATUS= checktag "$1" case "$TAGSTATUS" in IGNORE) return 0 ;; PUSH) if [ "$1" = '<A HREF="' ]; then TAGSTACK="${TAGSTACK}<A " else TAGSTACK="${TAGSTACK}$1" fi return 0 ;; PULL) TAGCOMPARE="$TAGSTACK" TAGKILL="`echo $1 | sed -e 's,</,<,'`" #Try to remove the modified close tag from tagstack: TAGSTACK=`echo "$TAGSTACK" | sed -e "s,${TAGKILL}$,,"` if [ "$TAGCOMPARE" = "$TAGSTACK" ]; then #If the above removal didn't work, then lets try one more time # to remove TAGKill. #This time, lets remove the trailing greater than sign # from tagkill. TAGKILL="`echo $TAGKILL | sed -e 's,>$,,'`" TAGSTACK="`echo $TAGSTACK | sed -e "s,${TAGKILL}$,,"`" fi if [ "$TAGCOMPARE" = "$TAGSTACK" ]; then #Removal of tag failed on both attempts. # Tags are out of order echo Tagstack is as follows: echo "$TAGSTACK" echo Failed while attempting to remove "$1" echo from the stack. echo Seek count is $SEEK bytes return 1 fi return 0 ;; *) echo Unknown error in dotag echo TAGSTATUS is "$TAGSTATUS" return 1 ;; esac } SEEK=0 TAGS= EOFSTATUS=0 CURR_TAG= HEX=0 TAGSTACK= #Go from beginning of file to end of file while [ -n "$HEX" ]; do gettag if [ -z "$HEX" ]; then break fi echo "$CURR_TAG" dotag "$CURR_TAG" EXIT=$? if [ $EXIT -gt 0 ]; then echo Failure, exiting return $EXIT fi #Debug echo Tagstack is "$TAGSTACK" done #check exit status of above loop - EOF reached? if [ "$EOFSTATUS" -eq 1 ]; then echo End of File Reached fi #Check if tagstack is empty if [ -n "$TAGSTACK" ]; then echo FAILURE: Failed to remove all items from tagstack echo Tags do not match. echo Tagstack contents are "$TAGSTACK" exit 1 fi echo Successfully cleared the tagstack, all tags match. echo Tagstack contents are "$TAGSTACK" exit 0
SUBMIT
DATESORT_SH
DO_DATESORT_SH
CHECKREQTAGS
TAGSCONF
OPENCLOSETAGS
WWWMANAGECONF
FTPEXPECT
LINUXCCAFILES
NOTES
Wwwmanage.conf Configuration File
#Global Variables #Livesite is a local version of the live web site. This livesite # can be running a web server, or not, it doesn't matter. # Files will be copied to livesite, then other scripts will # be run to push files from here to other sites you may wish to # keep synchronized. Also other scripts will be run after files # are copied here, such as datesort. LIVESITE=/home/httpd/www-geocities-rlcomp_1999/ #Prepsite is your preparation site that is not accessible by others. # All your work is done in prepsite, once your page is ready # "submit" it to the "livesite" by using this "submit" script. PREPSITE=/home/ftp/pub/techcd/pub/www_prep_site/www-geocities-rlcomp_1999-prep/ #backup is where you will save backups to. Files will be added # with their modification date as part of their filename # files will keep appending to the end of that backup. BACKUP=/home/ftp/pub/techcd/pub/www_prep_site/www-geo-rlcomp_1999.tar #DateSort Variables NAME_FIELD=48 TIME_FIELD=2 SIZE_FIELD=12 TEMPFILE=/tmp/sort.tmp #Home directory for wwwmanage WWWMANAGE=/scripts/wwwmanage #Exit Error Codes EXIT_NO_TAGS=7 EXIT_NOT_ROOT=6 #File Type is unknown; not a regular file, nor a directory EXIT_UNK_FIL_TYP=5 EXIT_DIR=4 EXIT_BACKUP=3
SUBMIT
DATESORT_SH
DO_DATESORT_SH
CHECKREQTAGS
TAGSCONF
OPENCLOSETAGS
WWWMANAGECONF
FTPEXPECT
LINUXCCAFILES
NOTES
Ftp Expect Scripts
#!/usr/bin/expect -f # # This Expect script was generated by autoexpect on Wed Jun 6 18:10:55 2001 # Expect and autoexpect were both written by Don Libes, NIST. # # Note that autoexpect does not guarantee a working script. It # necessarily has to guess about certain things. Two reasons a script # might fail are: # # 1) timing - A surprising number of programs (rn, ksh, zsh, telnet, # etc.) and devices discard or ignore keystrokes that arrive "too # quickly" after prompts. If you find your new script hanging up at # one spot, try adding a short sleep just before the previous send. # Setting "force_conservative" to 1 (see below) makes Expect do this # automatically - pausing briefly before sending each character. This # pacifies every program I know of. The -c flag makes the script do # this in the first place. The -C flag allows you to define a # character to toggle this mode off and on. set force_conservative 0 ;# set to 1 to force conservative mode even if ;# script wasn't run conservatively originally if {$force_conservative} { set send_slow {1 .1} proc send {ignore arg} { sleep .1 exp_send -s -- $arg } } # # 2) differing output - Some programs produce different output each time # they run. The "date" command is an obvious example. Another is # ftp, if it produces throughput statistics at the end of a file # transfer. If this causes a problem, delete these patterns or replace # them with wildcards. An alternative is to use the -p flag (for # "prompt") which makes Expect only look for the last line of output # (i.e., the prompt). The -P flag allows you to define a character to # toggle this mode off and on. # # Read the man page for more info. # # -Don set timeout -1 spawn ftp riblack6.americas.cpqcorp.net match_max 100000 expect "Name (riblack6.americas.cpqcorp.net:*): " send -- "geoweb\r" expect "Password:" send -- "xxxxxxxxxxxx\r" expect "ftp> " send -- "bin\r" expect "ftp> " send -- "hash\r" expect "ftp> " send -- "put [lrange $argv 0 0] [lrange $argv 1 1]\r" expect "ftp> " send -- "put [lrange $argv 2 2] [lrange $argv 3 3]\r" expect "ftp> " send -- "put [lrange $argv 4 4] [lrange $argv 5 5]\r" expect "ftp> " send -- "quit\r" expect "221 \r"
SUBMIT
DATESORT_SH
DO_DATESORT_SH
CHECKREQTAGS
TAGSCONF
OPENCLOSETAGS
WWWMANAGECONF
FTPEXPECT
LINUXCCAFILES
NOTES
Ftp Expect Configuration File
I have one server that doesn’t get all the files that I generate. For this one I have created this configuration file so that each time I “submit” a file, it is checked against this file. If it matches something in the first column, it will then be pushed to this special server using the 2nd column as the remote filename.
#filename-exactly-as-we-push-it destination-on-build-server procedures/kickstart-rh70.html /home/httpd/html/linux/riblack/kickstart-rh70.html
SUBMIT
DATESORT_SH
DO_DATESORT_SH
CHECKREQTAGS
TAGSCONF
OPENCLOSETAGS
WWWMANAGECONF
FTPEXPECT
LINUXCCAFILES
NOTES
Notes
I misuse variables and variable scope in these scripts. I also hardcode a lot of values where they should really be variables. These errors may or may not be cleaned up in time, for now it works – and to quote the old saying “If it isn’t broke, don’t fix it!”.
The openclose tags script is very slow (it seems to do about 1 tag per second). I do plan on migrating this one to C or C++. I feel that C or C++ could scan a whole html file in well under 10 seconds where it is taking more than a minute or two with the bash script.
I am currently – December 19, 2002 – not using any of the above scripts. I have converted over to a new set of scripts as shown below. Also I’m not currently checking for tag order using my own scripts — every once in a while I will download all my pages and use “weblint” to check the tags on my pages.
Here are my current scripts:
re-index.sh
site_index.sh
apply_template.sh
TEMPLATE_GUIDEBAR
TEMPLATE_HEAD
TEMPLATE_TAIL
savedates.sh
restoredates.sh
re-index.sh which runs as a cron job once each night. Here’s the contents:
site_index.sh apply_template.sh
site_index.sh now takes the place of datesort.sh and indexer.sh (my two previous methods of creating a site map). Here’s the contents:
cd ~/public_html SORT_KEYS="sitemap-filename sitemap-date sitemap-size sitemap-title sitemap-description" FILES_TO_PROCESS="`ls *.html | grep -v 'sitemap-.*\.html'`" #FILENAME_FIELD_LENGTH=48 #DATE_FIELD_LENGTH=22 #DATE_FIELD_LENGTH=180 #SIZE_FIELD_LENGTH=12 #TITLE_FIELD_LENGTH=80 #TITLE_FIELD_LENGTH=280 #DESCR_FIELD_LENGTH=180 #DESCR_FIELD_LENGTH=360 generate_sort_keys() { # get sort keys for filename, date, size, title, and description for X in $FILES_TO_PROCESS; do echo "$X $X"; done > sitemap-filename.tmp for X in $FILES_TO_PROCESS; do find $X -printf "%[email protected] %f\n"; done > sitemap-date.tmp for X in $FILES_TO_PROCESS; do find $X -printf "%s %f\n"; done > sitemap-size.tmp for X in $FILES_TO_PROCESS; do LINE="`cat $X | grep '<TITLE>' | sed -e 's,^.*<TITLE> *,,' -e 's, .*$,,' | head -1 | tr '\n' ' '`"; if [ -z "$LINE" ]; then echo -ne "zzz "; else echo -ne "$LINE"; fi; echo $X; done > sitemap-title.tmp for X in $FILES_TO_PROCESS; do LINE="`cat $X | grep '" *description *"' | grep content | sed -e 's,^.*content *= *",,' -e 's, .*$,,' | head -1 | tr '\n' ' '`"; if [ -z "$LINE" ]; then echo -ne "zzz "; else echo -ne "$LINE"; fi; echo $X; done > sitemap-description.tmp } print_left() { #$1 = field width #$2 = field value if [ "$1" = "nowrap" ]; then WRAP=NOWRAP; else WRAP=""; fi shift FIELD_VALUE="[email protected]" echo -ne '<TD ALIGN="left" VALIGN="TOP" ' echo -ne "$WRAP" echo -ne '>' echo -ne "$FIELD_VALUE" echo -ne '</TD>' } print_right() { #$1 = field width #$2 = field value if [ "$1" = "nowrap" ]; then WRAP=NOWRAP; else WRAP=""; fi shift FIELD_VALUE="[email protected]" echo -ne '<TD ALIGN="right" VALIGN="TOP" ' echo -ne "$WRAP" echo -ne '>' echo -n $FIELD_VALUE echo -ne '</TD>' } print_line_formatted() { echo -ne '<TR ALIGN="center">' #print_left $DESCR_FIELD_LENGTH $RECORD_DESCRIPTION print_left nowrap $RECORD_FILENAME print_left nowrap $RECORD_DATE print_right nowrap $RECORD_SIZE print_left nowrap $RECORD_TITLE print_left wrap $RECORD_DESCRIPTION echo '</TR>' } print_header() { echo $1 | grep 'sitemap-filename.html' >/dev/null && RECORD_FILENAME='<A HREF="https://cyanogenmods.org/sitemap-filename-r.html">Filename</A>' || RECORD_FILENAME='<A HREF="https://cyanogenmods.org/sitemap-filename.html">Filename</A>' echo $1 | grep 'sitemap-date.html' >/dev/null && RECORD_DATE='<A HREF="https://cyanogenmods.org/sitemap-date-r.html">Date</A>' || RECORD_DATE='<A HREF="https://cyanogenmods.org/sitemap-date.html">Date</A>' echo $1 | grep 'sitemap-size.html' >/dev/null && RECORD_SIZE='<A HREF="https://cyanogenmods.org/sitemap-size-r.html">Size</A>' || RECORD_SIZE='<A HREF="https://cyanogenmods.org/sitemap-size.html">Size</A>' echo $1 | grep 'sitemap-title.html' >/dev/null && RECORD_TITLE='<A HREF="https://cyanogenmods.org/sitemap-title-r.html">Title</A>' || RECORD_TITLE='<A HREF="https://cyanogenmods.org/sitemap-title.html">Title</A>' echo $1 | grep 'sitemap-description.html' >/dev/null && RECORD_DESCRIPTION='<A HREF="https://cyanogenmods.org/sitemap-description-r.html">Description</A>' || RECORD_DESCRIPTION='<A HREF="https://cyanogenmods.org/sitemap-description.html">Description</A>' echo $1 | grep 'sitemap-filename' >/dev/null && SORT_REF="Name" echo $1 | grep 'sitemap-date' >/dev/null && SORT_REF="Date" echo $1 | grep 'sitemap-size' >/dev/null && SORT_REF="Size" echo $1 | grep 'sitemap-title.html' >/dev/null && SORT_REF="Title" echo $1 | grep 'sitemap-description.html' >/dev/null && SORT_REF="Description" cat <<EOF <!-- PLACE_CREATED_DATE_HERE = "December 16, 2002" PLACE_DESCRIPTION_HERE = "CPQLINUX Site Map - sorted by $SORT_REF" PLACE_TITLE_HERE = "cpqlinux Site Map - Sorted by $SORT_REF" PLACE_NEXT_ITEM_HERE = "fixme" PLACE_PREVIOUS_ITEM_HERE = "fixme" --> EOF echo '<TABLE ALIGN="CENTER">' print_line_formatted } print_record() { RECORD_FILENAME="` echo -ne '<A HREF="https://cyanogenmods.org/' echo -ne "$1" echo -ne '">' echo -ne "$1" echo -ne '</A>' `" RECORD_DATE="`find $1 -printf "%TY %Tb %Td - %TH:%TM:%TS\n"`" RECORD_SIZE="`find $1 -printf "%s"`" RECORD_TITLE="`cat $1 | grep '<TITLE>' | sed -e 's,^.*<TITLE> *,,' -e 's,</TITLE>.*$,,' | head -1 | tr '\n' ' '`" RECORD_DESCRIPTION="`cat $1 | grep '" *description *"' | grep content | sed -e 's,^.*content *= *",,' -e 's,".*$,,' | head -1 | tr '\n' ' '`" print_line_formatted } generate_sort_keys for X in $SORT_KEYS; do print_header ${X}.html > ${X}.html print_header ${X}-r.html > ${X}-r.html sort -g ${X}.tmp | while read KEY FILE; do print_record $FILE; done >> ${X}.html sort -g -r ${X}.tmp | while read KEY FILE; do print_record $FILE; done >> ${X}-r.html rm ${X}.tmp done cat sitemap-date.html | sed -e 's,sitemap-date-r.html">Date</A>,sitemap-date.html">Date</A>,' > sitemap-date.html.tmp cat sitemap-date-r.html | sed -e 's,sitemap-date.html">Date</A>,sitemap-date-r.html">Date</A>,' > sitemap-date.html mv sitemap-date.html.tmp sitemap-date-r.html cp -a sitemap-date.html sitemap.html cp -a sitemap-date.html datesort.html cp -a sitemap-date.html indexer.html
Here’s the contents of apply_template.sh
TEMPLATE_PATH=~/bin WEBSITE_PATH=~/public_html TEMPLATE_LIST="TEMPLATE_HEAD TEMPLATE_GUIDEBAR TEMPLATE_TAIL" FAILURE=0 for X in $TEMPLATE_LIST; do if [ ! -f $TEMPLATE_PATH/$X ]; then echo unable to find $TEMPLATE_PATH/$X; FAILURE=`expr $FAILURE + 1`; fi; done if [ $FAILURE -gt 1 ]; then echo check TEMPLATE_PATH variable in $0; fi if [ $FAILURE -eq 1 ]; then echo check TEMPLATE_LIST variable in $0; fi if [ $FAILURE -gt 0 ]; then exit; fi # See if we have updated this file or any of the template files. If any of that has been updated # then we need to reprocess _all_ the html files -- maybe a template has changed. So we remove the template.hist file if [ -e $TEMPLATE_PATH/template.hist ]; then for X in $TEMPLATE_LIST `basename $0`; do TEMPLATES_CHANGED=`find $TEMPLATE_PATH/$X -newer $TEMPLATE_PATH/template.hist` if [ -n "$TEMPLATES_CHANGED" ]; then rm $TEMPLATE_PATH/template.hist break fi done fi # # # # Let's do a file at a time -- here goes a big "for" loop # # cd $WEBSITE_PATH for HTML_FILE in `( if [ -e $TEMPLATE_PATH/template.hist ]; then find . -type f -newer $TEMPLATE_PATH/template.hist -mindepth 1 -maxdepth 1; else find . -type f -mindepth 1 -maxdepth 1; fi; if [ -n "$1" ]; then for X in [email protected]; do find . -type f -mindepth 1 -maxdepth 1 -name $X; done; fi) | grep html$ | grep -v "\./index.html"`; do #echo -ne '.' echo $HTML_FILE #Strip out existing templates, keep file date and time for TEMPLATE_NAME in $TEMPLATE_LIST; do cat $HTML_FILE | awk "BEGIN { PRINTVAR = 1 } /$TEMPLATE_NAME BEGIN/ { PRINTVAR = 0 } /$TEMPLATE_NAME END/ { PRINTVAR = 1; next } { if ( PRINTVAR == 1 ) {print}}" > $HTML_FILE.template.tmp; touch -t `find $HTML_FILE -mindepth 0 -maxdepth 0 -printf "%TY%Tm%Td%TH%TM.%TS"` $HTML_FILE.template.tmp; mv $HTML_FILE.template.tmp $HTML_FILE; done #Insert new templates, keep file date and time #Do TEMPLATE_HEAD FIXME=0 REPLACEMENT_LIST=`cat $TEMPLATE_PATH/TEMPLATE_HEAD | grep "PLACE_.*_HERE" | sed -e 's,PLACE_,~&,g' -e 's,_HERE,&~,g' | tr '~' '\n' | grep "PLACE_.*_HERE" | sort | uniq` cat $TEMPLATE_PATH/TEMPLATE_HEAD > template_head.tmp for REPLACEMENT_ITEM in $REPLACEMENT_LIST; do VALUE=`cat $HTML_FILE | grep "${REPLACEMENT_ITEM} *= *\"" | head -1 | sed -e 's,PLACE_,~&,g' | tr '~' '\n' | grep $REPLACEMENT_ITEM | head -1 | sed -e "s,^.*$REPLACEMENT_ITEM *= *\",," -e 's,".*$,,'` if [ "$REPLACEMENT_ITEM" == "PLACE_UPDATED_DATE_HERE" ]; then VALUE=`find $HTML_FILE -mindepth 0 -maxdepth 0 -printf "%TB %Td, %TY"` fi if [ "$VALUE" == "fixme" ]; then cat template_head.tmp | sed -e "s~$REPLACEMENT_ITEM~~g" >template_head.tmp.1 mv template_head.tmp.1 template_head.tmp elif [ -n "$VALUE" ]; then cat template_head.tmp | sed -e "s~$REPLACEMENT_ITEM~$VALUE~g" >template_head.tmp.1 mv template_head.tmp.1 template_head.tmp else cat template_head.tmp | sed -e "s~$REPLACEMENT_ITEM~~g" >template_head.tmp.1 mv template_head.tmp.1 template_head.tmp if [ "$FIXME" -eq 0 ]; then echo '<!--' >> template_head.tmp; FIXME=1; fi echo $REPLACEMENT_ITEM' = "fixme"' >> template_head.tmp fi done if [ "$FIXME" -eq 1 ]; then echo '-->' >> template_head.tmp; fi #Do TEMPLATE_GUIDEBAR FIXME=0 GUIDEBAR_NOT_READY=0 REPLACEMENT_LIST=`cat $TEMPLATE_PATH/TEMPLATE_GUIDEBAR | grep "PLACE_.*_HERE" | sed -e 's,PLACE_,~&,g' -e 's,_HERE,&~,g' | tr '~' '\n' | grep "PLACE_.*_HERE" | sort | uniq` cat $TEMPLATE_PATH/TEMPLATE_GUIDEBAR > template_guidebar.tmp for REPLACEMENT_ITEM in $REPLACEMENT_LIST; do VALUE=`cat $HTML_FILE | grep "${REPLACEMENT_ITEM} *= *\"" | head -1 | sed -e 's,PLACE_,~&,g' | tr '~' '\n' | grep $REPLACEMENT_ITEM | head -1 | sed -e "s,^.*$REPLACEMENT_ITEM *= *\",," -e 's,".*$,,'` if [ "$VALUE" == "fixme" ]; then GUIDEBAR_NOT_READY=1 cat template_guidebar.tmp | sed -e "s~$REPLACEMENT_ITEM~~g" >template_guidebar.tmp.1 mv template_guidebar.tmp.1 template_guidebar.tmp elif [ -n "$VALUE" ]; then cat template_guidebar.tmp | sed -e "s~$REPLACEMENT_ITEM~$VALUE~g" >template_guidebar.tmp.1 mv template_guidebar.tmp.1 template_guidebar.tmp else cat template_guidebar.tmp | sed -e "s~$REPLACEMENT_ITEM~~g" >template_guidebar.tmp.1 mv template_guidebar.tmp.1 template_guidebar.tmp if [ "$FIXME" -eq 0 ]; then echo '<!--' >> template_guidebar.tmp; FIXME=1; GUIDEBAR_NOT_READY=1; fi echo $REPLACEMENT_ITEM' = "fixme"' >> template_guidebar.tmp fi done if [ "$FIXME" -ne 0 ]; then # Close off the fixme section echo '-->' >> template_guidebar.tmp fi if [ "$GUIDEBAR_NOT_READY" -gt 0 ]; then # Also if the guidebar isn't finished, I don't want to display it, so strip it out for TEMPLATE_NAME in TEMPLATE_GUIDEBAR; do cat template_guidebar.tmp | awk "BEGIN { PRINTVAR = 1 } /$TEMPLATE_NAME BEGIN/ { PRINTVAR = 0 } /$TEMPLATE_NAME END/ { PRINTVAR = 1; next } { if ( PRINTVAR == 1 ) {print}}" > template_guidebar.tmp.1; mv template_guidebar.tmp.1 template_guidebar.tmp; done fi #Do TEMPLATE_TAIL FIXME=0 REPLACEMENT_LIST=`cat $TEMPLATE_PATH/TEMPLATE_TAIL | grep "PLACE_.*_HERE" | sed -e 's,PLACE_,~&,g' -e 's,_HERE,&~,g' | tr '~' '\n' | grep "PLACE_.*_HERE" | sort | uniq` cat $TEMPLATE_PATH/TEMPLATE_TAIL > template_tail.tmp for REPLACEMENT_ITEM in $REPLACEMENT_LIST; do VALUE=`cat $HTML_FILE | grep "${REPLACEMENT_ITEM} *= *\"" | head -1 | sed -e 's,PLACE_,~&,g' | tr '~' '\n' | grep $REPLACEMENT_ITEM | head -1 | sed -e "s,^.*$REPLACEMENT_ITEM *= *\",," -e 's,".*$,,'` if [ "$REPLACEMENT_ITEM" == "PLACE_UPDATED_DATE_HERE" ]; then VALUE=`find $HTML_FILE -mindepth 0 -maxdepth 0 -printf "%TB %Td, %TY"` fi if [ "$VALUE" == "fixme" ]; then cat template_tail.tmp | sed -e "s~$REPLACEMENT_ITEM~~g" >template_tail.tmp.1 mv template_tail.tmp.1 template_tail.tmp elif [ -n "$VALUE" ]; then cat template_tail.tmp | sed -e "s~$REPLACEMENT_ITEM~$VALUE~g" >template_tail.tmp.1 mv template_tail.tmp.1 template_tail.tmp else cat template_tail.tmp | sed -e "s~$REPLACEMENT_ITEM~~g" >template_tail.tmp.1 mv template_tail.tmp.1 template_tail.tmp if [ "$FIXME" -eq 0 ]; then echo '<!--' >> template_tail.tmp; FIXME=1; fi echo $REPLACEMENT_ITEM' = "fixme"' >> template_tail.tmp fi done if [ "$FIXME" -eq 1 ]; then echo '-->' >> template_tail.tmp; fi # Put all the pieces together ( #How many lines in template_guidebar? If template_guidebar is populated then strip off the last HR from template_head LINECOUNT=`cat template_guidebar.tmp | grep -v ^PLACE_ | wc -l` if [ "$LINECOUNT" -eq 0 ]; then GUIDEBAREMPTY="empty"; else GUIDEBAREMPTY="not_empty"; fi #Remove off last HR tag from template_head if GUIDEBAR is not empty, else don't strip if [ "$GUIDEBAREMPTY" = "not_empty" ]; then LASTHR=`cat template_head.tmp | grep '<HR>' | wc -l` cat template_head.tmp | awk -v LAST="$LASTHR" 'BEGIN {COUNT=0} /<HR>/ {COUNT=COUNT+1; if (COUNT==LAST) {next}} {print}' else cat template_head.tmp fi #Send the guidebar cat template_guidebar.tmp #Send the main html body - strip off any CRLF chars while we are at it cat $HTML_FILE | awk '{gsub("\r$",""); print}' if [ "$GUIDEBAREMPTY" = "not_empty" ]; then #if guidebar is populated, then send it, with no "fixup"'s, and strip HR from template_tail.tmp cat template_guidebar.tmp | grep -v ^PLACE_ #Remove off first HR tag from template_tail cat template_tail.tmp | awk 'BEGIN {FOUND=0} /<HR>/ {if (FOUND == 0) {FOUND=1; next}} {print}' else # else if guidebar is empty, then don't send it and don't strip any HR's from tail. cat template_tail.tmp fi ) > $HTML_FILE.template.tmp touch -t `find $HTML_FILE -mindepth 0 -maxdepth 0 -printf "%TY%Tm%Td%TH%TM.%TS"` $HTML_FILE.template.tmp mv $HTML_FILE.template.tmp $HTML_FILE #cleanup after putting all the pieces together rm template_head.tmp rm template_guidebar.tmp rm template_tail.tmp done # End HTML_FILE big loop # Here we keep track of which files we need to process touch $TEMPLATE_PATH/template.hist #Original for strip out templates from existing html file #for Y in awk.html; do for X in $TEMPLATE_LIST; do cat $Y | awk "BEGIN { PRINTVAR = 1 } /$X BEGIN/ { PRINTVAR = 0 } /$X END/ { PRINTVAR = 1; next } { if ( PRINTVAR == 1 ) {print}}" > $Y.template.tmp; touch -t `find $Y -mindepth 0 -maxdepth 1 -printf "%TY%Tm%Td%TH%TM.%TS"` $Y.template.tmp; mv $Y.template.tmp $Y; done; done #DO TEMPLATE_HEAD_DATESORT #cat if.html | awk '{gsub("\r$",""); print}' > if.html.1
Here’s savedates.sh, which I use when modifying formatting rather than content:
cd ../public_html for X in *.html; do echo -ne '.' >&2; find $X -type f -mindepth 0 -maxdepth 0 -printf "%TY%Tm%Td%TH%TM.%TS %p\n"; done > ../allfiles.txt; echo
And here’s restoredates.sh, which is savedates.sh complement:
cd ../public_html cat ../allfiles.txt | while read DATE FILENAME; do touch -t $DATE $FILENAME; echo -ne '.'; done; echo