#!/usr/bin/python3
# ===================================================================
# Build a HTML file (index.html) containing links to selected
# files in a directory. A simple HTML link is created.
# -------------------------------------------------------------------
# This script assumes that only a single directory will be
# searched/processed. Therefore, there will be no duplicate
# file names. If the script is modified to search/process
# more than one directory, duplicate file names are possible.
# -------------------------------------------------------------------
# The web server may try to execute the file pointed to by the
# link created by this script. This simple script does nothing
# about this problem.
# ===================================================================
import re
import os
import datetime
# -------------------------------------------------------------------
# global variables, constants, etc.
# -------------------------------------------------------------------
AUTHOR = 'Tom Wolfe' # HTML file author
CSSFILE = 'xxxx.css' # HTML CSS file
DIRECTORY = './' # directory to be searched/processed
# ending '/' character required
FILEMATCHPATS = [ r'\.html$', # file name patterns - match regular expresion
r'\.pdf$',
r'\.png$',
r'\.py$',
r'\.txt$',
r'\.css$',
r'\.bat$' ]
FILESKIPPATS = ['^index.html$'] # file name patterns - skip regular expressions
OUTFILE = './index.html' # output file
# -------------------------------------------------------------------
# output start web page
# -------------------------------------------------------------------
def start_web_page(ofile,dir,author=None,stylesheet=None):
ofile.write('<!DOCTYPE html>\n')
ofile.write('<html>\n')
ofile.write('<head>\n')
ofile.write('<meta charset="utf-8" />\n')
if author is not None:
ofile.write('<meta name="author" content="{}" />\n'.
format(author))
if stylesheet is not None:
ofile.write('<link rel="stylesheet" href="{}" />\n'.
format(stylesheet))
ofile.write('</head>\n')
ofile.write('<body>\n')
ofile.write('<header>\n')
ofile.write('<center>Dir: {}</center>\n'.format(dir))
ofile.write('</header>\n')
ofile.write('<div class="indent12">\n')
# -------------------------------------------------------------------
# output end of web page
# -------------------------------------------------------------------
def end_web_page(ofile):
d = datetime.datetime.now()
dd = d.strftime('%B %Y')
ofile.write('</div>\n')
ofile.write('<footer>\n')
ofile.write('<modate>Last Modified: {}</modate>\n'.format(dd))
ofile.write('</footer>\n')
ofile.write('</body>\n')
ofile.write('</html>')
# -------------------------------------------------------------------
# test if a string matches one of a list of regular expressions
#
# Regular expressions use the backslash character ('\') to
# indicate special forms or to allow special characters to
# be used without invoking their special meaning. This collides
# with Python’s usage of the same character for the same purpose
# in string literals. The solution is to use Python’s raw string
# notation for regular expression patterns; backslashes are not
# handled in any special way in a string literal prefixed with 'r'.
# r"\n" is a two-character string containing.
#
# For example to match html files: 'r\.html$' or '\\.html$'
# -------------------------------------------------------------------
def string_match_pattern(patterns,str):
for p in patterns:
if re.search(p,str,re.IGNORECASE):
return True
return False
# -------------------------------------------------------------------
# return a list (dictionary) of selected file names
#
# dir directory to search/process
# mpat list of file match regular expressions
# spat list of file skip regular expressions
# -------------------------------------------------------------------
def get_list_of_files(dir,mpat,spat):
dct = {} # file dictionary (list of file)
# --- get a list of entries in the directory
files = os.listdir(dir)
# ---- add files to the list
for f in files:
# ---- file path and name
ff = dir + f
# ---- skip hidden files (file name starts with a '.')
if re.search('^\.',f):
##print('skipping hidden file {}'.format(ff))
continue
# ---- skip links and directories
if os.path.islink(ff):
##print('skipping link {}'.format(ff))
continue
if os.path.isdir(ff):
##print('skipping dir {}'.format(ff))
continue
# ---- skip the file name?
if string_match_pattern(spat,f):
##print('skipping file {}'.format(f))
continue
# ---- match the file name?
if not string_match_pattern(mpat,f):
##print('skipping match file {}'.format(f))
continue
# ---- save the selected file name in a dictionary
# ---- dictionary key = file name
# ---- dictionary value = path + file name
##print('adding match file {}'.format(f))
dct[f] = ff
# ---- return the dictionary (list of file)
return dct
# -------------------------------------------------------------------
# add links to the output web page
#
# ofile output file
# dct is a dictionary containing selected file names
# dictionary key = file name
# dictionary value = path + file name
# -------------------------------------------------------------------
def create_web_page_links(ofile,dct):
ofile.write('<p>\n')
c = 0 # link count
for k in sorted(dct.keys()):
if c != 0:
ofile.write('<br>\n')
ofile.write('<a href="{}">{}</a>\n'.format(dct[k],k))
c += 1 # increment link count
ofile.write('</p>\n')
# ===================================================================
# main
# ===================================================================
if __name__ == '__main__':
# ---- does the directory to search/process exists?
if not os.path.isdir(DIRECTORY):
print()
print('No directory found')
print('Output file NOT created or modified')
print('DIRECTORY : {}'.format(DIRECTORY))
print('OUTPUT FILE : {}'.format(OUTFILE))
print()
quit()
# ---- fix the directory name string (if we need too)
# ---- it must end in '/' or be empty
# ---- (belt and suspenders - double check)
if len(DIRECTORY) > 0:
if not re.search('\/$',DIRECTORY):
DIRECTORY = DIRECTORY + '/'
# ---- get a dictionary of selected files
dct = get_list_of_files(DIRECTORY,FILEMATCHPATS,FILESKIPPATS)
# ---- any files found to process?
if len(dct) == 0:
print()
print('No files found in directory to process')
print('Output file NOT created or modified')
print('DIRECTORY : {}'.format(DIRECTORY))
print('OUTPUT FILE : {}'.format(OUTFILE))
for p in FILESKIPPATS:
print('FILE SKIP PATTERN: {}'.format(p))
for p in FILEMATCHPATS:
print('FILE MATCH PATTERN: {}'.format(p))
print()
quit()
# ---- create output file
ofile = open(OUTFILE,"w",encoding="utf-8")
start_web_page(ofile,DIRECTORY,AUTHOR,CSSFILE)
create_web_page_links(ofile,dct)
end_web_page(ofile)
ofile.close()
print()
print('{} links written to file'.format(len(dct)))
print()