#!/usr/bin/python3
# ===================================================================
# Build a HTML file (index.html) containing links to selected
# files in a directory
# -------------------------------------------------------------------
# This script assumes that only a single directory will be
# searched/processed. Therefore, there will be no duplicate
# file names. If the script is modified to search/process
# more than one directory, duplicate file names are possible.
# -------------------------------------------------------------------
# The web server may try to execute the file pointed to by the link
# created by this script. (For example, python.) This script can
# copy and rename a file so it can be displayed as text.
# For example: xxxx.py -> xxxx_PY
# ===================================================================
import re
import os
import datetime
import shutil
# -------------------------------------------------------------------
# global variables, constants, etc.
# -------------------------------------------------------------------
AUTHOR = 'Tom Wolfe' # HTML file author
CSSFILE = 'xxxx.css' # HTML CSS file
DIRECTORY = './' # directory to be searched/processed
# ending '/' character required
FILEMATCHPATS = [ r'\.html$', # file name patterns - match regular expresion
r'\.pdf$',
r'\.png$',
r'\.txt$',
r'\.css$' ]
FILERENAMEPATS = [ r'\.py$', # file name patterns - rename regular expressions
r'\.bat$' ]
FILESKIPPATS = ['^index.html$'] # file name patterns - skip regular expressions
REPLACEMENTSTRS = [ # file name patterns - rename match and replace strings
(r'\.py$','_PY'),
(r'\.bat$','_BAT') ]
OUTFILE = './index.html' # output file
VERBOSE = True
# -------------------------------------------------------------------
# output start web page
# -------------------------------------------------------------------
def start_web_page(ofile,dir,author=None,stylesheet=None):
ofile.write('<!DOCTYPE html>\n')
ofile.write('<html>\n')
ofile.write('<head>\n')
ofile.write('<meta charset="utf-8" />\n')
if author is not None:
ofile.write('<meta name="author" content="{}" />\n'.
format(author))
if stylesheet is not None:
ofile.write('<link rel="stylesheet" href="{}" />\n'.
format(stylesheet))
ofile.write('</head>\n')
ofile.write('<body>\n')
ofile.write('<header>\n')
ofile.write('<center>Dir: {}</center>\n'.format(dir))
ofile.write('</header>\n')
ofile.write('<div class="indent12">\n')
# -------------------------------------------------------------------
# output end of web page
# -------------------------------------------------------------------
def end_web_page(ofile):
d = datetime.datetime.now()
dd = d.strftime('%B %Y')
ofile.write('</div>\n')
ofile.write('<footer>\n')
ofile.write('<modate>Last Modified: {}</modate>\n'.format(dd))
ofile.write('</footer>\n')
ofile.write('</body>\n')
ofile.write('</html>')
# -------------------------------------------------------------------
# test if a string matches one of a list of regular expressions
#
# Regular expressions use the backslash character ('\') to
# indicate special forms or to allow special characters to
# be used without invoking their special meaning. This collides
# with Python’s usage of the same character for the same purpose
# in string literals. The solution is to use Python’s raw string
# notation for regular expression patterns; backslashes are not
# handled in any special way in a string literal prefixed with 'r'.
# r"\n" is a two-character string containing.
#
# For example to match html files: 'r\.html$' or '\\.html$'
# -------------------------------------------------------------------
def string_match_pattern(patterns,str):
for p in patterns:
if re.search(p,str,re.IGNORECASE):
return True
return False
# -------------------------------------------------------------------
# return a list (dictionary) of selected file names
#
# dir directory to search/process
# spat list of file skip regular expressions
# mpat list of file match regular expressions
# rpat list of file rename regular expressions
# -------------------------------------------------------------------
def get_list_of_files(dir,spat,mpat,rpat):
mdct = {} # file dictionary (list of file)
rdct = {} # file dictionary (list of file)
# --- get a list of entries in the directory
files = os.listdir(dir)
# ---- add files to the list
for f in files:
# ---- file path and name
ff = dir + f
# ---- skip hidden files (file name starts with a '.')
if re.search('^\.',f):
##print('skipping hidden file {}'.format(ff))
continue
# ---- skip links and directories
if os.path.islink(ff):
##print('skipping link {}'.format(ff))
continue
if os.path.isdir(ff):
##print('skipping dir {}'.format(ff))
continue
# ---- skip the file name?
if string_match_pattern(spat,f):
##print('skipping file {}'.format(f))
continue
# ---- match patterm matches the file name?
if string_match_pattern(mpat,f):
##print('pattern matches file {}'.format(f))
# ---- save the selected file name in a dictionary
# ---- dictionary key = file name
# ---- dictionary value = path + file name
##print('adding match file {}'.format(f))
mdct[f] = ff
continue
# ---- rename pattern matches the file name?
if string_match_pattern(rpat,f):
##print('rename patterm matches file {}'.format(f))
# ---- save the selected file name in a dictionary
# ---- dictionary key = file name
# ---- dictionary value = path + file name
##print('adding rename file {}'.format(f))
rdct[f] = ff
# ---- return the dictionarys (list of file)
return (mdct,rdct)
# -------------------------------------------------------------------
# add links to the output web page
#
# ofile output file
# mdct is a dictionary containing matched file names
# dictionary key = file name
# dictionary value = path + file name
# rdct is a dictionary containing rename file names
# dictionary key = file name
# dictionary value = path + file name
# rstrs is a list of file name replacement strings
# (list entryies are tuples)
# [0] regexp pattern (string to replace)
# [1] replacment string
# -------------------------------------------------------------------
def create_web_page_links(ofile,dir,mdct,rdct,rstrs):
# ---------------------------------------------------------------
# copy and rename a file into the same directory
# dir directory path
# orgf original file name
# newf new file name
# rstrs file name replacement strings
# ----
# note: if you want to just rename the file
# os.rename(dir+orgf, dir+newf)
# ---------------------------------------------------------------
def copy_and_rename_file(dir,orgf,newf):
##print('copy_and_rename_file({},{},{})'.format(dir,orgf,newf))
o = dir + orgf # original file (path + name)
n = dir + newf # new file (path + name)
if o == n:
print('Error: original and new files have the ' +
'same name ({})\n'.format(o))
return False
shutil.copy(o,n)
return True
# ---------------------------------------------------------------
# write regular links to the otput file
# mdct:
# dictionary key = file name
# dictionary value = path + file name
# ---------------------------------------------------------------
def create_regular_links(ofile,dir,mdct):
##print('\nCreate_regular_links()')
ofile.write('<p>\n')
c = 0 # link count
for k in sorted(mdct.keys()):
if c != 0:
ofile.write('<br>\n')
ofile.write('<a href="{}">{}</a>\n'.format(mdct[k],k))
##print(' {}'.format(k))
c += 1 # increment link count
ofile.write('</p>\n')
return True
# ---------------------------------------------------------------
# create new file name
# ---------------------------------------------------------------
# fn file name
# rstrs replacement strings list (list entries are tuples)
# [0] regexp (string to replace)
# [1] replacement string
# ---------------------------------------------------------------
# returns a tuple
# [0] found a match - true/False
# [1] new file name
# ---------------------------------------------------------------
def create_new_file_name(fn,rstrs):
##print('create_new_name({})'.format(fn))
for p in rstrs:
if re.search(p[0],fn,re.IGNORECASE) is not None:
nn = re.sub(p[0],p[1],fn,flags=re.IGNORECASE)
##print('new file name ({}) -> ({})'.format(fn,nn))
return (True,nn)
return (False,'')
# ---------------------------------------------------------------
# write rename links to the output file
# rdct:
# dictionary key = file name
# dictionary value = path + file name
# ---------------------------------------------------------------
def create_renamed_links(ofile,dir,rdct,rstrs):
##print('create_renamed_links()')
ofile.write('<h2>Rename these files</h2>\n')
ofile.write('<p>\n')
c = 0 # link count
for k in sorted(rdct.keys()):
# ---- new file name?
(err,nn) = create_new_file_name(k,rstrs)
if err is False:
return
# ---- copy original file to new file
copy_and_rename_file(dir,k,nn)
# ---- write link to output file
if c != 0:
ofile.write('<br>\n')
ofile.write('<a href="{}">{}</a>\n'.format(dir+nn,nn))
##print(' {}'.format(nn))
c += 1 # increment link count
ofile.write('</p>\n')
# ---------------------------------------------------------------
# ---- function's main code -------------------------------------
# ---------------------------------------------------------------
# ---- create regular links?
if len(rdct) > 0:
create_regular_links(ofile,dir,mdct)
# ---- any replacement strings defined?
if rstrs is None or len(rstrs) < 1:
print('Error: no file name replacement strings\n')
return False
# ---- create replacement links?
if len(rdct) > 0:
create_renamed_links(ofile,dir,rdct,rstrs)
# -------------------------------------------------------------------
# verbose runtime
# -------------------------------------------------------------------
def verbose_runtime():
print('DIRECTORY : {}'.format(DIRECTORY))
print('OUTPUT FILE : {}'.format(OUTFILE))
for p in FILESKIPPATS:
print('FILE SKIP PATTERN : {}'.format(p))
for p in FILEMATCHPATS:
print('FILE MATCH PATTERN : {}'.format(p))
for p in FILERENAMEPATS:
print('FILE RENAME PATTERN: {}'.format(p))
for r in REPLACEMENTSTRS:
print('REPLACEMENT STRS : {}'.format(r))
# ===================================================================
# main
# ===================================================================
if __name__ == '__main__':
# ---- does the directory to search/process exists?
if not os.path.isdir(DIRECTORY):
print()
print('No directory found')
print('Output file NOT created or modified')
print('DIRECTORY : {}'.format(DIRECTORY))
print('OUTPUT FILE : {}'.format(OUTFILE))
print()
quit()
# ---- fix the directory name string (if we need too)
# ---- it must end in '/' or be empty
# ---- (belt and suspenders - double check)
if len(DIRECTORY) > 0:
if not re.search('\/$',DIRECTORY):
DIRECTORY = DIRECTORY + '/'
# ---- get lists (dictionaries) of selected files
(mdct,rdct) = get_list_of_files(DIRECTORY,FILESKIPPATS,
FILEMATCHPATS,FILERENAMEPATS)
# ---- any files found to process?
if len(mdct) == 0 and len(rdct) == 0:
print()
print('No files found in directory to process')
print('Output file NOT created or modified')
if VERBOSE:
verbose_runtime()
print()
quit()
# ---- display verbose messages?
if VERBOSE:
verbose_runtime()
# ---- create output file
ofile = open(OUTFILE,"w",encoding="utf-8")
start_web_page(ofile,DIRECTORY,AUTHOR,CSSFILE)
create_web_page_links(ofile,DIRECTORY,mdct,rdct,REPLACEMENTSTRS)
end_web_page(ofile)
ofile.close()
print()
print('{:3} match links written to output file'.format(len(mdct)))
print('{:3} renamed links written to output file'.format(len(rdct)))
print()