#! /usr/bin/python3
# ==================================================================
#
# ==================================================================
import os
import re
# -------------------------------------------------------------------
# test if a string matches one of a list/tuple of regular expressions
#
# Regular expressions use the backslash character ('\') to
# indicate special forms or to allow special characters to
# be used without invoking their special meaning. This collides
# with Python's usage of the same character for the same purpose
# in string literals. The solution is to use Python's raw string
# notation for regular expression patterns; backslashes are not
# handled in any special way in a string literal prefixed with 'r'.
# (r"\n" is a two-character string.)
#
# For example to match html files: r'\.html$' or '\\.html$'
# -------------------------------------------------------------------
def StringMatchPattern(patterns,str,ignorecase=True):
for p in patterns:
if ignorecase:
if re.search(p,str,re.IGNORECASE):
return True
else:
if re.search(p,str):
return True
return False
# -------------------------------------------------------------------
# create a directory if it does not exist
# -------------------------------------------------------------------
def CreateDirectory(path):
if os.path.exists(path):
##print("Path alrerady exists {}".format(path))
if not os.path.isdir(path):
return False
else:
return True
##print("Creating new directory {}".format(path))
os.makedirs(path)
return True
# -------------------------------------------------------------------
# get a list of regular files (not directories or links)
# that match a list of regular expressions
# -------------------------------------------------------------------
def GetListOfFiles(ifiles,sfiles,rdir,filelist,verbose=False,debug=False):
'''
Get a list of regular files that match a regular expression.
(not directories or links)
Attributes:
ifiles - list or tuple of files to be included
They are RegExp patters.
sfiles - list or tuple of files to skipped
They are RegExp patterns.
rdir - directory to be searched
filelist - returned list of matched files (path + file name)
debug - print debug information
verbose - print messages describing what the code is doing
'''
if debug:
print("GetListOfFiles({})".format(rdir))
# --- dir must end in a '/' character
if not re.search('\/$',rdir):
rdir = rdir + '/'
# --- compile regular expression
# --- get a list of entries in the directory
files = os.listdir(rdir)
# --- add a file to the list
for f in files:
# ---- skip hidden files and directories
# ---- note: they start with a period '.'
if re.search('^\.',f):
continue
ff = rdir + f
# ---- skip links and directories
if os.path.islink(ff):
##if debug:
## print('skipping link {}'.format(ff))
continue
if os.path.isdir(ff):
##if debug:
## print('skipping dir {}'.format(ff))
continue
# ---- skip the file?
if StringMatchPattern(sfiles,f):
##if debug:
## print('skipping file {}'.format(f))
continue
# ---- is it a regular file?
if not StringMatchPattern(ifiles,f):
##if debug:
## print('skipping file {}'.format(f))
continue
# ---- save file (path + name)
filelist.append(ff)
return True
# -------------------------------------------------------------------
# get a list of sub-directories in a directory (not files or links)
# that match a list of regular expressions
# -------------------------------------------------------------------
def GetListOfDirs(sdirs,rdir,dirlist,verbose=False,debug=False):
'''
Get a list of sub-directories that match a regular expressions.
Attributes:
sdirs - list or tuple of directories to be skipped
They are RegExp patterns.
rdir - directory to be searched (rootdir)
dirlist - returned list of matched files (path + file name)
debug - print debug information
verbose - print messages describing what the code is doing
'''
if debug:
print("GetListOfDirs({})".format(rdir))
# --- dir must end in a '/' character
if not re.search('\/$',rdir):
rdir = rdir + '/'
# --- compile regular expression
# --- get a list of entries in the directory
dirs = os.listdir(rdir)
# --- add a sub-directory to the list
for d in dirs:
# ---- skip hidden files and directories
# ---- note: they start with a period '.'
if re.search('^\.',d):
continue
dd = rdir + d
# ---- is it a directory?
if not os.path.isdir(dd):
##if debug:
## print('skipping dir {}'.format(ff))
continue
# ---- skip directory?
if StringMatchPattern(sdirs,d):
continue
# ---- save directory (path + name)
dirlist.append(dd)
return True
# ==================================================================
# main - testing
# ==================================================================
if __name__ == '__main__':
# ---- global variables
DEBUG = False
INCLUDEFILES = [ '\\.html$', r'\.py$' ]
ROOTDIR = '/hodgepodge/' # should end in a '/' character
SKIPDIRS = []
SKIPFILES = [ r'^\.', '^my_', '^tk_' ]
VERBOSE = False
# ---- root directory exists?
if not os.path.isdir(ROOTDIR):
print("Root directory does no exist ({})".format(ROOTDIR))
quit()
# ---- get and display a list of files
print('---- files --------------------------------------------')
filelist = []
if not GetListOfFiles(INCLUDEFILES,SKIPFILES,ROOTDIR, \
filelist,VERBOSE,DEBUG):
print("GetListOfFiles failed")
else:
if len(filelist) < 1:
print("No files found in directory ({})".format(rdir))
else:
filelist.sort()
for f in filelist:
print(f)
print()
print("{} files found".format(len(filelist)))
print()
# ---- get and display a list of sub-directories
dirlist = []
print('---- sub-directories-----------------------------------')
if not GetListOfDirs(SKIPDIRS,ROOTDIR,dirlist,VERBOSE,DEBUG):
print("GetListOfDirs failed")
else:
if len(dirlist) < 1:
print("No sub-directories found in directory ({})".format(rdir))
else:
dirlist.sort()
for d in dirlist:
print(d)
print()
print("{} sub-directories found".format(len(dirlist)))
print()
# -------------------------------------------------------------------
# walk (process) a tree or sub-tree of directories and files
#
# Dir - root directory of a tree or sub-tree
# IncFiles -
# SkpFiles -
# SkpDirs -
# DirFunc - function to call on every directory under the
# root direrctory
# FileFunc - function to call on every file under the
# root directory
# DirFirst - call DirFunc before FileFunc (True,False)
# TestMode -
# Verbose - print verbose messages
# Debug - print debug mesages
# -------------------------------------------------------------------
def WalkTheTree(RootDir,DeltaDir,IncFiles,SkpFiles,SkpDirs,DirFunc,
FileFunc,DirFirst=True,TopDown=True,TestMode=False,
Verbose=False,Debug=False):
# ----get a list of sub-directories in the root directory
dirlist = []
if not GetListOfDirs(SkpDirs,RootDir,dirlist,Verbose,False):
print("GetListOfDirs failed ()".format(RootDir))
return False
# ---- get a list of files in the root directory
filelist = []
if not GetListOfFiles(IncFiles,SkpFiles,RootDir,filelist,
Verbose,False):
print("GetListOfFiles failed ()".format(RootDir))
return False
# ---- process top down or bottom up
# ---- process each directory and file
if (TopDown): # process top down
if (DirFirst):
for d in dirlist:
if not DirFunc(d,TestMode,Verbose,Debug):
return False
for f in filelist:
if not FileFunc(f,TestMode,Verbose,Debug):
return False
else:
for f in filelist:
if not FileFunc(f,TestMode,Verbose,Debug):
return False
for d in dirlist:
if not DirFunc(d,TestMode,Verbose,Debug):
return False
for d in dirlist:
if not WalkTheTree(d,DeltaDir,IncFiles,SkpFiles,
SkpDirs,DirFunc,
FileFunc,TopDown,DirFirst,
TestMode,Verbose,Debug):
return False
else: # process bottom up
for d in dirlist:
if not WalkTheTree(d,DeltaDir,IncFiles,SkpFiles,
SkpDirs,DirFunc,
FileFunc,TowDown,DirFirst,
TestMode,Verbose,Debug):
return False
if (DirFirst):
for d in dirlist:
if not DirFunc(d,TestMode,Verbose,Debug):
return False
for f in filelist:
if not FileFunc(f,TestMode,Verbose,Debug):
return False
else:
for f in filelist:
if not FileFunc(f,TestMode,Verbose,Debug):
return False
for d in dirlist:
if not DirFunc(d,TestMode,Verbose,Debug):
return False
return True