#! /usr/bin/python3
# ===================================================================
# get a list of directories in a directory tree
# ===================================================================
import re
import os
# -------------------------------------------------------------------
# test if a string matches one of a list/tuple of regular expressions
#
# Regular expressions use the backslash character ('\') to
# indicate special forms or to allow special characters to
# be used without invoking their special meaning. This collides
# with Python's usage of the same character for the same purpose
# in string literals. The solution is to use Python's raw string
# notation for regular expression patterns; backslashes are not
# handled in any special way in a string literal prefixed with 'r'.
# r"\n" is a two-character string containing.
#
# For example to match html files: r'\.html$' or '\\.html$'
# -------------------------------------------------------------------
def StringMatchPattern(patterns,str):
for p in patterns:
if re.search(p,str,re.IGNORECASE):
return True
return False
# -------------------------------------------------------------------
# get a list of directories
# -------------------------------------------------------------------
def GetListOfDirs(skipdirs,treedir,dirlist,level=None,verbose=False):
'''
Create a list of directories in all or part of a directory tree.
(no regular files or links)
Arguments:
skipdirs - list or tuple of the directories to not
capture. They are RegExp patterns.
treedir - current directory in the directory tree
being processed
dirlist - the list of captured directories
(path + directory name)
level - The number of directory levels to capture
level = None -- capture all levels of directories
level < 0 -- capture nothing and return
level = 0 -- capture this directory
level > 0 -- decrement level and keep going
verbose - print messages describing what the code is doing
'''
if verbose:
print("GetListOfDirs({},level={},verbose={})".
format(treedir,level,verbose))
# have if we have captured enough directory levels?
if level != None:
if level < 0:
return True
level = level - 1
# --- treedir must end in '/'
if not re.search('\/$',treedir):
treedir = treedir + '/'
if verbose:
print("searching dir {}".format(treedir))
# --- get a list of entries in the directory
dirs = os.listdir(treedir)
# --- add directories to the list
for d in dirs:
# ---- skip hidden files and directories
# ---- note: they start with a period '.'
if re.search('^\.',d):
continue
if verbose:
print("testing dir entry {}".format(d))
dd = treedir + d
# ---- skip links
if os.path.islink(dd):
##print('skipping link {}'.format(dd))
continue
# ---- skip non-directories
if not os.path.isdir(dd):
##print('skipping non-dir {}'.format(dd))
continue
# ---- skip the directoy?
if StringMatchPattern(skipdirs,d):
if verbose:
print("skipping dir {}".format(d))
continue
# ---- add the directory to the list
##print("adding {} to dir list".format(dd))
dirlist.append(dd)
# ---- search sub-directory
GetListOfDirs(skipdirs,dd,dirlist,level,verbose)
return True
# ===================================================================
# main - testing
#
# skipdirs - a list or a tuple of RegEx search patterens
# treeroot - root of a directory tree to search
# ===================================================================
if __name__ == '__main__':
skipdirs = [ "^wiki$", "^x$" ]
treeroot = '/var/www/html'
treedirs = [treeroot]
print('---- directories -----------------------------------')
if not GetListOfDirs(skipdirs,treeroot,treedirs):
print("GetListOfDirs failed")
else:
if len(treedirs) < 1:
print('No directories found in tree ({})'.format(treeroot))
else:
treedirs.sort()
for d in treedirs:
print(d)
print()
print("{} directories found".format(len(treedirs)))
print()