# =========================================================
# Read a text file and report statistics about the
# words found in it
# =========================================================
import math
import sys
import os
import platform
# ---------------------------------------------------------
# global variables
#
# punctuation punctuation characters
# wdict dict - holds words and word counts
# wline count - lines in file
# wtotal count - total number of words found in file
# wunique count - unique words in file
# ---------------------------------------------------------
punctuation = '''.?!,;:-_()[]{}"'/\\'''
wdict = dict()
wline = 0
wtotal = 0
wunique = 0
# ---------------------------------------------------------
# am I running Python 3?
# ---------------------------------------------------------
def RunningPython3():
##print(sys.version_info)
if sys.version_info[0] == 3:
return True
return False
# ---------------------------------------------------------
# get user input (Python 2 or 3)
# ---------------------------------------------------------
def GetUserInput(prompt,py3):
if py3:
i = input(prompt)
else:
i = raw_input(prompt)
return i.strip()
# ---------------------------------------------------------
# pause program
# ---------------------------------------------------------
def Pause(py3):
print('')
GetUserInput('Press enter to continue ',py3)
# ---------------------------------------------------------
# clear the screen
# ---------------------------------------------------------
def ClearScreen():
if platform.system() == 'Linux':
os.system('clear')
elif platform.system() == 'Windows':
os.system('clear')
else:
os.system('cls')
# ---------------------------------------------------------
# test for a punctuation character at the end of a string
# ---------------------------------------------------------
# puncutuation characters are: period,question mark,
# exclamation mark, comma, semicolon, colon, dash,
# hyphen, parenthese, brackets, braces, apostrophe,
# quote marks and ellipsis.
# ---------------------------------------------------------
# Note: depending on OS, editor settings, etc. dashs,
# hyphens, apostrophes, elilipsis, and quote marks
# may appear differently in the text file
# ---------------------------------------------------------
def HasPunctuation(str,py3):
global punctuation
if str[-1] in punctuation:
return True
return False
# ---------------------------------------------------------
# process a text file
# ---------------------------------------------------------
def ProcessTextFile(file,py3):
global wline
i = 0
inFile = open(file,'r')
for line in inFile:
line = line.strip()
if line:
##print(line)
ProcessTextLine(line,py3)
wline += 1
inFile.close()
# ---------------------------------------------------------
# process a line (string) of text
#
# convert words to lowercase and remove any punctuation at
# the end of words - count the words
# ---------------------------------------------------------
def ProcessTextLine(line,py3):
global wdict, wline, wtotal, wunique
wlist = line.split()
wc = 0
for w in wlist:
w = w.lower()
if HasPunctuation(w,py3):
w = w[0:-1]
if w in wdict:
wdict[w] += 1
else:
wdict[w] = 1
wunique += 1
wc += 1
wtotal += 1
return wc
# ---------------------------------------------------------
# main
# ---------------------------------------------------------
if __name__ == '__main__':
py3 = RunningPython3()
##file = 'gettysburg_address.txt'
file = 'declaration_of_independence.txt'
ProcessTextFile(file,py3)
# -----------------------------------------------------
# display the words found in the text file
# -----------------------------------------------------
##
##print('')
##
###normal order
##for k,v in wdict.iteritems():
## print('{}: {}'.format(k,v))
##
###sort on key
##for k in sorted(wddict.iterkeys()):
## print('{}: {}'.format(k,wdict[k]))
##
###sort on value (count)
##i = 0
##for k,v in sorted(wdict.iteritems(), reverse=True,
## key=lambda (k,v): (v,k)):
## print('[{:02}] {:>4}: {}'.format(i,v,k))
## i += 1
##Pause(py3)
# -----------------------------------------------------
# display text file statistics
# -----------------------------------------------------
print('')
print('Text file: {}'.format(file))
print('{} unique words in text file'.format(wunique))
print('{} words in text file'.format(wtotal))
Pause(py3)
# -----------------------------------------------------
# search the dictionary for a specific word
# -----------------------------------------------------
while True:
ClearScreen()
print('------ Search for Word ------')
print('')
w = GetUserInput('Enter search word: ',py3)
if w == '':
break
w = w.lower()
if w in wdict:
print('')
print('Found {}, word count is {}'.format(w,wdict[w]))
else:
print('')
print('{} not found'.format(w))
Pause(py3)
print('')