#!/usr/bin/env python3
import sys
import re
import pathlib
path_to_files = '/projects/manuals'
words = dict()
before_or_after = '\',.(){}[]?!"*+-:<>;'
def main():
get_words('Restoration of Erathia', 'RoE', 144)
get_words("Armageddon's Blade", 'AB', 28)
get_words('Shadow of Death', 'SoD', 36)
get_words('Tutorial', 'Tutorial', 12)
print_words()
def get_words(name, short_name, total_pages):
folder = f'{path_to_files}/{short_name}'
path = pathlib.PurePath(f'{folder}/_{short_name}.txt')
file = open(path, encoding='utf-8')
text = file.read().replace(f"[[Category:{name} Manual|*]]", '')
for word in re.split(f"\s+|/|\\\\|\|", text):
add_word(word)
def add_word(word):
global words
global before_and_after
global before_or_after
length = len(word)
if word == '':
return
elif length > 2 and (word[length - 2 :] == "'s" or word[length - 2 :] == '(s'):
add_word(word[0 : length - 2])
elif word[0] in before_or_after or word[-1] in before_or_after:
add_word(word.strip(before_or_after))
elif word in words:
words[word] = words[word] + 1
else:
words[word] = 1
def print_words():
global words
word_list = open('word_list.txt', 'w', encoding='utf-8')
word_list.write('{| class="wikitable sortable" style="white-space:nowrap; text-align: center;"\n')
word_list.write('! Word\n! Count\n')
for word in sorted(words.keys(), key=lambda word: word.lower()):
word_list.write(f'|-\n| style="vertical-align: center; text-align: left;" | {word}\n')
word_list.write(f'| style="vertical-align: center; text-align: center;" | {words[word]}\n')
word_list.write('|}')
if __name__ == "__main__":
main()
# Project Structure
# /projects/manuals/AB/_AB.txt
# /projects/manuals/RoE/_RoE.txt
# /projects/manuals/SoD/_SoD.txt
# /projects/manuals/Tutorial/_Tutorial.txt