Talk:Word list from the manuals

From Heroes 3 wiki
Revision as of 22:02, 7 January 2017 by Imahero (talk | contribs) (Created page with "<pre> #!/usr/bin/env python3 import sys import re import pathlib path_to_files = '/projects/manuals' words = dict() before_or_after = '\',.(){}[]?!"*+-:<>;' def main():...")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.
#!/usr/bin/env python3

import sys
import re
import pathlib

path_to_files = '/projects/manuals'
words = dict()
before_or_after = '\',.(){}[]?!"*+-:<>;'

def main():
    get_words('Restoration of Erathia', 'RoE', 144)
    get_words("Armageddon's Blade", 'AB', 28)
    get_words('Shadow of Death', 'SoD', 36)
    get_words('Tutorial', 'Tutorial', 12)
    print_words()

def get_words(name, short_name, total_pages):
    folder = f'{path_to_files}/{short_name}'
    path = pathlib.PurePath(f'{folder}/_{short_name}.txt')
    file = open(path, encoding='utf-8')
    text = file.read().replace(f"[[Category:{name} Manual|*]]", '')
    for word in re.split(f"\s+|/|\\\\|\|", text):
        add_word(word)

def add_word(word):
    global words
    global before_and_after
    global before_or_after
    length = len(word)
    if word == '':
        return
    elif length > 2 and (word[length - 2 :] == "'s" or word[length - 2 :] == '(s'):
        add_word(word[0 : length - 2])
    elif word[0] in before_or_after or word[-1] in before_or_after:
        add_word(word.strip(before_or_after))
    elif word in words:
        words[word] = words[word] + 1
    else:
        words[word] = 1

def print_words():
    global words
    word_list = open('word_list.txt', 'w', encoding='utf-8')
    word_list.write('{| class="wikitable sortable" style="white-space:nowrap; text-align: center;"\n')
    word_list.write('! Word\n! Count\n')
    for word in sorted(words.keys(), key=lambda word: word.lower()):
        word_list.write(f'|-\n| style="vertical-align: center; text-align: left;" | {word}\n')
        word_list.write(f'| style="vertical-align: center; text-align: center;" | {words[word]}\n')
    word_list.write('|}')

if __name__ == "__main__":
    main()

# Project Structure
# /projects/manuals/AB/_AB.txt
# /projects/manuals/RoE/_RoE.txt
# /projects/manuals/SoD/_SoD.txt
# /projects/manuals/Tutorial/_Tutorial.txt