Talk:Word list from the manuals

From Heroes 3 wiki
Revision as of 22:02, 7 January 2017 by Imahero (talk | contribs) (Created page with "<pre> #!/usr/bin/env python3 import sys import re import pathlib path_to_files = '/projects/manuals' words = dict() before_or_after = '\',.(){}[]?!"*+-:<>;' def main():...")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search
#!/usr/bin/env python3

import sys
import re
import pathlib

path_to_files = '/projects/manuals'
words = dict()
before_or_after = '\',.(){}[]?!"*+-:<>;'

def main():
    get_words('Restoration of Erathia', 'RoE', 144)
    get_words("Armageddon's Blade", 'AB', 28)
    get_words('Shadow of Death', 'SoD', 36)
    get_words('Tutorial', 'Tutorial', 12)
    print_words()

def get_words(name, short_name, total_pages):
    folder = f'{path_to_files}/{short_name}'
    path = pathlib.PurePath(f'{folder}/_{short_name}.txt')
    file = open(path, encoding='utf-8')
    text = file.read().replace(f"[[Category:{name} Manual|*]]", '')
    for word in re.split(f"\s+|/|\\\\|\|", text):
        add_word(word)

def add_word(word):
    global words
    global before_and_after
    global before_or_after
    length = len(word)
    if word == '':
        return
    elif length > 2 and (word[length - 2 :] == "'s" or word[length - 2 :] == '(s'):
        add_word(word[0 : length - 2])
    elif word[0] in before_or_after or word[-1] in before_or_after:
        add_word(word.strip(before_or_after))
    elif word in words:
        words[word] = words[word] + 1
    else:
        words[word] = 1

def print_words():
    global words
    word_list = open('word_list.txt', 'w', encoding='utf-8')
    word_list.write('{| class="wikitable sortable" style="white-space:nowrap; text-align: center;"\n')
    word_list.write('! Word\n! Count\n')
    for word in sorted(words.keys(), key=lambda word: word.lower()):
        word_list.write(f'|-\n| style="vertical-align: center; text-align: left;" | {word}\n')
        word_list.write(f'| style="vertical-align: center; text-align: center;" | {words[word]}\n')
    word_list.write('|}')

if __name__ == "__main__":
    main()

# Project Structure
# /projects/manuals/AB/_AB.txt
# /projects/manuals/RoE/_RoE.txt
# /projects/manuals/SoD/_SoD.txt
# /projects/manuals/Tutorial/_Tutorial.txt