Editing Talk:AllPages
Jump to navigation
Jump to search
The edit can be undone. Please check the comparison below to verify that this is what you want to do, and then publish the changes below to finish undoing the edit.
Latest revision | Your text | ||
Line 1: | Line 1: | ||
== | == List generation == | ||
<pre> | |||
#!/usr/bin/env python3 | |||
import pathlib | |||
import urllib.request | |||
import json | |||
if not pathlib.Path('output').exists(): | |||
pathlib.Path('output').mkdir() | |||
all_pages_output_file = open('output/AllPages.mediawiki', 'w') | |||
category_output_files = {} | |||
base_url = 'http://heroes.thelazy.net/wiki/' | |||
page_titles_by_category = {} | |||
category_titles_by_category = {} | |||
def main(): | |||
base_page_query = 'http://heroes.thelazy.net/wiki/api.php?action=query&list=allpages&aplimit=500&apfilterredir=nonredirects&format=json&apfrom=' | |||
continue_title = '' | |||
page_titles = {} | |||
while True: | |||
request = urllib.request.urlopen(base_page_query + continue_title.replace(' ', '_')) | |||
response = request.read() | |||
results = json.loads(response.decode()) | |||
for page in results['query']['allpages']: | |||
title = page['title'] | |||
page_titles[title] = base_url + title | |||
if 'query-continue' in results: | |||
continue_title = results['query-continue']['allpages']['apcontinue'] | |||
else: | |||
break | |||
page_title_queries = [] | |||
page_title_query = '' | |||
for title, url in page_titles.items(): | |||
if title != 'Main Page' and title != 'Search': | |||
page_title_query += title + '|' | |||
if len(page_title_query) > 50: | |||
page_title_queries.append(page_title_query.rstrip('|')) | |||
page_title_query = '' | |||
prop_page_query = 'http://heroes.thelazy.net/wiki/api.php?action=query&prop=categories&format=json&titles=' | |||
for page_title_query in page_title_queries: | |||
request = urllib.request.urlopen(prop_page_query + page_title_query.replace(' ', '_')) | |||
response = request.read() | |||
results = json.loads(response.decode()) | |||
for _, result in results['query']['pages'].items(): | |||
if 'categories' not in result: | |||
print('\n\n') | |||
print(result['title'] + ' has no categories.') | |||
print('\n\n') | |||
continue | |||
progress = result['title'] + ' : ' | |||
for category in result['categories']: | |||
if category['title'] not in page_titles_by_category: | |||
page_titles_by_category[category['title']] = set() | |||
page_titles_by_category[category['title']].add(result['title']) | |||
progress += category['title'] + ', ' | |||
print(progress.rstrip(', ')) | |||
base_category_query = 'http://heroes.thelazy.net/wiki/api.php?action=query&list=allcategories&aclimit=500&acprop=size&format=json&acfrom=' | |||
continue_category = '' | |||
category_titles = {} | |||
while True: | |||
request = urllib.request.urlopen(base_category_query + continue_category.replace(' ', '_')) | |||
response = request.read() | |||
results = json.loads(response.decode()) | |||
for category in results['query']['allcategories']: | |||
if category['size'] is 0 or category['size'] is '0': | |||
continue | |||
title = category['*'] | |||
category_titles[title] = base_url + title | |||
if 'query-continue' in results: | |||
continue_category = results['query-continue']['allcategories']['accontinue'] | |||
else: | |||
break | |||
category_title_queries = [] | |||
category_title_query = '' | |||
for title, url in category_titles.items(): | |||
category_title_query += 'Category:' + title + '|' | |||
if len(category_title_query) > 50: | |||
category_title_queries.append(category_title_query.rstrip('|')) | |||
category_title_query = '' | |||
prop_category_query = 'http://heroes.thelazy.net/wiki/api.php?action=query&prop=categories&format=json&titles=' | |||
for category_title_query in category_title_queries: | |||
request = urllib.request.urlopen(prop_category_query + category_title_query.replace(' ', '_')) | |||
response = request.read() | |||
results = json.loads(response.decode()) | |||
for _, result in results['query']['pages'].items(): | |||
if 'categories' not in result: | |||
print('\n\n') | |||
print(result['title'] + ' has no categories.') | |||
print('\n\n') | |||
continue | |||
progress = result['title'] + ' : ' | |||
for category in result['categories']: | |||
if category['title'] not in category_titles_by_category: | |||
category_titles_by_category[category['title']] = set() | |||
category_titles_by_category[category['title']].add(result['title']) | |||
progress += category['title'] + ', ' | |||
print(progress.rstrip(', ')) | |||
print('\n\n') | |||
print(page_titles_by_category) | |||
print('\n\n') | |||
print(category_titles_by_category) | |||
for category_title in sorted(category_titles_by_category['Category:Content']): | |||
category_file_name = ''.join([i for i in category_title if i.isalpha()]) | |||
category_output_files[category_file_name] = open('output/' + category_file_name + '.mediawiki', 'w') | |||
print_titles(category_file_name, category_title) | |||
def print_titles(category_file_name, category_title, category_level='=', indent_level=''): | |||
all_pages_output_file.write(category_level + ' <span class="plainlinks">[' + base_url + category_title.replace(' ', '_') + ' ' + indent_level + category_title.split('Category:', 1).pop() + ']</span> ' + category_level + '\n\n') | |||
category_output_files[category_file_name].write(category_level + ' <span class="plainlinks">[' + base_url + category_title.replace(' ', '_') + ' ' + indent_level + category_title.split('Category:', 1).pop() + ']</span> ' + category_level + '\n\n') | |||
category_level += '=' | |||
indent_level += ' ' | |||
if category_title in page_titles_by_category: | |||
for page_title in sorted(page_titles_by_category[category_title]): | |||
all_pages_output_file.write('<span class="plainlinks">[' + base_url + page_title.replace(' ', '_') + ' ' + indent_level + page_title + ']</span>\n\n') | |||
category_output_files[category_file_name].write('<span class="plainlinks">[' + base_url + page_title.replace(' ', '_') + ' ' + indent_level + page_title + ']</span>\n\n') | |||
if category_title in category_titles_by_category: | |||
for child_category_title in sorted(category_titles_by_category[category_title]): | |||
print_titles(category_file_name, child_category_title, category_level, indent_level) | |||
if __name__ == "__main__": | |||
main() | |||
</pre> | |||
== Discussion == | |||
Nice technical page, but I must ask is there any use for it? At least for me this seems quite useless, causing hinder rather than help. –[[User:Kapteeni Ruoska|Kapteeni Ruoska]] ([[User talk:Kapteeni Ruoska|talk]]) 06:11, 7 September 2016 (CEST) | |||
:I wanted to make sure there wasn't anything I was missing. When I'm browsing through the list it's easier to click these links than copy pasting the auto-generated titles from the api query. | |||
:--[[User:imahero|imahero]] 03:14, 8 September 2016 (CEST) | |||
::Sure, just wondering, as the wiki already has [[Special:AllPages]], but perhaps there is a use for that. –[[User:Kapteeni Ruoska|Kapteeni Ruoska]] ([[User talk:Kapteeni Ruoska|talk]]) 07:27, 8 September 2016 (CEST) | |||
:::[[Special:AllPages]] felt too clumsy for me to navigate. All I really wanted was a single list of all nonredirect links :) | |||
::: --[[User:imahero|imahero]] 08:31, 8 September 2016 (CEST) | |||
I may want to go back to using the external link syntax if breaking the [[Special:LonelyPages|orphaned pages]] is a big deal. For now I've just added the orphaned pages here and I'll just update the list every time I generate the AllPages list.<br> | |||
--[[User:imahero|imahero]] 15:19, 8 September 2016 (CEST) | |||
::I changed it back to use the external link syntax to make sure the [[Special:LonelyPages|orphaned pages]] list still works properly. <br> | |||
:: --[[User:imahero|imahero]] 22:48, 8 September 2016 (CEST) | |||
It's now sorted by category. <br> | |||
--[[User:imahero|imahero]] 12:26, 10 September 2016 (CEST) |