Talk:AllPages: Difference between revisions
Jump to navigation
Jump to search
m (Imahero moved page Talk:AllPages to Talk:All Pages) |
|||
Line 8: | Line 8: | ||
base_url = 'http://heroes.thelazy.net/wiki/' | base_url = 'http://heroes.thelazy.net/wiki/' | ||
base_query = 'http://heroes.thelazy.net/wiki/api.php?action=query&list=allpages&aplimit=500&apfilterredir=nonredirects&format=json&apfrom=' | base_query = 'http://heroes.thelazy.net/wiki/api.php?action=query&list=allpages&aplimit=500&apfilterredir=nonredirects&format=json&apfrom=' | ||
continue_title = '' | |||
titles = {} | titles = {} | ||
while True: | while True: | ||
request = urllib.request.urlopen(base_query + | request = urllib.request.urlopen(base_query + urllib.parse.quote(continue_title)) | ||
response = request.read() | response = request.read() | ||
results = json.loads(response.decode()) | results = json.loads(response.decode()) | ||
for page in results['query']['allpages']: | for page in results['query']['allpages']: | ||
title = page['title'] | title = page['title'] | ||
titles[title] = base_url + | titles[title] = base_url + title | ||
if 'query-continue' in results: | if 'query-continue' in results: | ||
continue_title = results['query-continue']['allpages']['apcontinue'] | |||
else: | else: | ||
break | break | ||
title_queries = [] | |||
title_query = '' | |||
for title, url in titles.items(): | |||
if title != 'All Pages' and title != 'Main Page' and title != 'Search': | |||
title_query += title + '|' | |||
if len(title_query) > 50: | |||
title_queries.append(title_query.rstrip('|')) | |||
title_query = '' | |||
prop_query = 'http://heroes.thelazy.net/wiki/api.php?action=query&prop=categories&format=json&titles=' | |||
titles_by_category = {} | |||
for title_query in title_queries: | |||
request = urllib.request.urlopen(prop_query + urllib.parse.quote(title_query)) | |||
response = request.read() | |||
results = json.loads(response.decode()) | |||
for _, page in results['query']['pages'].items(): | |||
if 'categories' not in page: | |||
print(page) | |||
continue | |||
for category in page['categories']: | |||
if category['title'] not in titles_by_category: | |||
titles_by_category[category['title']] = set() | |||
titles_by_category[category['title']].add(page['title']) | |||
output_file = open('output.mediawiki', 'w') | output_file = open('output.mediawiki', 'w') | ||
for | for category, titles in sorted(titles_by_category.items()): | ||
output_file.write('[' + base_url + category.replace(' ', '_') + ' ' + category.split('Category:', 1).pop() + ']\n\n') | |||
output_file.write('[' + | for title in sorted(titles): | ||
output_file.write(':[' + base_url + title.replace(' ', '_') + ' ' + title + ']\n\n') | |||
</pre> | </pre> | ||
== Discussion == | |||
:Nice technical page, but I must ask is there any use for it? At least for me this seems quite useless, causing hinder rather than help. –[[User:Kapteeni Ruoska|Kapteeni Ruoska]] ([[User talk:Kapteeni Ruoska|talk]]) 06:11, 7 September 2016 (CEST) | :Nice technical page, but I must ask is there any use for it? At least for me this seems quite useless, causing hinder rather than help. –[[User:Kapteeni Ruoska|Kapteeni Ruoska]] ([[User talk:Kapteeni Ruoska|talk]]) 06:11, 7 September 2016 (CEST) |
Revision as of 10:22, 10 September 2016
List generation
#!/usr/bin/env python3 import urllib.request import json base_url = 'http://heroes.thelazy.net/wiki/' base_query = 'http://heroes.thelazy.net/wiki/api.php?action=query&list=allpages&aplimit=500&apfilterredir=nonredirects&format=json&apfrom=' continue_title = '' titles = {} while True: request = urllib.request.urlopen(base_query + urllib.parse.quote(continue_title)) response = request.read() results = json.loads(response.decode()) for page in results['query']['allpages']: title = page['title'] titles[title] = base_url + title if 'query-continue' in results: continue_title = results['query-continue']['allpages']['apcontinue'] else: break title_queries = [] title_query = '' for title, url in titles.items(): if title != 'All Pages' and title != 'Main Page' and title != 'Search': title_query += title + '|' if len(title_query) > 50: title_queries.append(title_query.rstrip('|')) title_query = '' prop_query = 'http://heroes.thelazy.net/wiki/api.php?action=query&prop=categories&format=json&titles=' titles_by_category = {} for title_query in title_queries: request = urllib.request.urlopen(prop_query + urllib.parse.quote(title_query)) response = request.read() results = json.loads(response.decode()) for _, page in results['query']['pages'].items(): if 'categories' not in page: print(page) continue for category in page['categories']: if category['title'] not in titles_by_category: titles_by_category[category['title']] = set() titles_by_category[category['title']].add(page['title']) output_file = open('output.mediawiki', 'w') for category, titles in sorted(titles_by_category.items()): output_file.write('[' + base_url + category.replace(' ', '_') + ' ' + category.split('Category:', 1).pop() + ']\n\n') for title in sorted(titles): output_file.write(':[' + base_url + title.replace(' ', '_') + ' ' + title + ']\n\n')
Discussion
- Nice technical page, but I must ask is there any use for it? At least for me this seems quite useless, causing hinder rather than help. –Kapteeni Ruoska (talk) 06:11, 7 September 2016 (CEST)
- I wanted to make sure there wasn't anything I was missing. When I'm browsing through the list it's easier to click these links than copy pasting the auto-generated titles from the api query.
- --imahero 03:14, 8 September 2016 (CEST)
- Sure, just wondering, as the wiki already has Special:AllPages, but perhaps there is a use for that. –Kapteeni Ruoska (talk) 07:27, 8 September 2016 (CEST)
- Special:AllPages felt too clumsy for me to navigate. All I really wanted was a single list of all nonredirect links :)
- --imahero 08:31, 8 September 2016 (CEST)
I may want to go back to using the external link syntax if breaking the orphaned pages is a big deal. For now I've just added the orphaned pages here and I'll just update the list every time I generate the AllPages list.
--imahero 15:19, 8 September 2016 (CEST)
- I changed it back to use the external link syntax to make sure the orphaned pages list still works properly.
- --imahero 22:48, 8 September 2016 (CEST)
- I changed it back to use the external link syntax to make sure the orphaned pages list still works properly.