|
|
(5 intermediate revisions by the same user not shown) |
Line 1: |
Line 1: |
| #!/usr/bin/env python3
| | ==Generate== |
| | | In the past I used a python script to generate this page. At some point I figured out how to do it using <nowiki>{{Special:AllPages|hideredirects=1|namespace=0|from=|to=a}}...</nowiki> instead. –[[user_talk:imahero|imahero]] 00:56, 8 December 2023 (UTC) |
| import pathlib
| |
| import urllib.request
| |
| import json
| |
| | |
| | |
| if not pathlib.Path('output').exists():
| |
| pathlib.Path('output').mkdir()
| |
| | |
| all_pages_output_file = open('output/AllPages.mediawiki', 'w')
| |
| | |
| category_output_files = {}
| |
| | |
| base_url = 'http://heroes.thelazy.net/wiki/'
| |
| page_titles_by_category = {}
| |
| category_titles_by_category = {}
| |
| | |
| def main():
| |
| base_page_query = 'http://heroes.thelazy.net/wiki/api.php?action=query&list=allpages&aplimit=500&apfilterredir=nonredirects&format=json&apfrom='
| |
| continue_title = ''
| |
| page_titles = {}
| |
| | |
| while True:
| |
| request = urllib.request.urlopen(base_page_query + continue_title.replace(' ', '_'))
| |
| response = request.read()
| |
| results = json.loads(response.decode())
| |
| for page in results['query']['allpages']:
| |
| title = page['title']
| |
| page_titles[title] = base_url + title
| |
| if 'query-continue' in results:
| |
| continue_title = results['query-continue']['allpages']['apcontinue']
| |
| else:
| |
| break
| |
| | |
| page_title_queries = []
| |
| page_title_query = ''
| |
| | |
| for title, url in page_titles.items():
| |
| if title != 'Main Page' and title != 'Search':
| |
| page_title_query += title + '|'
| |
| if len(page_title_query) > 50:
| |
| page_title_queries.append(page_title_query.rstrip('|'))
| |
| page_title_query = ''
| |
| | |
| prop_page_query = 'http://heroes.thelazy.net/wiki/api.php?action=query&prop=categories&format=json&titles='
| |
| | |
| for page_title_query in page_title_queries:
| |
| request = urllib.request.urlopen(prop_page_query + page_title_query.replace(' ', '_'))
| |
| response = request.read()
| |
| results = json.loads(response.decode())
| |
| for _, result in results['query']['pages'].items():
| |
| if 'categories' not in result:
| |
| print('\n\n')
| |
| print(result['title'] + ' has no categories.')
| |
| print('\n\n')
| |
| continue
| |
| progress = result['title'] + ' : '
| |
| for category in result['categories']:
| |
| if category['title'] not in page_titles_by_category:
| |
| page_titles_by_category[category['title']] = set()
| |
| page_titles_by_category[category['title']].add(result['title'])
| |
| progress += category['title'] + ', '
| |
| print(progress.rstrip(', '))
| |
| | |
| base_category_query = 'http://heroes.thelazy.net/wiki/api.php?action=query&list=allcategories&aclimit=500&acprop=size&format=json&acfrom='
| |
| continue_category = ''
| |
| category_titles = {}
| |
| | |
| while True:
| |
| request = urllib.request.urlopen(base_category_query + continue_category.replace(' ', '_'))
| |
| response = request.read()
| |
| results = json.loads(response.decode())
| |
| for category in results['query']['allcategories']:
| |
| if category['size'] is 0 or category['size'] is '0':
| |
| continue
| |
| title = category['*']
| |
| category_titles[title] = base_url + title
| |
| if 'query-continue' in results:
| |
| continue_category = results['query-continue']['allcategories']['accontinue']
| |
| else:
| |
| break
| |
| | |
| category_title_queries = []
| |
| category_title_query = ''
| |
| | |
| for title, url in category_titles.items():
| |
| category_title_query += 'Category:' + title + '|'
| |
| if len(category_title_query) > 50:
| |
| category_title_queries.append(category_title_query.rstrip('|'))
| |
| category_title_query = ''
| |
| | |
| prop_category_query = 'http://heroes.thelazy.net/wiki/api.php?action=query&prop=categories&format=json&titles='
| |
| | |
| for category_title_query in category_title_queries:
| |
| request = urllib.request.urlopen(prop_category_query + category_title_query.replace(' ', '_'))
| |
| response = request.read()
| |
| results = json.loads(response.decode())
| |
| for _, result in results['query']['pages'].items():
| |
| if 'categories' not in result:
| |
| print('\n\n')
| |
| print(result['title'] + ' has no categories.')
| |
| print('\n\n')
| |
| continue
| |
| progress = result['title'] + ' : '
| |
| for category in result['categories']:
| |
| if category['title'] not in category_titles_by_category:
| |
| category_titles_by_category[category['title']] = set()
| |
| category_titles_by_category[category['title']].add(result['title'])
| |
| progress += category['title'] + ', '
| |
| print(progress.rstrip(', '))
| |
| | |
| print('\n\n')
| |
| print(page_titles_by_category)
| |
| print('\n\n')
| |
| print(category_titles_by_category)
| |
| | |
| for category_title in sorted(category_titles_by_category['Category:Content']):
| |
| category_file_name = ''.join([i for i in category_title if i.isalpha()])
| |
| category_output_files[category_file_name] = open('output/' + category_file_name + '.mediawiki', 'w')
| |
| print_titles(category_file_name, category_title)
| |
| | |
| def print_titles(category_file_name, category_title, category_level='=', indent_level=''):
| |
| all_pages_output_file.write(category_level + ' <span class="plainlinks">[' + base_url + category_title.replace(' ', '_') + ' ' + indent_level + category_title.split('Category:', 1).pop() + ']</span> ' + category_level + '\n\n')
| |
| category_output_files[category_file_name].write(category_level + ' <span class="plainlinks">[' + base_url + category_title.replace(' ', '_') + ' ' + indent_level + category_title.split('Category:', 1).pop() + ']</span> ' + category_level + '\n\n')
| |
| category_level += '='
| |
| indent_level += ' '
| |
| if category_title in page_titles_by_category:
| |
| for page_title in sorted(page_titles_by_category[category_title]):
| |
| all_pages_output_file.write('<span class="plainlinks">[' + base_url + page_title.replace(' ', '_') + ' ' + indent_level + page_title + ']</span>\n\n')
| |
| category_output_files[category_file_name].write('<span class="plainlinks">[' + base_url + page_title.replace(' ', '_') + ' ' + indent_level + page_title + ']</span>\n\n')
| |
| if category_title in category_titles_by_category:
| |
| for child_category_title in sorted(category_titles_by_category[category_title]):
| |
| print_titles(category_file_name, child_category_title, category_level, indent_level)
| |
| | |
| if __name__ == "__main__":
| |
| main()
| |
| | |
| == Discussion ==
| |
| | |
| Nice technical page, but I must ask is there any use for it? At least for me this seems quite useless, causing hinder rather than help. –[[User:Kapteeni Ruoska|Kapteeni Ruoska]] ([[User talk:Kapteeni Ruoska|talk]]) 06:11, 7 September 2016 (CEST)
| |
| | |
| :I wanted to make sure there wasn't anything I was missing. When I'm browsing through the list it's easier to click these links than copy pasting the auto-generated titles from the api query.
| |
| :--[[User:imahero|imahero]] 03:14, 8 September 2016 (CEST)
| |
| | |
| ::Sure, just wondering, as the wiki already has [[Special:AllPages]], but perhaps there is a use for that. –[[User:Kapteeni Ruoska|Kapteeni Ruoska]] ([[User talk:Kapteeni Ruoska|talk]]) 07:27, 8 September 2016 (CEST)
| |
| | |
| :::[[Special:AllPages]] felt too clumsy for me to navigate. All I really wanted was a single list of all nonredirect links :)
| |
| ::: --[[User:imahero|imahero]] 08:31, 8 September 2016 (CEST)
| |
| | |
| I may want to go back to using the external link syntax if breaking the [[Special:LonelyPages|orphaned pages]] is a big deal. For now I've just added the orphaned pages here and I'll just update the list every time I generate the AllPages list.<br>
| |
| --[[User:imahero|imahero]] 15:19, 8 September 2016 (CEST)
| |
| | |
| ::I changed it back to use the external link syntax to make sure the [[Special:LonelyPages|orphaned pages]] list still works properly. <br>
| |
| :: --[[User:imahero|imahero]] 22:48, 8 September 2016 (CEST)
| |
| | |
| It's now sorted by category. <br>
| |
| --[[User:imahero|imahero]] 12:26, 10 September 2016 (CEST)
| |