User:Gdr/dykbot.py
Appearance
< User:Gdr
#!/usr/bin/python
#
#
# DYKBOT.PY -- UPDATING "DID YOU KNOW"
# Gdr, 2005-05-12
#
#
# INTRODUCTION
#
# This script partially automates the procedure of placing new items on
# [[Template:Did you know]] on the English Wikipedia.
#
# You must have the Python Wikipedia Robot Framework
# (http://sourceforge.net/projects/pywikipediabot/).
#
#
# DOCUMENTATION
#
# [[User:Gdr/DYKbot]]
#
#
# LICENCE
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at
# your option) any later version.
import calendar
import getopt
import history
import protect
import re
import sys
import time
import wikipedia
class DYK:
# Carry out operations for real?
for_real = False
# These are the pages targeted by the script and their page links
site = wikipedia.Site('en')
target = "Template:Did you know"
target_pl = None
talk = "Template talk:Did you know"
talk_pl = None
new_image = None
new_image_pl = None
old_image = None
old_image_pl = None
# The list of suggestions, in the form of dictionaries with keys
# ('article', 'fact', 'image', 'suggester', 'creator') where some
# elements may be None.
suggestions = []
def confirm(self, query):
answer = wikipedia.input(query + u' [y|N]')
return (answer in ('Y', 'y'))
def check(self, query):
if not self.confirm(query):
print "Stopping."
sys.exit(1)
def __init__(self, for_real = False):
self.for_real = for_real
self.suggestions = []
# Format for archiving.
def format_archive(self, s):
if s['image']:
return u'*...%s ([[:%s]])<br>\n' % (s['fact'], s['image'])
else:
return u'*...%s<br>\n' % s['fact']
# Format for DYK.
def format_dyk(self, s):
return u'<li>...%s</li>\n' % s['fact']
# Generate edit comment.
def make_comment(self, comment):
print '-' * 72
wikipedia.output(u'>>> ' + comment)
return u'DYKbot - ' + comment
# Suggestion regexp. Useful groups are:
# 1. Suggestion text
# 2. Article name
# 5. Image name
# 7. Image width
# 9. Image caption
# 11. User name.
suggestion_re = re.compile(
ur"^\*?\s*(?:\.\.\.|\u2026|…|…)\s*(.*'''\[\[([^\]|]+)(\|[^\]|]*)?\]\][a-z]*'''.*?\?)"
ur"(.*\(?\[\[:(Image:[^\]|]+)(\|([0-9]+)px)?(\|([^\]|]+))?\]\]\)?)?"
ur"(.*\[\[(User:[^\]|]+))?.*$\s*", re.M)
# String marking the start of the archive in the talk page.
archive_marker = "<!-- Please place the latest did you know lines on the top. -->"
# String marking the end of suggestions in the talk page.
suggestions_end_marker = '== Inform these users =='
# Positions in the talk page.
suggestions_end_re = re.compile(re.escape(suggestions_end_marker)
+ '|' + re.escape(archive_marker))
archive_marker_re = re.compile(re.escape(archive_marker) + r'\s*')
# String matching the line for the time which needs to be updated.
refresh_string1 = u'* Earliest time for next refreshment is'
refresh_string2 = u'Wikipedia time ([[UTC]]). <!-- This should be six hours from when new items were last added to the template. -->'
refresh_re = re.compile('^' + re.escape(refresh_string1) + '.*$', re.M)
# Acceptable licences for the new image? (This isn't a thorough test
# but it will catch typical mistakes such as no licence at all.)
licence_re = re.compile(r'{{(GFDL|CC|cc|PD)')
# Old image on target page.
old_image_re = re.compile(
r'\[\[(Image:[^\]|]+)(\|([0-9]+)px)?(\|([^\]|]*))*\]\]')
mprotected_re = re.compile(r'\s*{{mprotected}}\s*')
# Edittime regexp.
edittime_re = re.compile(r'^([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])'
r'([0-9][0-9])([0-9][0-9])([0-9][0-9])$')
def run(self):
self.target_pl = wikipedia.Page(self.site, self.target)
self.talk_pl = wikipedia.Page(self.site, self.talk)
# Check that it's more than 6 hours since the last time
# the target page was edited.
target_orig = self.target_pl.get()
et = wikipedia.edittime[repr(self.site),
wikipedia.link2url(self.target,
site = self.site)]
m = self.edittime_re.match(et)
if m:
et_secs = calendar.timegm(map(int, m.groups()[0:6]))
if time.time() - et_secs < 6 * 60 * 60:
self.check(u'%s last updated at %s. Proceed anyway?'
% (self.target, et))
else:
self.check(u"Unrecognizable edittime '%s' in %s. Proceed anyway?"
% (et, self.target))
# Read talk page, extracting suggestions and deleting them.
talk_orig = self.talk_pl.get()
# Only look at suggestions up to the suggestions end marker, and
# in reverse order (oldest first).
m = self.suggestions_end_re.search(talk_orig)
if not m:
print u'No archive marker in %s. Stopping.' % self.talk
sys.exit(1)
sugg_end = m.end()
iter = self.suggestion_re.finditer(talk_orig[:sugg_end])
mm = list(iter)
mm.reverse()
n = 0
talk_text = talk_orig
for m in mm:
n += 1
s = {
'article': m.group(2),
'pagelink': None,
'fact': m.group(1),
'image': m.group(5),
'suggester': m.group(11),
'creator': None,
}
wikipedia.output(u"Parsed suggestion %d as:" % n)
wikipedia.output(u" Article = " + s['article'])
wikipedia.output(u" Fact = " + s['fact'])
if s['image']:
wikipedia.output(u" Image = " + s['image'])
if s['suggester']:
wikipedia.output(u" Suggester = " + s['suggester'])
if self.confirm(u"Use this suggestion?"):
s['pagelink'] = wikipedia.Page(self.site, m.group(2))
# Cut out used suggestion.
talk_text = talk_text[:m.start()] + talk_text[m.end():]
if s['image'] and self.confirm(u"Use this image?"):
self.new_image = s['image']
self.new_image_pl = wikipedia.Page(self.site, s['image'])
# Item with image needs to appear on top.
self.suggestions = [s] + self.suggestions
else:
self.suggestions.append(s)
if not self.suggestions:
print "No suggestions. Stopping."
sys.exit(1)
if not self.new_image:
print "No image! Stopping."
sys.exit(1)
# Check creation times for suggestions.
for s in self.suggestions:
h = history.historyPage(s['pagelink'])
if h:
hoursago = (time.time() - h[-1]['date']) / 3600
msg = (u'%s created %d hours ago by %s.'
% (s['article'], hoursago, h[-1]['user'] or 'anon'))
if hoursago <= 72:
wikipedia.output(msg)
else:
self.check(msg + u' OK?')
s['creator'] = h[-1]['user']
else:
wikipedia.output(u'No history for %s.' % s['article'])
# Check that the new image has a plausible license.
try:
image_orig = self.new_image_pl.get()
except:
image_orig = u''
if not self.licence_re.search(image_orig):
print '-' * 72
wikipedia.output(image_orig)
print '-' * 72
self.check(u"%s appears not to have an acceptable licence. "
u"Use it anyway?" % self.new_image)
# Update the "next refresh" time in the talk page.
if self.refresh_re.search(talk_text):
next_refresh = time.time() + 6 * 60 * 60
replacement = self.refresh_string1 \
+ time.strftime(u' %Y-%m-%d %T ',
time.gmtime(next_refresh)) \
+ self.refresh_string2
talk_text = self.refresh_re.sub(replacement, talk_text, 1)
else:
self.check(u"No refresh text in %s. Proceed anyway?" % self.talk)
# Add the new suggestions to the top of the "Archive" section of
# the talk page, formatted with *...<br>, for
# [[User:AllyUnion]]'s bot to archive.
if self.archive_marker_re.search(talk_text):
replacement = self.archive_marker + '\n\n' \
+ ''.join(map(self.format_archive,
self.suggestions)) \
+ '\n'
talk_text = self.archive_marker_re.sub(replacement, talk_text, 1)
else:
print u"No archive marker in %s. Stopping." % self.talk
sys.exit(1)
# Add {{mprotected}} to the description page for the new image.
image_text = image_orig + '\n{{mprotected}}'
comment = self.make_comment(
u'added {{mprotected}}: image is about to appear on [[Main Page]]')
print '-' * 72
wikipedia.showDiff(image_orig, image_text)
print '-' * 72
if self.confirm(u"OK to update %s?" % self.new_image) \
and self.for_real:
self.new_image_pl.put(image_text, comment)
# Protect the new image.
comment = self.make_comment(
u'image is about to appear on [[Main Page]] via [[%s]]'
% self.target)
if self.confirm(u"OK to protect %s?" % self.new_image) \
and self.for_real:
print "Protecting new image"
protect.protectPage(self.new_image_pl, comment)
# Edit target, formatting lines with <li>...</li>.
m = self.old_image_re.search(target_orig)
if m:
self.old_image = m.group(1)
self.old_image_pl = wikipedia.Page(self.site, self.old_image)
else:
print "No image found on %s" % self.target
target_text = u"{{subst:User:Gdr/Did you know header|[[%s|100px|]]}}\n" \
% self.new_image \
+ u''.join(map(self.format_dyk, self.suggestions)) \
+ u"{{subst:User:Gdr/Did you know footer}}"
comment = self.make_comment(
u'%d new entries: %s'
% (len(self.suggestions),
u', '.join(u'[[%s]]' % s['article'] for s in self.suggestions)))
print '-' * 72
wikipedia.output(target_text)
print '-' * 72
if self.confirm(u"OK to update %s?" % self.target) and self.for_real:
self.target_pl.put(target_text, comment)
# Purge the [[Main Page]] cache.
if self.for_real:
print "Purging [[Main Page]] cache"
wikipedia.getUrl(self.site.hostname(),
'/w/wiki.phtml?title=Main_Page&action=purge')
if self.old_image:
# Unprotect the old image
comment = self.make_comment(u'image no longer on [[Main Page]]')
if self.confirm(u"OK to unprotect %s?" % self.old_image) \
and self.for_real:
print "Unprotecting old image"
protect.unprotectPage(self.old_image_pl, comment)
# Remove {{mprotected}} from the description page for the
# old image.
try:
image_orig = self.old_image_pl.get()
except:
image_orig = u''
if self.mprotected_re.search(image_orig):
image_text = self.mprotected_re.sub('', image_orig, 1)
comment = self.make_comment(
u'removed {{mprotected}}: image no longer on [[Main Page]]')
print '-' * 72
wikipedia.showDiff(image_orig, image_text)
print '-' * 72
if self.confirm(u"OK to update %s?" % self.old_image) \
and self.for_real:
self.old_image_pl.put(image_text, comment)
else:
print "{{mprotected}} not found in %s" % self.old_image
self.inform_creators()
# Update [[Template talk:Did you know]]
comment = self.make_comment(u'archiving %d suggestions'
% len(self.suggestions))
print '-' * 72
wikipedia.showDiff(talk_orig, talk_text)
print '-' * 72
if self.confirm(u"OK to update %s?" % self.talk) and self.for_real:
self.talk_pl.put(talk_text, comment)
print '-' * 72
if self.for_real:
print "Did you know has been updated. Please check the results."
else:
print "Did you know has not been updated."
def inform_creators(self):
# Leave {{subst:UpdatedDYK|[[<article>]]}} message on user pages of
# creators.
for s in self.suggestions:
if s['creator']:
user_talk = re.sub('^User:', 'User talk:', s['creator'])
user_talk_pl = wikipedia.Page(self.site, user_talk)
try:
user_talk_orig = user_talk_pl.get()
except wikipedia.IsRedirectPage:
continue
except wikipedia.NoPage:
user_talk_orig = u''
user_talk_text = user_talk_orig \
+ u'\n\n== Did you know? ==\n\n' \
+ u'{{subst:UpdatedDYK|[[%s]]}}' % s['article']
comment = self.make_comment(
u'your article [[%s]] has been used on [[%s]]'
% (s['article'], self.target))
print '-' * 72
wikipedia.showDiff(user_talk_orig, user_talk_text)
print '-' * 72
if self.confirm(u"OK to update %s?" % user_talk) \
and self.for_real:
user_talk_pl.put(user_talk_text, comment)
if __name__ == '__main__':
wikipedia.username = 'DYKbot'
forreal = False
opts, args = getopt.getopt(sys.argv[1:], '', ['for-real'])
for o, a in opts:
if o == '--for-real':
forreal = True
if not forreal:
print "RUNNING IN DEBUGGING MODE: ARTICLES WILL NOT BE EDITED"
try:
DYK(forreal).run()
finally:
wikipedia.stopme()