diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..9a1c3ae12362e73cff697f50cfc5bd721c0b8557 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.pyc +*.svn diff --git a/README.txt b/README.txt new file mode 100644 index 0000000000000000000000000000000000000000..1aa6753dd2a881517dee964e719ea4508139a2ea --- /dev/null +++ b/README.txt @@ -0,0 +1,89 @@ +This files describes mwclient-0.6.5. The latest version is available in the +subversion repository <https://mwclient.svn.sourceforge.net/svnroot/mwclient> +and also browsable <http://mwclient.svn.sourceforge.net/viewvc/mwclient/>. + +Mwclient is a client to the MediaWiki API <http://mediawiki.org/wiki/API> +and allows access to almost all implemented API functions. Mwclient requires +Python 2.4. This version supports MediaWiki 1.11 and above. However, for +functions not available in the current MediaWiki, a MediaWikiVersionError +is raised. + +This framework is written by Bryan Tong Minh and serves most of his bots. +The framework and this documentation are primarily written for personal +use and may or may not work for you. In case it doesn't, Bryan can be +contacted on btongminh@users.sourceforge.net. + +This framework heavily depends on simplejson, (c) copyright Bob Ippolito. + + +== Implementation notes == +Most properties and generators accept the same parameters as the API, without +their two letter prefix. Exceptions to this rule: +* Image.imageinfo is the imageinfo of the latest image. Earlier versions can be + fetched using imagehistory() +* Site.all* : parameter [ap]from renamed to start +* categorymembers is implemented as Category.members +* deletedrevs is deletedrevisions +* usercontribs is usercontributions +* First parameters of search and usercontributions are search and user + respectively + +Properties and generators are implemented as Python generators. Their limit +parameter is only an indication of the number of items in one chunk. It is not +the total limit. Doing list(generator(limit = limit)) will return ALL items of +generator, and not be limited by the limit value. +Default chunk size is generally the maximum chunk size. + +== HTTPS == +To use https, specify the host as a tuple in the form of ('https', hostname). + +== Example == +## For more information, see REFERENCE.txt +# Init site object +import mwclient +site = mwclient.Site('commons.wikimedia.org') +site.login(username, password) # Optional + +# Edit page +page = site.Pages['Commons:Sandbox'] +text = page.edit() +print 'Text in sandbox:', text.encode('utf-8') +page.save(text + u'\nExtra data', summary = 'Test edit') + +# Printing imageusage +image = site.Images['Example.jpg'] +print 'Image', image.name.encode('utf-8'), 'usage:' +for page in image.imageusage(): + print 'Used:', page.name.encode('utf-8'), '; namespace', page.namespace + print 'Image info:', image.imageinfo + +# Uploading a file +site.upload(open('file.jpg'), 'destination.jpg', 'Image description') + +# Listing all categories (don't do this in reality) +for category in site.allcategories(): + print category + +== License == + Copyright (c) 2006-2011 Bryan Tong Minh + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, + copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following + conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/REFERENCE.txt b/REFERENCE.txt new file mode 100644 index 0000000000000000000000000000000000000000..1d362d916e504d099d6b287cf734c6ea44a8410e --- /dev/null +++ b/REFERENCE.txt @@ -0,0 +1,91 @@ +This file is intended to be a reference to mwclient. The current version is +mwclient 0.6.6. + +The mwclient framework provides an access to the MediaWiki API. It provides the +functions of the MediaWiki API in a Pythonic manner. + +== Sites == +The 'Site' object is the most important class. It represents a MediaWiki site. +Its constructor accepts various arguments, of which the first two, host and +path are the most important. They represent respectively the hostname without +protocol and the root directory where api.php is located. The path parameter +should end with a slash /. Other protocols than HTTP are currently not +supported. + +site = mwclient.Site(host, path = '/w/', ...) + +=== Pages === +Sites provide access to pages via various generators and the Pages object. The +base Page object is called Page and from that derive Category and Image. When +the page is retrieved via Site.Pages or a generator, it will check +automatically which of those three specific types should be returned. To get +a page by its name, call Site.Pages as a scriptable object: + +page = site.Pages['Template:Stub'] +image = site.Pages['Image:Wiki.png'] # This will return an Image object +image2 = site.Images['Wiki.png'] # The same image + +Alternatively Site.Images and Site.Categories are provided, which do exactly +the same as Site.Pages, except that they require the page name without its +namespace prefixed. + +==== PageProperties ==== +The Page object provides many generators available in the API. In addition to +the page properties listed in the API documentation, also the lists backlinks +and embeddedin are members of the Page object. See for more information about +using generators the section about generators below. + +Category objects provide an extra property members to list all members of the +category. The Category object can also be used itself as an iterator yielding +all its members. + +category = site.Pages['Category:Help'] +for page in category: + print category + +Image objects have additional functions imagehistory and imageusage which +represent the old images and the usage respectively. Image.download returns a +file object to the full size image. + +fr = image.download() +fw = open('Wiki.png', 'rb') +while True: + s = fr.read(4096) + if not s: break + fw.write(s) +fr.close() # Always close those file objects !!! +fw.close() + +==== Editting pages ==== +Call Page.edit() to retrieve the page content. Use Page.save(text, summary = +u'', ...) to save the page. If available, Page.save uses the API to edit, but +falls back to the old way if the write API is not available. + +== Generators == +== Exceptions == + +== Implementation notes == +Most properties and generators accept the same parameters as the API, without +their two letter prefix Exceptions: +* Image.imageinfo is the imageinfo of the latest image. Earlier versions can be + fetched using imagehistory() +* Site.all* : parameter (ap)from renamed to start +* categorymembers is implemented as Category.members +* deletedrevs is deletedrevisions +* usercontribs is usercontributions +* First parameters of search and usercontributions are search and user + respectively + +Properties and generators are implemented as Python generators. Their limit +parameter is only an indication of the number of items in one chunk. It is not +the total limit. Doing list(generator(limit = limit)) will return ALL items of +generator, and not be limitted by the limit value. Use list(generator( +max_items = max_items)) to limit the amount of items returned. +Default chunk size is generally the maximum chunk size. + +== Links == +* SourceForge.net project: http://sourceforge.net/projects/mwclient +* Project website: http://mwclient.sourceforge.net/ +* Subversion: https://mwclient.svn.sourceforge.net/svnroot/mwclient +* Browseable repository: http://mwclient.svn.sourceforge.net/viewvc/mwclient/ +* MediaWiki API documentation: http://mediawiki.org/wiki/API \ No newline at end of file diff --git a/RELEASE-NOTES.txt b/RELEASE-NOTES.txt new file mode 100644 index 0000000000000000000000000000000000000000..e96125671b5f1705acdb63f1f300941729e04ccf --- /dev/null +++ b/RELEASE-NOTES.txt @@ -0,0 +1,69 @@ +RELEASE NOTES FOR MWCLIENT + +This is mwclient 0.6.6. The following are the release notes for this version. + +== Changes in version 0.6.6 == +* Allow setting both the upload description and the page content separately + +== Changes in version 0.6.5 == +* Explicitly convert the Content-Length header to str, avoiding a TypeError + on some versions of Python. +* Fix for upload by URL +* Handle readapidenied error in site init +* Fix version parsing for almost any sane version string + +== Changes in version 0.6.4 == +* Added support for upload API +* Added prop=duplicatefiles +* Properly fix detection of alpha versions +* Added support for builtin json library +* Handle badtoken once +* Bug 2690034: Fix revision iteration +* Fix module conflict with simplejson-1.x by inserting mwclient path at the + beginning of sys.path instead of the end +* Supply token on login if necessary + +== Changes in version 0.6.3 == +* Added domain parameter to login. +* Applied edit fix to page_nowriteapi +* Allow arbitrary data to be passed to page.save +* Fix mwclient on WMF wikis + +== Changes in version 0.6.2 == +Mwclient was released on 2 May 2009. +* Compatibility fixes for MediaWiki 1.13 +* Download fix for images +* Full support for editing pages via write api and split of compatibility to + another file. +* Added expandtemplates api call +* Added and fixed moving via API +* Raise an ApiDisabledError is the API is disabled +* Added support for HTTPS +* Fixed email code +* Mark edits as bots by default. +* Added action=parse. Modified patch by Brian Mingus. +* Improved general HTTP and upload handling. + +== Changes in version 0.6.1 and 0.6.0 == +Mwclient 0.6.1 was released in May 2008. No release notes were kept for +that version. + +Mwclient 0.6.0 was released in February 2008. It was the first official +release via Sourceforge. This version removed some Pywikipedia influences +added in 0.4. + +== Mwclient 0.5 == +Mwclient 0.5 was an architectural redesign which accomplished easy +extendibility and added proper support for continuations. + +== Mwclient 0.4 == +Mwclient 0.4 was somewhat the basis for future releases and shows the current +module architecture. It was influenced by Pywikipedia that was by then +discovered by the author. + +== Mwclient 0.2 and 0.3 == +Mwclient 0.2 and 0.3 were probably a bit of a generalization, and maybe +already used the API for some part, but details are unknown. + +== Mwclient 0.1 == +Mwclient 0.1 was a non-API module for accessing Wikipedia using an XML parser. diff --git a/client.py b/client.py index efd6a04c545982cd12642838eb67cc23d484e3bc..820d574dd67dbc9f237d858e421ce1b1fa3b3db4 100644 --- a/client.py +++ b/client.py @@ -418,9 +418,11 @@ class Site(object): self.wait(wait_token) file.seek(0, 0) - def parse(self, text, title = None): - kwargs = {'text': text} + def parse(self, text = None, title = None, page = None): + kwargs = {} + if text is not None: kwargs['text'] = text if title is not None: kwargs['title'] = title + if page is not None: kwargs['page'] = page result = self.api('parse', **kwargs) return result['parse'] @@ -528,11 +530,11 @@ class Site(object): return listing.List(self, 'exturlusage', 'eu', limit = limit, **kwargs) def logevents(self, type = None, prop = None, start = None, end = None, - dir = 'older', user = None, title = None, limit = None): - self.require(1, 9) + dir = 'older', user = None, title = None, limit = None, action = None): + self.require(1, 10) kwargs = dict(listing.List.generate_kwargs('le', prop = prop, type = type, start = start, - end = end, dir = dir, user = user, title = title)) + end = end, dir = dir, user = user, title = title, action = action)) return listing.List(self, 'logevents', 'le', limit = limit, **kwargs) # def protectedtitles requires 1.15 @@ -549,7 +551,7 @@ class Site(object): kwargs = dict(listing.List.generate_kwargs('rn', namespace = namespace)) return listing.List(self, 'random', 'rn', limit = limit, **kwargs) - + def recentchanges(self, start = None, end = None, dir = 'older', namespace = None, prop = None, show = None, limit = None, type = None): self.require(1, 9) diff --git a/listing.py b/listing.py index 3c510959b5bf0f62bc69b828cf3e15b1e41b4bac..170547175ff9505156ab75497ff981808a62f33a 100644 --- a/listing.py +++ b/listing.py @@ -169,13 +169,13 @@ class PageList(GeneratorList): return page.Page(self.site, self.site.namespaces[self.namespace] + ':' + name, info) else: # Guessing page class - namespace = self.guess_namespace(name) - if namespace == 14: - return Category(self.site, name, info) - elif namespace == 6: - return page.Image(self.site, name, info) - else: - return page.Page(self.site, name, info) + if type(name) is not int: + namespace = self.guess_namespace(name) + if namespace == 14: + return Category(self.site, name, info) + elif namespace == 6: + return page.Image(self.site, name, info) + return page.Page(self.site, name, info) def guess_namespace(self, name): normal_name = page.Page.normalize_title(name) diff --git a/page.py b/page.py index 3c77acbf5f5e957b29a39f8d6821daa06d6a6308..0265eeea73bb15ee94b6caccab826c8d53136fb2 100644 --- a/page.py +++ b/page.py @@ -10,6 +10,7 @@ class Page(object): return self.__dict__.update(name.__dict__) self.site = site self.name = name + self.section = None if not info: if extra_properties: @@ -20,8 +21,12 @@ class Page(object): prop = 'info' extra_props = () - info = self.site.api('query', prop = prop, titles = name, - inprop = 'protection', *extra_props) + if type(name) is int: + info = self.site.api('query', prop = prop, pageids = name, + inprop = 'protection', *extra_props) + else: + info = self.site.api('query', prop = prop, titles = name, + inprop = 'protection', *extra_props) info = info['query']['pages'].itervalues().next() self._info = info @@ -41,7 +46,26 @@ class Page(object): self.last_rev_time = None self.edit_time = None - + + def redirects_to(self): + """ Returns the redirect target page, or None if the page is not a redirect page.""" + info = self.site.api('query', prop = 'pageprops', titles = self.name, redirects = '')['query'] + if 'redirects' in info: + for page in info['redirects']: + if page['from'] == self.name: + return Page(self.site, page['to']) + return None + else: + return None + + def resolve_redirect(self): + """ Returns the redirect target page, or the current page if it's not a redirect page.""" + target_page = self.redirects_to() + if target_page == None: + return self + else: + return target_page + def __repr__(self): return "<Page object '%s' for %s>" % (self.name.encode('utf-8'), self.site) @@ -104,18 +128,20 @@ class Page(object): if not self.exists: return u'' - revs = self.revisions(prop = 'content|timestamp', limit = 1) + revs = self.revisions(prop = 'content|timestamp', limit = 1, section = section) try: rev = revs.next() self.text = rev['*'] + self.section = section self.last_rev_time = rev['timestamp'] except StopIteration: self.text = u'' + self.section = None self.edit_time = None self.edit_time = time.gmtime() return self.text - def save(self, text = u'', summary = u'', minor = False, bot = True, **kwargs): + def save(self, text = u'', summary = u'', minor = False, bot = True, section = None, **kwargs): """Save text of page.""" if not self.site.logged_in and self.site.force_login: # Should we really check for this? @@ -126,6 +152,7 @@ class Page(object): raise errors.ProtectedPageError(self) if not text: text = self.text + if not section: section = self.section if not self.site.writeapi: return OldPage.save(self, text = text, summary = summary, minor = False) @@ -136,6 +163,7 @@ class Page(object): if self.last_rev_time: data['basetimestamp'] = time.strftime('%Y%m%d%H%M%S', self.last_rev_time) if self.edit_time: data['starttimestamp'] = time.strftime('%Y%m%d%H%M%S', self.edit_time) if bot: data['bot'] = '1' + if section: data['section'] = section data.update(kwargs) @@ -277,27 +305,29 @@ class Page(object): else: return listing.PageProperty(self, 'images', '', return_values = 'title') - def langlinks(self): + def langlinks(self, **kwargs): self.site.require(1, 9) - return listing.PageProperty(self, 'langlinks', 'll', return_values = ('lang', '*')) + return listing.PageProperty(self, 'langlinks', 'll', return_values = ('lang', '*'), **kwargs) - def links(self, namespace = None, generator = True): + def links(self, namespace = None, generator = True, redirects = False): self.site.require(1, 9) kwargs = dict(listing.List.generate_kwargs('pl', namespace = namespace)) + if redirects: kwargs['redirects'] = '1' if generator: - return listing.PagePropertyGenerator(self, 'links', 'pl') + return listing.PagePropertyGenerator(self, 'links', 'pl', **kwargs) else: - return listing.PageProperty(self, 'links', 'pl', return_values = 'title') + return listing.PageProperty(self, 'links', 'pl', return_values = 'title', **kwargs) def revisions(self, startid = None, endid = None, start = None, end = None, dir = 'older', user = None, excludeuser = None, limit = 50, - prop = 'ids|timestamp|flags|comment|user', expandtemplates = False): + prop = 'ids|timestamp|flags|comment|user', expandtemplates = False, section = None): self.site.require(1, 8) kwargs = dict(listing.List.generate_kwargs('rv', startid = startid, endid = endid, start = start, end = end, user = user, excludeuser = excludeuser)) kwargs['rvdir'] = dir kwargs['rvprop'] = prop if expandtemplates: kwargs['rvexpandtemplates'] = '1' + if section: kwargs['rvsection'] = section return listing.RevisionsIterator(self, 'revisions', 'rv', limit = limit, **kwargs)