diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000000000000000000000000000000000000..b45f05ec05ff5a754b6f0033c98db5623c0314d2 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,17 @@ +# http://about.travis-ci.org/docs/user/languages/python/ + +language: python +python: + - "2.5" + - "2.6" + - "2.7" + +install: + - pip install -r requirements.txt --use-mirrors + #- python setup.py install + +before_script: + - pip install -r test_requirements.txt --use-mirrors + +script: + - py.test --pep8 mwclient -v --doctest-modules tests/client-test.py diff --git a/mwclient/__init__.py b/mwclient/__init__.py index afc17bfa0912cde0b2251bd59074561c844fa29e..bb32ddeb99a5a011f9c7ada974eccb54df7a6b51 100644 --- a/mwclient/__init__.py +++ b/mwclient/__init__.py @@ -1,6 +1,6 @@ """ Copyright (c) 2006-2011 Bryan Tong Minh - + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without @@ -9,10 +9,10 @@ copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND diff --git a/mwclient/client.py b/mwclient/client.py index 820d574dd67dbc9f237d858e421ce1b1fa3b3db4..eead6d62485bb9386ef1a0336746785068c7b958 100644 --- a/mwclient/client.py +++ b/mwclient/client.py @@ -1,606 +1,621 @@ __ver__ = '0.6.6' -import urllib, urlparse -import time, random -import sys, weakref +import urllib +import urlparse +import time +import random +import sys +import weakref import socket try: - import json + import json except ImportError: - import simplejson as json + import simplejson as json import http import upload import errors -import listing, page +import listing +import page import compatibility try: - import gzip + import gzip except ImportError: - gzip = None + gzip = None try: - from cStringIO import StringIO + from cStringIO import StringIO except ImportError: - from StringIO import StringIO + from StringIO import StringIO + def parse_timestamp(t): - if t == '0000-00-00T00:00:00Z': - return (0, 0, 0, 0, 0, 0, 0, 0) - return time.strptime(t, '%Y-%m-%dT%H:%M:%SZ') - + if t == '0000-00-00T00:00:00Z': + return (0, 0, 0, 0, 0, 0, 0, 0) + return time.strptime(t, '%Y-%m-%dT%H:%M:%SZ') + + class WaitToken(object): - def __init__(self): - self.id = '%x' % random.randint(0, sys.maxint) - def __hash__(self): - return hash(self.id) + + def __init__(self): + self.id = '%x' % random.randint(0, sys.maxint) + + def __hash__(self): + return hash(self.id) + class Site(object): - api_limit = 500 - def __init__(self, host, path = '/w/', ext = '.php', pool = None, retry_timeout = 30, - max_retries = 25, wait_callback = lambda *x: None, clients_useragent = None, - max_lag = 3, compress = True, force_login = True, do_init = True): - # Setup member variables - self.host = host - self.path = path - self.ext = ext - self.credentials = None - self.compress = compress - - self.retry_timeout = retry_timeout - self.max_retries = max_retries - self.wait_callback = wait_callback - self.max_lag = str(max_lag) - self.force_login = force_login - - # The token string => token object mapping - self.wait_tokens = weakref.WeakKeyDictionary() - - # Site properties - self.blocked = False # Whether current user is blocked - self.hasmsg = False # Whether current user has new messages - self.groups = [] # Groups current user belongs to - self.rights = [] # Rights current user has - self.tokens = {} # Edit tokens of the current user - self.version = None - - self.namespaces = self.default_namespaces - self.writeapi = False - - # Setup connection - if pool is None: - self.connection = http.HTTPPool(clients_useragent) - else: - self.connection = pool - - # Page generators - self.pages = listing.PageList(self) - self.categories = listing.PageList(self, namespace = 14) - self.images = listing.PageList(self, namespace = 6) - - # Compat page generators - self.Pages = self.pages - self.Categories = self.categories - self.Images = self.images - - # Initialization status - self.initialized = False - - if do_init: - try: - self.site_init() - except errors.APIError, e: - # Private wiki, do init after login - if e[0] not in (u'unknown_action', u'readapidenied'): - raise - - - def site_init(self): - meta = self.api('query', meta = 'siteinfo|userinfo', - siprop = 'general|namespaces', uiprop = 'groups|rights') - - # Extract site info - self.site = meta['query']['general'] - self.namespaces = dict(((i['id'], i.get('*', '')) for i in meta['query']['namespaces'].itervalues())) - self.writeapi = 'writeapi' in self.site - - # Determine version - if self.site['generator'].startswith('MediaWiki '): - version = self.site['generator'][10:].split('.') - def split_num(s): - i = 0 - while i < len(s): - if s[i] < '0' or s[i] > '9': - break - i += 1 - if s[i:]: - return (int(s[:i]), s[i:], ) - else: - return (int(s[:i]), ) - self.version = sum((split_num(s) for s in version), ()) - - if len(self.version) < 2: - raise errors.MediaWikiVersionError('Unknown MediaWiki %s' % '.'.join(version)) - else: - raise errors.MediaWikiVersionError('Unknown generator %s' % self.site['generator']) - # Require 1.11 until some compatibility issues are fixed - self.require(1, 11) - - # User info - userinfo = compatibility.userinfo(meta, self.require(1, 12, raise_error = False)) - self.username = userinfo['name'] - self.groups = userinfo.get('groups', []) - self.rights = userinfo.get('rights', []) - self.initialized = True - - - default_namespaces = {0: u'', 1: u'Talk', 2: u'User', 3: u'User talk', 4: u'Project', 5: u'Project talk', - 6: u'Image', 7: u'Image talk', 8: u'MediaWiki', 9: u'MediaWiki talk', 10: u'Template', 11: u'Template talk', - 12: u'Help', 13: u'Help talk', 14: u'Category', 15: u'Category talk', -1: u'Special', -2: u'Media'} - - def __repr__(self): - return "<Site object '%s%s'>" % (self.host, self.path) - - - def api(self, action, *args, **kwargs): - """ An API call. Handles errors and returns dict object. """ - kwargs.update(args) - if action == 'query': - if 'meta' in kwargs: - kwargs['meta'] += '|userinfo' - else: - kwargs['meta'] = 'userinfo' - if 'uiprop' in kwargs: - kwargs['uiprop'] += '|blockinfo|hasmsg' - else: - kwargs['uiprop'] = 'blockinfo|hasmsg' - - token = self.wait_token() - while True: - info = self.raw_api(action, **kwargs) - if not info: info = {} - res = self.handle_api_result(info, token = token) - if res: - return info - - - def handle_api_result(self, info, kwargs = None, token = None): - if token is None: - token = self.wait_token() - - try: - userinfo = compatibility.userinfo(info, self.require(1, 12, raise_error = None)) - except KeyError: - userinfo = () - if 'blockedby' in userinfo: - self.blocked = (userinfo['blockedby'], userinfo.get('blockreason', u'')) - else: - self.blocked = False - self.hasmsg = 'message' in userinfo - self.logged_in = 'anon' not in userinfo - if 'error' in info: - if info['error']['code'] in (u'internal_api_error_DBConnectionError', ): - self.wait(token) - return False - if '*' in info['error']: - raise errors.APIError(info['error']['code'], - info['error']['info'], info['error']['*']) - raise errors.APIError(info['error']['code'], - info['error']['info'], kwargs) - return True - - @staticmethod - def _to_str(data): - if type(data) is unicode: - return data.encode('utf-8') - return str(data) - @staticmethod - def _query_string(*args, **kwargs): - kwargs.update(args) - qs = urllib.urlencode([(k, Site._to_str(v)) for k, v in kwargs.iteritems() - if k != 'wpEditToken']) - if 'wpEditToken' in kwargs: - qs += '&wpEditToken=' + urllib.quote(Site._to_str(kwargs['wpEditToken'])) - return qs - - def raw_call(self, script, data): - url = self.path + script + self.ext - headers = {} - if not issubclass(data.__class__, upload.Upload): - headers['Content-Type'] = 'application/x-www-form-urlencoded' - if self.compress and gzip: - headers['Accept-Encoding'] = 'gzip' - - token = self.wait_token((script, data)) - while True: - try: - stream = self.connection.post(self.host, - url, data = data, headers = headers) - if stream.getheader('Content-Encoding') == 'gzip': - # BAD. - seekable_stream = StringIO(stream.read()) - stream = gzip.GzipFile(fileobj = seekable_stream) - return stream - - except errors.HTTPStatusError, e: - if e[0] == 503 and e[1].getheader('X-Database-Lag'): - self.wait(token, int(e[1].getheader('Retry-After'))) - elif e[0] < 500 or e[0] > 599: - raise - else: - self.wait(token) - except errors.HTTPRedirectError: - raise - except errors.HTTPError: - self.wait(token) - except ValueError: - self.wait(token) - - def raw_api(self, action, *args, **kwargs): - """Sends a call to the API.""" - kwargs['action'] = action - kwargs['format'] = 'json' - data = self._query_string(*args, **kwargs) - json_data = self.raw_call('api', data).read() - try: - return json.loads(json_data) - except ValueError: - if json_data.startswith('MediaWiki API is not enabled for this site.'): - raise errors.APIDisabledError - raise - - def raw_index(self, action, *args, **kwargs): - """Sends a call to index.php rather than the API.""" - kwargs['action'] = action - kwargs['maxlag'] = self.max_lag - data = self._query_string(*args, **kwargs) - return self.raw_call('index', data).read().decode('utf-8', 'ignore') - - def wait_token(self, args = None): - token = WaitToken() - self.wait_tokens[token] = (0, args) - return token - - def wait(self, token, min_wait = 0): - retry, args = self.wait_tokens[token] - self.wait_tokens[token] = (retry + 1, args) - if retry > self.max_retries and self.max_retries != -1: - raise errors.MaximumRetriesExceeded(self, token, args) - self.wait_callback(self, token, retry, args) - - timeout = self.retry_timeout * retry - if timeout < min_wait: timeout = min_wait - time.sleep(timeout) - return self.wait_tokens[token] - - def require(self, major, minor, revision = None, raise_error = True): - if self.version is None: - if raise_error is None: return - raise RuntimeError('Site %s has not yet been initialized' % repr(self)) - - if revision is None: - if self.version[:2] >= (major, minor): - return True - elif raise_error: - raise errors.MediaWikiVersionError('Requires version %s.%s, current version is %s.%s' - % ((major, minor) + self.version[:2])) - else: - return False - else: - raise NotImplementedError - - - # Actions - def email(self, user, text, subject, cc = False): - """Sends email to a specified user on the wiki.""" - #TODO: Use api! - postdata = {} - postdata['wpSubject'] = subject - postdata['wpText'] = text - if cc: postdata['wpCCMe'] = '1' - postdata['wpEditToken'] = self.tokens['edit'] - postdata['uselang'] = 'en' - postdata['title'] = u'Special:Emailuser/' + user - - data = self.raw_index('submit', **postdata) - if 'var wgAction = "success";' not in data: - if 'This user has not specified a valid e-mail address' in data: - # Dirty hack - raise errors.NoSpecifiedEmailError, user - raise errors.EmailError, data - - - def login(self, username = None, password = None, cookies = None, domain = None): - """Login to the wiki.""" - if self.initialized: self.require(1, 10) - - if username and password: - self.credentials = (username, password, domain) - if cookies: - if self.host not in self.conn.cookies: - self.conn.cookies[self.host] = http.CookieJar() - self.conn.cookies[self.host].update(cookies) - - if self.credentials: - wait_token = self.wait_token() - kwargs = { - 'lgname': self.credentials[0], - 'lgpassword': self.credentials[1] - } - if self.credentials[2]: - kwargs['lgdomain'] = self.credentials[2] - while True: - login = self.api('login', **kwargs) - if login['login']['result'] == 'Success': - break - elif login['login']['result'] == 'NeedToken': - kwargs['lgtoken'] = login['login']['token'] - elif login['login']['result'] == 'Throttled': - self.wait(wait_token, login['login'].get('wait', 5)) - else: - raise errors.LoginError(self, login['login']) - - if self.initialized: - info = self.api('query', meta = 'userinfo', uiprop = 'groups|rights') - userinfo = compatibility.userinfo(info, self.require(1, 12, raise_error = False)) - self.username = userinfo['name'] - self.groups = userinfo.get('groups', []) - self.rights = userinfo.get('rights', []) - self.tokens = {} - else: - self.site_init() - - - def upload(self, file = None, filename = None, description = '', ignore = False, file_size = None, - url = None, session_key = None, comment = None): - """Upload a file to the wiki.""" - if self.version[:2] < (1, 16): - return compatibility.old_upload(self, file = file, filename = filename, - description = description, ignore = ignore, - file_size = file_size) - - image = self.Images[filename] - if not image.can('upload'): - raise errors.InsufficientPermission(filename) - - - - predata = {} - - if comment is None: - predata['comment'] = description - else: - predata['comment'] = comment - predata['text'] = description - - if ignore: - predata['ignorewarnings'] = 'true' - predata['token'] = image.get_token('edit') - predata['action'] = 'upload' - predata['format'] = 'json' - predata['filename'] = filename - if url: - predata['url'] = url - if session_key: - predata['session_key'] = session_key - - if file is None: - postdata = self._query_string(predata) - else: - if type(file) is str: - file_size = len(file) - file = StringIO(file) - if file_size is None: - file.seek(0, 2) - file_size = file.tell() - file.seek(0, 0) - - postdata = upload.UploadFile('file', filename, file_size, file, predata) - - wait_token = self.wait_token() - while True: - try: - data = self.raw_call('api', postdata).read() - info = json.loads(data) - if not info: - info = {} - if self.handle_api_result(info, kwargs = predata): - return info.get('upload', {}) - except errors.HTTPStatusError, e: - if e[0] == 503 and e[1].getheader('X-Database-Lag'): - self.wait(wait_token, int(e[1].getheader('Retry-After'))) - elif e[0] < 500 or e[0] > 599: - raise - else: - self.wait(wait_token) - except errors.HTTPError: - self.wait(wait_token) - file.seek(0, 0) - - def parse(self, text = None, title = None, page = None): - kwargs = {} - if text is not None: kwargs['text'] = text - if title is not None: kwargs['title'] = title - if page is not None: kwargs['page'] = page - result = self.api('parse', **kwargs) - return result['parse'] - - # def block: requires 1.12 - # def unblock: requires 1.12 - # def patrol: requires 1.14 - # def import: requires 1.15 - - # Lists - def allpages(self, start = None, prefix = None, namespace = '0', filterredir = 'all', - minsize = None, maxsize = None, prtype = None, prlevel = None, - limit = None, dir = 'ascending', filterlanglinks = 'all', generator = True): - """Retrieve all pages on the wiki as a generator.""" - self.require(1, 9) - - pfx = listing.List.get_prefix('ap', generator) - kwargs = dict(listing.List.generate_kwargs(pfx, ('from', start), prefix = prefix, - minsize = minsize, maxsize = maxsize, prtype = prtype, prlevel = prlevel, - namespace = namespace, filterredir = filterredir, dir = dir, - filterlanglinks = filterlanglinks)) - return listing.List.get_list(generator)(self, 'allpages', 'ap', limit = limit, return_values = 'title', **kwargs) - # def allimages(self): requires 1.12 - # TODO! - - def alllinks(self, start = None, prefix = None, unique = False, prop = 'title', - namespace = '0', limit = None, generator = True): - """Retrieve a list of all links on the wiki as a generator.""" - self.require(1, 11) - - pfx = listing.List.get_prefix('al', generator) - kwargs = dict(listing.List.generate_kwargs(pfx, ('from', start), prefix = prefix, - prop = prop, namespace = namespace)) - if unique: kwargs[pfx + 'unique'] = '1' - return listing.List.get_list(generator)(self, 'alllinks', 'al', limit = limit, return_values = 'title', **kwargs) - - def allcategories(self, start = None, prefix = None, dir = 'ascending', limit = None, generator = True): - """Retrieve all categories on the wiki as a generator.""" - self.require(1, 12) - - pfx = listing.List.get_prefix('ac', generator) - kwargs = dict(listing.List.generate_kwargs(pfx, ('from', start), prefix = prefix, dir = dir)) - return listing.List.get_list(generator)(self, 'allcategories', 'ac', limit = limit, **kwargs) - - def allusers(self, start = None, prefix = None, group = None, prop = None, limit = None): - """Retrieve all users on the wiki as a generator.""" - self.require(1, 11) - - kwargs = dict(listing.List.generate_kwargs('au', ('from', start), prefix = prefix, - group = group, prop = prop)) - return listing.List(self, 'allusers', 'au', limit = limit, **kwargs) - - def blocks(self, start = None, end = None, dir = 'older', ids = None, users = None, limit = None, - prop = 'id|user|by|timestamp|expiry|reason|flags'): - """Retrieve blocks as a generator. - - Each block is a dictionary containing: - - user: the username or IP address of the user - - id: the ID of the block - - timestamp: when the block was added - - expiry: when the block runs out (infinity for indefinite blocks) - - reason: the reason they are blocked - - allowusertalk: key is present (empty string) if the user is allowed to edit their user talk page - - by: the administrator who blocked the user - - nocreate: key is present (empty string) if the user's ability to create accounts has been disabled. - - """ - - self.require(1, 12) - # TODO: Fix. Fix what? - kwargs = dict(listing.List.generate_kwargs('bk', start = start, end = end, dir = dir, - users = users, prop = prop)) - return listing.List(self, 'blocks', 'bk', limit = limit, **kwargs) - - def deletedrevisions(self, start = None, end = None, dir = 'older', namespace = None, - limit = None, prop = 'user|comment'): - # TODO: Fix - self.require(1, 12) - - kwargs = dict(listing.List.generate_kwargs('dr', start = start, end = end, dir = dir, - namespace = namespace, prop = prop)) - return listing.List(self, 'deletedrevs', 'dr', limit = limit, **kwargs) - - def exturlusage(self, query, prop = None, protocol = 'http', namespace = None, limit = None): - """Retrieves list of pages that link to a particular domain or URL as a generator. - - This API call mirrors the Special:LinkSearch function on-wiki. - - Query can be a domain like 'bbc.co.uk'. Wildcards can be used, e.g. '*.bbc.co.uk'. - Alternatively, a query can contain a full domain name and some or all of a URL: - e.g. '*.wikipedia.org/wiki/*' - - See <https://meta.wikimedia.org/wiki/Help:Linksearch> for details. - - The generator returns dictionaries containing three keys: - - url: the URL linked to. - - ns: namespace of the wiki page - - pageid: the ID of the wiki page - - title: the page title. - - """ - self.require(1, 11) - - kwargs = dict(listing.List.generate_kwargs('eu', query = query, prop = prop, - protocol = protocol, namespace = namespace)) - return listing.List(self, 'exturlusage', 'eu', limit = limit, **kwargs) - - def logevents(self, type = None, prop = None, start = None, end = None, - dir = 'older', user = None, title = None, limit = None, action = None): - self.require(1, 10) - - kwargs = dict(listing.List.generate_kwargs('le', prop = prop, type = type, start = start, - end = end, dir = dir, user = user, title = title, action = action)) - return listing.List(self, 'logevents', 'le', limit = limit, **kwargs) - - # def protectedtitles requires 1.15 - def random(self, namespace, limit = 20): - """Retrieves a generator of random page from a particular namespace. - - limit specifies the number of random articles retrieved. - namespace is a namespace identifier integer. - - Generator contains dictionary with namespace, page ID and title. - - """ - self.require(1, 12) - - kwargs = dict(listing.List.generate_kwargs('rn', namespace = namespace)) - return listing.List(self, 'random', 'rn', limit = limit, **kwargs) - - def recentchanges(self, start = None, end = None, dir = 'older', namespace = None, - prop = None, show = None, limit = None, type = None): - self.require(1, 9) - - kwargs = dict(listing.List.generate_kwargs('rc', start = start, end = end, dir = dir, - namespace = namespace, prop = prop, show = show, type = type)) - return listing.List(self, 'recentchanges', 'rc', limit = limit, **kwargs) - - def search(self, search, namespace = '0', what = 'title', redirects = False, limit = None): - self.require(1, 11) - - kwargs = dict(listing.List.generate_kwargs('sr', search = search, namespace = namespace, what = what)) - if redirects: kwargs['srredirects'] = '1' - return listing.List(self, 'search', 'sr', limit = limit, **kwargs) - - def usercontributions(self, user, start = None, end = None, dir = 'older', namespace = None, - prop = None, show = None, limit = None): - self.require(1, 9) - - kwargs = dict(listing.List.generate_kwargs('uc', user = user, start = start, end = end, - dir = dir, namespace = namespace, prop = prop, show = show)) - return listing.List(self, 'usercontribs', 'uc', limit = limit, **kwargs) - - def users(self, users, prop = 'blockinfo|groups|editcount'): - self.require(1, 12) - - return listing.List(self, 'users', 'us', ususers = '|'.join(users), usprop = prop) - - def watchlist(self, allrev = False, start = None, end = None, namespace = None, dir = 'older', - prop = None, show = None, limit = None): - self.require(1, 9) - - kwargs = dict(listing.List.generate_kwargs('wl', start = start, end = end, - namespace = namespace, dir = dir, prop = prop, show = show)) - if allrev: kwargs['wlallrev'] = '1' - return listing.List(self, 'watchlist', 'wl', limit = limit, **kwargs) - - def expandtemplates(self, text, title = None, generatexml = False): - """Takes wikitext (text) and expands templates.""" - self.require(1, 11) - - kwargs = {} - if title is None: kwargs['title'] = title - if generatexml: kwargs['generatexml'] = '1' - - result = self.api('expandtemplates', text = text, **kwargs) - - if generatexml: - return result['expandtemplates']['*'], result['parsetree']['*'] - else: - return result['expandtemplates']['*'] - + api_limit = 500 + + def __init__(self, host, path='/w/', ext='.php', pool=None, retry_timeout=30, + max_retries=25, wait_callback=lambda *x: None, clients_useragent=None, + max_lag=3, compress=True, force_login=True, do_init=True): + # Setup member variables + self.host = host + self.path = path + self.ext = ext + self.credentials = None + self.compress = compress + + self.retry_timeout = retry_timeout + self.max_retries = max_retries + self.wait_callback = wait_callback + self.max_lag = str(max_lag) + self.force_login = force_login + + # The token string => token object mapping + self.wait_tokens = weakref.WeakKeyDictionary() + + # Site properties + self.blocked = False # Whether current user is blocked + self.hasmsg = False # Whether current user has new messages + self.groups = [] # Groups current user belongs to + self.rights = [] # Rights current user has + self.tokens = {} # Edit tokens of the current user + self.version = None + + self.namespaces = self.default_namespaces + self.writeapi = False + + # Setup connection + if pool is None: + self.connection = http.HTTPPool(clients_useragent) + else: + self.connection = pool + + # Page generators + self.pages = listing.PageList(self) + self.categories = listing.PageList(self, namespace=14) + self.images = listing.PageList(self, namespace=6) + + # Compat page generators + self.Pages = self.pages + self.Categories = self.categories + self.Images = self.images + + # Initialization status + self.initialized = False + + if do_init: + try: + self.site_init() + except errors.APIError, e: + # Private wiki, do init after login + if e[0] not in (u'unknown_action', u'readapidenied'): + raise + + def site_init(self): + meta = self.api('query', meta='siteinfo|userinfo', + siprop='general|namespaces', uiprop='groups|rights') + + # Extract site info + self.site = meta['query']['general'] + self.namespaces = dict(((i['id'], i.get('*', '')) for i in meta['query']['namespaces'].itervalues())) + self.writeapi = 'writeapi' in self.site + + # Determine version + if self.site['generator'].startswith('MediaWiki '): + version = self.site['generator'][10:].split('.') + + def split_num(s): + i = 0 + while i < len(s): + if s[i] < '0' or s[i] > '9': + break + i += 1 + if s[i:]: + return (int(s[:i]), s[i:], ) + else: + return (int(s[:i]), ) + self.version = sum((split_num(s) for s in version), ()) + + if len(self.version) < 2: + raise errors.MediaWikiVersionError('Unknown MediaWiki %s' % '.'.join(version)) + else: + raise errors.MediaWikiVersionError('Unknown generator %s' % self.site['generator']) + # Require 1.11 until some compatibility issues are fixed + self.require(1, 11) + + # User info + userinfo = compatibility.userinfo(meta, self.require(1, 12, raise_error=False)) + self.username = userinfo['name'] + self.groups = userinfo.get('groups', []) + self.rights = userinfo.get('rights', []) + self.initialized = True + + default_namespaces = {0: u'', 1: u'Talk', 2: u'User', 3: u'User talk', 4: u'Project', 5: u'Project talk', + 6: u'Image', 7: u'Image talk', 8: u'MediaWiki', 9: u'MediaWiki talk', 10: u'Template', 11: u'Template talk', + 12: u'Help', 13: u'Help talk', 14: u'Category', 15: u'Category talk', -1: u'Special', -2: u'Media'} + + def __repr__(self): + return "<Site object '%s%s'>" % (self.host, self.path) + + def api(self, action, *args, **kwargs): + """ An API call. Handles errors and returns dict object. """ + kwargs.update(args) + if action == 'query': + if 'meta' in kwargs: + kwargs['meta'] += '|userinfo' + else: + kwargs['meta'] = 'userinfo' + if 'uiprop' in kwargs: + kwargs['uiprop'] += '|blockinfo|hasmsg' + else: + kwargs['uiprop'] = 'blockinfo|hasmsg' + + token = self.wait_token() + while True: + info = self.raw_api(action, **kwargs) + if not info: + info = {} + res = self.handle_api_result(info, token=token) + if res: + return info + + def handle_api_result(self, info, kwargs=None, token=None): + if token is None: + token = self.wait_token() + + try: + userinfo = compatibility.userinfo(info, self.require(1, 12, raise_error=None)) + except KeyError: + userinfo = () + if 'blockedby' in userinfo: + self.blocked = (userinfo['blockedby'], userinfo.get('blockreason', u'')) + else: + self.blocked = False + self.hasmsg = 'message' in userinfo + self.logged_in = 'anon' not in userinfo + if 'error' in info: + if info['error']['code'] in (u'internal_api_error_DBConnectionError', ): + self.wait(token) + return False + if '*' in info['error']: + raise errors.APIError(info['error']['code'], + info['error']['info'], info['error']['*']) + raise errors.APIError(info['error']['code'], + info['error']['info'], kwargs) + return True + + @staticmethod + def _to_str(data): + if type(data) is unicode: + return data.encode('utf-8') + return str(data) + + @staticmethod + def _query_string(*args, **kwargs): + kwargs.update(args) + qs = urllib.urlencode([(k, Site._to_str(v)) for k, v in kwargs.iteritems() + if k != 'wpEditToken']) + if 'wpEditToken' in kwargs: + qs += '&wpEditToken=' + urllib.quote(Site._to_str(kwargs['wpEditToken'])) + return qs + + def raw_call(self, script, data): + url = self.path + script + self.ext + headers = {} + if not issubclass(data.__class__, upload.Upload): + headers['Content-Type'] = 'application/x-www-form-urlencoded' + if self.compress and gzip: + headers['Accept-Encoding'] = 'gzip' + + token = self.wait_token((script, data)) + while True: + try: + stream = self.connection.post(self.host, + url, data=data, headers=headers) + if stream.getheader('Content-Encoding') == 'gzip': + # BAD. + seekable_stream = StringIO(stream.read()) + stream = gzip.GzipFile(fileobj=seekable_stream) + return stream + + except errors.HTTPStatusError, e: + if e[0] == 503 and e[1].getheader('X-Database-Lag'): + self.wait(token, int(e[1].getheader('Retry-After'))) + elif e[0] < 500 or e[0] > 599: + raise + else: + self.wait(token) + except errors.HTTPRedirectError: + raise + except errors.HTTPError: + self.wait(token) + except ValueError: + self.wait(token) + + def raw_api(self, action, *args, **kwargs): + """Sends a call to the API.""" + kwargs['action'] = action + kwargs['format'] = 'json' + data = self._query_string(*args, **kwargs) + json_data = self.raw_call('api', data).read() + try: + return json.loads(json_data) + except ValueError: + if json_data.startswith('MediaWiki API is not enabled for this site.'): + raise errors.APIDisabledError + raise + + def raw_index(self, action, *args, **kwargs): + """Sends a call to index.php rather than the API.""" + kwargs['action'] = action + kwargs['maxlag'] = self.max_lag + data = self._query_string(*args, **kwargs) + return self.raw_call('index', data).read().decode('utf-8', 'ignore') + + def wait_token(self, args=None): + token = WaitToken() + self.wait_tokens[token] = (0, args) + return token + + def wait(self, token, min_wait=0): + retry, args = self.wait_tokens[token] + self.wait_tokens[token] = (retry + 1, args) + if retry > self.max_retries and self.max_retries != -1: + raise errors.MaximumRetriesExceeded(self, token, args) + self.wait_callback(self, token, retry, args) + + timeout = self.retry_timeout * retry + if timeout < min_wait: + timeout = min_wait + time.sleep(timeout) + return self.wait_tokens[token] + + def require(self, major, minor, revision=None, raise_error=True): + if self.version is None: + if raise_error is None: + return + raise RuntimeError('Site %s has not yet been initialized' % repr(self)) + + if revision is None: + if self.version[:2] >= (major, minor): + return True + elif raise_error: + raise errors.MediaWikiVersionError('Requires version %s.%s, current version is %s.%s' + % ((major, minor) + self.version[:2])) + else: + return False + else: + raise NotImplementedError + + # Actions + def email(self, user, text, subject, cc=False): + """Sends email to a specified user on the wiki.""" + # TODO: Use api! + postdata = {} + postdata['wpSubject'] = subject + postdata['wpText'] = text + if cc: + postdata['wpCCMe'] = '1' + postdata['wpEditToken'] = self.tokens['edit'] + postdata['uselang'] = 'en' + postdata['title'] = u'Special:Emailuser/' + user + + data = self.raw_index('submit', **postdata) + if 'var wgAction = "success";' not in data: + if 'This user has not specified a valid e-mail address' in data: + # Dirty hack + raise errors.NoSpecifiedEmailError, user + raise errors.EmailError, data + + def login(self, username=None, password=None, cookies=None, domain=None): + """Login to the wiki.""" + if self.initialized: + self.require(1, 10) + + if username and password: + self.credentials = (username, password, domain) + if cookies: + if self.host not in self.conn.cookies: + self.conn.cookies[self.host] = http.CookieJar() + self.conn.cookies[self.host].update(cookies) + + if self.credentials: + wait_token = self.wait_token() + kwargs = { + 'lgname': self.credentials[0], + 'lgpassword': self.credentials[1] + } + if self.credentials[2]: + kwargs['lgdomain'] = self.credentials[2] + while True: + login = self.api('login', **kwargs) + if login['login']['result'] == 'Success': + break + elif login['login']['result'] == 'NeedToken': + kwargs['lgtoken'] = login['login']['token'] + elif login['login']['result'] == 'Throttled': + self.wait(wait_token, login['login'].get('wait', 5)) + else: + raise errors.LoginError(self, login['login']) + + if self.initialized: + info = self.api('query', meta='userinfo', uiprop='groups|rights') + userinfo = compatibility.userinfo(info, self.require(1, 12, raise_error=False)) + self.username = userinfo['name'] + self.groups = userinfo.get('groups', []) + self.rights = userinfo.get('rights', []) + self.tokens = {} + else: + self.site_init() + + def upload(self, file=None, filename=None, description='', ignore=False, file_size=None, + url=None, session_key=None, comment=None): + """Upload a file to the wiki.""" + if self.version[:2] < (1, 16): + return compatibility.old_upload(self, file=file, filename=filename, + description=description, ignore=ignore, + file_size=file_size) + + image = self.Images[filename] + if not image.can('upload'): + raise errors.InsufficientPermission(filename) + + predata = {} + + if comment is None: + predata['comment'] = description + else: + predata['comment'] = comment + predata['text'] = description + + if ignore: + predata['ignorewarnings'] = 'true' + predata['token'] = image.get_token('edit') + predata['action'] = 'upload' + predata['format'] = 'json' + predata['filename'] = filename + if url: + predata['url'] = url + if session_key: + predata['session_key'] = session_key + + if file is None: + postdata = self._query_string(predata) + else: + if type(file) is str: + file_size = len(file) + file = StringIO(file) + if file_size is None: + file.seek(0, 2) + file_size = file.tell() + file.seek(0, 0) + + postdata = upload.UploadFile('file', filename, file_size, file, predata) + + wait_token = self.wait_token() + while True: + try: + data = self.raw_call('api', postdata).read() + info = json.loads(data) + if not info: + info = {} + if self.handle_api_result(info, kwargs=predata): + return info.get('upload', {}) + except errors.HTTPStatusError, e: + if e[0] == 503 and e[1].getheader('X-Database-Lag'): + self.wait(wait_token, int(e[1].getheader('Retry-After'))) + elif e[0] < 500 or e[0] > 599: + raise + else: + self.wait(wait_token) + except errors.HTTPError: + self.wait(wait_token) + file.seek(0, 0) + + def parse(self, text=None, title=None, page=None): + kwargs = {} + if text is not None: + kwargs['text'] = text + if title is not None: + kwargs['title'] = title + if page is not None: + kwargs['page'] = page + result = self.api('parse', **kwargs) + return result['parse'] + + # def block: requires 1.12 + # def unblock: requires 1.12 + # def patrol: requires 1.14 + # def import: requires 1.15 + + # Lists + def allpages(self, start=None, prefix=None, namespace='0', filterredir='all', + minsize=None, maxsize=None, prtype=None, prlevel=None, + limit=None, dir='ascending', filterlanglinks='all', generator=True): + """Retrieve all pages on the wiki as a generator.""" + self.require(1, 9) + + pfx = listing.List.get_prefix('ap', generator) + kwargs = dict(listing.List.generate_kwargs(pfx, ('from', start), prefix=prefix, + minsize=minsize, maxsize=maxsize, prtype=prtype, prlevel=prlevel, + namespace=namespace, filterredir=filterredir, dir=dir, + filterlanglinks=filterlanglinks)) + return listing.List.get_list(generator)(self, 'allpages', 'ap', limit=limit, return_values='title', **kwargs) + # def allimages(self): requires 1.12 + # TODO! + + def alllinks(self, start=None, prefix=None, unique=False, prop='title', + namespace='0', limit=None, generator=True): + """Retrieve a list of all links on the wiki as a generator.""" + self.require(1, 11) + + pfx = listing.List.get_prefix('al', generator) + kwargs = dict(listing.List.generate_kwargs(pfx, ('from', start), prefix=prefix, + prop=prop, namespace=namespace)) + if unique: + kwargs[pfx + 'unique'] = '1' + return listing.List.get_list(generator)(self, 'alllinks', 'al', limit=limit, return_values='title', **kwargs) + + def allcategories(self, start=None, prefix=None, dir='ascending', limit=None, generator=True): + """Retrieve all categories on the wiki as a generator.""" + self.require(1, 12) + + pfx = listing.List.get_prefix('ac', generator) + kwargs = dict(listing.List.generate_kwargs(pfx, ('from', start), prefix=prefix, dir=dir)) + return listing.List.get_list(generator)(self, 'allcategories', 'ac', limit=limit, **kwargs) + + def allusers(self, start=None, prefix=None, group=None, prop=None, limit=None): + """Retrieve all users on the wiki as a generator.""" + self.require(1, 11) + + kwargs = dict(listing.List.generate_kwargs('au', ('from', start), prefix=prefix, + group=group, prop=prop)) + return listing.List(self, 'allusers', 'au', limit=limit, **kwargs) + + def blocks(self, start=None, end=None, dir='older', ids=None, users=None, limit=None, + prop='id|user|by|timestamp|expiry|reason|flags'): + """Retrieve blocks as a generator. + + Each block is a dictionary containing: + - user: the username or IP address of the user + - id: the ID of the block + - timestamp: when the block was added + - expiry: when the block runs out (infinity for indefinite blocks) + - reason: the reason they are blocked + - allowusertalk: key is present (empty string) if the user is allowed to edit their user talk page + - by: the administrator who blocked the user + - nocreate: key is present (empty string) if the user's ability to create accounts has been disabled. + + """ + + self.require(1, 12) + # TODO: Fix. Fix what? + kwargs = dict(listing.List.generate_kwargs('bk', start=start, end=end, dir=dir, + users=users, prop=prop)) + return listing.List(self, 'blocks', 'bk', limit=limit, **kwargs) + + def deletedrevisions(self, start=None, end=None, dir='older', namespace=None, + limit=None, prop='user|comment'): + # TODO: Fix + self.require(1, 12) + + kwargs = dict(listing.List.generate_kwargs('dr', start=start, end=end, dir=dir, + namespace=namespace, prop=prop)) + return listing.List(self, 'deletedrevs', 'dr', limit=limit, **kwargs) + + def exturlusage(self, query, prop=None, protocol='http', namespace=None, limit=None): + """Retrieves list of pages that link to a particular domain or URL as a generator. + + This API call mirrors the Special:LinkSearch function on-wiki. + + Query can be a domain like 'bbc.co.uk'. Wildcards can be used, e.g. '*.bbc.co.uk'. + Alternatively, a query can contain a full domain name and some or all of a URL: + e.g. '*.wikipedia.org/wiki/*' + + See <https://meta.wikimedia.org/wiki/Help:Linksearch> for details. + + The generator returns dictionaries containing three keys: + - url: the URL linked to. + - ns: namespace of the wiki page + - pageid: the ID of the wiki page + - title: the page title. + + """ + self.require(1, 11) + + kwargs = dict(listing.List.generate_kwargs('eu', query=query, prop=prop, + protocol=protocol, namespace=namespace)) + return listing.List(self, 'exturlusage', 'eu', limit=limit, **kwargs) + + def logevents(self, type=None, prop=None, start=None, end=None, + dir='older', user=None, title=None, limit=None, action=None): + self.require(1, 10) + + kwargs = dict(listing.List.generate_kwargs('le', prop=prop, type=type, start=start, + end=end, dir=dir, user=user, title=title, action=action)) + return listing.List(self, 'logevents', 'le', limit=limit, **kwargs) + + # def protectedtitles requires 1.15 + def random(self, namespace, limit=20): + """Retrieves a generator of random page from a particular namespace. + + limit specifies the number of random articles retrieved. + namespace is a namespace identifier integer. + + Generator contains dictionary with namespace, page ID and title. + + """ + self.require(1, 12) + + kwargs = dict(listing.List.generate_kwargs('rn', namespace=namespace)) + return listing.List(self, 'random', 'rn', limit=limit, **kwargs) + + def recentchanges(self, start=None, end=None, dir='older', namespace=None, + prop=None, show=None, limit=None, type=None): + self.require(1, 9) + + kwargs = dict(listing.List.generate_kwargs('rc', start=start, end=end, dir=dir, + namespace=namespace, prop=prop, show=show, type=type)) + return listing.List(self, 'recentchanges', 'rc', limit=limit, **kwargs) + + def search(self, search, namespace='0', what='title', redirects=False, limit=None): + self.require(1, 11) + + kwargs = dict(listing.List.generate_kwargs('sr', search=search, namespace=namespace, what=what)) + if redirects: + kwargs['srredirects'] = '1' + return listing.List(self, 'search', 'sr', limit=limit, **kwargs) + + def usercontributions(self, user, start=None, end=None, dir='older', namespace=None, + prop=None, show=None, limit=None): + self.require(1, 9) + + kwargs = dict(listing.List.generate_kwargs('uc', user=user, start=start, end=end, + dir=dir, namespace=namespace, prop=prop, show=show)) + return listing.List(self, 'usercontribs', 'uc', limit=limit, **kwargs) + + def users(self, users, prop='blockinfo|groups|editcount'): + self.require(1, 12) + + return listing.List(self, 'users', 'us', ususers='|'.join(users), usprop=prop) + + def watchlist(self, allrev=False, start=None, end=None, namespace=None, dir='older', + prop=None, show=None, limit=None): + self.require(1, 9) + + kwargs = dict(listing.List.generate_kwargs('wl', start=start, end=end, + namespace=namespace, dir=dir, prop=prop, show=show)) + if allrev: + kwargs['wlallrev'] = '1' + return listing.List(self, 'watchlist', 'wl', limit=limit, **kwargs) + + def expandtemplates(self, text, title=None, generatexml=False): + """Takes wikitext (text) and expands templates.""" + self.require(1, 11) + + kwargs = {} + if title is None: + kwargs['title'] = title + if generatexml: + kwargs['generatexml'] = '1' + + result = self.api('expandtemplates', text=text, **kwargs) + + if generatexml: + return result['expandtemplates']['*'], result['parsetree']['*'] + else: + return result['expandtemplates']['*'] diff --git a/mwclient/compatibility.py b/mwclient/compatibility.py index cbcb462b7f324296504523745d7144feb6d22a15..273a16fb8a50633fff508f4e37ab0841788176ff 100644 --- a/mwclient/compatibility.py +++ b/mwclient/compatibility.py @@ -1,91 +1,98 @@ -import upload, errors +import upload +import errors + def title(prefix, new_format): - if new_format: - return prefix + 'title' - else: - return 'titles' - -def userinfo(data, new_format = None): - if new_format is None: - # Unknown version; trying to guess - if 'userinfo' in data: - return data['userinfo'] - elif 'userinfo' in data.get('query', ()): - return data['query']['userinfo'] - else: - return {} - elif new_format: - return data['query']['userinfo'] - else: - return data['userinfo'] + if new_format: + return prefix + 'title' + else: + return 'titles' + + +def userinfo(data, new_format=None): + if new_format is None: + # Unknown version; trying to guess + if 'userinfo' in data: + return data['userinfo'] + elif 'userinfo' in data.get('query', ()): + return data['query']['userinfo'] + else: + return {} + elif new_format: + return data['query']['userinfo'] + else: + return data['userinfo'] + def iiprop(version): - if version[:2] >= (1, 13): - return 'timestamp|user|comment|url|size|sha1|metadata|archivename' - if version[:2] >= (1, 12): - return 'timestamp|user|comment|url|size|sha1|metadata' - else: - return 'timestamp|user|comment|url|size|sha1' - -def cmtitle(page, new_format, prefix = ''): - if new_format: - return prefix + 'title', page.name - else: - return prefix + 'category', page.strip_namespace(page.name) - + if version[:2] >= (1, 13): + return 'timestamp|user|comment|url|size|sha1|metadata|archivename' + if version[:2] >= (1, 12): + return 'timestamp|user|comment|url|size|sha1|metadata' + else: + return 'timestamp|user|comment|url|size|sha1' + + +def cmtitle(page, new_format, prefix=''): + if new_format: + return prefix + 'title', page.name + else: + return prefix + 'category', page.strip_namespace(page.name) + + def protectright(version): - if version[:2] >= (1, 13): - return 'editprotected' - else: - return 'protect' + if version[:2] >= (1, 13): + return 'editprotected' + else: + return 'protect' from cStringIO import StringIO -def old_upload(self, file, filename, description, license = '', ignore = False, file_size = None): - image = self.Images[filename] - if not image.can('upload'): - raise errors.InsufficientPermission(filename) - if image.exists and not ignore: - raise errors.FileExists(filename) - - if type(file) is str: - file_size = len(file) - file = StringIO(file) - if file_size is None: - file.seek(0, 2) - file_size = file.tell() - file.seek(0, 0) - - predata = {} - # Do this thing later so that an incomplete upload won't work - # predata['wpDestFile'] = filename - predata['wpUploadDescription'] = description - predata['wpLicense'] = license - if ignore: predata['wpIgnoreWarning'] = 'true' - predata['wpUpload'] = 'Upload file' - predata['wpSourceType'] = 'file' - predata['wpDestFile'] = filename - predata['wpEditToken'] = image.get_token('edit') - - postdata = upload.UploadFile('wpUploadFile', filename, file_size, file, predata) - - wait_token = self.wait_token() - while True: - try: - self.connection.post(self.host, - self.path + 'index.php?title=Special:Upload&maxlag=' - + self.max_lag, data = postdata).read() - except errors.HTTPStatusError, e: - if e[0] == 503 and e[1].getheader('X-Database-Lag'): - self.wait(wait_token, int(e[1].getheader('Retry-After'))) - elif e[0] < 500 or e[0] > 599: - raise - else: - self.wait(wait_token) - except errors.HTTPError: - self.wait(wait_token) - else: - return - file.seek(0, 0) - - \ No newline at end of file + + +def old_upload(self, file, filename, description, license='', ignore=False, file_size=None): + image = self.Images[filename] + if not image.can('upload'): + raise errors.InsufficientPermission(filename) + if image.exists and not ignore: + raise errors.FileExists(filename) + + if type(file) is str: + file_size = len(file) + file = StringIO(file) + if file_size is None: + file.seek(0, 2) + file_size = file.tell() + file.seek(0, 0) + + predata = {} + # Do this thing later so that an incomplete upload won't work + # predata['wpDestFile'] = filename + predata['wpUploadDescription'] = description + predata['wpLicense'] = license + if ignore: + predata['wpIgnoreWarning'] = 'true' + predata['wpUpload'] = 'Upload file' + predata['wpSourceType'] = 'file' + predata['wpDestFile'] = filename + predata['wpEditToken'] = image.get_token('edit') + + postdata = upload.UploadFile('wpUploadFile', filename, file_size, file, predata) + + wait_token = self.wait_token() + while True: + try: + self.connection.post(self.host, + self.path + 'index.php?title=Special:Upload&maxlag=' + + self.max_lag, data=postdata).read() + except errors.HTTPStatusError, e: + if e[0] == 503 and e[1].getheader('X-Database-Lag'): + self.wait(wait_token, int(e[1].getheader('Retry-After'))) + elif e[0] < 500 or e[0] > 599: + raise + else: + self.wait(wait_token) + except errors.HTTPError: + self.wait(wait_token) + else: + return + file.seek(0, 0) diff --git a/mwclient/errors.py b/mwclient/errors.py index 2a1dbd4a42dcc2da6967cf133cc3488447edbd3b..4075013acbb0b13fd0b685cd32c007b0f1c0d44e 100644 --- a/mwclient/errors.py +++ b/mwclient/errors.py @@ -1,46 +1,66 @@ class MwClientError(RuntimeError): - pass + pass + class MediaWikiVersionError(MwClientError): - pass + pass + class APIDisabledError(MwClientError): - pass + pass + class HTTPError(MwClientError): - pass + pass + + class HTTPStatusError(MwClientError): - pass + pass + + class HTTPRedirectError(HTTPError): - pass + pass + class MaximumRetriesExceeded(MwClientError): - pass - + pass + + class APIError(MwClientError): - def __init__(self, code, info, kwargs): - self.code = code - self.info = info - MwClientError.__init__(self, code, info, kwargs) - + + def __init__(self, code, info, kwargs): + self.code = code + self.info = info + MwClientError.__init__(self, code, info, kwargs) + + class InsufficientPermission(MwClientError): - pass + pass + + class UserBlocked(InsufficientPermission): - pass + pass + class EditError(MwClientError): - pass + pass + + class ProtectedPageError(EditError, InsufficientPermission): - pass + pass + + class FileExists(EditError): - pass - + pass + class LoginError(MwClientError): - pass + pass + class EmailError(MwClientError): - pass + pass + + class NoSpecifiedEmail(EmailError): - pass - + pass diff --git a/mwclient/ex.py b/mwclient/ex.py index afb77d950a9562b5b6b7ed93225162d5bcd2026a..d0ec8dae097a03a7c5f140cdcadb3c31b09bd049 100644 --- a/mwclient/ex.py +++ b/mwclient/ex.py @@ -1,76 +1,84 @@ -import client, http +import client +import http + def read_config(config_files, **predata): - cfg = {} - for config_file in config_files: - cfg.update(_read_config_file( - config_file, predata)) - return cfg - + cfg = {} + for config_file in config_files: + cfg.update(_read_config_file( + config_file, predata)) + return cfg + + def _read_config_file(_config_file, predata): - _file = open(_config_file) - exec _file in globals(), predata - _file.close() - - for _k, _v in predata.iteritems(): - if not _k.startswith('_'): - yield _k, _v - for _k, _v in locals().iteritems(): - if not _k.startswith('_'): - yield _k, _v + _file = open(_config_file) + exec _file in globals(), predata + _file.close() + + for _k, _v in predata.iteritems(): + if not _k.startswith('_'): + yield _k, _v + for _k, _v in locals().iteritems(): + if not _k.startswith('_'): + yield _k, _v + class SiteList(object): - def __init__(self): - self.sites = {} - def __getitem__(self, key): - if key not in self.sites: - self.sites[key] = {} - return self.sites[key] - def __iter__(self): - return self.sites.itervalues() + + def __init__(self): + self.sites = {} + + def __getitem__(self, key): + if key not in self.sites: + self.sites[key] = {} + return self.sites[key] + + def __iter__(self): + return self.sites.itervalues() + class ConfiguredSite(client.Site): - def __init__(self, *config_files, **kwargs): - self.config = read_config(config_files, sites = SiteList()) - - if 'name' in kwargs: - self.config.update(self.config['sites'][kwargs['name']]) - - do_login = 'username' in self.config and 'password' in self.config - - client.Site.__init__(self, host = self.config['host'], - path = self.config['path'], ext = self.config.get('ext', '.php'), - do_init = not do_login, - retry_timeout = self.config.get('retry_timeout', 30), - max_retries = self.config.get('max_retries', -1)) - - - if do_login: - self.login(self.config['username'], - self.config['password']) - + + def __init__(self, *config_files, **kwargs): + self.config = read_config(config_files, sites=SiteList()) + + if 'name' in kwargs: + self.config.update(self.config['sites'][kwargs['name']]) + + do_login = 'username' in self.config and 'password' in self.config + + client.Site.__init__(self, host=self.config['host'], + path=self.config['path'], ext=self.config.get('ext', '.php'), + do_init=not do_login, + retry_timeout=self.config.get('retry_timeout', 30), + max_retries=self.config.get('max_retries', -1)) + + if do_login: + self.login(self.config['username'], + self.config['password']) + + class ConfiguredPool(list): - def __init__(self, *config_files): - self.config = read_config(config_files, sites = SiteList()) - self.pool = http.HTTPPool() - - config = dict([(k, v) for k, v in self.config.iteritems() - if k != 'sites']) - - for site in self.config['sites']: - cfg = config.copy() - cfg.update(site) - site.update(cfg) - - do_login = 'username' in site and 'password' in site - - self.append(client.Site(host = site['host'], - path = site['path'], ext = site.get('ext', '.php'), - pool = self.pool, do_init = not do_login, - retry_timeout = site.get('retry_timeout', 30), - max_retries = site.get('max_retries', -1))) - if do_login: - self[-1].login(site['username'], site['password']) - self[-1].config = site - + def __init__(self, *config_files): + self.config = read_config(config_files, sites=SiteList()) + self.pool = http.HTTPPool() + + config = dict([(k, v) for k, v in self.config.iteritems() + if k != 'sites']) + + for site in self.config['sites']: + cfg = config.copy() + cfg.update(site) + site.update(cfg) + + do_login = 'username' in site and 'password' in site + + self.append(client.Site(host=site['host'], + path=site['path'], ext=site.get('ext', '.php'), + pool=self.pool, do_init=not do_login, + retry_timeout=site.get('retry_timeout', 30), + max_retries=site.get('max_retries', -1))) + if do_login: + self[-1].login(site['username'], site['password']) + self[-1].config = site diff --git a/mwclient/http.py b/mwclient/http.py index 093ca194edc5dfe5ab4a44fdb19706df6cfb0b49..b929850fca34c5df1947f11d41dfb8807ee909b6 100644 --- a/mwclient/http.py +++ b/mwclient/http.py @@ -9,225 +9,252 @@ import errors from client import __ver__ + class CookieJar(dict): - def __init__(self): - dict.__init__(self, ()) - def extract_cookies(self, response): - for cookie in response.msg.getallmatchingheaders('Set-Cookie'): - self.parse_cookie(cookie.strip()) - if response.getheader('set-cookie2', None): - # ... - raise RuntimeError, 'Set-Cookie2', value - def parse_cookie(self, cookie): - if not cookie: return - value, attrs = cookie.split(': ', 1)[1].split(';', 1) - i = value.strip().split('=') - if len(i) == 1 and i[0] in self: - del self[i[0]] - else: - self[i[0]] = i[1] - - def get_cookie_header(self): - return '; '.join(('%s=%s' % i for i in self.iteritems())) - def __iter__(self): - for k, v in self.iteritems(): - yield Cookie(k, v) - + + def __init__(self): + dict.__init__(self, ()) + + def extract_cookies(self, response): + for cookie in response.msg.getallmatchingheaders('Set-Cookie'): + self.parse_cookie(cookie.strip()) + if response.getheader('set-cookie2', None): + # ... + raise RuntimeError, 'Set-Cookie2', value + + def parse_cookie(self, cookie): + if not cookie: + return + value, attrs = cookie.split(': ', 1)[1].split(';', 1) + i = value.strip().split('=') + if len(i) == 1 and i[0] in self: + del self[i[0]] + else: + self[i[0]] = i[1] + + def get_cookie_header(self): + return '; '.join(('%s=%s' % i for i in self.iteritems())) + + def __iter__(self): + for k, v in self.iteritems(): + yield Cookie(k, v) + + class Cookie(object): - def __init__(self, name, value): - self.name = name - self.value = value - + + def __init__(self, name, value): + self.name = name + self.value = value + + class HTTPPersistentConnection(object): - http_class = httplib.HTTPConnection - scheme_name = 'http' - useragent = None - - def __init__(self, host, pool = None, clients_useragent = None): - self._conn = self.http_class(host) - self._conn.connect() - self.last_request = time.time() - self.cookies = {} - - self.pool = pool - if pool: self.cookies = pool.cookies - - clients_useragent = clients_useragent or "" - if clients_useragent != "": clients_useragent += " " - self.useragent = clients_useragent + 'MwClient/' + __ver__ - - def request(self, method, host, path, headers, data, - raise_on_not_ok = True, auto_redirect = True): - - # Strip scheme - if type(host) is tuple: - host = host[1] - - # Dirty hack... - if (time.time() - self.last_request) > 60: - self._conn.close() - self._conn.connect() - - _headers = headers - headers = {} - - headers['Connection'] = 'Keep-Alive' - headers['User-Agent'] = self.useragent - headers['Host'] = host - if host in self.cookies: - headers['Cookie'] = self.cookies[host].get_cookie_header() - if issubclass(data.__class__, upload.Upload): - headers['Content-Type'] = data.content_type - headers['Content-Length'] = str(data.length) - elif data: - headers['Content-Length'] = str(len(data)) - - if _headers: headers.update(_headers) - - try: - self._conn.request(method, path, headers = headers) - if issubclass(data.__class__, upload.Upload): - for s in data: - self._conn.send(s) - elif data: - self._conn.send(data) - - self.last_request = time.time() - try: - res = self._conn.getresponse() - except httplib.BadStatusLine: - self._conn.close() - self._conn.connect() - self._conn.request(method, path, data, headers) - res = self._conn.getresponse() - except socket.error, e: - self._conn.close() - raise errors.HTTPError, e - #except Exception, e: - # raise errors.HTTPError, e - - if not host in self.cookies: self.cookies[host] = CookieJar() - self.cookies[host].extract_cookies(res) - - if res.status >= 300 and res.status <= 399 and auto_redirect: - res.read() - - location = urlparse.urlparse(res.getheader('Location')) - if res.status in (302, 303): - if 'Content-Type' in headers: - del headers['Content-Type'] - if 'Content-Length' in headers: - del headers['Content-Length'] - method = 'GET' - data = '' - old_path = path - path = location[2] - if location[4]: path = path + '?' + location[4] - - if location[0].lower() != self.scheme_name: - raise errors.HTTPRedirectError, ('Only HTTP connections are supported', - res.getheader('Location')) - - if self.pool is None: - if location[1] != host: - raise errors.HTTPRedirectError, ('Redirecting to different hosts not supported', - res.getheader('Location')) - - return self.request(method, host, path, headers, data) - else: - if host == location[1] and path == old_path: - conn = self.__class__(location[1], self.pool) - self.pool.append(([location[1]], conn)) - return self.pool.request(method, location[1], path, - headers, data, raise_on_not_ok, auto_redirect) - - if res.status != 200 and raise_on_not_ok: - try: - raise errors.HTTPStatusError, (res.status, res) - finally: - res.close() - - return res - - def get(self, host, path, headers = None): - return self.request('GET', host, path, headers, None) - def post(self, host, path, headers = None, data = None): - return self.request('POST', host, path, headers, data) - def head(self, host, path, headers = None, auto_redirect = False): - res = self.request('HEAD', host, path, headers, - data = None, raise_on_not_ok = False, - auto_redirect = auto_redirect) - res.read() - return res.status, res.getheaders() - - def close(self): - self._conn.close() - def fileno(self): - return self._conn.sock.fileno() + http_class = httplib.HTTPConnection + scheme_name = 'http' + useragent = None + + def __init__(self, host, pool=None, clients_useragent=None): + self._conn = self.http_class(host) + self._conn.connect() + self.last_request = time.time() + self.cookies = {} + + self.pool = pool + if pool: + self.cookies = pool.cookies + + clients_useragent = clients_useragent or "" + if clients_useragent != "": + clients_useragent += " " + self.useragent = clients_useragent + 'MwClient/' + __ver__ + + def request(self, method, host, path, headers, data, + raise_on_not_ok=True, auto_redirect=True): + + # Strip scheme + if type(host) is tuple: + host = host[1] + + # Dirty hack... + if (time.time() - self.last_request) > 60: + self._conn.close() + self._conn.connect() + + _headers = headers + headers = {} + + headers['Connection'] = 'Keep-Alive' + headers['User-Agent'] = self.useragent + headers['Host'] = host + if host in self.cookies: + headers['Cookie'] = self.cookies[host].get_cookie_header() + if issubclass(data.__class__, upload.Upload): + headers['Content-Type'] = data.content_type + headers['Content-Length'] = str(data.length) + elif data: + headers['Content-Length'] = str(len(data)) + + if _headers: + headers.update(_headers) + + try: + self._conn.request(method, path, headers=headers) + if issubclass(data.__class__, upload.Upload): + for s in data: + self._conn.send(s) + elif data: + self._conn.send(data) + + self.last_request = time.time() + try: + res = self._conn.getresponse() + except httplib.BadStatusLine: + self._conn.close() + self._conn.connect() + self._conn.request(method, path, data, headers) + res = self._conn.getresponse() + except socket.error, e: + self._conn.close() + raise errors.HTTPError, e + # except Exception, e: + # raise errors.HTTPError, e + + if not host in self.cookies: + self.cookies[host] = CookieJar() + self.cookies[host].extract_cookies(res) + + if res.status >= 300 and res.status <= 399 and auto_redirect: + res.read() + + location = urlparse.urlparse(res.getheader('Location')) + if res.status in (302, 303): + if 'Content-Type' in headers: + del headers['Content-Type'] + if 'Content-Length' in headers: + del headers['Content-Length'] + method = 'GET' + data = '' + old_path = path + path = location[2] + if location[4]: + path = path + '?' + location[4] + + if location[0].lower() != self.scheme_name: + raise errors.HTTPRedirectError, ('Only HTTP connections are supported', + res.getheader('Location')) + + if self.pool is None: + if location[1] != host: + raise errors.HTTPRedirectError, ('Redirecting to different hosts not supported', + res.getheader('Location')) + + return self.request(method, host, path, headers, data) + else: + if host == location[1] and path == old_path: + conn = self.__class__(location[1], self.pool) + self.pool.append(([location[1]], conn)) + return self.pool.request(method, location[1], path, + headers, data, raise_on_not_ok, auto_redirect) + + if res.status != 200 and raise_on_not_ok: + try: + raise errors.HTTPStatusError, (res.status, res) + finally: + res.close() + + return res + + def get(self, host, path, headers=None): + return self.request('GET', host, path, headers, None) + + def post(self, host, path, headers=None, data=None): + return self.request('POST', host, path, headers, data) + + def head(self, host, path, headers=None, auto_redirect=False): + res = self.request('HEAD', host, path, headers, + data=None, raise_on_not_ok=False, + auto_redirect=auto_redirect) + res.read() + return res.status, res.getheaders() + + def close(self): + self._conn.close() + + def fileno(self): + return self._conn.sock.fileno() + class HTTPConnection(HTTPPersistentConnection): - def request(self, method, host, path, headers, data, - raise_on_not_ok = True, auto_redirect = True): - if not headers: headers = {} - headers['Connection'] = 'Close' - res = HTTPPersistentConnection.request(self, method, host, path, headers, data, - raise_on_not_ok, auto_redirect) - return res + + def request(self, method, host, path, headers, data, + raise_on_not_ok=True, auto_redirect=True): + if not headers: + headers = {} + headers['Connection'] = 'Close' + res = HTTPPersistentConnection.request(self, method, host, path, headers, data, + raise_on_not_ok, auto_redirect) + return res + class HTTPSPersistentConnection(HTTPPersistentConnection): - http_class = httplib.HTTPSConnection - scheme_name = 'https' + http_class = httplib.HTTPSConnection + scheme_name = 'https' + - class HTTPPool(list): - def __init__(self, clients_useragent = None): - list.__init__(self) - self.cookies = {} - self.clients_useragent = clients_useragent - - def find_connection(self, host, scheme = 'http'): - if type(host) is tuple: - scheme, host = host - - for hosts, conn in self: - if (scheme, host) in hosts: return conn - - redirected_host = None - for hosts, conn in self: - status, headers = conn.head(host, '/') - if status == 200: - hosts.append((scheme, host)) - return conn - if status >= 300 and status <= 399: - # BROKEN! - headers = dict(headers) - location = urlparse.urlparse(headers.get('location', '')) - if (location[0], location[1]) == (scheme, host): - hosts.append((scheme, host)) - return conn - if scheme == 'http': - cls = HTTPPersistentConnection - elif scheme == 'https': - cls = HTTPSPersistentConnection - else: - raise RuntimeError('Unsupported scheme', scheme) - conn = cls(host, self, self.clients_useragent) - self.append(([(scheme, host)], conn)) - return conn - def get(self, host, path, headers = None): - return self.find_connection(host).get(host, - path, headers) - def post(self, host, path, headers = None, data = None): - return self.find_connection(host).post(host, - path, headers, data) - def head(self, host, path, headers = None, auto_redirect = False): - return self.find_connection(host).head(host, - path, headers, auto_redirect) - def request(self, method, host, path, headers, data, - raise_on_not_ok, auto_redirect): - return self.find_connection(host).request(method, host, path, - headers, data, raise_on_not_ok, auto_redirect) - def close(self): - for hosts, conn in self: - conn.close() - + + def __init__(self, clients_useragent=None): + list.__init__(self) + self.cookies = {} + self.clients_useragent = clients_useragent + + def find_connection(self, host, scheme='http'): + if type(host) is tuple: + scheme, host = host + + for hosts, conn in self: + if (scheme, host) in hosts: + return conn + + redirected_host = None + for hosts, conn in self: + status, headers = conn.head(host, '/') + if status == 200: + hosts.append((scheme, host)) + return conn + if status >= 300 and status <= 399: + # BROKEN! + headers = dict(headers) + location = urlparse.urlparse(headers.get('location', '')) + if (location[0], location[1]) == (scheme, host): + hosts.append((scheme, host)) + return conn + if scheme == 'http': + cls = HTTPPersistentConnection + elif scheme == 'https': + cls = HTTPSPersistentConnection + else: + raise RuntimeError('Unsupported scheme', scheme) + conn = cls(host, self, self.clients_useragent) + self.append(([(scheme, host)], conn)) + return conn + + def get(self, host, path, headers=None): + return self.find_connection(host).get(host, + path, headers) + + def post(self, host, path, headers=None, data=None): + return self.find_connection(host).post(host, + path, headers, data) + + def head(self, host, path, headers=None, auto_redirect=False): + return self.find_connection(host).head(host, + path, headers, auto_redirect) + + def request(self, method, host, path, headers, data, + raise_on_not_ok, auto_redirect): + return self.find_connection(host).request(method, host, path, + headers, data, raise_on_not_ok, auto_redirect) + + def close(self): + for hosts, conn in self: + conn.close() diff --git a/mwclient/listing.py b/mwclient/listing.py index 170547175ff9505156ab75497ff981808a62f33a..9fc9e298c35f7701c0a773f2d2adc0ff71316a36 100644 --- a/mwclient/listing.py +++ b/mwclient/listing.py @@ -1,216 +1,232 @@ -import client, page +import client +import page import compatibility + class List(object): - def __init__(self, site, list_name, prefix, limit = None, return_values = None, max_items = None, *args, **kwargs): - # NOTE: Fix limit - self.site = site - self.list_name = list_name - self.generator = 'list' - self.prefix = prefix - - kwargs.update(args) - self.args = kwargs - - if limit is None: limit = site.api_limit - self.args[self.prefix + 'limit'] = str(limit) - - self.count = 0 - self.max_items = max_items - - self._iter = iter(xrange(0)) - - self.last = False - self.result_member = list_name - self.return_values = return_values - - def __iter__(self): - return self - - def next(self, full = False): - if self.max_items is not None: - if self.count >= self.max_items: - raise StopIteration - try: - item = self._iter.next() - self.count += 1 - if 'timestamp' in item: - item['timestamp'] = client.parse_timestamp(item['timestamp']) - if full: return item - - if type(self.return_values) is tuple: - return tuple((item[i] for i in self.return_values)) - elif self.return_values is None: - return item - else: - return item[self.return_values] - - except StopIteration: - if self.last: raise StopIteration - self.load_chunk() - return List.next(self, full = full) - - def load_chunk(self): - data = self.site.api('query', (self.generator, self.list_name), *[(str(k), v) for k, v in self.args.iteritems()]) - if not data: - # Non existent page - raise StopIteration - self.set_iter(data) - - if self.list_name in data.get('query-continue', ()): - self.args.update(data['query-continue'][self.list_name]) - else: - self.last = True - - def set_iter(self, data): - if self.result_member not in data['query']: - self._iter = iter(xrange(0)) - elif type(data['query'][self.result_member]) is list: - self._iter = iter(data['query'][self.result_member]) - else: - self._iter = data['query'][self.result_member].itervalues() - - - def __repr__(self): - return "<List object '%s' for %s>" % (self.list_name, self.site) - - @staticmethod - def generate_kwargs(_prefix, *args, **kwargs): - kwargs.update(args) - for key, value in kwargs.iteritems(): - if value != None: - yield _prefix + key, value - - @staticmethod - def get_prefix(prefix, generator = False): - if generator: - return 'g' + prefix - else: - return prefix - - @staticmethod - def get_list(generator = False): - if generator: - return GeneratorList - else: - return List + + def __init__(self, site, list_name, prefix, limit=None, return_values=None, max_items=None, *args, **kwargs): + # NOTE: Fix limit + self.site = site + self.list_name = list_name + self.generator = 'list' + self.prefix = prefix + + kwargs.update(args) + self.args = kwargs + + if limit is None: + limit = site.api_limit + self.args[self.prefix + 'limit'] = str(limit) + + self.count = 0 + self.max_items = max_items + + self._iter = iter(xrange(0)) + + self.last = False + self.result_member = list_name + self.return_values = return_values + + def __iter__(self): + return self + + def next(self, full=False): + if self.max_items is not None: + if self.count >= self.max_items: + raise StopIteration + try: + item = self._iter.next() + self.count += 1 + if 'timestamp' in item: + item['timestamp'] = client.parse_timestamp(item['timestamp']) + if full: + return item + + if type(self.return_values) is tuple: + return tuple((item[i] for i in self.return_values)) + elif self.return_values is None: + return item + else: + return item[self.return_values] + + except StopIteration: + if self.last: + raise StopIteration + self.load_chunk() + return List.next(self, full=full) + + def load_chunk(self): + data = self.site.api('query', (self.generator, self.list_name), *[(str(k), v) for k, v in self.args.iteritems()]) + if not data: + # Non existent page + raise StopIteration + self.set_iter(data) + + if self.list_name in data.get('query-continue', ()): + self.args.update(data['query-continue'][self.list_name]) + else: + self.last = True + + def set_iter(self, data): + if self.result_member not in data['query']: + self._iter = iter(xrange(0)) + elif type(data['query'][self.result_member]) is list: + self._iter = iter(data['query'][self.result_member]) + else: + self._iter = data['query'][self.result_member].itervalues() + + def __repr__(self): + return "<List object '%s' for %s>" % (self.list_name, self.site) + + @staticmethod + def generate_kwargs(_prefix, *args, **kwargs): + kwargs.update(args) + for key, value in kwargs.iteritems(): + if value is not None: + yield _prefix + key, value + + @staticmethod + def get_prefix(prefix, generator=False): + if generator: + return 'g' + prefix + else: + return prefix + + @staticmethod + def get_list(generator=False): + if generator: + return GeneratorList + else: + return List class GeneratorList(List): - def __init__(self, site, list_name, prefix, *args, **kwargs): - List.__init__(self, site, list_name, prefix, *args, **kwargs) - - self.args['g' + self.prefix + 'limit'] = self.args[self.prefix + 'limit'] - del self.args[self.prefix + 'limit'] - self.generator = 'generator' - - self.args['prop'] = 'info|imageinfo' - self.args['inprop'] = 'protection' - - self.result_member = 'pages' - - self.page_class = page.Page - - def next(self): - info = List.next(self, full = True) - if info['ns'] == 14: - return Category(self.site, u'', info) - if info['ns'] == 6: - return page.Image(self.site, u'', info) - return page.Page(self.site, u'', info) - - def load_chunk(self): - # Put this here so that the constructor does not fail - # on uninitialized sites - self.args['iiprop'] = compatibility.iiprop(self.site.version) - return List.load_chunk(self) - - + + def __init__(self, site, list_name, prefix, *args, **kwargs): + List.__init__(self, site, list_name, prefix, *args, **kwargs) + + self.args['g' + self.prefix + 'limit'] = self.args[self.prefix + 'limit'] + del self.args[self.prefix + 'limit'] + self.generator = 'generator' + + self.args['prop'] = 'info|imageinfo' + self.args['inprop'] = 'protection' + + self.result_member = 'pages' + + self.page_class = page.Page + + def next(self): + info = List.next(self, full=True) + if info['ns'] == 14: + return Category(self.site, u'', info) + if info['ns'] == 6: + return page.Image(self.site, u'', info) + return page.Page(self.site, u'', info) + + def load_chunk(self): + # Put this here so that the constructor does not fail + # on uninitialized sites + self.args['iiprop'] = compatibility.iiprop(self.site.version) + return List.load_chunk(self) + + class Category(page.Page, GeneratorList): - def __init__(self, site, name, info = None, namespace = None): - page.Page.__init__(self, site, name, info) - kwargs = {} - kwargs.update((compatibility.cmtitle(self, self.site.require( - 1, 12, raise_error = False), prefix = 'gcm'), )) - if namespace: kwargs['gcmnamespace'] = namespace - GeneratorList.__init__(self, site, 'categorymembers', 'cm', **kwargs) - - def __repr__(self): - return "<Category object '%s' for %s>" % (self.name.encode('utf-8'), self.site) - - def members(self, prop = 'ids|title', namespace = None, sort = 'sortkey', - dir = 'asc', start = None, end = None, generator = True): - prefix = self.get_prefix('cm', generator) - kwargs = dict(self.generate_kwargs(prefix, prop = prop, namespace = namespace, - sort = sort, dir = dir, start = start, end = end, *(compatibility.cmtitle( - self, self.site.require(1, 12, raise_error = False)), ))) - return self.get_list(generator)(self.site, 'categorymembers', 'cm', **kwargs) - + + def __init__(self, site, name, info=None, namespace=None): + page.Page.__init__(self, site, name, info) + kwargs = {} + kwargs.update((compatibility.cmtitle(self, self.site.require( + 1, 12, raise_error=False), prefix='gcm'), )) + if namespace: + kwargs['gcmnamespace'] = namespace + GeneratorList.__init__(self, site, 'categorymembers', 'cm', **kwargs) + + def __repr__(self): + return "<Category object '%s' for %s>" % (self.name.encode('utf-8'), self.site) + + def members(self, prop='ids|title', namespace=None, sort='sortkey', + dir='asc', start=None, end=None, generator=True): + prefix = self.get_prefix('cm', generator) + kwargs = dict(self.generate_kwargs(prefix, prop=prop, namespace=namespace, + sort=sort, dir=dir, start=start, end=end, *(compatibility.cmtitle( + self, self.site.require(1, 12, raise_error=False)), ))) + return self.get_list(generator)(self.site, 'categorymembers', 'cm', **kwargs) + + class PageList(GeneratorList): - def __init__(self, site, prefix = None, start = None, namespace = 0, redirects = 'all'): - self.namespace = namespace - - kwargs = {} - if prefix: kwargs['apprefix'] = prefix - if start: kwargs['apfrom'] = start - - GeneratorList.__init__(self, site, 'allpages', 'ap', - apnamespace = str(namespace), apfilterredir = redirects, **kwargs) - - def __getitem__(self, name): - return self.get(name, None) - - def get(self, name, info = ()): - if self.namespace == 14: - return Category(self.site, self.site.namespaces[14] + ':' + name, info) - elif self.namespace == 6: - return page.Image(self.site, self.site.namespaces[6] + ':' + name, info) - elif self.namespace != 0: - return page.Page(self.site, self.site.namespaces[self.namespace] + ':' + name, info) - else: - # Guessing page class - if type(name) is not int: - namespace = self.guess_namespace(name) - if namespace == 14: - return Category(self.site, name, info) - elif namespace == 6: - return page.Image(self.site, name, info) - return page.Page(self.site, name, info) - - def guess_namespace(self, name): - normal_name = page.Page.normalize_title(name) - for ns in self.site.namespaces: - if ns == 0: continue - if name.startswith(u'%s:' % self.site.namespaces[ns].replace(' ', '_')): - return ns - elif ns in self.site.default_namespaces: - if name.startswith(u'%s:' % self.site.default_namespaces[ns].replace(' ', '_')): - return ns - return 0 - - + + def __init__(self, site, prefix=None, start=None, namespace=0, redirects='all'): + self.namespace = namespace + + kwargs = {} + if prefix: + kwargs['apprefix'] = prefix + if start: + kwargs['apfrom'] = start + + GeneratorList.__init__(self, site, 'allpages', 'ap', + apnamespace=str(namespace), apfilterredir=redirects, **kwargs) + + def __getitem__(self, name): + return self.get(name, None) + + def get(self, name, info=()): + if self.namespace == 14: + return Category(self.site, self.site.namespaces[14] + ':' + name, info) + elif self.namespace == 6: + return page.Image(self.site, self.site.namespaces[6] + ':' + name, info) + elif self.namespace != 0: + return page.Page(self.site, self.site.namespaces[self.namespace] + ':' + name, info) + else: + # Guessing page class + if type(name) is not int: + namespace = self.guess_namespace(name) + if namespace == 14: + return Category(self.site, name, info) + elif namespace == 6: + return page.Image(self.site, name, info) + return page.Page(self.site, name, info) + + def guess_namespace(self, name): + normal_name = page.Page.normalize_title(name) + for ns in self.site.namespaces: + if ns == 0: + continue + if name.startswith(u'%s:' % self.site.namespaces[ns].replace(' ', '_')): + return ns + elif ns in self.site.default_namespaces: + if name.startswith(u'%s:' % self.site.default_namespaces[ns].replace(' ', '_')): + return ns + return 0 + + class PageProperty(List): - def __init__(self, page, prop, prefix, *args, **kwargs): - List.__init__(self, page.site, prop, prefix, titles = page.name, *args, **kwargs) - self.page = page - self.generator = 'prop' - - def set_iter(self, data): - for page in data['query']['pages'].itervalues(): - if page['title'] == self.page.name: - self._iter = iter(page.get(self.list_name, ())) - return - raise StopIteration - - + + def __init__(self, page, prop, prefix, *args, **kwargs): + List.__init__(self, page.site, prop, prefix, titles=page.name, *args, **kwargs) + self.page = page + self.generator = 'prop' + + def set_iter(self, data): + for page in data['query']['pages'].itervalues(): + if page['title'] == self.page.name: + self._iter = iter(page.get(self.list_name, ())) + return + raise StopIteration + + class PagePropertyGenerator(GeneratorList): - def __init__(self, page, prop, prefix, *args, **kwargs): - GeneratorList.__init__(self, page.site, prop, prefix, titles = page.name, *args, **kwargs) - self.page = page + + def __init__(self, page, prop, prefix, *args, **kwargs): + GeneratorList.__init__(self, page.site, prop, prefix, titles=page.name, *args, **kwargs) + self.page = page + class RevisionsIterator(PageProperty): - def load_chunk(self): - if 'rvstartid' in self.args and 'rvstart' in self.args: - del self.args['rvstart'] - return PageProperty.load_chunk(self) - + + def load_chunk(self): + if 'rvstartid' in self.args and 'rvstart' in self.args: + del self.args['rvstart'] + return PageProperty.load_chunk(self) diff --git a/mwclient/page.py b/mwclient/page.py index 0265eeea73bb15ee94b6caccab826c8d53136fb2..bbeeaeec0d65f3c62e437c9204445695f2c8868f 100644 --- a/mwclient/page.py +++ b/mwclient/page.py @@ -1,381 +1,408 @@ -import client, errors, listing +import client +import errors +import listing import compatibility from page_nowriteapi import OldPage -import urllib, urlparse, time - +import urllib +import urlparse +import time + + class Page(object): - def __init__(self, site, name, info = None, extra_properties = {}): - if type(name) is type(self): - return self.__dict__.update(name.__dict__) - self.site = site - self.name = name - self.section = None - - if not info: - if extra_properties: - prop = 'info|' + '|'.join(extra_properties.iterkeys()) - extra_props = [] - [extra_props.extend(extra_prop) for extra_prop in extra_properties.itervalues()] - else: - prop = 'info' - extra_props = () - - if type(name) is int: - info = self.site.api('query', prop = prop, pageids = name, - inprop = 'protection', *extra_props) - else: - info = self.site.api('query', prop = prop, titles = name, - inprop = 'protection', *extra_props) - info = info['query']['pages'].itervalues().next() - self._info = info - - self.namespace = info.get('ns', 0) - self.name = info.get('title', u'') - if self.namespace: - self.page_title = self.strip_namespace(self.name) - else: - self.page_title = self.name - - self.touched = client.parse_timestamp(info.get('touched', '0000-00-00T00:00:00Z')) - self.revision = info.get('lastrevid', 0) - self.exists = 'missing' not in info - self.length = info.get('length') - self.protection = dict([(i['type'], (i['level'], i['expiry'])) for i in info.get('protection', ()) if i]) - self.redirect = 'redirect' in info - - self.last_rev_time = None - self.edit_time = None - - def redirects_to(self): - """ Returns the redirect target page, or None if the page is not a redirect page.""" - info = self.site.api('query', prop = 'pageprops', titles = self.name, redirects = '')['query'] - if 'redirects' in info: - for page in info['redirects']: - if page['from'] == self.name: - return Page(self.site, page['to']) - return None - else: - return None - - def resolve_redirect(self): - """ Returns the redirect target page, or the current page if it's not a redirect page.""" - target_page = self.redirects_to() - if target_page == None: - return self - else: - return target_page - - def __repr__(self): - return "<Page object '%s' for %s>" % (self.name.encode('utf-8'), self.site) - - def __unicode__(self): - return self.name - - @staticmethod - def strip_namespace(title): - if title[0] == ':': - title = title[1:] - return title[title.find(':') + 1:] - - @staticmethod - def normalize_title(title): - # TODO: Make site dependent - title = title.strip() - if title[0] == ':': - title = title[1:] - title = title[0].upper() + title[1:] - title = title.replace(' ', '_') - return title - - - def can(self, action): - level = self.protection.get(action, (action, ))[0] - if level == 'sysop': level = compatibility.protectright(self.site.version) - - return level in self.site.rights - - def get_token(self, type, force = False): - self.site.require(1, 11) - - if type not in self.site.tokens: - self.site.tokens[type] = '0' - if self.site.tokens.get(type, '0') == '0' or force: - info = self.site.api('query', titles = self.name, - prop = 'info', intoken = type) - for i in info['query']['pages'].itervalues(): - if i['title'] == self.name: - self.site.tokens[type] = i['%stoken' % type] - return self.site.tokens[type] - - def get_expanded(self): - self.site.require(1, 12) - - revs = self.revisions(prop = 'content', limit = 1, expandtemplates = True) - try: - return revs.next()['*'] - except StopIteration: - return u'' - - def edit(self, section = None, readonly = False): - """Returns wikitext for a specified section or for the whole page. - - Retrieves the latest edit. - - """ - if not self.can('read'): - raise errors.InsufficientPermission(self) - if not self.exists: - return u'' - - revs = self.revisions(prop = 'content|timestamp', limit = 1, section = section) - try: - rev = revs.next() - self.text = rev['*'] - self.section = section - self.last_rev_time = rev['timestamp'] - except StopIteration: - self.text = u'' - self.section = None - self.edit_time = None - self.edit_time = time.gmtime() - return self.text - - def save(self, text = u'', summary = u'', minor = False, bot = True, section = None, **kwargs): - """Save text of page.""" - if not self.site.logged_in and self.site.force_login: - # Should we really check for this? - raise errors.LoginError(self.site) - if self.site.blocked: - raise errors.UserBlocked(self.site.blocked) - if not self.can('edit'): - raise errors.ProtectedPageError(self) - - if not text: text = self.text - if not section: section = self.section - - if not self.site.writeapi: - return OldPage.save(self, text = text, summary = summary, minor = False) - - data = {} - if minor: data['minor'] = '1' - if not minor: data['notminor'] = '1' - if self.last_rev_time: data['basetimestamp'] = time.strftime('%Y%m%d%H%M%S', self.last_rev_time) - if self.edit_time: data['starttimestamp'] = time.strftime('%Y%m%d%H%M%S', self.edit_time) - if bot: data['bot'] = '1' - if section: data['section'] = section - - data.update(kwargs) - - def do_edit(): - result = self.site.api('edit', title = self.name, text = text, - summary = summary, token = self.get_token('edit'), - **data) - if result['edit'].get('result').lower() == 'failure': - raise errors.EditError(self, result['edit']) - return result - try: - result = do_edit() - except errors.APIError, e: - if e.code == 'badtoken': - # Retry, but only once to avoid an infinite loop - self.get_token('edit', force = True) - try: - result = do_edit() - except errors.APIError, e: - self.handle_edit_error(e, summary) - else: - self.handle_edit_error(e, summary) - - if result['edit'] == 'Success': - self.last_rev_time = client.parse_timestamp(result['newtimestamp']) - return result['edit'] - - def handle_edit_error(self, e, summary): - if e.code == 'editconflict': - raise errors.EditError(self, summary, e.info) - elif e.code in ('protectedtitle', 'cantcreate', 'cantcreate-anon', 'noimageredirect-anon', - 'noimageredirect', 'noedit-anon', 'noedit'): - raise errors.ProtectedPageError(self, e.code, e.info) - else: - raise - - def get_expanded(self): - self.site.require(1, 12) - - revs = self.revisions(prop = 'content', limit = 1, expandtemplates = True) - try: - return revs.next()['*'] - except StopIteration: - return u'' - - def move(self, new_title, reason = '', move_talk = True, no_redirect = False): - """Move (rename) page to new_title. - - If user account is an administrator, specify no_direct as True to not - leave a redirect. - - If user does not have permission to move page, an InsufficientPermission - exception is raised. - - """ - if not self.can('move'): raise errors.InsufficientPermission(self) - - if not self.site.writeapi: - return OldPage.move(self, new_title = new_title, - reason = reason, move_talk = move_talk) - - data = {} - if move_talk: data['movetalk'] = '1' - if no_redirect: data['noredirect'] = '1' - result = self.site.api('move', ('from', self.name), to = new_title, - token = self.get_token('move'), reason = reason, **data) - return result['move'] - - - def delete(self, reason = '', watch = False, unwatch = False, oldimage = False): - """Delete page. - - If user does not have permission to delete page, an InsufficientPermission - exception is raised. - - """ - if not self.can('delete'): raise errors.InsufficientPermission(self) - - if not self.site.writeapi: - return OldPage.delete(self, reason = reason) - - data = {} - if watch: data['watch'] = '1' - if unwatch: data['unwatch'] = '1' - if oldimage: data['oldimage'] = oldimage - result = self.site.api('delete', title = self.name, - token = self.get_token('delete'), - reason = reason, **data) - return result['delete'] - - def purge(self): - """Purge server-side cache of page. This will re-render templates and other - dynamic content. - - """ - self.site.raw_index('purge', title = self.name) - - # def watch: requires 1.14 - - # Properties - def backlinks(self, namespace = None, filterredir = 'all', redirect = False, limit = None, generator = True): - self.site.require(1, 9) - # Fix title for < 1.11 !! - prefix = listing.List.get_prefix('bl', generator) - kwargs = dict(listing.List.generate_kwargs(prefix, - namespace = namespace, filterredir = filterredir)) - if redirect: kwargs['%sredirect' % prefix] = '1' - kwargs[compatibility.title(prefix, self.site.require(1, 11, raise_error = False))] = self.name - - return listing.List.get_list(generator)(self.site, 'backlinks', 'bl', limit = limit, return_values = 'title', **kwargs) - - def categories(self, generator = True): - self.site.require(1, 11) - if generator: - return listing.PagePropertyGenerator(self, 'categories', 'cl') - else: - # TODO: return sortkey if wanted - return listing.PageProperty(self, 'categories', 'cl', return_values = 'title') - - def embeddedin(self, namespace = None, filterredir = 'all', redirect = False, limit = None, generator = True): - self.site.require(1, 9) - # Fix title for < 1.11 !! - prefix = listing.List.get_prefix('ei', generator) - kwargs = dict(listing.List.generate_kwargs(prefix, - namespace = namespace, filterredir = filterredir)) - if redirect: kwargs['%sredirect' % prefix] = '1' - kwargs[compatibility.title(prefix, self.site.require(1, 11, raise_error = False))] = self.name - - return listing.List.get_list(generator)(self.site, 'embeddedin', 'ei', limit = limit, return_values = 'title', **kwargs) - - def extlinks(self): - self.site.require(1, 11) - return listing.PageProperty(self, 'extlinks', 'el', return_values = '*') - - def images(self, generator = True): - self.site.require(1, 9) - if generator: - return listing.PagePropertyGenerator(self, 'images', '') - else: - return listing.PageProperty(self, 'images', '', return_values = 'title') - - def langlinks(self, **kwargs): - self.site.require(1, 9) - return listing.PageProperty(self, 'langlinks', 'll', return_values = ('lang', '*'), **kwargs) - - def links(self, namespace = None, generator = True, redirects = False): - self.site.require(1, 9) - kwargs = dict(listing.List.generate_kwargs('pl', namespace = namespace)) - if redirects: kwargs['redirects'] = '1' - if generator: - return listing.PagePropertyGenerator(self, 'links', 'pl', **kwargs) - else: - return listing.PageProperty(self, 'links', 'pl', return_values = 'title', **kwargs) - - def revisions(self, startid = None, endid = None, start = None, end = None, - dir = 'older', user = None, excludeuser = None, limit = 50, - prop = 'ids|timestamp|flags|comment|user', expandtemplates = False, section = None): - self.site.require(1, 8) - kwargs = dict(listing.List.generate_kwargs('rv', startid = startid, endid = endid, - start = start, end = end, user = user, excludeuser = excludeuser)) - kwargs['rvdir'] = dir - kwargs['rvprop'] = prop - if expandtemplates: kwargs['rvexpandtemplates'] = '1' - if section: kwargs['rvsection'] = section - - return listing.RevisionsIterator(self, 'revisions', 'rv', limit = limit, **kwargs) - - def templates(self, namespace = None, generator = True): - self.site.require(1, 8) - kwargs = dict(listing.List.generate_kwargs('tl', namespace = namespace)) - if generator: - return listing.PagePropertyGenerator(self, 'templates', 'tl') - else: - return listing.PageProperty(self, 'templates', 'tl', return_values = 'title') + + def __init__(self, site, name, info=None, extra_properties={}): + if type(name) is type(self): + return self.__dict__.update(name.__dict__) + self.site = site + self.name = name + self.section = None + + if not info: + if extra_properties: + prop = 'info|' + '|'.join(extra_properties.iterkeys()) + extra_props = [] + [extra_props.extend(extra_prop) for extra_prop in extra_properties.itervalues()] + else: + prop = 'info' + extra_props = () + + if type(name) is int: + info = self.site.api('query', prop=prop, pageids=name, + inprop='protection', *extra_props) + else: + info = self.site.api('query', prop=prop, titles=name, + inprop='protection', *extra_props) + info = info['query']['pages'].itervalues().next() + self._info = info + + self.namespace = info.get('ns', 0) + self.name = info.get('title', u'') + if self.namespace: + self.page_title = self.strip_namespace(self.name) + else: + self.page_title = self.name + + self.touched = client.parse_timestamp(info.get('touched', '0000-00-00T00:00:00Z')) + self.revision = info.get('lastrevid', 0) + self.exists = 'missing' not in info + self.length = info.get('length') + self.protection = dict([(i['type'], (i['level'], i['expiry'])) for i in info.get('protection', ()) if i]) + self.redirect = 'redirect' in info + + self.last_rev_time = None + self.edit_time = None + + def redirects_to(self): + """ Returns the redirect target page, or None if the page is not a redirect page.""" + info = self.site.api('query', prop='pageprops', titles=self.name, redirects='')['query'] + if 'redirects' in info: + for page in info['redirects']: + if page['from'] == self.name: + return Page(self.site, page['to']) + return None + else: + return None + + def resolve_redirect(self): + """ Returns the redirect target page, or the current page if it's not a redirect page.""" + target_page = self.redirects_to() + if target_page is None: + return self + else: + return target_page + + def __repr__(self): + return "<Page object '%s' for %s>" % (self.name.encode('utf-8'), self.site) + + def __unicode__(self): + return self.name + + @staticmethod + def strip_namespace(title): + if title[0] == ':': + title = title[1:] + return title[title.find(':') + 1:] + + @staticmethod + def normalize_title(title): + # TODO: Make site dependent + title = title.strip() + if title[0] == ':': + title = title[1:] + title = title[0].upper() + title[1:] + title = title.replace(' ', '_') + return title + + def can(self, action): + level = self.protection.get(action, (action, ))[0] + if level == 'sysop': + level = compatibility.protectright(self.site.version) + + return level in self.site.rights + + def get_token(self, type, force=False): + self.site.require(1, 11) + + if type not in self.site.tokens: + self.site.tokens[type] = '0' + if self.site.tokens.get(type, '0') == '0' or force: + info = self.site.api('query', titles=self.name, + prop='info', intoken=type) + for i in info['query']['pages'].itervalues(): + if i['title'] == self.name: + self.site.tokens[type] = i['%stoken' % type] + return self.site.tokens[type] + + def get_expanded(self): + self.site.require(1, 12) + + revs = self.revisions(prop='content', limit=1, expandtemplates=True) + try: + return revs.next()['*'] + except StopIteration: + return u'' + + def edit(self, section=None, readonly=False): + """Returns wikitext for a specified section or for the whole page. + + Retrieves the latest edit. + + """ + if not self.can('read'): + raise errors.InsufficientPermission(self) + if not self.exists: + return u'' + + revs = self.revisions(prop='content|timestamp', limit=1, section=section) + try: + rev = revs.next() + self.text = rev['*'] + self.section = section + self.last_rev_time = rev['timestamp'] + except StopIteration: + self.text = u'' + self.section = None + self.edit_time = None + self.edit_time = time.gmtime() + return self.text + + def save(self, text=u'', summary=u'', minor=False, bot=True, section=None, **kwargs): + """Save text of page.""" + if not self.site.logged_in and self.site.force_login: + # Should we really check for this? + raise errors.LoginError(self.site) + if self.site.blocked: + raise errors.UserBlocked(self.site.blocked) + if not self.can('edit'): + raise errors.ProtectedPageError(self) + + if not text: + text = self.text + if not section: + section = self.section + + if not self.site.writeapi: + return OldPage.save(self, text=text, summary=summary, minor=False) + + data = {} + if minor: + data['minor'] = '1' + if not minor: + data['notminor'] = '1' + if self.last_rev_time: + data['basetimestamp'] = time.strftime('%Y%m%d%H%M%S', self.last_rev_time) + if self.edit_time: + data['starttimestamp'] = time.strftime('%Y%m%d%H%M%S', self.edit_time) + if bot: + data['bot'] = '1' + if section: + data['section'] = section + + data.update(kwargs) + + def do_edit(): + result = self.site.api('edit', title=self.name, text=text, + summary=summary, token=self.get_token('edit'), + **data) + if result['edit'].get('result').lower() == 'failure': + raise errors.EditError(self, result['edit']) + return result + try: + result = do_edit() + except errors.APIError, e: + if e.code == 'badtoken': + # Retry, but only once to avoid an infinite loop + self.get_token('edit', force=True) + try: + result = do_edit() + except errors.APIError, e: + self.handle_edit_error(e, summary) + else: + self.handle_edit_error(e, summary) + + if result['edit'] == 'Success': + self.last_rev_time = client.parse_timestamp(result['newtimestamp']) + return result['edit'] + + def handle_edit_error(self, e, summary): + if e.code == 'editconflict': + raise errors.EditError(self, summary, e.info) + elif e.code in ('protectedtitle', 'cantcreate', 'cantcreate-anon', 'noimageredirect-anon', + 'noimageredirect', 'noedit-anon', 'noedit'): + raise errors.ProtectedPageError(self, e.code, e.info) + else: + raise + + def get_expanded(self): + self.site.require(1, 12) + + revs = self.revisions(prop='content', limit=1, expandtemplates=True) + try: + return revs.next()['*'] + except StopIteration: + return u'' + + def move(self, new_title, reason='', move_talk=True, no_redirect=False): + """Move (rename) page to new_title. + + If user account is an administrator, specify no_direct as True to not + leave a redirect. + + If user does not have permission to move page, an InsufficientPermission + exception is raised. + + """ + if not self.can('move'): + raise errors.InsufficientPermission(self) + + if not self.site.writeapi: + return OldPage.move(self, new_title=new_title, + reason=reason, move_talk=move_talk) + + data = {} + if move_talk: + data['movetalk'] = '1' + if no_redirect: + data['noredirect'] = '1' + result = self.site.api('move', ('from', self.name), to=new_title, + token=self.get_token('move'), reason=reason, **data) + return result['move'] + + def delete(self, reason='', watch=False, unwatch=False, oldimage=False): + """Delete page. + + If user does not have permission to delete page, an InsufficientPermission + exception is raised. + + """ + if not self.can('delete'): + raise errors.InsufficientPermission(self) + + if not self.site.writeapi: + return OldPage.delete(self, reason=reason) + + data = {} + if watch: + data['watch'] = '1' + if unwatch: + data['unwatch'] = '1' + if oldimage: + data['oldimage'] = oldimage + result = self.site.api('delete', title=self.name, + token=self.get_token('delete'), + reason=reason, **data) + return result['delete'] + + def purge(self): + """Purge server-side cache of page. This will re-render templates and other + dynamic content. + + """ + self.site.raw_index('purge', title=self.name) + + # def watch: requires 1.14 + + # Properties + def backlinks(self, namespace=None, filterredir='all', redirect=False, limit=None, generator=True): + self.site.require(1, 9) + # Fix title for < 1.11 !! + prefix = listing.List.get_prefix('bl', generator) + kwargs = dict(listing.List.generate_kwargs(prefix, + namespace=namespace, filterredir=filterredir)) + if redirect: + kwargs['%sredirect' % prefix] = '1' + kwargs[compatibility.title(prefix, self.site.require(1, 11, raise_error=False))] = self.name + + return listing.List.get_list(generator)(self.site, 'backlinks', 'bl', limit=limit, return_values='title', **kwargs) + + def categories(self, generator=True): + self.site.require(1, 11) + if generator: + return listing.PagePropertyGenerator(self, 'categories', 'cl') + else: + # TODO: return sortkey if wanted + return listing.PageProperty(self, 'categories', 'cl', return_values='title') + + def embeddedin(self, namespace=None, filterredir='all', redirect=False, limit=None, generator=True): + self.site.require(1, 9) + # Fix title for < 1.11 !! + prefix = listing.List.get_prefix('ei', generator) + kwargs = dict(listing.List.generate_kwargs(prefix, + namespace=namespace, filterredir=filterredir)) + if redirect: + kwargs['%sredirect' % prefix] = '1' + kwargs[compatibility.title(prefix, self.site.require(1, 11, raise_error=False))] = self.name + + return listing.List.get_list(generator)(self.site, 'embeddedin', 'ei', limit=limit, return_values='title', **kwargs) + + def extlinks(self): + self.site.require(1, 11) + return listing.PageProperty(self, 'extlinks', 'el', return_values='*') + + def images(self, generator=True): + self.site.require(1, 9) + if generator: + return listing.PagePropertyGenerator(self, 'images', '') + else: + return listing.PageProperty(self, 'images', '', return_values='title') + + def langlinks(self, **kwargs): + self.site.require(1, 9) + return listing.PageProperty(self, 'langlinks', 'll', return_values=('lang', '*'), **kwargs) + + def links(self, namespace=None, generator=True, redirects=False): + self.site.require(1, 9) + kwargs = dict(listing.List.generate_kwargs('pl', namespace=namespace)) + if redirects: + kwargs['redirects'] = '1' + if generator: + return listing.PagePropertyGenerator(self, 'links', 'pl', **kwargs) + else: + return listing.PageProperty(self, 'links', 'pl', return_values='title', **kwargs) + + def revisions(self, startid=None, endid=None, start=None, end=None, + dir='older', user=None, excludeuser=None, limit=50, + prop='ids|timestamp|flags|comment|user', expandtemplates=False, section=None): + self.site.require(1, 8) + kwargs = dict(listing.List.generate_kwargs('rv', startid=startid, endid=endid, + start=start, end=end, user=user, excludeuser=excludeuser)) + kwargs['rvdir'] = dir + kwargs['rvprop'] = prop + if expandtemplates: + kwargs['rvexpandtemplates'] = '1' + if section: + kwargs['rvsection'] = section + + return listing.RevisionsIterator(self, 'revisions', 'rv', limit=limit, **kwargs) + + def templates(self, namespace=None, generator=True): + self.site.require(1, 8) + kwargs = dict(listing.List.generate_kwargs('tl', namespace=namespace)) + if generator: + return listing.PagePropertyGenerator(self, 'templates', 'tl') + else: + return listing.PageProperty(self, 'templates', 'tl', return_values='title') + class Image(Page): - def __init__(self, site, name, info = None): - site.require(1, 11) - Page.__init__(self, site, name, info, - extra_properties = {'imageinfo': (('iiprop', - compatibility.iiprop(site.version)), )}) - self.imagerepository = self._info.get('imagerepository', '') - self.imageinfo = self._info.get('imageinfo', ({}, ))[0] - - def imagehistory(self): - return listing.PageProperty(self, 'imageinfo', 'ii', - iiprop = compatibility.iiprop(self.site.version)) - - def imageusage(self, namespace = None, filterredir = 'all', redirect = False, - limit = None, generator = True): - self.site.require(1, 11) - # TODO: Fix for versions < 1.11 - prefix = listing.List.get_prefix('iu', generator) - kwargs = dict(listing.List.generate_kwargs(prefix, title = self.name, - namespace = namespace, filterredir = filterredir)) - if redirect: kwargs['%sredirect' % prefix] = '1' - return listing.List.get_list(generator)(self.site, 'imageusage', 'iu', - limit = limit, return_values = 'title', **kwargs) - - def duplicatefiles(self, limit = None): - self.require(1, 14) - return listing.PageProperty(self, 'duplicatefiles', 'df', - dflimit = limit) - - def download(self): - url = self.imageinfo['url'] - if not url.startswith('http://'): - url = 'http://' + self.site.host + url - url = urlparse.urlparse(url) - # TODO: query string - return self.site.connection.get(url[1], url[2]) - - def __repr__(self): - return "<Image object '%s' for %s>" % (self.name.encode('utf-8'), self.site) - + + def __init__(self, site, name, info=None): + site.require(1, 11) + Page.__init__(self, site, name, info, + extra_properties={'imageinfo': (('iiprop', + compatibility.iiprop(site.version)), )}) + self.imagerepository = self._info.get('imagerepository', '') + self.imageinfo = self._info.get('imageinfo', ({}, ))[0] + + def imagehistory(self): + return listing.PageProperty(self, 'imageinfo', 'ii', + iiprop=compatibility.iiprop(self.site.version)) + + def imageusage(self, namespace=None, filterredir='all', redirect=False, + limit=None, generator=True): + self.site.require(1, 11) + # TODO: Fix for versions < 1.11 + prefix = listing.List.get_prefix('iu', generator) + kwargs = dict(listing.List.generate_kwargs(prefix, title=self.name, + namespace=namespace, filterredir=filterredir)) + if redirect: + kwargs['%sredirect' % prefix] = '1' + return listing.List.get_list(generator)(self.site, 'imageusage', 'iu', + limit=limit, return_values='title', **kwargs) + + def duplicatefiles(self, limit=None): + self.require(1, 14) + return listing.PageProperty(self, 'duplicatefiles', 'df', + dflimit=limit) + + def download(self): + url = self.imageinfo['url'] + if not url.startswith('http://'): + url = 'http://' + self.site.host + url + url = urlparse.urlparse(url) + # TODO: query string + return self.site.connection.get(url[1], url[2]) + + def __repr__(self): + return "<Image object '%s' for %s>" % (self.name.encode('utf-8'), self.site) diff --git a/mwclient/page_nowriteapi.py b/mwclient/page_nowriteapi.py index 60e85ca3f2314e0657bf9615aca7eba0a85060ca..d6f68cdb7a0cc7c896bd399e3ac526b1fb9a75ef 100644 --- a/mwclient/page_nowriteapi.py +++ b/mwclient/page_nowriteapi.py @@ -1,121 +1,133 @@ import time from HTMLParser import HTMLParser -from htmlentitydefs import name2codepoint +from htmlentitydefs import name2codepoint import errors + class OldPage(object): - @staticmethod - def save(self, text = u'', summary = u'', minor = False): - data = {} - data['wpTextbox1'] = text - data['wpSummary'] = summary - data['wpSave'] = 'Save page' - data['wpEditToken'] = self.get_token('edit') - if self.last_rev_time: - data['wpEdittime'] = time.strftime('%Y%m%d%H%M%S', self.last_rev_time) - else: - data['wpEdittime'] = time.strftime('%Y%m%d%H%M%S', time.gmtime()) - if self.edit_time: - data['wpStarttime'] = time.strftime('%Y%m%d%H%M%S', self.edit_time) - else: - data['wpStarttime'] = time.strftime('%Y%m%d%H%M%S', time.gmtime()) - data['wpStarttime'] = time.strftime('%Y%m%d%H%M%S', time.gmtime()) - - if minor: data['wpMinoredit'] = '1' - data['title'] = self.name - - page_data = self.site.raw_index('submit', **data) - - page = EditPage('editform') - page.feed(page_data) - page.close() - - if page.data: - if page.readonly: raise errors.ProtectedPageError(self) - self.get_token('edit', True) - raise errors.EditError(page.title, data) - - @staticmethod - def move(self, new_title, reason = '', move_talk = True): - postdata = {'wpNewTitle': new_title, - 'wpOldTitle': self.name, - 'wpReason': reason, - 'wpMove': '1', - 'wpEditToken': self.get_token('move')} - if move_talk: postdata['wpMovetalk'] = '1' - postdata['title'] = 'Special:Movepage' - - page_data = self.site.raw_index('submit', **data) - - page = EditPage('movepage') - page.feed(page_data.decode('utf-8', 'ignore')) - page.close() - - if 'wpEditToken' in page.data: - raise errors.EditError(page.title, postdata) - - @staticmethod - def delete(self, reason = ''): - postdata = {'wpReason': reason, - 'wpConfirmB': 'Delete', - 'mw-filedelete-submit': 'Delete', - 'wpEditToken': self.get_token('delete'), - 'title': self.name} - - page_data = self.site.raw_index('delete', **postdata) + + @staticmethod + def save(self, text=u'', summary=u'', minor=False): + data = {} + data['wpTextbox1'] = text + data['wpSummary'] = summary + data['wpSave'] = 'Save page' + data['wpEditToken'] = self.get_token('edit') + if self.last_rev_time: + data['wpEdittime'] = time.strftime('%Y%m%d%H%M%S', self.last_rev_time) + else: + data['wpEdittime'] = time.strftime('%Y%m%d%H%M%S', time.gmtime()) + if self.edit_time: + data['wpStarttime'] = time.strftime('%Y%m%d%H%M%S', self.edit_time) + else: + data['wpStarttime'] = time.strftime('%Y%m%d%H%M%S', time.gmtime()) + data['wpStarttime'] = time.strftime('%Y%m%d%H%M%S', time.gmtime()) + + if minor: + data['wpMinoredit'] = '1' + data['title'] = self.name + + page_data = self.site.raw_index('submit', **data) + + page = EditPage('editform') + page.feed(page_data) + page.close() + + if page.data: + if page.readonly: + raise errors.ProtectedPageError(self) + self.get_token('edit', True) + raise errors.EditError(page.title, data) + + @staticmethod + def move(self, new_title, reason='', move_talk=True): + postdata = {'wpNewTitle': new_title, + 'wpOldTitle': self.name, + 'wpReason': reason, + 'wpMove': '1', + 'wpEditToken': self.get_token('move')} + if move_talk: + postdata['wpMovetalk'] = '1' + postdata['title'] = 'Special:Movepage' + + page_data = self.site.raw_index('submit', **data) + + page = EditPage('movepage') + page.feed(page_data.decode('utf-8', 'ignore')) + page.close() + + if 'wpEditToken' in page.data: + raise errors.EditError(page.title, postdata) + + @staticmethod + def delete(self, reason=''): + postdata = {'wpReason': reason, + 'wpConfirmB': 'Delete', + 'mw-filedelete-submit': 'Delete', + 'wpEditToken': self.get_token('delete'), + 'title': self.name} + + page_data = self.site.raw_index('delete', **postdata) + class EditPage(HTMLParser): - def __init__(self, form): - HTMLParser.__init__(self) - - self.form = form - - self.in_form = False - self.in_text = False - self.in_title = False - - self.data = {} - self.textdata = [] - self.title = u'' - - self.readonly = True - - def handle_starttag(self, tag, attrs): - self.in_title = (tag == 'title') - - if (u'id', self.form) in attrs: - attrs = dict(attrs) - self.in_form = True - self.action = attrs['action'] - - if tag == 'input' and self.in_form and (u'type', u'submit') \ - not in attrs and (u'type', u'checkbox') not in attrs: - attrs = dict(attrs) - if u'name' in attrs: self.data[attrs[u'name']] = attrs.get(u'value', u'') - - if self.in_form and tag == 'textarea': - self.in_text = True - self.readonly = (u'readonly', u'readonly') in attrs - - - def handle_endtag(self, tag): - if self.in_title and tag == 'title': self.in_title = False - if self.in_form and tag == 'form': self.in_form = False - if self.in_text and tag == 'textarea': self.in_text = False - - def handle_data(self, data): - if self.in_text: self.textdata.append(data) - if self.in_title: self.title += data - - def handle_entityref(self, name): - if name in name2codepoint: - self.handle_data(unichr(name2codepoint[name])) - else: - self.handle_data(u'&%s;' % name) - def handle_charref(self, name): - try: - self.handle_data(unichr(int(name))) - except ValueError: - self.handle_data(u'&#$s;' % name) - + + def __init__(self, form): + HTMLParser.__init__(self) + + self.form = form + + self.in_form = False + self.in_text = False + self.in_title = False + + self.data = {} + self.textdata = [] + self.title = u'' + + self.readonly = True + + def handle_starttag(self, tag, attrs): + self.in_title = (tag == 'title') + + if (u'id', self.form) in attrs: + attrs = dict(attrs) + self.in_form = True + self.action = attrs['action'] + + if tag == 'input' and self.in_form and (u'type', u'submit') \ + not in attrs and (u'type', u'checkbox') not in attrs: + attrs = dict(attrs) + if u'name' in attrs: + self.data[attrs[u'name']] = attrs.get(u'value', u'') + + if self.in_form and tag == 'textarea': + self.in_text = True + self.readonly = (u'readonly', u'readonly') in attrs + + def handle_endtag(self, tag): + if self.in_title and tag == 'title': + self.in_title = False + if self.in_form and tag == 'form': + self.in_form = False + if self.in_text and tag == 'textarea': + self.in_text = False + + def handle_data(self, data): + if self.in_text: + self.textdata.append(data) + if self.in_title: + self.title += data + + def handle_entityref(self, name): + if name in name2codepoint: + self.handle_data(unichr(name2codepoint[name])) + else: + self.handle_data(u'&%s;' % name) + + def handle_charref(self, name): + try: + self.handle_data(unichr(int(name))) + except ValueError: + self.handle_data(u'&#$s;' % name) diff --git a/mwclient/upload.py b/mwclient/upload.py index 77403f5bf1422d1bad70d6ee5c388fb25981f159..4d79cc2f350b9cf142ba098a2d04b63ea202287e 100644 --- a/mwclient/upload.py +++ b/mwclient/upload.py @@ -1,123 +1,135 @@ import random from cStringIO import StringIO + class Upload(object): - """ - Base class for upload objects. This class should always be subclassed - by upload classes and its constructor always be called. - - Upload classes are file like object/iterators that have additional - variables length and content_type. - """ - - BLOCK_SIZE = 8192 - def __init__(self, length, content_type): - self.length = length - self.content_type = content_type - - def __iter__(self): - return self - def next(self): - data = self.read(self.BLOCK_SIZE) - if data == '': - raise StopIteration - return data - - @staticmethod - def encode(s): - if type(s) is str: - return s - elif type(s) is unicode: - return s.encode('utf-8') - else: - return s + + """ + Base class for upload objects. This class should always be subclassed + by upload classes and its constructor always be called. + + Upload classes are file like object/iterators that have additional + variables length and content_type. + """ + + BLOCK_SIZE = 8192 + + def __init__(self, length, content_type): + self.length = length + self.content_type = content_type + + def __iter__(self): + return self + + def next(self): + data = self.read(self.BLOCK_SIZE) + if data == '': + raise StopIteration + return data + + @staticmethod + def encode(s): + if type(s) is str: + return s + elif type(s) is unicode: + return s.encode('utf-8') + else: + return s + class UploadRawData(Upload): - """ - This upload class is simply a wrapper around StringIO - """ - def __init__(self, data, content_type = 'application/x-www-form-urlencoded'): - self.fstr = StringIO(data) - Upload.__init__(self, len(data), content_type) - def read(self, length = -1): - return self.fstr.read(length) - - + + """ + This upload class is simply a wrapper around StringIO + """ + + def __init__(self, data, content_type='application/x-www-form-urlencoded'): + self.fstr = StringIO(data) + Upload.__init__(self, len(data), content_type) + + def read(self, length=-1): + return self.fstr.read(length) + + class UploadDict(UploadRawData): - """ - This class creates an x-www-form-urlencoded representation of a dict - and then passes it through its parent UploadRawData - """ - def __init__(self, data): - postdata = '&'.join('%s=%s' % (self.encode(i), self.encode(data[i])) for i in data) - UploadRawData.__init__(self, postdata) - + + """ + This class creates an x-www-form-urlencoded representation of a dict + and then passes it through its parent UploadRawData + """ + + def __init__(self, data): + postdata = '&'.join('%s=%s' % (self.encode(i), self.encode(data[i])) for i in data) + UploadRawData.__init__(self, postdata) + + class UploadFile(Upload): - """ - This class accepts a file with information and a postdata dictionary - and creates a multipart/form-data representation from it. - """ - STAGE_FILEHEADER = 0 - STAGE_FILE = 1 - STAGE_POSTDATA = 2 - STAGE_FOOTER = 3 - STAGE_DONE = 4 - def __init__(self, filefield, filename, filelength, file, data): - self.stage = self.STAGE_FILEHEADER; - self.boundary = self.generate_boundary() - self.postdata = self.generate_multipart_from_dict(data) - self.footer = '\r\n--%s--\r\n' % self.boundary - self.fileheader = ('--%s\r\n' % self.boundary + - 'Content-Disposition: form-data; name="%s"; filename="%s"\r\n' % - (self.encode(filefield), self.encode(filename)) + - 'Content-Type: application/octet-stream\r\n\r\n') - self.file = file - self.length_left = filelength - self.str_data = None - - Upload.__init__(self, len(self.fileheader) + filelength + len(self.postdata) + len(self.footer) + 2, - 'multipart/form-data; boundary=' + self.boundary) - - def read(self, length): - if self.stage == self.STAGE_DONE: - return '' - elif self.stage != self.STAGE_FILE: - if self.str_data is None: - if self.stage == self.STAGE_FILEHEADER: - self.str_data = StringIO(self.fileheader) - elif self.stage == self.STAGE_POSTDATA: - self.str_data = StringIO(self.postdata) - elif self.stage == self.STAGE_FOOTER: - self.str_data = StringIO(self.footer) - data = self.str_data.read(length) - else: - if self.length_left: - if length > self.length_left: - length = self.length_left - data = self.file.read(length) - self.length_left -= len(data) - else: - self.stage += 1 - return '\r\n' - - if data == '': - self.stage += 1 - self.str_data = None - return self.read(length) - return data - - - @staticmethod - def generate_boundary(): - return '----%s----' % ''.join((random.choice( - 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789') - for i in xrange(32))) - - def generate_multipart_from_dict(self, data): - postdata = [] - for i in data: - postdata.append('--' + self.boundary) - postdata.append('Content-Disposition: form-data; name="%s"' % self.encode(i)) - postdata.append('') - postdata.append(self.encode(data[i])) - return '\r\n'.join(postdata) + + """ + This class accepts a file with information and a postdata dictionary + and creates a multipart/form-data representation from it. + """ + STAGE_FILEHEADER = 0 + STAGE_FILE = 1 + STAGE_POSTDATA = 2 + STAGE_FOOTER = 3 + STAGE_DONE = 4 + + def __init__(self, filefield, filename, filelength, file, data): + self.stage = self.STAGE_FILEHEADER + self.boundary = self.generate_boundary() + self.postdata = self.generate_multipart_from_dict(data) + self.footer = '\r\n--%s--\r\n' % self.boundary + self.fileheader = ('--%s\r\n' % self.boundary + + 'Content-Disposition: form-data; name="%s"; filename="%s"\r\n' % + (self.encode(filefield), self.encode(filename)) + + 'Content-Type: application/octet-stream\r\n\r\n') + self.file = file + self.length_left = filelength + self.str_data = None + + Upload.__init__(self, len(self.fileheader) + filelength + len(self.postdata) + len(self.footer) + 2, + 'multipart/form-data; boundary=' + self.boundary) + + def read(self, length): + if self.stage == self.STAGE_DONE: + return '' + elif self.stage != self.STAGE_FILE: + if self.str_data is None: + if self.stage == self.STAGE_FILEHEADER: + self.str_data = StringIO(self.fileheader) + elif self.stage == self.STAGE_POSTDATA: + self.str_data = StringIO(self.postdata) + elif self.stage == self.STAGE_FOOTER: + self.str_data = StringIO(self.footer) + data = self.str_data.read(length) + else: + if self.length_left: + if length > self.length_left: + length = self.length_left + data = self.file.read(length) + self.length_left -= len(data) + else: + self.stage += 1 + return '\r\n' + + if data == '': + self.stage += 1 + self.str_data = None + return self.read(length) + return data + + @staticmethod + def generate_boundary(): + return '----%s----' % ''.join((random.choice( + 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789') + for i in xrange(32))) + + def generate_multipart_from_dict(self, data): + postdata = [] + for i in data: + postdata.append('--' + self.boundary) + postdata.append('Content-Disposition: form-data; name="%s"' % self.encode(i)) + postdata.append('') + postdata.append(self.encode(data[i])) + return '\r\n'.join(postdata) diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000000000000000000000000000000000000..7ad3f7d55bce5863baf11824f92864a05f0b5a39 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +pep8ignore = E501 W602 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..60540ac0f64a2c48be5938f4dc8ffa3735ebfd97 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +simplejson \ No newline at end of file diff --git a/test_requirements.txt b/test_requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..bcc079ac64d382dfb5f171a1ad51dcbf915e4615 --- /dev/null +++ b/test_requirements.txt @@ -0,0 +1,2 @@ +pytest +pytest-pep8 \ No newline at end of file diff --git a/tests/client-test.py b/tests/client-test.py new file mode 100644 index 0000000000000000000000000000000000000000..abc991fc71956111a3cb342f11a08ad9f5aa2f84 --- /dev/null +++ b/tests/client-test.py @@ -0,0 +1,22 @@ +#encoding=utf-8 +import unittest +import sys +import os +myPath = os.path.dirname(os.path.abspath(__file__)) +sys.path.insert(0, myPath + '/../') + +from mwclient import Site + + +class TestClient(unittest.TestCase): + + def setUp(self): + pass + + def test_setup(self): + # Check that templates can be found + site = Site('commons.wikimedia.org') + self.assertTrue(site.initialized) + +if __name__ == '__main__': + unittest.main()