diff --git a/.gitignore b/.gitignore index fbce730a02ae836053f996d05deae1f792f6395f..b0494f5a028b980dd763e039817a31707a3a39e8 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ dist *~ *.egg *.egg-info +.eggs +.tox diff --git a/.travis.yml b/.travis.yml index 1cada115eebe1de52860fab818174d634dced1ad..86361c8bcd246d88b7a2cb3ebb89c5f094f903e4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,12 +1,19 @@ # http://about.travis-ci.org/docs/user/languages/python/ +sudo: false language: python python: - "2.6" - "2.7" + - "3.3" + - "3.4" -install: +install: - python setup.py install + - pip install coveralls -script: +script: - python setup.py test + +after_success: + - coveralls diff --git a/README.rst b/README.rst index 1b67dd73f3eaa946fbf7f2c7bef358a8cad8579d..3c9c3a6b58a29fb50cf6ed8b5541dcb945bd31a2 100644 --- a/README.rst +++ b/README.rst @@ -1,27 +1,41 @@ -.. image:: http://img.shields.io/travis/mwclient/mwclient.svg?style=flat +.. image:: https://img.shields.io/travis/mwclient/mwclient.svg :target: https://travis-ci.org/mwclient/mwclient + :alt: Build status -.. image:: http://img.shields.io/pypi/v/mwclient.svg?style=flat +.. image:: https://img.shields.io/coveralls/mwclient/mwclient.svg + :target: https://coveralls.io/r/mwclient/mwclient + :alt: Test coverage + +.. image:: https://landscape.io/github/mwclient/mwclient/master/landscape.svg?style=flat + :target: https://landscape.io/github/mwclient/mwclient/master + :alt: Code health + +.. image:: https://img.shields.io/pypi/v/mwclient.svg :target: https://pypi.python.org/pypi/mwclient + :alt: Latest version -.. image:: http://img.shields.io/pypi/dw/mwclient.svg?style=flat +.. image:: https://img.shields.io/pypi/dw/mwclient.svg :target: https://pypi.python.org/pypi/mwclient + :alt: Downloads + +.. image:: https://img.shields.io/github/license/mwclient/mwclient.svg + :target: http://opensource.org/licenses/MIT + :alt: MIT license - mwclient ======== mwclient is a lightweight Python client library to the `MediaWiki API <https://mediawiki.org/wiki/API>`_ which provides access to most API functionality. -It requires Python 2.6 or 2.7 (Python 3.x support planned) and supports MediaWiki 1.16 -and above. +It requires Python 2.6 or 2.7 (Python 3.3 and 3.4 supported in the development version) +and supports MediaWiki 1.16 and above. For functions not available in the current MediaWiki, a ``MediaWikiVersionError`` is raised. This framework was written by Bryan Tong Minh, who maintained the project until version 0.6.5, released on 6 May 2011. The current stable -`version 0.7.1 <https://github.com/mwclient/mwclient/archive/v0.7.1.zip>`_ -was released on 23 November 2014, and is `available through PyPI <https://pypi.python.org/pypi/mwclient>`_: +`version 0.7.2 <https://github.com/mwclient/mwclient/archive/v0.7.2.zip>`_ +was released on 18 July 2015, and is `available through PyPI <https://pypi.python.org/pypi/mwclient>`_: .. code-block:: console @@ -108,7 +122,7 @@ following example: .. code-block:: python - useragent = 'YourBot, based on mwclient v0.6.5. Run by User:You, you@gmail.com' + useragent = 'YourBot, based on mwclient v0.7.2. Run by User:You, you@gmail.com' site = mwclient.Site(('https', 'en.wikipedia.org'), clients_useragent=useragent) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index e0ce6eb748e60f5e7fc29d70a168076630b547b9..604cd582b8266bca59051c4e848122252a8b9588 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -1,17 +1,27 @@ # Release Notes for mwclient -## Changes in version 0.7.2 + +## Changes in version 0.8.0 This is the development version of mwclient. +* [2015-07-18] [@Danmichaelo](https://github.com/Danmichaelo) + Add support for Python 3. + [#52](https://github.com/mwclient/mwclient/issues/52) +* [2015-07-19] [@Danmichaelo](https://github.com/Danmichaelo) + Fix broken `Image.download()` method. + +## Changes in version 0.7.2 + + * [2014-11-28] [@c-martinez](https://github.com/c-martinez): - Add 'continue' parameter to all queries. + Add `continue` parameter to all queries. [#73](https://github.com/mwclient/mwclient/issues/73). * [2014-12-18] [@AdamWill](https://github.com/AdamWill): Fix PageList API arg passing to GeneratorList. [059322e](https://github.com/mwclient/mwclient/commit/059322e). * [2015-03-15] [@itkach](https://github.com/itkach): - Add 'toponly' parameter for recentchanges. + Add `toponly` parameter for recentchanges. [#78](https://github.com/mwclient/mwclient/issues/78). * [2015-06-04] [@Gui13](https://github.com/Gui13): Configure default logger. @@ -19,6 +29,21 @@ This is the development version of mwclient. * [2015-06-20] [@lfaraone](https://github.com/lfaraone): Add support for querying the CheckUser log. [#86](https://github.com/mwclient/mwclient/pull/86). +* [2015-07-06] [@PierreSelim](https://github.com/PierreSelim) + Expose `pageid`, `contentmodel`, `pagelanguage`, `restrictiontypes` as attributes of `Page`. + [#89](https://github.com/mwclient/mwclient/pull/89) +* [2015-07-08] [@Pathoschild](https://github.com/Pathoschild) + Fix 'New messages' flag (`hasmsg`). + [#90](https://github.com/mwclient/mwclient/issues/90) +* [2015-07-17] [@Danmichaelo](https://github.com/Danmichaelo) + Don't retry on connection error during site init. + [#85](https://github.com/mwclient/mwclient/issues/85) +* [2015-07-18] [@Danmichaelo](https://github.com/Danmichaelo) + Deprecate implicit use of Page.section when saving, to prepare for + the merge of [#81](https://github.com/mwclient/mwclient/issues/81) +* [2015-07-18] [@Danmichaelo](https://github.com/Danmichaelo) + More intuitive error message when an invalid JSON response is received. + [#79](https://github.com/mwclient/mwclient/issues/79) ## Changes in version 0.7.1 @@ -84,7 +109,7 @@ Detailed changelog: [773adf9](https://github.com/btongminh/mwclient/commit/773adf9), [#16](https://github.com/btongminh/mwclient/pull/16). * [2013-03-20] [@eug48](https://github.com/eug48): - Removed unused 'Request' class. + Removed unused `Request` class. [99e786d](https://github.com/btongminh/mwclient/commit/99e786d), [#16](https://github.com/btongminh/mwclient/pull/16). * [2013-05-13] [@danmichaelo](https://github.com/danmichaelo): @@ -109,7 +134,7 @@ Detailed changelog: [223aa0](https://github.com/danmichaelo/mwclient/commit/223aa0), [#19](https://github.com/btongminh/mwclient/pull/19). * [2013-11-14] [@kyv](https://github.com/kyv): - Allow setting HTTP 'Authorization' header. + Allow setting HTTP `Authorization` header. [HTTP headers](http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.8). [72fc49a](https://github.com/kyv/mwclient/commit/72fc49a). * [2013-11-15] [@kyv](https://github.com/kyv): diff --git a/mwclient/__init__.py b/mwclient/__init__.py index 3f85dc1605e90f57e7f876796454541216f1519f..b11b25d2027bd319e059cfa2f35d2c3f921d33f8 100644 --- a/mwclient/__init__.py +++ b/mwclient/__init__.py @@ -23,12 +23,15 @@ OTHER DEALINGS IN THE SOFTWARE. """ -from errors import * -from client import Site, __ver__ -import ex +from mwclient.errors import * +from mwclient.client import Site, __ver__ +import logging +import warnings + +# Show DeprecationWarning +warnings.simplefilter('always', DeprecationWarning) # Logging: Add a null handler to avoid "No handler found" warnings. -import logging try: from logging import NullHandler except ImportError: diff --git a/mwclient/client.py b/mwclient/client.py index c2e6cbc84b0ddf15ed03289e1a695b26292522b3..1c193351b312239afe58a2f11e48750fc482064d 100644 --- a/mwclient/client.py +++ b/mwclient/client.py @@ -5,6 +5,7 @@ import sys import weakref import logging from six import text_type +import six try: # Python 2.7+ @@ -20,35 +21,20 @@ except ImportError: import requests from requests.auth import HTTPBasicAuth, AuthBase -import errors -import listing -import page +import mwclient.errors as errors +import mwclient.listing as listing +from mwclient.sleep import Sleepers try: import gzip except ImportError: gzip = None -__ver__ = '0.7.2.dev1' +__ver__ = '0.8.0.dev1' log = logging.getLogger(__name__) -def parse_timestamp(t): - if t == '0000-00-00T00:00:00Z': - return (0, 0, 0, 0, 0, 0, 0, 0) - return time.strptime(t, '%Y-%m-%dT%H:%M:%SZ') - - -class WaitToken(object): - - def __init__(self): - self.id = '%x' % random.randint(0, sys.maxint) - - def __hash__(self): - return hash(self.id) - - class Site(object): api_limit = 500 @@ -61,9 +47,6 @@ class Site(object): self.ext = ext self.credentials = None self.compress = compress - self.retry_timeout = retry_timeout - self.max_retries = max_retries - self.wait_callback = wait_callback self.max_lag = text_type(max_lag) self.force_login = force_login @@ -74,8 +57,7 @@ class Site(object): else: raise RuntimeError('Authentication is not a tuple or an instance of AuthBase') - # The token string => token object mapping - self.wait_tokens = weakref.WeakKeyDictionary() + self.sleepers = Sleepers(max_retries, retry_timeout, wait_callback) # Site properties self.blocked = False # Whether current user is blocked @@ -121,11 +103,11 @@ class Site(object): def site_init(self): meta = self.api('query', meta='siteinfo|userinfo', - siprop='general|namespaces', uiprop='groups|rights') + siprop='general|namespaces', uiprop='groups|rights', retry_on_error=False) # Extract site info self.site = meta['query']['general'] - self.namespaces = dict(((i['id'], i.get('*', '')) for i in meta['query']['namespaces'].itervalues())) + self.namespaces = dict(((i['id'], i.get('*', '')) for i in six.itervalues(meta['query']['namespaces']))) self.writeapi = 'writeapi' in self.site # Determine version @@ -198,17 +180,18 @@ class Site(object): else: kwargs['uiprop'] = 'blockinfo|hasmsg' - token = self.wait_token() + sleeper = self.sleepers.make() + while True: info = self.raw_api(action, **kwargs) if not info: info = {} - if self.handle_api_result(info, token=token): + if self.handle_api_result(info, sleeper=sleeper): return info - def handle_api_result(self, info, kwargs=None, token=None): - if token is None: - token = self.wait_token() + def handle_api_result(self, info, kwargs=None, sleeper=None): + if sleeper is None: + sleeper = self.sleepers.make() try: userinfo = info['query']['userinfo'] @@ -218,11 +201,11 @@ class Site(object): self.blocked = (userinfo['blockedby'], userinfo.get('blockreason', u'')) else: self.blocked = False - self.hasmsg = 'message' in userinfo + self.hasmsg = 'messages' in userinfo self.logged_in = 'anon' not in userinfo if 'error' in info: if info['error']['code'] in (u'internal_api_error_DBConnectionError', u'internal_api_error_DBQueryError'): - self.wait(token) + sleeper.sleep() return False if '*' in info['error']: raise errors.APIError(info['error']['code'], @@ -234,16 +217,36 @@ class Site(object): @staticmethod def _query_string(*args, **kwargs): kwargs.update(args) - qs1 = [(k, v) for k, v in kwargs.iteritems() if k not in ('wpEditToken', 'token')] - qs2 = [(k, v) for k, v in kwargs.iteritems() if k in ('wpEditToken', 'token')] + qs1 = [(k, v) for k, v in six.iteritems(kwargs) if k not in ('wpEditToken', 'token')] + qs2 = [(k, v) for k, v in six.iteritems(kwargs) if k in ('wpEditToken', 'token')] return OrderedDict(qs1 + qs2) - def raw_call(self, script, data, files=None): + def raw_call(self, script, data, files=None, retry_on_error=True): + """ + Perform a generic API call and return the raw text. + + In the event of a network problem, or a HTTP response with status code 5XX, + we'll wait and retry the configured number of times before giving up + if `retry_on_error` is True. + + `requests.exceptions.HTTPError` is still raised directly for + HTTP responses with status codes in the 4XX range, and invalid + HTTP responses. + + Args: + script (str): Script name, usually 'api'. + data (dict): Post data + files (dict): Files to upload + retry_on_error (bool): Retry on connection error + + Returns: + The raw text response. + """ url = self.path + script + self.ext headers = {} if self.compress and gzip: headers['Accept-Encoding'] = 'gzip' - token = self.wait_token((script, data)) + sleeper = self.sleepers.make((script, data)) while True: scheme = 'http' # Should we move to 'https' as default? host = self.host @@ -256,42 +259,43 @@ class Site(object): stream = self.connection.post(fullurl, data=data, files=files, headers=headers) if stream.headers.get('x-database-lag'): wait_time = int(stream.headers.get('retry-after')) - log.warn('Database lag exceeds max lag. Waiting for %d seconds', wait_time) - self.wait(token, wait_time) + log.warning('Database lag exceeds max lag. Waiting for %d seconds', wait_time) + sleeper.sleep(wait_time) elif stream.status_code == 200: return stream.text elif stream.status_code < 500 or stream.status_code > 599: stream.raise_for_status() else: - log.warn('Received %s response: %s. Retrying in a moment.', stream.status_code, stream.text) - self.wait(token) + if not retry_on_error: + stream.raise_for_status() + log.warning('Received %s response: %s. Retrying in a moment.', stream.status_code, stream.text) + sleeper.sleep() except requests.exceptions.ConnectionError: # In the event of a network problem (e.g. DNS failure, refused connection, etc), # Requests will raise a ConnectionError exception. - log.warn('Connection error. Retrying in a moment.') - self.wait(token) - - except requests.exceptions.HTTPError as e: - log.warn('HTTP error: %s', e.message) - raise - - except requests.exceptions.TooManyRedirects: - raise + if not retry_on_error: + raise + log.warning('Connection error. Retrying in a moment.') + sleeper.sleep() def raw_api(self, action, *args, **kwargs): """Sends a call to the API.""" + try: + retry_on_error = kwargs.pop('retry_on_error') + except KeyError: + retry_on_error = True kwargs['action'] = action kwargs['format'] = 'json' data = self._query_string(*args, **kwargs) - res = self.raw_call('api', data) + res = self.raw_call('api', data, retry_on_error=retry_on_error) try: return json.loads(res) except ValueError: if res.startswith('MediaWiki API is not enabled for this site.'): raise errors.APIDisabledError - raise ValueError('Could not decode JSON: %s' % res) + raise errors.InvalidResponse(res) def raw_index(self, action, *args, **kwargs): """Sends a call to index.php rather than the API.""" @@ -300,25 +304,6 @@ class Site(object): data = self._query_string(*args, **kwargs) return self.raw_call('index', data) - def wait_token(self, args=None): - token = WaitToken() - self.wait_tokens[token] = (0, args) - return token - - def wait(self, token, min_wait=0): - retry, args = self.wait_tokens[token] - self.wait_tokens[token] = (retry + 1, args) - if retry > self.max_retries and self.max_retries != -1: - raise errors.MaximumRetriesExceeded(self, token, args) - self.wait_callback(self, token, retry, args) - - timeout = self.retry_timeout * retry - if timeout < min_wait: - timeout = min_wait - log.debug('Sleeping for %d seconds', timeout) - time.sleep(timeout) - return self.wait_tokens[token] - def require(self, major, minor, revision=None, raise_error=True): if self.version is None: if raise_error is None: @@ -378,12 +363,10 @@ class Site(object): if username and password: self.credentials = (username, password, domain) if cookies: - if self.host not in self.conn.cookies: - self.conn.cookies[self.host] = http.CookieJar() - self.conn.cookies[self.host].update(cookies) + self.connection.cookies.update(cookies) if self.credentials: - wait_token = self.wait_token() + sleeper = self.sleepers.make() kwargs = { 'lgname': self.credentials[0], 'lgpassword': self.credentials[1] @@ -397,7 +380,7 @@ class Site(object): elif login['login']['result'] == 'NeedToken': kwargs['lgtoken'] = login['login']['token'] elif login['login']['result'] == 'Throttled': - self.wait(wait_token, login['login'].get('wait', 5)) + sleeper.sleep(int(login['login'].get('wait', 5))) else: raise errors.LoginError(self, login['login']) @@ -434,7 +417,7 @@ class Site(object): title = 'Test' info = self.api('query', titles=title, prop='info', intoken=type) - for i in info['query']['pages'].itervalues(): + for i in six.itervalues(info['query']['pages']): if i['title'] == title: self.tokens[type] = i['%stoken' % type] @@ -525,13 +508,13 @@ class Site(object): files = {'file': file} - wait_token = self.wait_token() + sleeper = self.sleepers.make() while True: data = self.raw_call('api', postdata, files) info = json.loads(data) if not info: info = {} - if self.handle_api_result(info, kwargs=predata, token=wait_token): + if self.handle_api_result(info, kwargs=predata, sleeper=sleeper): return info.get('upload', {}) def parse(self, text=None, title=None, page=None): @@ -690,7 +673,28 @@ class Site(object): toponly='1' if toponly else None)) return listing.List(self, 'recentchanges', 'rc', limit=limit, **kwargs) - def search(self, search, namespace='0', what='title', redirects=False, limit=None): + def search(self, search, namespace='0', what=None, redirects=False, limit=None): + """ + Perform a full text search. + API doc: https://www.mediawiki.org/wiki/API:Search + + >>> for result in site.search('prefix:Template:Citation/'): + ... print(result.get('title')) + + Args: + search (str): The query string + namespace (int): The namespace to search (default: 0) + what (str): Search scope: 'text' for fulltext, or 'title' for titles only. + Depending on the search backend, both options may not be available. + For instance + `CirrusSearch <https://www.mediawiki.org/wiki/Help:CirrusSearch>`_ + doesn't support 'title', but instead provides an "intitle:" + query string filter. + redirects (bool): Include redirect pages in the search (option removed in MediaWiki 1.23). + + Returns: + mwclient.listings.List: Search results iterator + """ kwargs = dict(listing.List.generate_kwargs('sr', search=search, namespace=namespace, what=what)) if redirects: diff --git a/mwclient/errors.py b/mwclient/errors.py index 6540c2d73e2d69da9c16f5c4c4845da517e14124..0fbfed83fa6a63bb085824dd2ecabbb012bf7906 100644 --- a/mwclient/errors.py +++ b/mwclient/errors.py @@ -56,3 +56,16 @@ class NoSpecifiedEmail(EmailError): class NoWriteApi(MwClientError): pass + + +class InvalidResponse(MwClientError): + + def __init__(self, response_text=None): + self.message = 'Did not get a valid JSON response from the server. Check that ' + \ + 'you used the correct hostname. If you did, the server might ' + \ + 'be wrongly configured or experiencing temporary problems.' + self.response_text = response_text + MwClientError.__init__(self, self.message, response_text) + + def __str__(self): + return self.message diff --git a/mwclient/image.py b/mwclient/image.py new file mode 100644 index 0000000000000000000000000000000000000000..e072a6836feb88d0c00e7f1b22711284ec137037 --- /dev/null +++ b/mwclient/image.py @@ -0,0 +1,51 @@ +import mwclient.listing +import mwclient.page + + +class Image(mwclient.page.Page): + + def __init__(self, site, name, info=None): + mwclient.page.Page.__init__(self, site, name, info, + extra_properties={'imageinfo': (('iiprop', 'timestamp|user|comment|url|size|sha1|metadata|archivename'), )}) + self.imagerepository = self._info.get('imagerepository', '') + self.imageinfo = self._info.get('imageinfo', ({}, ))[0] + + def imagehistory(self): + return mwclient.listing.PageProperty(self, 'imageinfo', 'ii', + iiprop='timestamp|user|comment|url|size|sha1|metadata|archivename') + + def imageusage(self, namespace=None, filterredir='all', redirect=False, + limit=None, generator=True): + prefix = mwclient.listing.List.get_prefix('iu', generator) + kwargs = dict(mwclient.listing.List.generate_kwargs(prefix, title=self.name, namespace=namespace, filterredir=filterredir)) + if redirect: + kwargs['%sredirect' % prefix] = '1' + return mwclient.listing.List.get_list(generator)(self.site, 'imageusage', 'iu', limit=limit, return_values='title', **kwargs) + + def duplicatefiles(self, limit=None): + return mwclient.listing.PageProperty(self, 'duplicatefiles', 'df', dflimit=limit) + + def download(self, destination=None): + """ + Download the file. If `destination` is given, the file will be written + directly to the stream. Otherwise the file content will be stored in memory + and returned (with the risk of running out of memory for large files). + + Recommended usage: + + >>> with open(filename, 'wb') as fd: + ... image.download(fd) + + Args: + destination (file object): Destination file + """ + url = self.imageinfo['url'] + if destination is not None: + res = self.site.connection.get(url, stream=True) + for chunk in res.iter_content(1024): + destination.write(chunk) + else: + return self.site.connection.get(url).content + + def __repr__(self): + return "<Image object '%s' for %s>" % (self.name.encode('utf-8'), self.site) diff --git a/mwclient/listing.py b/mwclient/listing.py index 1c83a9ec510fe8ef47712e1b202343cabbefc3ba..ce71b9acbd456c6795842f3bc270bb5db6f2508a 100644 --- a/mwclient/listing.py +++ b/mwclient/listing.py @@ -1,6 +1,9 @@ -import client -import page +import six +import six.moves from six import text_type +from mwclient.util import parse_timestamp +import mwclient.page +import mwclient.image class List(object): @@ -22,7 +25,7 @@ class List(object): self.count = 0 self.max_items = max_items - self._iter = iter(xrange(0)) + self._iter = iter(six.moves.range(0)) self.last = False self.result_member = list_name @@ -31,15 +34,15 @@ class List(object): def __iter__(self): return self - def next(self, full=False): + def __next__(self, full=False): if self.max_items is not None: if self.count >= self.max_items: raise StopIteration try: - item = self._iter.next() + item = six.next(self._iter) self.count += 1 if 'timestamp' in item: - item['timestamp'] = client.parse_timestamp(item['timestamp']) + item['timestamp'] = parse_timestamp(item['timestamp']) if full: return item @@ -56,8 +59,12 @@ class List(object): self.load_chunk() return List.next(self, full=full) + def next(self, full=False): + """ For Python 2.x support """ + return self.__next__(full) + def load_chunk(self): - data = self.site.api('query', (self.generator, self.list_name), *[(text_type(k), v) for k, v in self.args.iteritems()]) + data = self.site.api('query', (self.generator, self.list_name), *[(text_type(k), v) for k, v in six.iteritems(self.args)]) if not data: # Non existent page raise StopIteration @@ -76,11 +83,11 @@ class List(object): def set_iter(self, data): if self.result_member not in data['query']: - self._iter = iter(xrange(0)) + self._iter = iter(six.moves.range(0)) elif type(data['query'][self.result_member]) is list: self._iter = iter(data['query'][self.result_member]) else: - self._iter = data['query'][self.result_member].itervalues() + self._iter = six.itervalues(data['query'][self.result_member]) def __repr__(self): return "<List object '%s' for %s>" % (self.list_name, self.site) @@ -88,7 +95,7 @@ class List(object): @staticmethod def generate_kwargs(_prefix, *args, **kwargs): kwargs.update(args) - for key, value in kwargs.iteritems(): + for key, value in six.iteritems(kwargs): if value is not None and value is not False: yield _prefix + key, value @@ -130,15 +137,15 @@ class GeneratorList(List): self.result_member = 'pages' - self.page_class = page.Page + self.page_class = mwclient.page.Page def next(self): info = List.next(self, full=True) if info['ns'] == 14: return Category(self.site, u'', info) if info['ns'] == 6: - return page.Image(self.site, u'', info) - return page.Page(self.site, u'', info) + return mwclient.image.Image(self.site, u'', info) + return mwclient.page.Page(self.site, u'', info) def load_chunk(self): # Put this here so that the constructor does not fail @@ -147,10 +154,10 @@ class GeneratorList(List): return List.load_chunk(self) -class Category(page.Page, GeneratorList): +class Category(mwclient.page.Page, GeneratorList): def __init__(self, site, name, info=None, namespace=None): - page.Page.__init__(self, site, name, info) + mwclient.page.Page.__init__(self, site, name, info) kwargs = {} kwargs['gcmtitle'] = self.name if namespace: @@ -189,9 +196,9 @@ class PageList(GeneratorList): if self.namespace == 14: return Category(self.site, self.site.namespaces[14] + ':' + name, info) elif self.namespace == 6: - return page.Image(self.site, self.site.namespaces[6] + ':' + name, info) + return mwclient.image.Image(self.site, self.site.namespaces[6] + ':' + name, info) elif self.namespace != 0: - return page.Page(self.site, self.site.namespaces[self.namespace] + ':' + name, info) + return mwclient.page.Page(self.site, self.site.namespaces[self.namespace] + ':' + name, info) else: # Guessing page class if type(name) is not int: @@ -199,11 +206,11 @@ class PageList(GeneratorList): if namespace == 14: return Category(self.site, name, info) elif namespace == 6: - return page.Image(self.site, name, info) - return page.Page(self.site, name, info) + return mwclient.image.Image(self.site, name, info) + return mwclient.page.Page(self.site, name, info) def guess_namespace(self, name): - normal_name = page.Page.normalize_title(name) + normal_name = mwclient.page.Page.normalize_title(name) for ns in self.site.namespaces: if ns == 0: continue @@ -223,7 +230,7 @@ class PageProperty(List): self.generator = 'prop' def set_iter(self, data): - for page in data['query']['pages'].itervalues(): + for page in six.itervalues(data['query']['pages']): if page['title'] == self.page.name: self._iter = iter(page.get(self.list_name, ())) return diff --git a/mwclient/page.py b/mwclient/page.py index 9d70b6cf39329f56f6d489cb64197d40ae2481b9..ea54f3a80da1e00914c79bd6efbcfe6e14c3bde8 100644 --- a/mwclient/page.py +++ b/mwclient/page.py @@ -1,16 +1,15 @@ -import client -import errors -import listing - -from six.moves import urllib +import six from six import text_type import time import warnings +from mwclient.util import parse_timestamp +import mwclient.listing +import mwclient.errors class Page(object): - def __init__(self, site, name, info=None, extra_properties={}): + def __init__(self, site, name, info=None, extra_properties=None): if type(name) is type(self): return self.__dict__.update(name.__dict__) self.site = site @@ -19,9 +18,9 @@ class Page(object): if not info: if extra_properties: - prop = 'info|' + '|'.join(extra_properties.iterkeys()) + prop = 'info|' + '|'.join(six.iterkeys(extra_properties)) extra_props = [] - [extra_props.extend(extra_prop) for extra_prop in extra_properties.itervalues()] + [extra_props.extend(extra_prop) for extra_prop in six.itervalues(extra_properties)] else: prop = 'info' extra_props = () @@ -32,7 +31,7 @@ class Page(object): else: info = self.site.api('query', prop=prop, titles=name, inprop='protection', *extra_props) - info = info['query']['pages'].itervalues().next() + info = six.next(six.itervalues(info['query']['pages'])) self._info = info self.namespace = info.get('ns', 0) @@ -42,7 +41,7 @@ class Page(object): else: self.page_title = self.name - self.touched = client.parse_timestamp(info.get('touched', '0000-00-00T00:00:00Z')) + self.touched = parse_timestamp(info.get('touched')) self.revision = info.get('lastrevid', 0) self.exists = 'missing' not in info self.length = info.get('length') @@ -109,14 +108,14 @@ class Page(object): def get_expanded(self): """Deprecated. Use page.text(expandtemplates=True) instead""" - warnings.warn("page.get_expanded() was deprecated in mwclient 0.7.0, use page.text(expandtemplates=True) instead.", + warnings.warn("page.get_expanded() was deprecated in mwclient 0.7.0 and will be removed in 0.8.0, use page.text(expandtemplates=True) instead.", category=DeprecationWarning, stacklevel=2) return self.text(expandtemplates=True) def edit(self, *args, **kwargs): """Deprecated. Use page.text() instead""" - warnings.warn("page.edit() was deprecated in mwclient 0.7.0, please use page.text() instead.", + warnings.warn("page.edit() was deprecated in mwclient 0.7.0 and will be removed in 0.8.0, please use page.text() instead.", category=DeprecationWarning, stacklevel=2) return self.text(*args, **kwargs) @@ -131,7 +130,7 @@ class Page(object): """ if not self.can('read'): - raise errors.InsufficientPermission(self) + raise mwclient.errors.InsufficientPermission(self) if not self.exists: return u'' if section is not None: @@ -157,19 +156,25 @@ class Page(object): """ if not self.site.logged_in and self.site.force_login: # Should we really check for this? - raise errors.LoginError(self.site, 'By default, mwclient protects you from accidentally ' + - 'editing without being logged in. If you actually want to edit without ' - 'logging in, you can set force_login on the Site object to False.') + raise mwclient.errors.LoginError(self.site, 'By default, mwclient protects you from ' + + 'accidentally editing without being logged in. If you ' + + 'actually want to edit without logging in, you can set ' + + 'force_login on the Site object to False.') if self.site.blocked: - raise errors.UserBlocked(self.site.blocked) + raise mwclient.errors.UserBlocked(self.site.blocked) if not self.can('edit'): - raise errors.ProtectedPageError(self) - - if not section: + raise mwclient.errors.ProtectedPageError(self) + + if self.section is not None and section is None: + warnings.warn('From mwclient version 0.8.0, the `save()` method will no longer ' + + 'implicitly use the `section` parameter from the last `text()` or ' + + '`edit()` call. Please pass the `section` parameter explicitly to ' + + 'the save() method to save only a single section.', + category=DeprecationWarning, stacklevel=2) section = self.section if not self.site.writeapi: - raise errors.NoWriteApi(self) + raise mwclient.errors.NoWriteApi(self) data = {} if minor: @@ -192,32 +197,32 @@ class Page(object): summary=summary, token=self.get_token('edit'), **data) if result['edit'].get('result').lower() == 'failure': - raise errors.EditError(self, result['edit']) + raise mwclient.errors.EditError(self, result['edit']) return result try: result = do_edit() - except errors.APIError as e: + except mwclient.errors.APIError as e: if e.code == 'badtoken': # Retry, but only once to avoid an infinite loop self.get_token('edit', force=True) try: result = do_edit() - except errors.APIError as e: + except mwclient.errors.APIError as e: self.handle_edit_error(e, summary) else: self.handle_edit_error(e, summary) # 'newtimestamp' is not included if no change was made if 'newtimestamp' in result['edit'].keys(): - self.last_rev_time = client.parse_timestamp(result['edit'].get('newtimestamp')) + self.last_rev_time = parse_timestamp(result['edit'].get('newtimestamp')) return result['edit'] def handle_edit_error(self, e, summary): if e.code == 'editconflict': - raise errors.EditError(self, summary, e.info) + raise mwclient.errors.EditError(self, summary, e.info) elif e.code in ('protectedtitle', 'cantcreate', 'cantcreate-anon', 'noimageredirect-anon', 'noimageredirect', 'noedit-anon', 'noedit'): - raise errors.ProtectedPageError(self, e.code, e.info) + raise mwclient.errors.ProtectedPageError(self, e.code, e.info) else: raise @@ -232,10 +237,10 @@ class Page(object): """ if not self.can('move'): - raise errors.InsufficientPermission(self) + raise mwclient.errors.InsufficientPermission(self) if not self.site.writeapi: - raise errors.NoWriteApi(self) + raise mwclient.errors.NoWriteApi(self) data = {} if move_talk: @@ -254,10 +259,10 @@ class Page(object): """ if not self.can('delete'): - raise errors.InsufficientPermission(self) + raise mwclient.errors.InsufficientPermission(self) if not self.site.writeapi: - raise errors.NoWriteApi(self) + raise mwclient.errors.NoWriteApi(self) data = {} if watch: @@ -282,63 +287,61 @@ class Page(object): # Properties def backlinks(self, namespace=None, filterredir='all', redirect=False, limit=None, generator=True): - prefix = listing.List.get_prefix('bl', generator) - kwargs = dict(listing.List.generate_kwargs(prefix, - namespace=namespace, filterredir=filterredir)) + prefix = mwclient.listing.List.get_prefix('bl', generator) + kwargs = dict(mwclient.listing.List.generate_kwargs(prefix, namespace=namespace, filterredir=filterredir)) if redirect: kwargs['%sredirect' % prefix] = '1' kwargs[prefix + 'title'] = self.name - return listing.List.get_list(generator)(self.site, 'backlinks', 'bl', limit=limit, return_values='title', **kwargs) + return mwclient.listing.List.get_list(generator)(self.site, 'backlinks', 'bl', limit=limit, return_values='title', **kwargs) def categories(self, generator=True): if generator: - return listing.PagePropertyGenerator(self, 'categories', 'cl') + return mwclient.listing.PagePropertyGenerator(self, 'categories', 'cl') else: # TODO: return sortkey if wanted - return listing.PageProperty(self, 'categories', 'cl', return_values='title') + return mwclient.listing.PageProperty(self, 'categories', 'cl', return_values='title') def embeddedin(self, namespace=None, filterredir='all', redirect=False, limit=None, generator=True): - prefix = listing.List.get_prefix('ei', generator) - kwargs = dict(listing.List.generate_kwargs(prefix, - namespace=namespace, filterredir=filterredir)) + prefix = mwclient.listing.List.get_prefix('ei', generator) + kwargs = dict(mwclient.listing.List.generate_kwargs(prefix, namespace=namespace, filterredir=filterredir)) if redirect: kwargs['%sredirect' % prefix] = '1' kwargs[prefix + 'title'] = self.name - return listing.List.get_list(generator)(self.site, 'embeddedin', 'ei', limit=limit, return_values='title', **kwargs) + return mwclient.listing.List.get_list(generator)(self.site, 'embeddedin', 'ei', limit=limit, return_values='title', **kwargs) def extlinks(self): - return listing.PageProperty(self, 'extlinks', 'el', return_values='*') + return mwclient.listing.PageProperty(self, 'extlinks', 'el', return_values='*') def images(self, generator=True): if generator: - return listing.PagePropertyGenerator(self, 'images', '') + return mwclient.listing.PagePropertyGenerator(self, 'images', '') else: - return listing.PageProperty(self, 'images', '', return_values='title') + return mwclient.listing.PageProperty(self, 'images', '', return_values='title') def iwlinks(self): - return listing.PageProperty(self, 'iwlinks', 'iw', return_values=('prefix', '*')) + return mwclient.listing.PageProperty(self, 'iwlinks', 'iw', return_values=('prefix', '*')) def langlinks(self, **kwargs): - return listing.PageProperty(self, 'langlinks', 'll', return_values=('lang', '*'), **kwargs) + return mwclient.listing.PageProperty(self, 'langlinks', 'll', return_values=('lang', '*'), **kwargs) def links(self, namespace=None, generator=True, redirects=False): - prefix = listing.List.get_prefix('pl', generator) - kwargs = dict(listing.List.generate_kwargs(prefix, namespace=namespace)) + prefix = mwclient.listing.List.get_prefix('pl', generator) + kwargs = dict(mwclient.listing.List.generate_kwargs(prefix, namespace=namespace)) if redirects: kwargs['redirects'] = '1' if generator: - return listing.PagePropertyGenerator(self, 'links', 'pl', **kwargs) + return mwclient.listing.PagePropertyGenerator(self, 'links', 'pl', **kwargs) else: - return listing.PageProperty(self, 'links', 'pl', return_values='title', **kwargs) + return mwclient.listing.PageProperty(self, 'links', 'pl', return_values='title', **kwargs) def revisions(self, startid=None, endid=None, start=None, end=None, dir='older', user=None, excludeuser=None, limit=50, prop='ids|timestamp|flags|comment|user', expandtemplates=False, section=None): - kwargs = dict(listing.List.generate_kwargs('rv', startid=startid, endid=endid, - start=start, end=end, user=user, excludeuser=excludeuser)) + kwargs = dict(mwclient.listing.List.generate_kwargs('rv', startid=startid, endid=endid, start=start, + end=end, user=user, excludeuser=excludeuser)) kwargs['rvdir'] = dir kwargs['rvprop'] = prop if expandtemplates: @@ -346,51 +349,12 @@ class Page(object): if section is not None: kwargs['rvsection'] = section - return listing.RevisionsIterator(self, 'revisions', 'rv', limit=limit, **kwargs) + return mwclient.listing.RevisionsIterator(self, 'revisions', 'rv', limit=limit, **kwargs) def templates(self, namespace=None, generator=True): - kwargs = dict(listing.List.generate_kwargs('tl', namespace=namespace)) + prefix = mwclient.listing.List.get_prefix('tl', generator) + kwargs = dict(mwclient.listing.List.generate_kwargs(prefix, namespace=namespace)) if generator: - return listing.PagePropertyGenerator(self, 'templates', 'tl') + return mwclient.listing.PagePropertyGenerator(self, 'templates', prefix, **kwargs) else: - return listing.PageProperty(self, 'templates', 'tl', return_values='title') - - -class Image(Page): - - def __init__(self, site, name, info=None): - Page.__init__(self, site, name, info, - extra_properties={'imageinfo': - (('iiprop', 'timestamp|user|comment|url|size|sha1|metadata|archivename'), ) - }) - self.imagerepository = self._info.get('imagerepository', '') - self.imageinfo = self._info.get('imageinfo', ({}, ))[0] - - def imagehistory(self): - return listing.PageProperty(self, 'imageinfo', 'ii', - iiprop='timestamp|user|comment|url|size|sha1|metadata|archivename') - - def imageusage(self, namespace=None, filterredir='all', redirect=False, - limit=None, generator=True): - prefix = listing.List.get_prefix('iu', generator) - kwargs = dict(listing.List.generate_kwargs(prefix, title=self.name, - namespace=namespace, filterredir=filterredir)) - if redirect: - kwargs['%sredirect' % prefix] = '1' - return listing.List.get_list(generator)(self.site, 'imageusage', 'iu', - limit=limit, return_values='title', **kwargs) - - def duplicatefiles(self, limit=None): - return listing.PageProperty(self, 'duplicatefiles', 'df', - dflimit=limit) - - def download(self): - url = self.imageinfo['url'] - if not url.startswith('http://'): - url = 'http://' + self.site.host + url - url = urllib.parse.urlparse(url) - # TODO: query string - return self.site.connection.get(url[1], url[2]) - - def __repr__(self): - return "<Image object '%s' for %s>" % (self.name.encode('utf-8'), self.site) + return mwclient.listing.PageProperty(self, 'templates', prefix, return_values='title', **kwargs) diff --git a/mwclient/sleep.py b/mwclient/sleep.py new file mode 100644 index 0000000000000000000000000000000000000000..353f558cb61c14057d21620e678fb4b297a61777 --- /dev/null +++ b/mwclient/sleep.py @@ -0,0 +1,50 @@ +import random +import time +import logging +from mwclient.errors import MaximumRetriesExceeded + +log = logging.getLogger(__name__) + + +class Sleepers(object): + + def __init__(self, max_retries, retry_timeout, callback=lambda *x: None): + self.max_retries = max_retries + self.retry_timeout = retry_timeout + self.callback = callback + + def make(self, args=None): + return Sleeper(args, self.max_retries, self.retry_timeout, self.callback) + + +class Sleeper(object): + """ + For any given operation, a `Sleeper` object keeps count of the number of + retries. For each retry, the sleep time increases until the max number of + retries is reached and a `MaximumRetriesExceeded` is raised. The sleeper + object should be discarded once the operation is successful. + """ + + def __init__(self, args, max_retries, retry_timeout, callback): + self.args = args + self.retries = 0 + self.max_retries = max_retries + self.retry_timeout = retry_timeout + self.callback = callback + + def sleep(self, min_time=0): + """ + Sleep a minimum of `min_time` seconds. + The actual sleeping time will increase with the number of retries. + """ + self.retries += 1 + if self.retries > self.max_retries: + raise MaximumRetriesExceeded(self, self.args) + + self.callback(self, self.retries, self.args) + + timeout = self.retry_timeout * (self.retries - 1) + if timeout < min_time: + timeout = min_time + log.debug('Sleeping for %d seconds', timeout) + time.sleep(timeout) diff --git a/mwclient/util.py b/mwclient/util.py new file mode 100644 index 0000000000000000000000000000000000000000..bcd7b60ca8525e85e1afe8ced67ee42f93dd9dc7 --- /dev/null +++ b/mwclient/util.py @@ -0,0 +1,7 @@ +import time + + +def parse_timestamp(t): + if t is None or t == '0000-00-00T00:00:00Z': + return (0, 0, 0, 0, 0, 0, 0, 0, 0) + return time.strptime(t, '%Y-%m-%dT%H:%M:%SZ') diff --git a/setup.py b/setup.py index 7964c7a82504bf60a229b01810b6e96135ce801e..e9854fc72708e8ff9360171ed9355e4590ccbc1f 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ class PyTest(TestCommand): def initialize_options(self): TestCommand.initialize_options(self) - self.pytest_args = '-v --pep8 tests mwclient' + self.pytest_args = '-v --pep8 tests mwclient --cov mwclient' def finalize_options(self): TestCommand.finalize_options(self) @@ -39,13 +39,15 @@ if sys.version_info < (2, 7): requirements.append('ordereddict') setup(name='mwclient', - version='0.7.2.dev1', # Rember to also update __ver__ in client.py + version='0.8.0.dev1', # Rember to also update __ver__ in client.py description='MediaWiki API client', long_description=README, classifiers=[ 'Programming Language :: Python', 'Programming Language :: Python :: 2.6', - 'Programming Language :: Python :: 2.7' + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.4' ], keywords='mediawiki wikipedia', author='Bryan Tong Minh', @@ -54,6 +56,7 @@ setup(name='mwclient', license='MIT', packages=['mwclient'], cmdclass={'test': PyTest}, - tests_require=['pytest-pep8', 'pytest-cache', 'pytest', 'responses>=0.3.0'], - install_requires=requirements + tests_require=['pytest-pep8', 'pytest-cache', 'pytest', 'pytest-cov', 'funcsigs', 'responses>=0.3.0'], + install_requires=requirements, + zip_safe=True ) diff --git a/tests/test_sleep.py b/tests/test_sleep.py new file mode 100644 index 0000000000000000000000000000000000000000..d41d83122a01290ea1fa7d909d7325c1884ae404 --- /dev/null +++ b/tests/test_sleep.py @@ -0,0 +1,58 @@ +# encoding=utf-8 +from __future__ import print_function +import unittest +import time +import mock +import pytest +from mwclient.sleep import Sleepers +from mwclient.sleep import Sleeper +from mwclient.errors import MaximumRetriesExceeded + +if __name__ == "__main__": + print() + print("Note: Running in stand-alone mode. Consult the README") + print(" (section 'Contributing') for advice on running tests.") + print() + + +class TestSleepers(unittest.TestCase): + + def setUp(self): + self.sleep = mock.patch('time.sleep').start() + self.max_retries = 10 + self.sleepers = Sleepers(self.max_retries, 30) + + def tearDown(self): + mock.patch.stopall() + + def test_make(self): + sleeper = self.sleepers.make() + assert type(sleeper) == Sleeper + assert sleeper.retries == 0 + + def test_sleep(self): + sleeper = self.sleepers.make() + sleeper.sleep() + sleeper.sleep() + self.sleep.assert_has_calls([mock.call(0), mock.call(30)]) + + def test_min_time(self): + sleeper = self.sleepers.make() + sleeper.sleep(5) + self.sleep.assert_has_calls([mock.call(5)]) + + def test_retries_count(self): + sleeper = self.sleepers.make() + sleeper.sleep() + sleeper.sleep() + assert sleeper.retries == 2 + + def test_max_retries(self): + sleeper = self.sleepers.make() + for x in range(self.max_retries): + sleeper.sleep() + with pytest.raises(MaximumRetriesExceeded): + sleeper.sleep() + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_util.py b/tests/test_util.py new file mode 100644 index 0000000000000000000000000000000000000000..19e7fa173fae3fc03902e3c7b73a38b8050de0b3 --- /dev/null +++ b/tests/test_util.py @@ -0,0 +1,23 @@ +# encoding=utf-8 +from __future__ import print_function +import unittest +import time +from mwclient.util import parse_timestamp + +if __name__ == "__main__": + print() + print("Note: Running in stand-alone mode. Consult the README") + print(" (section 'Contributing') for advice on running tests.") + print() + + +class TestUtil(unittest.TestCase): + + def test_parse_empty_timestamp(self): + assert (0, 0, 0, 0, 0, 0, 0, 0, 0) == parse_timestamp('0000-00-00T00:00:00Z') + + def test_parse_nonempty_timestamp(self): + assert time.struct_time([2015, 1, 2, 20, 18, 36, 4, 2, -1]) == parse_timestamp('2015-01-02T20:18:36Z') + +if __name__ == '__main__': + unittest.main() diff --git a/tox.ini b/tox.ini index 9bb857b25d350bef4c5562efc16aa8c6bdc756fc..17a0f7ac18150ea340e04376b759d6046f8cc98d 100644 --- a/tox.ini +++ b/tox.ini @@ -1,7 +1,7 @@ [tox] -envlist = py26,py27 +envlist = py26,py27,py34 [testenv] deps=pytest pytest-pep8 responses -commands=py.test -v --pep8 mwclient tests \ No newline at end of file +commands=py.test -v --pep8 mwclient tests