From db661be45f8b1c96c6d279b0aeaaa6361d214f94 Mon Sep 17 00:00:00 2001 From: Bryan Tong Minh <bryan.tongminh@gmail.com> Date: Mon, 6 Apr 2009 17:43:26 +0000 Subject: [PATCH] Bump version to 0.6.2: * Dropped the stream_iter argument from the HTTP functions * The data argument is now either a string or a file like object that has a length attribute * Rewrite the upload code and move it to a different file --- mwclient/client.py | 52 ++----------------- mwclient/http.py | 43 +++++++++------- mwclient/upload.py | 123 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 154 insertions(+), 64 deletions(-) create mode 100644 mwclient/upload.py diff --git a/mwclient/client.py b/mwclient/client.py index 84fa24c..0a26fe4 100644 --- a/mwclient/client.py +++ b/mwclient/client.py @@ -1,4 +1,4 @@ -__ver__ = '0.6.1' +__ver__ = '0.6.2' import urllib, urlparse import time, random @@ -7,6 +7,7 @@ import socket import simplejson import http +import upload import errors import listing, page @@ -329,58 +330,15 @@ class Site(object): predata['wpUpload'] = 'Upload file' predata['wpSourceType'] = 'file' predata['wpDestFile'] = filename - - boundary = '----%s----' % ''.join((random.choice( - 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789') - for i in xrange(32))) - data_header = [] - for name, value in predata.iteritems(): - data_header.append('--' + boundary) - data_header.append('Content-Disposition: form-data; name="%s"' % name) - data_header.append('') - data_header.append(value.encode('utf-8')) - - data_header.append('--' + boundary) - data_header.append('Content-Disposition: form-data; name="wpUploadFile"; filename="%s"' % \ - filename.encode('utf-8')) - data_header.append('Content-Type: application/octet-stream') - data_header.append('') - data_header.append('') - - postdata = '\r\n'.join(data_header) - content_length = (len(postdata) + file_size + - 2 + # \r\n - (6 + len(boundary)) + - 49 + # wpUpload - 2 + # \r\n - 1 + # 1 - (4 + len(boundary)) + - 2) - - def iterator(): - yield postdata - while True: - chunk = file.read(32768) - if not chunk: break - yield chunk - yield '\r\n' - - yield '--%s\r\n' % boundary - yield 'Content-Disposition: form-data; name="wpUpload"\r\n' - yield '\r\n' - yield '1' - - yield '--%s--' % boundary - yield '\r\n' + + postdata = upload.UploadFile('wpUploadFile', filename, file_size, file, predata) wait_token = self.wait_token() while True: try: self.connection.post(self.host, self.path + 'index.php?title=Special:Upload&maxlag=' + self.max_lag, - headers = {'Content-Type': 'multipart/form-data; boundary=' + boundary, - 'Content-Length': str(content_length)}, - stream_iter = iterator()).read() + data = postdata).read() except errors.HTTPStatusError, e: if e[0] == 503 and e[1].getheader('X-Database-Lag'): self.wait(wait_token, int(e[1].getheader('Retry-After'))) diff --git a/mwclient/http.py b/mwclient/http.py index 79a1f60..6a3e726 100644 --- a/mwclient/http.py +++ b/mwclient/http.py @@ -4,6 +4,7 @@ import httplib import socket import time +import upload import errors from client import __ver__ @@ -61,7 +62,7 @@ class HTTPPersistentConnection(object): self._conn.connect() self.last_request = time.time() - def request(self, method, host, path, headers, data, stream_iter = None, + def request(self, method, host, path, headers, data, raise_on_not_ok = True, auto_redirect = True): # Strip scheme @@ -75,19 +76,27 @@ class HTTPPersistentConnection(object): _headers = headers headers = {} - if not data: data = '' + headers['Connection'] = 'Keep-Alive' headers['User-Agent'] = 'MwClient/' + __ver__ headers['Host'] = host if host in self.cookies: headers['Cookie'] = self.cookies[host].get_cookie_header() + if issubclass(data.__class__, upload.Upload): + headers['Content-Type'] = data.content_type + headers['Content-Length'] = data.length; + elif data: + headers['Content-Length'] = len(data) + if _headers: headers.update(_headers) try: - self._conn.request(method, path, data, headers) - if stream_iter: - for s_data in stream_iter: - self._conn.send(s_data) + self._conn.request(method, path, headers = headers) + if issubclass(data.__class__, upload.Upload): + for str in data: + self._conn.send(str) + elif data: + self._conn.send(data) self.last_request = time.time() try: @@ -100,8 +109,8 @@ class HTTPPersistentConnection(object): except socket.error, e: self._conn.close() raise errors.HTTPError, e - except Exception, e: - raise errors.HTTPError, e + #except Exception, e: + # raise errors.HTTPError, e if not host in self.cookies: self.cookies[host] = CookieJar() self.cookies[host].extract_cookies(res) @@ -136,7 +145,7 @@ class HTTPPersistentConnection(object): conn = self.__class__(location[1], self.pool) self.pool.append(([location[1]], conn)) return self.pool.request(method, location[1], path, - headers, data, stream_iter, raise_on_not_ok, auto_redirect) + headers, data, raise_on_not_ok, auto_redirect) if res.status != 200 and raise_on_not_ok: try: @@ -148,8 +157,8 @@ class HTTPPersistentConnection(object): def get(self, host, path, headers = None): return self.request('GET', host, path, headers, None) - def post(self, host, path, headers = None, data = None, stream_iter = None): - return self.request('POST', host, path, headers, data, stream_iter) + def post(self, host, path, headers = None, data = None): + return self.request('POST', host, path, headers, data) def head(self, host, path, headers = None, auto_redirect = False): res = self.request('HEAD', host, path, headers, data = None, raise_on_not_ok = False, @@ -161,12 +170,12 @@ class HTTPPersistentConnection(object): self._conn.close() class HTTPConnection(HTTPPersistentConnection): - def request(self, method, host, path, headers, data, stream_iter = None, + def request(self, method, host, path, headers, data, raise_on_not_ok = True, auto_redirect = True): if not headers: headers = {} headers['Connection'] = 'Close' res = HTTPPersistentConnection.request(self, method, host, path, headers, data, - stream_iter, raise_on_not_ok, auto_redirect) + raise_on_not_ok, auto_redirect) return res class HTTPSPersistentConnection(HTTPPersistentConnection): @@ -210,16 +219,16 @@ class HTTPPool(list): def get(self, host, path, headers = None): return self.find_connection(host).get(host, path, headers) - def post(self, host, path, headers = None, data = None, stream_iter = None): + def post(self, host, path, headers = None, data = None): return self.find_connection(host).post(host, - path, headers, data, stream_iter) + path, headers, data) def head(self, host, path, headers = None, auto_redirect = False): return self.find_connection(host).head(host, path, headers, auto_redirect) - def request(self, method, host, path, headers, data, stream_iter, + def request(self, method, host, path, headers, data, raise_on_not_ok, auto_redirect): return self.find_connection(host).request(method, host, path, - headers, data, stream_iter, raise_on_not_ok, auto_redirect) + headers, data, raise_on_not_ok, auto_redirect) def close(self): for hosts, conn in self: conn.close() diff --git a/mwclient/upload.py b/mwclient/upload.py new file mode 100644 index 0000000..eacd25b --- /dev/null +++ b/mwclient/upload.py @@ -0,0 +1,123 @@ +import random +from cStringIO import StringIO + +class Upload(object): + """ + Base class for upload objects. This class should always be subclassed + by upload classes and its constructor always be called. + + Upload classes are file like object/iterators that have additional + variables length and content_type. + """ + + BLOCK_SIZE = 8192 + def __init__(self, length, content_type): + self.length = length + self.content_type = content_type + + def __iter__(self): + return self + def next(self): + data = self.read(self.BLOCK_SIZE) + if data == '': + raise StopIteration + return data + + @staticmethod + def encode(s): + if type(s) is str: + return s + elif type(s) is unicode: + return s.encode('utf-8') + else: + return s + +class UploadRawData(Upload): + """ + This upload class is simply a wrapper around StringIO + """ + def __init__(self, data, content_type = 'application/x-www-form-urlencoded'): + self.fstr = StringIO(data) + Upload.__init__(self, len(data), content_type) + def read(self, length = -1): + return self.fstr.read(length) + + +class UploadDict(UploadRawData): + """ + This class creates an x-www-form-urlencoded representation of a dict + and then passes it through its parent UploadRawData + """ + def __init__(self, data): + postdata = '&'.join('%s=%s' % (self.encode(i), self.encode(data[i])) for i in data) + UploadRawData.__init__(self, postdata) + +class UploadFile(Upload): + """ + This class accepts a file with information and a postdata dictionary + and creates a multipart/form-data representation from it. + """ + STAGE_FILEHEADER = 0 + STAGE_FILE = 1 + STAGE_POSTDATA = 2 + STAGE_FOOTER = 3 + STAGE_DONE = 4 + def __init__(self, filefield, filename, filelength, file, data): + self.stage = self.STAGE_FILEHEADER; + self.boundary = self.generate_boundary() + self.postdata = self.generate_multipart_from_dict(data) + self.footer = '\r\n--%s--\r\n' % self.boundary + self.fileheader = ('--%s\r\n' % self.boundary + + 'Content-Disposition: form-data; name="%s"; filename="%s"\r\n' % + (self.encode(filefield), self.encode(filename)) + + 'Content-Type: application/octet-stream\r\n\r\n') + self.file = file + self.length_left = filelength + self.str_data = None + + Upload.__init__(self, len(self.fileheader) + filelength + len(self.postdata) + len(self.footer) + 2, + 'multipart/form-data; boundary=' + self.boundary) + + def read(self, length): + if self.stage == self.STAGE_DONE: + return '' + elif self.stage != self.STAGE_FILE: + if self.str_data is None: + if self.stage == self.STAGE_FILEHEADER: + self.str_data = StringIO(self.fileheader) + elif self.stage == self.STAGE_POSTDATA: + self.str_data = StringIO(self.postdata) + elif self.stage == self.STAGE_FOOTER: + self.str_data = StringIO(self.footer) + data = self.str_data.read(length) + else: + if self.length_left: + if length > self.length_left: + length = self.length_left + data = self.file.read(length) + self.length_left -= len(data) + else: + self.stage += 1 + return '\r\n' + + if data == '': + self.stage += 1 + self.str_data = None + return self.read(length) + return data + + + @staticmethod + def generate_boundary(): + return '----%s----' % ''.join((random.choice( + 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789') + for i in xrange(32))) + + def generate_multipart_from_dict(self, data): + postdata = [] + for i in data: + postdata.append('--' + self.boundary) + postdata.append('Content-Disposition: form-data; name="%s"' % self.encode(i)) + postdata.append('') + postdata.append(self.encode(data[i])) + return '\r\n'.join(postdata) -- GitLab