From db661be45f8b1c96c6d279b0aeaaa6361d214f94 Mon Sep 17 00:00:00 2001
From: Bryan Tong Minh <bryan.tongminh@gmail.com>
Date: Mon, 6 Apr 2009 17:43:26 +0000
Subject: [PATCH] Bump version to 0.6.2: * Dropped the stream_iter argument
 from the HTTP functions * The data argument is now either a string or a file
 like object that has a length attribute * Rewrite the upload code and move it
 to a different file

---
 mwclient/client.py |  52 ++-----------------
 mwclient/http.py   |  43 +++++++++-------
 mwclient/upload.py | 123 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 154 insertions(+), 64 deletions(-)
 create mode 100644 mwclient/upload.py

diff --git a/mwclient/client.py b/mwclient/client.py
index 84fa24c..0a26fe4 100644
--- a/mwclient/client.py
+++ b/mwclient/client.py
@@ -1,4 +1,4 @@
-__ver__ = '0.6.1'
+__ver__ = '0.6.2'
 
 import urllib, urlparse
 import time, random
@@ -7,6 +7,7 @@ import socket
 
 import simplejson
 import http
+import upload
 
 import errors
 import listing, page
@@ -329,58 +330,15 @@ class Site(object):
 		predata['wpUpload'] = 'Upload file'
 		predata['wpSourceType'] = 'file'
 		predata['wpDestFile'] = filename
-	
-		boundary = '----%s----' % ''.join((random.choice(
-			'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789') 
-			for i in xrange(32)))
-		data_header = []
-		for name, value in predata.iteritems():
-			data_header.append('--' + boundary) 
-			data_header.append('Content-Disposition: form-data; name="%s"' % name)
-			data_header.append('')
-			data_header.append(value.encode('utf-8'))
-			
-		data_header.append('--' + boundary) 
-		data_header.append('Content-Disposition: form-data; name="wpUploadFile"; filename="%s"' % \
-			filename.encode('utf-8'))
-		data_header.append('Content-Type: application/octet-stream')
-		data_header.append('')
-		data_header.append('')
-		
-		postdata = '\r\n'.join(data_header)
-		content_length = (len(postdata) + file_size + 
-				2 + # \r\n
-				(6 + len(boundary)) +
-				49 + # wpUpload
-				2 + # \r\n 
-				1 + # 1
-				(4 + len(boundary)) + 
-				2)
-		
-		def iterator():
-			yield postdata
-			while True:
-				chunk = file.read(32768)
-				if not chunk: break
-				yield chunk
-			yield '\r\n'
-			
-			yield '--%s\r\n' % boundary
-			yield 'Content-Disposition: form-data; name="wpUpload"\r\n'
-			yield '\r\n'
-			yield '1'
-			
-			yield '--%s--' % boundary
-			yield '\r\n'
+		
+		postdata = upload.UploadFile('wpUploadFile', filename, file_size, file, predata)
 		
 		wait_token = self.wait_token()
 		while True:
 			try:
 				self.connection.post(self.host,
 					self.path + 'index.php?title=Special:Upload&maxlag=' + self.max_lag,
-					headers = {'Content-Type': 'multipart/form-data; boundary=' + boundary,
-						'Content-Length': str(content_length)},
-					stream_iter = iterator()).read()
+					data = postdata).read()
 			except errors.HTTPStatusError, e:
 				if e[0] == 503 and e[1].getheader('X-Database-Lag'):
 					self.wait(wait_token, int(e[1].getheader('Retry-After')))
diff --git a/mwclient/http.py b/mwclient/http.py
index 79a1f60..6a3e726 100644
--- a/mwclient/http.py
+++ b/mwclient/http.py
@@ -4,6 +4,7 @@ import httplib
 import socket
 import time
 
+import upload
 import errors
 
 from client import __ver__
@@ -61,7 +62,7 @@ class HTTPPersistentConnection(object):
 		self._conn.connect()
 		self.last_request = time.time()
 		
-	def request(self, method, host, path, headers, data, stream_iter = None,
+	def request(self, method, host, path, headers, data,
 			raise_on_not_ok = True, auto_redirect = True):		
 		
 		# Strip scheme
@@ -75,19 +76,27 @@ class HTTPPersistentConnection(object):
 		
 		_headers = headers
 		headers = {}
-		if not data: data = ''
+		
 		headers['Connection'] = 'Keep-Alive'
 		headers['User-Agent'] = 'MwClient/' + __ver__
 		headers['Host'] = host
 		if host in self.cookies: 
 			headers['Cookie'] = self.cookies[host].get_cookie_header()
+		if issubclass(data.__class__, upload.Upload):
+			headers['Content-Type'] = data.content_type
+			headers['Content-Length'] = data.length;
+		elif data:
+			headers['Content-Length'] = len(data)
+			
 		if _headers: headers.update(_headers)
 		
 		try:
-			self._conn.request(method, path, data, headers)
-			if stream_iter:
-				for s_data in stream_iter:
-					self._conn.send(s_data)
+			self._conn.request(method, path, headers = headers)
+			if issubclass(data.__class__, upload.Upload):
+				for str in data:
+					self._conn.send(str)
+			elif data:
+				self._conn.send(data)
 			
 			self.last_request = time.time()
 			try:
@@ -100,8 +109,8 @@ class HTTPPersistentConnection(object):
 		except socket.error, e:
 			self._conn.close()
 			raise errors.HTTPError, e
-		except Exception, e:
-			raise errors.HTTPError, e
+		#except Exception, e:
+		#	raise errors.HTTPError, e
 				
 		if not host in self.cookies: self.cookies[host] = CookieJar()
 		self.cookies[host].extract_cookies(res)
@@ -136,7 +145,7 @@ class HTTPPersistentConnection(object):
 					conn = self.__class__(location[1], self.pool)
 					self.pool.append(([location[1]], conn))
 				return self.pool.request(method, location[1], path, 
-					headers, data, stream_iter, raise_on_not_ok, auto_redirect)
+					headers, data, raise_on_not_ok, auto_redirect)
 			
 		if res.status != 200 and raise_on_not_ok:
 			try:
@@ -148,8 +157,8 @@ class HTTPPersistentConnection(object):
 		
 	def get(self, host, path, headers = None):
 		return self.request('GET', host, path, headers, None)
-	def post(self, host, path, headers = None, data = None, stream_iter = None):
-		return self.request('POST', host, path, headers, data, stream_iter)
+	def post(self, host, path, headers = None, data = None):
+		return self.request('POST', host, path, headers, data)
 	def head(self, host, path, headers = None, auto_redirect = False):
 		res = self.request('HEAD', host, path, headers, 
 			data = None, raise_on_not_ok = False,
@@ -161,12 +170,12 @@ class HTTPPersistentConnection(object):
 		self._conn.close()
 
 class HTTPConnection(HTTPPersistentConnection):
-	def request(self, method, host, path, headers, data, stream_iter = None,
+	def request(self, method, host, path, headers, data,
 			raise_on_not_ok = True, auto_redirect = True):
 		if not headers: headers = {}
 		headers['Connection'] = 'Close'
 		res = HTTPPersistentConnection.request(self, method, host, path, headers, data, 
-			stream_iter, raise_on_not_ok, auto_redirect)
+			raise_on_not_ok, auto_redirect)
 		return res
 
 class HTTPSPersistentConnection(HTTPPersistentConnection):
@@ -210,16 +219,16 @@ class HTTPPool(list):
 	def get(self, host, path, headers = None):
 		return self.find_connection(host).get(host, 
 			path, headers)
-	def post(self, host, path, headers = None, data = None, stream_iter = None):
+	def post(self, host, path, headers = None, data = None):
 		return self.find_connection(host).post(host, 
-			path, headers, data, stream_iter)
+			path, headers, data)
 	def head(self, host, path, headers = None, auto_redirect = False):
 		return self.find_connection(host).head(host, 
 			path, headers, auto_redirect)
-	def request(self, method, host, path, headers, data, stream_iter,
+	def request(self, method, host, path, headers, data,
 			raise_on_not_ok, auto_redirect):
 		return self.find_connection(host).request(method, host, path,
-			headers, data, stream_iter, raise_on_not_ok, auto_redirect)
+			headers, data, raise_on_not_ok, auto_redirect)
 	def close(self):
 		for hosts, conn in self:
 			conn.close()
diff --git a/mwclient/upload.py b/mwclient/upload.py
new file mode 100644
index 0000000..eacd25b
--- /dev/null
+++ b/mwclient/upload.py
@@ -0,0 +1,123 @@
+import random
+from cStringIO import StringIO
+
+class Upload(object):
+	"""
+	Base class for upload objects. This class should always be subclassed 
+	by upload classes and its constructor always be called.
+	
+	Upload classes are file like object/iterators that have additional 
+	variables length and content_type.
+	"""
+	
+	BLOCK_SIZE = 8192
+	def __init__(self, length, content_type):
+		self.length = length
+		self.content_type = content_type
+		
+	def __iter__(self):
+		return self
+	def next(self):
+		data = self.read(self.BLOCK_SIZE)
+		if data == '':
+			raise StopIteration
+		return data
+		
+	@staticmethod
+	def encode(s):
+		if type(s) is str:
+			return s
+		elif type(s) is unicode:
+			return s.encode('utf-8')
+		else:
+			return s
+
+class UploadRawData(Upload):
+	"""
+	This upload class is simply a wrapper around StringIO 
+	"""
+	def __init__(self, data, content_type = 'application/x-www-form-urlencoded'):
+		self.fstr = StringIO(data)
+		Upload.__init__(self, len(data), content_type)
+	def read(self, length = -1):
+		return self.fstr.read(length)
+		
+		
+class UploadDict(UploadRawData):
+	"""
+	This class creates an x-www-form-urlencoded representation of a dict 
+	and then passes it through its parent UploadRawData 
+	"""
+	def __init__(self, data):
+		postdata = '&'.join('%s=%s' % (self.encode(i), self.encode(data[i])) for i in data)
+		UploadRawData.__init__(self, postdata)
+		
+class UploadFile(Upload):
+	"""
+	This class accepts a file with information and a postdata dictionary
+	and creates a multipart/form-data representation from it.
+	"""
+	STAGE_FILEHEADER = 0
+	STAGE_FILE = 1
+	STAGE_POSTDATA = 2
+	STAGE_FOOTER = 3
+	STAGE_DONE = 4
+	def __init__(self, filefield, filename, filelength, file, data):
+		self.stage = self.STAGE_FILEHEADER;
+		self.boundary = self.generate_boundary()
+		self.postdata = self.generate_multipart_from_dict(data)
+		self.footer = '\r\n--%s--\r\n' % self.boundary
+		self.fileheader = ('--%s\r\n' % self.boundary +
+				'Content-Disposition: form-data; name="%s"; filename="%s"\r\n' %
+					(self.encode(filefield), self.encode(filename)) +
+				'Content-Type: application/octet-stream\r\n\r\n')
+		self.file = file
+		self.length_left = filelength
+		self.str_data = None
+		
+		Upload.__init__(self, len(self.fileheader) + filelength + len(self.postdata) + len(self.footer) + 2,
+			'multipart/form-data; boundary=' + self.boundary)
+		
+	def read(self, length):
+		if self.stage == self.STAGE_DONE:
+			return ''
+		elif self.stage != self.STAGE_FILE:	
+			if self.str_data is None:
+				if self.stage == self.STAGE_FILEHEADER:
+				  	self.str_data = StringIO(self.fileheader)
+				elif self.stage == self.STAGE_POSTDATA:
+				  	self.str_data = StringIO(self.postdata)
+			  	elif self.stage == self.STAGE_FOOTER:
+			  		self.str_data = StringIO(self.footer)
+			data = self.str_data.read(length)
+		else:
+			if self.length_left:
+				if length > self.length_left:
+					length = self.length_left
+				data = self.file.read(length)
+				self.length_left -= len(data)
+			else:
+				self.stage += 1
+				return '\r\n'
+		
+		if data == '':
+			self.stage += 1
+			self.str_data = None
+			return self.read(length)
+		return data
+
+		
+	@staticmethod
+	def generate_boundary():
+		return '----%s----' % ''.join((random.choice(
+			'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789') 
+			for i in xrange(32)))
+	
+	def generate_multipart_from_dict(self, data):
+		postdata = []
+		for i in data:
+			postdata.append('--' + self.boundary) 
+			postdata.append('Content-Disposition: form-data; name="%s"' % self.encode(i))
+			postdata.append('')
+			postdata.append(self.encode(data[i]))
+		return '\r\n'.join(postdata)
-- 
GitLab