add url path normalization to httpretty

This commit is contained in:
Steve Pulec 2013-02-26 14:47:47 -05:00
parent a07ec2940d
commit 1db787e7da

View File

@ -605,6 +605,28 @@ class Entry(Py3kObject):
fk.seek(0)
def url_fix(s, charset='utf-8'):
import urllib
import urlparse
"""Sometimes you get an URL by a user that just isn't a real
URL because it contains unsafe characters like ' ' and so on. This
function can fix some of the problems in a similar way browsers
handle data entered by the user:
>>> url_fix(u'http://de.wikipedia.org/wiki/Elf (Begriffsklärung)')
'http://de.wikipedia.org/wiki/Elf%20%28Begriffskl%C3%A4rung%29'
:param charset: The target charset for the URL if the url was
given as unicode string.
"""
if isinstance(s, unicode):
s = s.encode(charset, 'ignore')
scheme, netloc, path, qs, anchor = urlparse.urlsplit(s)
path = urllib.quote(path, '/%')
qs = urllib.quote_plus(qs, ':&=')
return urlparse.urlunsplit((scheme, netloc, path, qs, anchor))
class URIInfo(Py3kObject):
def __init__(self,
username='',
@ -629,7 +651,7 @@ class URIInfo(Py3kObject):
port = 443
self.port = port or 80
self.path = path or ''
self.path = url_fix(path) or ''
self.query = query or ''
self.scheme = scheme or (self.port is 80 and "http" or "https")
self.fragment = fragment or ''