From 1db787e7da562ee3bbee507bc7c7c282a716e8fc Mon Sep 17 00:00:00 2001 From: Steve Pulec Date: Tue, 26 Feb 2013 14:47:47 -0500 Subject: [PATCH] add url path normalization to httpretty --- moto/packages/httpretty.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/moto/packages/httpretty.py b/moto/packages/httpretty.py index 8074f9b53..a9bbf8f79 100644 --- a/moto/packages/httpretty.py +++ b/moto/packages/httpretty.py @@ -605,6 +605,28 @@ class Entry(Py3kObject): fk.seek(0) +def url_fix(s, charset='utf-8'): + import urllib + import urlparse + """Sometimes you get an URL by a user that just isn't a real + URL because it contains unsafe characters like ' ' and so on. This + function can fix some of the problems in a similar way browsers + handle data entered by the user: + + >>> url_fix(u'http://de.wikipedia.org/wiki/Elf (Begriffsklärung)') + 'http://de.wikipedia.org/wiki/Elf%20%28Begriffskl%C3%A4rung%29' + + :param charset: The target charset for the URL if the url was + given as unicode string. + """ + if isinstance(s, unicode): + s = s.encode(charset, 'ignore') + scheme, netloc, path, qs, anchor = urlparse.urlsplit(s) + path = urllib.quote(path, '/%') + qs = urllib.quote_plus(qs, ':&=') + return urlparse.urlunsplit((scheme, netloc, path, qs, anchor)) + + class URIInfo(Py3kObject): def __init__(self, username='', @@ -629,7 +651,7 @@ class URIInfo(Py3kObject): port = 443 self.port = port or 80 - self.path = path or '' + self.path = url_fix(path) or '' self.query = query or '' self.scheme = scheme or (self.port is 80 and "http" or "https") self.fragment = fragment or ''