diff --git a/moto/packages/httpretty.py b/moto/packages/httpretty.py index 8074f9b53..a9bbf8f79 100644 --- a/moto/packages/httpretty.py +++ b/moto/packages/httpretty.py @@ -605,6 +605,28 @@ class Entry(Py3kObject): fk.seek(0) +def url_fix(s, charset='utf-8'): + import urllib + import urlparse + """Sometimes you get an URL by a user that just isn't a real + URL because it contains unsafe characters like ' ' and so on. This + function can fix some of the problems in a similar way browsers + handle data entered by the user: + + >>> url_fix(u'http://de.wikipedia.org/wiki/Elf (Begriffsklärung)') + 'http://de.wikipedia.org/wiki/Elf%20%28Begriffskl%C3%A4rung%29' + + :param charset: The target charset for the URL if the url was + given as unicode string. + """ + if isinstance(s, unicode): + s = s.encode(charset, 'ignore') + scheme, netloc, path, qs, anchor = urlparse.urlsplit(s) + path = urllib.quote(path, '/%') + qs = urllib.quote_plus(qs, ':&=') + return urlparse.urlunsplit((scheme, netloc, path, qs, anchor)) + + class URIInfo(Py3kObject): def __init__(self, username='', @@ -629,7 +651,7 @@ class URIInfo(Py3kObject): port = 443 self.port = port or 80 - self.path = path or '' + self.path = url_fix(path) or '' self.query = query or '' self.scheme = scheme or (self.port is 80 and "http" or "https") self.fragment = fragment or ''