Package cherrypy :: Package test :: Module test_encoding
[hide private]
[frames] | no frames]

Source Code for Module cherrypy.test.test_encoding

  1   
  2  import gzip 
  3  import sys 
  4   
  5  import cherrypy 
  6  from cherrypy._cpcompat import BytesIO, IncompleteRead, ntob, ntou 
  7   
  8  europoundUnicode = ntou('\x80\xa3') 
  9  sing = ntou("\u6bdb\u6cfd\u4e1c: Sing, Little Birdie?", 'escape') 
 10  sing8 = sing.encode('utf-8') 
 11  sing16 = sing.encode('utf-16') 
 12   
 13   
 14  from cherrypy.test import helper 
 15   
 16   
17 -class EncodingTests(helper.CPWebCase):
18
19 - def setup_server():
20 class Root: 21 def index(self, param): 22 assert param == europoundUnicode, "%r != %r" % (param, europoundUnicode) 23 yield europoundUnicode
24 index.exposed = True 25 26 def mao_zedong(self): 27 return sing
28 mao_zedong.exposed = True 29 30 def utf8(self): 31 return sing8 32 utf8.exposed = True 33 utf8._cp_config = {'tools.encode.encoding': 'utf-8'} 34 35 def cookies_and_headers(self): 36 # if the headers have non-ascii characters and a cookie has 37 # any part which is unicode (even ascii), the response 38 # should not fail. 39 cherrypy.response.cookie['candy'] = 'bar' 40 cherrypy.response.cookie['candy']['domain'] = 'cherrypy.org' 41 cherrypy.response.headers['Some-Header'] = 'My d\xc3\xb6g has fleas' 42 return 'Any content' 43 cookies_and_headers.exposed = True 44 45 def reqparams(self, *args, **kwargs): 46 return ntob(', ').join([": ".join((k, v)).encode('utf8') 47 for k, v in cherrypy.request.params.items()]) 48 reqparams.exposed = True 49 50 def nontext(self, *args, **kwargs): 51 cherrypy.response.headers['Content-Type'] = 'application/binary' 52 return '\x00\x01\x02\x03' 53 nontext.exposed = True 54 nontext._cp_config = {'tools.encode.text_only': False, 55 'tools.encode.add_charset': True, 56 } 57 58 class GZIP: 59 def index(self): 60 yield "Hello, world" 61 index.exposed = True 62 63 def noshow(self): 64 # Test for ticket #147, where yield showed no exceptions (content- 65 # encoding was still gzip even though traceback wasn't zipped). 66 raise IndexError() 67 yield "Here be dragons" 68 noshow.exposed = True 69 # Turn encoding off so the gzip tool is the one doing the collapse. 70 noshow._cp_config = {'tools.encode.on': False} 71 72 def noshow_stream(self): 73 # Test for ticket #147, where yield showed no exceptions (content- 74 # encoding was still gzip even though traceback wasn't zipped). 75 raise IndexError() 76 yield "Here be dragons" 77 noshow_stream.exposed = True 78 noshow_stream._cp_config = {'response.stream': True} 79 80 class Decode: 81 def extra_charset(self, *args, **kwargs): 82 return ', '.join([": ".join((k, v)) 83 for k, v in cherrypy.request.params.items()]) 84 extra_charset.exposed = True 85 extra_charset._cp_config = { 86 'tools.decode.on': True, 87 'tools.decode.default_encoding': ['utf-16'], 88 } 89 90 def force_charset(self, *args, **kwargs): 91 return ', '.join([": ".join((k, v)) 92 for k, v in cherrypy.request.params.items()]) 93 force_charset.exposed = True 94 force_charset._cp_config = { 95 'tools.decode.on': True, 96 'tools.decode.encoding': 'utf-16', 97 } 98 99 root = Root() 100 root.gzip = GZIP() 101 root.decode = Decode() 102 cherrypy.tree.mount(root, config={'/gzip': {'tools.gzip.on': True}}) 103 setup_server = staticmethod(setup_server) 104
105 - def test_query_string_decoding(self):
106 europoundUtf8 = europoundUnicode.encode('utf-8') 107 self.getPage(ntob('/?param=') + europoundUtf8) 108 self.assertBody(europoundUtf8) 109 110 # Encoded utf8 query strings MUST be parsed correctly. 111 # Here, q is the POUND SIGN U+00A3 encoded in utf8 and then %HEX 112 self.getPage("/reqparams?q=%C2%A3") 113 # The return value will be encoded as utf8. 114 self.assertBody(ntob("q: \xc2\xa3")) 115 116 # Query strings that are incorrectly encoded MUST raise 404. 117 # Here, q is the POUND SIGN U+00A3 encoded in latin1 and then %HEX 118 self.getPage("/reqparams?q=%A3") 119 self.assertStatus(404) 120 self.assertErrorPage(404, 121 "The given query string could not be processed. Query " 122 "strings for this resource must be encoded with 'utf8'.")
123
124 - def test_urlencoded_decoding(self):
125 # Test the decoding of an application/x-www-form-urlencoded entity. 126 europoundUtf8 = europoundUnicode.encode('utf-8') 127 body=ntob("param=") + europoundUtf8 128 self.getPage('/', method='POST', 129 headers=[("Content-Type", "application/x-www-form-urlencoded"), 130 ("Content-Length", str(len(body))), 131 ], 132 body=body), 133 self.assertBody(europoundUtf8) 134 135 # Encoded utf8 entities MUST be parsed and decoded correctly. 136 # Here, q is the POUND SIGN U+00A3 encoded in utf8 137 body = ntob("q=\xc2\xa3") 138 self.getPage('/reqparams', method='POST', 139 headers=[("Content-Type", "application/x-www-form-urlencoded"), 140 ("Content-Length", str(len(body))), 141 ], 142 body=body), 143 self.assertBody(ntob("q: \xc2\xa3")) 144 145 # ...and in utf16, which is not in the default attempt_charsets list: 146 body = ntob("\xff\xfeq\x00=\xff\xfe\xa3\x00") 147 self.getPage('/reqparams', method='POST', 148 headers=[("Content-Type", "application/x-www-form-urlencoded;charset=utf-16"), 149 ("Content-Length", str(len(body))), 150 ], 151 body=body), 152 self.assertBody(ntob("q: \xc2\xa3")) 153 154 # Entities that are incorrectly encoded MUST raise 400. 155 # Here, q is the POUND SIGN U+00A3 encoded in utf16, but 156 # the Content-Type incorrectly labels it utf-8. 157 body = ntob("\xff\xfeq\x00=\xff\xfe\xa3\x00") 158 self.getPage('/reqparams', method='POST', 159 headers=[("Content-Type", "application/x-www-form-urlencoded;charset=utf-8"), 160 ("Content-Length", str(len(body))), 161 ], 162 body=body), 163 self.assertStatus(400) 164 self.assertErrorPage(400, 165 "The request entity could not be decoded. The following charsets " 166 "were attempted: ['utf-8']")
167
168 - def test_decode_tool(self):
169 # An extra charset should be tried first, and succeed if it matches. 170 # Here, we add utf-16 as a charset and pass a utf-16 body. 171 body = ntob("\xff\xfeq\x00=\xff\xfe\xa3\x00") 172 self.getPage('/decode/extra_charset', method='POST', 173 headers=[("Content-Type", "application/x-www-form-urlencoded"), 174 ("Content-Length", str(len(body))), 175 ], 176 body=body), 177 self.assertBody(ntob("q: \xc2\xa3")) 178 179 # An extra charset should be tried first, and continue to other default 180 # charsets if it doesn't match. 181 # Here, we add utf-16 as a charset but still pass a utf-8 body. 182 body = ntob("q=\xc2\xa3") 183 self.getPage('/decode/extra_charset', method='POST', 184 headers=[("Content-Type", "application/x-www-form-urlencoded"), 185 ("Content-Length", str(len(body))), 186 ], 187 body=body), 188 self.assertBody(ntob("q: \xc2\xa3")) 189 190 # An extra charset should error if force is True and it doesn't match. 191 # Here, we force utf-16 as a charset but still pass a utf-8 body. 192 body = ntob("q=\xc2\xa3") 193 self.getPage('/decode/force_charset', method='POST', 194 headers=[("Content-Type", "application/x-www-form-urlencoded"), 195 ("Content-Length", str(len(body))), 196 ], 197 body=body), 198 self.assertErrorPage(400, 199 "The request entity could not be decoded. The following charsets " 200 "were attempted: ['utf-16']")
201
202 - def test_multipart_decoding(self):
203 # Test the decoding of a multipart entity when the charset (utf16) is 204 # explicitly given. 205 body=ntob('\r\n'.join(['--X', 206 'Content-Type: text/plain;charset=utf-16', 207 'Content-Disposition: form-data; name="text"', 208 '', 209 '\xff\xfea\x00b\x00\x1c c\x00', 210 '--X', 211 'Content-Type: text/plain;charset=utf-16', 212 'Content-Disposition: form-data; name="submit"', 213 '', 214 '\xff\xfeC\x00r\x00e\x00a\x00t\x00e\x00', 215 '--X--'])) 216 self.getPage('/reqparams', method='POST', 217 headers=[("Content-Type", "multipart/form-data;boundary=X"), 218 ("Content-Length", str(len(body))), 219 ], 220 body=body), 221 self.assertBody(ntob("text: ab\xe2\x80\x9cc, submit: Create"))
222
223 - def test_multipart_decoding_no_charset(self):
224 # Test the decoding of a multipart entity when the charset (utf8) is 225 # NOT explicitly given, but is in the list of charsets to attempt. 226 body=ntob('\r\n'.join(['--X', 227 'Content-Disposition: form-data; name="text"', 228 '', 229 '\xe2\x80\x9c', 230 '--X', 231 'Content-Disposition: form-data; name="submit"', 232 '', 233 'Create', 234 '--X--'])) 235 self.getPage('/reqparams', method='POST', 236 headers=[("Content-Type", "multipart/form-data;boundary=X"), 237 ("Content-Length", str(len(body))), 238 ], 239 body=body), 240 self.assertBody(ntob("text: \xe2\x80\x9c, submit: Create"))
241
242 - def test_multipart_decoding_no_successful_charset(self):
243 # Test the decoding of a multipart entity when the charset (utf16) is 244 # NOT explicitly given, and is NOT in the list of charsets to attempt. 245 body=ntob('\r\n'.join(['--X', 246 'Content-Disposition: form-data; name="text"', 247 '', 248 '\xff\xfea\x00b\x00\x1c c\x00', 249 '--X', 250 'Content-Disposition: form-data; name="submit"', 251 '', 252 '\xff\xfeC\x00r\x00e\x00a\x00t\x00e\x00', 253 '--X--'])) 254 self.getPage('/reqparams', method='POST', 255 headers=[("Content-Type", "multipart/form-data;boundary=X"), 256 ("Content-Length", str(len(body))), 257 ], 258 body=body), 259 self.assertStatus(400) 260 self.assertErrorPage(400, 261 "The request entity could not be decoded. The following charsets " 262 "were attempted: ['us-ascii', 'utf-8']")
263
264 - def test_nontext(self):
265 self.getPage('/nontext') 266 self.assertHeader('Content-Type', 'application/binary;charset=utf-8') 267 self.assertBody('\x00\x01\x02\x03')
268
269 - def testEncoding(self):
270 # Default encoding should be utf-8 271 self.getPage('/mao_zedong') 272 self.assertBody(sing8) 273 274 # Ask for utf-16. 275 self.getPage('/mao_zedong', [('Accept-Charset', 'utf-16')]) 276 self.assertHeader('Content-Type', 'text/html;charset=utf-16') 277 self.assertBody(sing16) 278 279 # Ask for multiple encodings. ISO-8859-1 should fail, and utf-16 280 # should be produced. 281 self.getPage('/mao_zedong', [('Accept-Charset', 282 'iso-8859-1;q=1, utf-16;q=0.5')]) 283 self.assertBody(sing16) 284 285 # The "*" value should default to our default_encoding, utf-8 286 self.getPage('/mao_zedong', [('Accept-Charset', '*;q=1, utf-7;q=.2')]) 287 self.assertBody(sing8) 288 289 # Only allow iso-8859-1, which should fail and raise 406. 290 self.getPage('/mao_zedong', [('Accept-Charset', 'iso-8859-1, *;q=0')]) 291 self.assertStatus("406 Not Acceptable") 292 self.assertInBody("Your client sent this Accept-Charset header: " 293 "iso-8859-1, *;q=0. We tried these charsets: " 294 "iso-8859-1.") 295 296 # Ask for x-mac-ce, which should be unknown. See ticket #569. 297 self.getPage('/mao_zedong', [('Accept-Charset', 298 'us-ascii, ISO-8859-1, x-mac-ce')]) 299 self.assertStatus("406 Not Acceptable") 300 self.assertInBody("Your client sent this Accept-Charset header: " 301 "us-ascii, ISO-8859-1, x-mac-ce. We tried these " 302 "charsets: ISO-8859-1, us-ascii, x-mac-ce.") 303 304 # Test the 'encoding' arg to encode. 305 self.getPage('/utf8') 306 self.assertBody(sing8) 307 self.getPage('/utf8', [('Accept-Charset', 'us-ascii, ISO-8859-1')]) 308 self.assertStatus("406 Not Acceptable")
309
310 - def testGzip(self):
311 zbuf = BytesIO() 312 zfile = gzip.GzipFile(mode='wb', fileobj=zbuf, compresslevel=9) 313 zfile.write(ntob("Hello, world")) 314 zfile.close() 315 316 self.getPage('/gzip/', headers=[("Accept-Encoding", "gzip")]) 317 self.assertInBody(zbuf.getvalue()[:3]) 318 self.assertHeader("Vary", "Accept-Encoding") 319 self.assertHeader("Content-Encoding", "gzip") 320 321 # Test when gzip is denied. 322 self.getPage('/gzip/', headers=[("Accept-Encoding", "identity")]) 323 self.assertHeader("Vary", "Accept-Encoding") 324 self.assertNoHeader("Content-Encoding") 325 self.assertBody("Hello, world") 326 327 self.getPage('/gzip/', headers=[("Accept-Encoding", "gzip;q=0")]) 328 self.assertHeader("Vary", "Accept-Encoding") 329 self.assertNoHeader("Content-Encoding") 330 self.assertBody("Hello, world") 331 332 self.getPage('/gzip/', headers=[("Accept-Encoding", "*;q=0")]) 333 self.assertStatus(406) 334 self.assertNoHeader("Content-Encoding") 335 self.assertErrorPage(406, "identity, gzip") 336 337 # Test for ticket #147 338 self.getPage('/gzip/noshow', headers=[("Accept-Encoding", "gzip")]) 339 self.assertNoHeader('Content-Encoding') 340 self.assertStatus(500) 341 self.assertErrorPage(500, pattern="IndexError\n") 342 343 # In this case, there's nothing we can do to deliver a 344 # readable page, since 1) the gzip header is already set, 345 # and 2) we may have already written some of the body. 346 # The fix is to never stream yields when using gzip. 347 if (cherrypy.server.protocol_version == "HTTP/1.0" or 348 getattr(cherrypy.server, "using_apache", False)): 349 self.getPage('/gzip/noshow_stream', 350 headers=[("Accept-Encoding", "gzip")]) 351 self.assertHeader('Content-Encoding', 'gzip') 352 self.assertInBody('\x1f\x8b\x08\x00') 353 else: 354 # The wsgiserver will simply stop sending data, and the HTTP client 355 # will error due to an incomplete chunk-encoded stream. 356 self.assertRaises((ValueError, IncompleteRead), self.getPage, 357 '/gzip/noshow_stream', 358 headers=[("Accept-Encoding", "gzip")])
359
360 - def test_UnicodeHeaders(self):
361 self.getPage('/cookies_and_headers') 362 self.assertBody('Any content')
363