Utilities/__init__.py

changeset 6099
a7fecbc392d7
parent 6080
436ac20f7639
child 6105
cbd34d558bd9
equal deleted inserted replaced
6098:a1d10c6ce103 6099:a7fecbc392d7
297 return str(text, "utf-8", "ignore"), 'utf-8-ignore' 297 return str(text, "utf-8", "ignore"), 'utf-8-ignore'
298 else: 298 else:
299 return decode(text) 299 return decode(text)
300 300
301 301
302 def writeEncodedFile(filename, text, orig_coding): 302 def writeEncodedFile(filename, text, origEncoding, forcedEncoding=""):
303 """ 303 """
304 Function to write a file with properly encoded text. 304 Function to write a file with properly encoded text.
305 305
306 @param filename name of the file to read (string) 306 @param filename name of the file to read
307 @param text text to be written (string) 307 @type str
308 @param orig_coding type of the original encoding (string) 308 @param text text to be written
309 @return encoding used for writing the file (string) 309 @type str
310 """ 310 @param origEncoding type of the original encoding
311 etext, encoding = encode(text, orig_coding) 311 @type str
312 @param forcedEncoding encoding to be used for writing, if no coding
313 line is present
314 @type str
315 @return encoding used for writing the file
316 @rtype str
317 """
318 etext, encoding = encode(text, origEncoding, forcedEncoding=forcedEncoding)
312 319
313 f = open(filename, "wb") 320 f = open(filename, "wb")
314 f.write(etext) 321 f.write(etext)
315 f.close() 322 f.close()
316 323
317 return encoding 324 return encoding
318 325
319 326
320 def encode(text, orig_coding): 327 def encode(text, origEncoding, forcedEncoding=""):
321 """ 328 """
322 Function to encode text into a byte text. 329 Function to encode text into a byte text.
323 330
324 @param text text to be encoded (string) 331 @param text text to be encoded
325 @param orig_coding type of the original encoding (string) 332 @type str
326 @return tuple of encoded text and encoding used (bytes, string) 333 @param origEncoding type of the original encoding
334 @type str
335 @param forcedEncoding encoding to be used for writing, if no coding line
336 is present
337 @type str
338 @return tuple of encoded text and encoding used
339 @rtype tuple of (bytes, str)
327 @exception CodingError raised to indicate an invalid encoding 340 @exception CodingError raised to indicate an invalid encoding
328 """ 341 """
329 encoding = None 342 encoding = None
330 if orig_coding == 'utf-8-bom': 343 if origEncoding == 'utf-8-bom':
331 etext, encoding = BOM_UTF8 + text.encode("utf-8"), 'utf-8-bom' 344 etext, encoding = BOM_UTF8 + text.encode("utf-8"), 'utf-8-bom'
332 else: 345 else:
333 # Try declared coding spec 346 # Try declared coding spec
334 coding = get_coding(text) 347 coding = get_coding(text)
335 if coding: 348 if coding:
337 etext, encoding = text.encode(coding), coding 350 etext, encoding = text.encode(coding), coding
338 except (UnicodeError, LookupError): 351 except (UnicodeError, LookupError):
339 # Error: Declared encoding is incorrect 352 # Error: Declared encoding is incorrect
340 raise CodingError(coding) 353 raise CodingError(coding)
341 else: 354 else:
342 if orig_coding and orig_coding.endswith( 355 if forcedEncoding:
343 ('-selected', '-default', '-guessed', '-ignore')):
344 coding = orig_coding\
345 .replace("-selected", "")\
346 .replace("-default", "")\
347 .replace("-guessed", "")\
348 .replace("-ignore", "")
349 try: 356 try:
350 etext, encoding = text.encode(coding), coding 357 etext, encoding = (
358 text.encode(forcedEncoding), forcedEncoding)
351 except (UnicodeError, LookupError): 359 except (UnicodeError, LookupError):
360 # Error: Forced encoding is incorrect, ignore it
352 pass 361 pass
353 362
354 if encoding is None: 363 if encoding is None:
355 # Try configured default 364 # Try the original encoding
356 try: 365 if origEncoding and origEncoding.endswith(
357 codec = Preferences.getEditor("DefaultEncoding") 366 ('-selected', '-default', '-guessed', '-ignore')):
358 etext, encoding = text.encode(codec), codec 367 coding = origEncoding\
359 except (UnicodeError, LookupError): 368 .replace("-selected", "")\
360 pass 369 .replace("-default", "")\
370 .replace("-guessed", "")\
371 .replace("-ignore", "")
372 try:
373 etext, encoding = text.encode(coding), coding
374 except (UnicodeError, LookupError):
375 pass
361 376
362 if encoding is None: 377 if encoding is None:
363 # Try saving as ASCII 378 # Try configured default
364 try: 379 try:
365 etext, encoding = text.encode('ascii'), 'ascii' 380 codec = Preferences.getEditor("DefaultEncoding")
366 except UnicodeError: 381 etext, encoding = text.encode(codec), codec
382 except (UnicodeError, LookupError):
367 pass 383 pass
368 384
369 if encoding is None: 385 if encoding is None:
370 # Save as UTF-8 without BOM 386 # Try saving as ASCII
371 etext, encoding = text.encode('utf-8'), 'utf-8' 387 try:
388 etext, encoding = text.encode('ascii'), 'ascii'
389 except UnicodeError:
390 pass
391
392 if encoding is None:
393 # Save as UTF-8 without BOM
394 etext, encoding = text.encode('utf-8'), 'utf-8'
372 395
373 return etext, encoding 396 return etext, encoding
374 397
375 398
376 def decodeString(text): 399 def decodeString(text):

eric ide

mercurial