297 return str(text, "utf-8", "ignore"), 'utf-8-ignore' |
297 return str(text, "utf-8", "ignore"), 'utf-8-ignore' |
298 else: |
298 else: |
299 return decode(text) |
299 return decode(text) |
300 |
300 |
301 |
301 |
302 def writeEncodedFile(filename, text, orig_coding): |
302 def writeEncodedFile(filename, text, origEncoding, forcedEncoding=""): |
303 """ |
303 """ |
304 Function to write a file with properly encoded text. |
304 Function to write a file with properly encoded text. |
305 |
305 |
306 @param filename name of the file to read (string) |
306 @param filename name of the file to read |
307 @param text text to be written (string) |
307 @type str |
308 @param orig_coding type of the original encoding (string) |
308 @param text text to be written |
309 @return encoding used for writing the file (string) |
309 @type str |
310 """ |
310 @param origEncoding type of the original encoding |
311 etext, encoding = encode(text, orig_coding) |
311 @type str |
|
312 @param forcedEncoding encoding to be used for writing, if no coding |
|
313 line is present |
|
314 @type str |
|
315 @return encoding used for writing the file |
|
316 @rtype str |
|
317 """ |
|
318 etext, encoding = encode(text, origEncoding, forcedEncoding=forcedEncoding) |
312 |
319 |
313 f = open(filename, "wb") |
320 f = open(filename, "wb") |
314 f.write(etext) |
321 f.write(etext) |
315 f.close() |
322 f.close() |
316 |
323 |
317 return encoding |
324 return encoding |
318 |
325 |
319 |
326 |
320 def encode(text, orig_coding): |
327 def encode(text, origEncoding, forcedEncoding=""): |
321 """ |
328 """ |
322 Function to encode text into a byte text. |
329 Function to encode text into a byte text. |
323 |
330 |
324 @param text text to be encoded (string) |
331 @param text text to be encoded |
325 @param orig_coding type of the original encoding (string) |
332 @type str |
326 @return tuple of encoded text and encoding used (bytes, string) |
333 @param origEncoding type of the original encoding |
|
334 @type str |
|
335 @param forcedEncoding encoding to be used for writing, if no coding line |
|
336 is present |
|
337 @type str |
|
338 @return tuple of encoded text and encoding used |
|
339 @rtype tuple of (bytes, str) |
327 @exception CodingError raised to indicate an invalid encoding |
340 @exception CodingError raised to indicate an invalid encoding |
328 """ |
341 """ |
329 encoding = None |
342 encoding = None |
330 if orig_coding == 'utf-8-bom': |
343 if origEncoding == 'utf-8-bom': |
331 etext, encoding = BOM_UTF8 + text.encode("utf-8"), 'utf-8-bom' |
344 etext, encoding = BOM_UTF8 + text.encode("utf-8"), 'utf-8-bom' |
332 else: |
345 else: |
333 # Try declared coding spec |
346 # Try declared coding spec |
334 coding = get_coding(text) |
347 coding = get_coding(text) |
335 if coding: |
348 if coding: |
337 etext, encoding = text.encode(coding), coding |
350 etext, encoding = text.encode(coding), coding |
338 except (UnicodeError, LookupError): |
351 except (UnicodeError, LookupError): |
339 # Error: Declared encoding is incorrect |
352 # Error: Declared encoding is incorrect |
340 raise CodingError(coding) |
353 raise CodingError(coding) |
341 else: |
354 else: |
342 if orig_coding and orig_coding.endswith( |
355 if forcedEncoding: |
343 ('-selected', '-default', '-guessed', '-ignore')): |
|
344 coding = orig_coding\ |
|
345 .replace("-selected", "")\ |
|
346 .replace("-default", "")\ |
|
347 .replace("-guessed", "")\ |
|
348 .replace("-ignore", "") |
|
349 try: |
356 try: |
350 etext, encoding = text.encode(coding), coding |
357 etext, encoding = ( |
|
358 text.encode(forcedEncoding), forcedEncoding) |
351 except (UnicodeError, LookupError): |
359 except (UnicodeError, LookupError): |
|
360 # Error: Forced encoding is incorrect, ignore it |
352 pass |
361 pass |
353 |
362 |
354 if encoding is None: |
363 if encoding is None: |
355 # Try configured default |
364 # Try the original encoding |
356 try: |
365 if origEncoding and origEncoding.endswith( |
357 codec = Preferences.getEditor("DefaultEncoding") |
366 ('-selected', '-default', '-guessed', '-ignore')): |
358 etext, encoding = text.encode(codec), codec |
367 coding = origEncoding\ |
359 except (UnicodeError, LookupError): |
368 .replace("-selected", "")\ |
360 pass |
369 .replace("-default", "")\ |
|
370 .replace("-guessed", "")\ |
|
371 .replace("-ignore", "") |
|
372 try: |
|
373 etext, encoding = text.encode(coding), coding |
|
374 except (UnicodeError, LookupError): |
|
375 pass |
361 |
376 |
362 if encoding is None: |
377 if encoding is None: |
363 # Try saving as ASCII |
378 # Try configured default |
364 try: |
379 try: |
365 etext, encoding = text.encode('ascii'), 'ascii' |
380 codec = Preferences.getEditor("DefaultEncoding") |
366 except UnicodeError: |
381 etext, encoding = text.encode(codec), codec |
|
382 except (UnicodeError, LookupError): |
367 pass |
383 pass |
368 |
384 |
369 if encoding is None: |
385 if encoding is None: |
370 # Save as UTF-8 without BOM |
386 # Try saving as ASCII |
371 etext, encoding = text.encode('utf-8'), 'utf-8' |
387 try: |
|
388 etext, encoding = text.encode('ascii'), 'ascii' |
|
389 except UnicodeError: |
|
390 pass |
|
391 |
|
392 if encoding is None: |
|
393 # Save as UTF-8 without BOM |
|
394 etext, encoding = text.encode('utf-8'), 'utf-8' |
372 |
395 |
373 return etext, encoding |
396 return etext, encoding |
374 |
397 |
375 |
398 |
376 def decodeString(text): |
399 def decodeString(text): |