ThirdParty/Pygments/pygments/formatters/rtf.py

changeset 4172
4f20dba37ab6
parent 3079
0233bbe9a9c4
child 4697
c2e9bf425554
equal deleted inserted replaced
4170:8bc578136279 4172:4f20dba37ab6
3 pygments.formatters.rtf 3 pygments.formatters.rtf
4 ~~~~~~~~~~~~~~~~~~~~~~~ 4 ~~~~~~~~~~~~~~~~~~~~~~~
5 5
6 A formatter that generates RTF files. 6 A formatter that generates RTF files.
7 7
8 :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS. 8 :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details. 9 :license: BSD, see LICENSE for details.
10 """ 10 """
11 11
12 from pygments.formatter import Formatter 12 from pygments.formatter import Formatter
13 from pygments.util import get_int_opt, _surrogatepair
13 14
14 15
15 __all__ = ['RtfFormatter'] 16 __all__ = ['RtfFormatter']
16 17
17 18
18 class RtfFormatter(Formatter): 19 class RtfFormatter(Formatter):
19 """ 20 """
20 Format tokens as RTF markup. This formatter automatically outputs full RTF 21 Format tokens as RTF markup. This formatter automatically outputs full RTF
21 documents with color information and other useful stuff. Perfect for Copy and 22 documents with color information and other useful stuff. Perfect for Copy and
22 Paste into Microsoft® Word® documents. 23 Paste into Microsoft(R) Word(R) documents.
23 24
24 *New in Pygments 0.6.* 25 Please note that ``encoding`` and ``outencoding`` options are ignored.
26 The RTF format is ASCII natively, but handles unicode characters correctly
27 thanks to escape sequences.
28
29 .. versionadded:: 0.6
25 30
26 Additional options accepted: 31 Additional options accepted:
27 32
28 `style` 33 `style`
29 The style to use, can be a string or a Style subclass (default: 34 The style to use, can be a string or a Style subclass (default:
30 ``'default'``). 35 ``'default'``).
31 36
32 `fontface` 37 `fontface`
33 The used font famliy, for example ``Bitstream Vera Sans``. Defaults to 38 The used font famliy, for example ``Bitstream Vera Sans``. Defaults to
34 some generic font which is supposed to have fixed width. 39 some generic font which is supposed to have fixed width.
40
41 `fontsize`
42 Size of the font used. Size is specified in half points. The
43 default is 24 half-points, giving a size 12 font.
44
45 .. versionadded:: 2.0
35 """ 46 """
36 name = 'RTF' 47 name = 'RTF'
37 aliases = ['rtf'] 48 aliases = ['rtf']
38 filenames = ['*.rtf'] 49 filenames = ['*.rtf']
39 50
40 unicodeoutput = False
41
42 def __init__(self, **options): 51 def __init__(self, **options):
43 """ 52 r"""
44 Additional options accepted: 53 Additional options accepted:
45 54
46 ``fontface`` 55 ``fontface``
47 Name of the font used. Could for example be ``'Courier New'`` 56 Name of the font used. Could for example be ``'Courier New'``
48 to further specify the default which is ``'\fmodern'``. The RTF 57 to further specify the default which is ``'\fmodern'``. The RTF
49 specification claims that ``\fmodern`` are "Fixed-pitch serif 58 specification claims that ``\fmodern`` are "Fixed-pitch serif
50 and sans serif fonts". Hope every RTF implementation thinks 59 and sans serif fonts". Hope every RTF implementation thinks
51 the same about modern... 60 the same about modern...
61
52 """ 62 """
53 Formatter.__init__(self, **options) 63 Formatter.__init__(self, **options)
54 self.fontface = options.get('fontface') or '' 64 self.fontface = options.get('fontface') or ''
65 self.fontsize = get_int_opt(options, 'fontsize', 0)
55 66
56 def _escape(self, text): 67 def _escape(self, text):
57 return text.replace('\\', '\\\\') \ 68 return text.replace(u'\\', u'\\\\') \
58 .replace('{', '\\{') \ 69 .replace(u'{', u'\\{') \
59 .replace('}', '\\}') 70 .replace(u'}', u'\\}')
60 71
61 def _escape_text(self, text): 72 def _escape_text(self, text):
62 # empty strings, should give a small performance improvment 73 # empty strings, should give a small performance improvment
63 if not text: 74 if not text:
64 return '' 75 return u''
65 76
66 # escape text 77 # escape text
67 text = self._escape(text) 78 text = self._escape(text)
68 if self.encoding in ('utf-8', 'utf-16', 'utf-32'):
69 encoding = 'iso-8859-15'
70 else:
71 encoding = self.encoding or 'iso-8859-15'
72 79
73 buf = [] 80 buf = []
74 for c in text: 81 for c in text:
75 if ord(c) > 128: 82 cn = ord(c)
76 ansic = c.encode(encoding, 'ignore') or '?' 83 if cn < (2**7):
77 if ord(ansic) > 128: 84 # ASCII character
78 ansic = '\\\'%x' % ord(ansic)
79 else:
80 ansic = c
81 buf.append(r'\ud{\u%d%s}' % (ord(c), ansic))
82 else:
83 buf.append(str(c)) 85 buf.append(str(c))
86 elif (2**7) <= cn < (2**16):
87 # single unicode escape sequence
88 buf.append(u'{\\u%d}' % cn)
89 elif (2**16) <= cn:
90 # RTF limits unicode to 16 bits.
91 # Force surrogate pairs
92 buf.append(u'{\\u%d}{\\u%d}' % _surrogatepair(cn))
84 93
85 return ''.join(buf).replace('\n', '\\par\n') 94 return u''.join(buf).replace(u'\n', u'\\par\n')
86 95
87 def format_unencoded(self, tokensource, outfile): 96 def format_unencoded(self, tokensource, outfile):
88 # rtf 1.8 header 97 # rtf 1.8 header
89 outfile.write(r'{\rtf1\ansi\deff0' 98 outfile.write(u'{\\rtf1\\ansi\\uc0\\deff0'
90 r'{\fonttbl{\f0\fmodern\fprq1\fcharset0%s;}}' 99 u'{\\fonttbl{\\f0\\fmodern\\fprq1\\fcharset0%s;}}'
91 r'{\colortbl;' % (self.fontface and 100 u'{\\colortbl;' % (self.fontface and
92 ' ' + self._escape(self.fontface) or 101 u' ' + self._escape(self.fontface) or
93 '')) 102 u''))
94 103
95 # convert colors and save them in a mapping to access them later. 104 # convert colors and save them in a mapping to access them later.
96 color_mapping = {} 105 color_mapping = {}
97 offset = 1 106 offset = 1
98 for _, style in self.style: 107 for _, style in self.style:
99 for color in style['color'], style['bgcolor'], style['border']: 108 for color in style['color'], style['bgcolor'], style['border']:
100 if color and color not in color_mapping: 109 if color and color not in color_mapping:
101 color_mapping[color] = offset 110 color_mapping[color] = offset
102 outfile.write(r'\red%d\green%d\blue%d;' % ( 111 outfile.write(u'\\red%d\\green%d\\blue%d;' % (
103 int(color[0:2], 16), 112 int(color[0:2], 16),
104 int(color[2:4], 16), 113 int(color[2:4], 16),
105 int(color[4:6], 16) 114 int(color[4:6], 16)
106 )) 115 ))
107 offset += 1 116 offset += 1
108 outfile.write(r'}\f0') 117 outfile.write(u'}\\f0 ')
118 if self.fontsize:
119 outfile.write(u'\\fs%d' % (self.fontsize))
109 120
110 # highlight stream 121 # highlight stream
111 for ttype, value in tokensource: 122 for ttype, value in tokensource:
112 while not self.style.styles_token(ttype) and ttype.parent: 123 while not self.style.styles_token(ttype) and ttype.parent:
113 ttype = ttype.parent 124 ttype = ttype.parent
114 style = self.style.style_for_token(ttype) 125 style = self.style.style_for_token(ttype)
115 buf = [] 126 buf = []
116 if style['bgcolor']: 127 if style['bgcolor']:
117 buf.append(r'\cb%d' % color_mapping[style['bgcolor']]) 128 buf.append(u'\\cb%d' % color_mapping[style['bgcolor']])
118 if style['color']: 129 if style['color']:
119 buf.append(r'\cf%d' % color_mapping[style['color']]) 130 buf.append(u'\\cf%d' % color_mapping[style['color']])
120 if style['bold']: 131 if style['bold']:
121 buf.append(r'\b') 132 buf.append(u'\\b')
122 if style['italic']: 133 if style['italic']:
123 buf.append(r'\i') 134 buf.append(u'\\i')
124 if style['underline']: 135 if style['underline']:
125 buf.append(r'\ul') 136 buf.append(u'\\ul')
126 if style['border']: 137 if style['border']:
127 buf.append(r'\chbrdr\chcfpat%d' % 138 buf.append(u'\\chbrdr\\chcfpat%d' %
128 color_mapping[style['border']]) 139 color_mapping[style['border']])
129 start = ''.join(buf) 140 start = u''.join(buf)
130 if start: 141 if start:
131 outfile.write('{%s ' % start) 142 outfile.write(u'{%s ' % start)
132 outfile.write(self._escape_text(value)) 143 outfile.write(self._escape_text(value))
133 if start: 144 if start:
134 outfile.write('}') 145 outfile.write(u'}')
135 146
136 outfile.write('}') 147 outfile.write(u'}')

eric ide

mercurial