|
1 # -*- coding: utf-8 -*- |
|
2 |
|
3 # Copyright (c) 2020 - 2022 Detlev Offenbach <detlev@die-offenbachs.de> |
|
4 # |
|
5 |
|
6 """ |
|
7 Module implementing utility functions used by the security checks. |
|
8 """ |
|
9 |
|
10 import ast |
|
11 import os |
|
12 import contextlib |
|
13 |
|
14 import AstUtilities |
|
15 |
|
16 |
|
17 class InvalidModulePath(Exception): |
|
18 """ |
|
19 Class defining an exception for invalid module paths. |
|
20 """ |
|
21 pass |
|
22 |
|
23 |
|
24 def getModuleQualnameFromPath(path): |
|
25 """ |
|
26 Function to get the module's qualified name by analysis of the |
|
27 path. |
|
28 |
|
29 Resolve the absolute pathname and eliminate symlinks. This could result |
|
30 in an incorrect name if symlinks are used to restructure the python lib |
|
31 directory. |
|
32 |
|
33 Starting from the right-most directory component look for __init__.py |
|
34 in the directory component. If it exists then the directory name is |
|
35 part of the module name. Move left to the subsequent directory |
|
36 components until a directory is found without __init__.py. |
|
37 |
|
38 @param path path of the module to be analyzed |
|
39 @type str |
|
40 @return qualified name of the module |
|
41 @rtype str |
|
42 @exception InvalidModulePath raised to indicate an invalid module path |
|
43 """ |
|
44 (head, tail) = os.path.split(path) |
|
45 if head == '' or tail == '': |
|
46 raise InvalidModulePath('Invalid python file path: "{0}"' |
|
47 ' Missing path or file name'.format(path)) |
|
48 |
|
49 qname = [os.path.splitext(tail)[0]] |
|
50 while head not in ['/', '.', '']: |
|
51 if os.path.isfile(os.path.join(head, '__init__.py')): |
|
52 (head, tail) = os.path.split(head) |
|
53 qname.insert(0, tail) |
|
54 else: |
|
55 break |
|
56 |
|
57 qualname = '.'.join(qname) |
|
58 return qualname |
|
59 |
|
60 |
|
61 def namespacePathJoin(namespace, name): |
|
62 """ |
|
63 Function to extend a given namespace path. |
|
64 |
|
65 @param namespace namespace to be extended |
|
66 @type str |
|
67 @param name node name to be appended |
|
68 @type str |
|
69 @return extended namespace |
|
70 @rtype str |
|
71 """ |
|
72 return "{0}.{1}".format(namespace, name) |
|
73 |
|
74 |
|
75 def namespacePathSplit(path): |
|
76 """ |
|
77 Function to split a namespace path into a head and tail. |
|
78 |
|
79 Tail will be the last namespace path component and head will |
|
80 be everything leading up to that in the path. This is similar to |
|
81 os.path.split. |
|
82 |
|
83 @param path namespace path to be split |
|
84 @type str |
|
85 @return tuple containing the namespace path head and tail |
|
86 @rtype tuple of (str, str) |
|
87 """ |
|
88 return tuple(path.rsplit('.', 1)) |
|
89 |
|
90 |
|
91 def getAttrQualName(node, aliases): |
|
92 """ |
|
93 Function to get a the full name for the attribute node. |
|
94 |
|
95 This will resolve a pseudo-qualified name for the attribute |
|
96 rooted at node as long as all the deeper nodes are Names or |
|
97 Attributes. This will give you how the code referenced the name but |
|
98 will not tell you what the name actually refers to. If we |
|
99 encounter a node without a static name we punt with an |
|
100 empty string. If this encounters something more complex, such as |
|
101 foo.mylist[0](a,b) we just return empty string. |
|
102 |
|
103 @param node attribute node to be treated |
|
104 @type ast.Attribute |
|
105 @param aliases dictionary of import aliases |
|
106 @type dict |
|
107 @return qualified name of the attribute |
|
108 @rtype str |
|
109 """ |
|
110 if isinstance(node, ast.Name): |
|
111 if node.id in aliases: |
|
112 return aliases[node.id] |
|
113 return node.id |
|
114 elif isinstance(node, ast.Attribute): |
|
115 name = "{0}.{1}".format(getAttrQualName(node.value, aliases), |
|
116 node.attr) |
|
117 if name in aliases: |
|
118 return aliases[name] |
|
119 return name |
|
120 else: |
|
121 return "" |
|
122 |
|
123 |
|
124 def getCallName(node, aliases): |
|
125 """ |
|
126 Function to extract the call name from an ast.Call node. |
|
127 |
|
128 @param node node to extract information from |
|
129 @type ast.Call |
|
130 @param aliases dictionary of import aliases |
|
131 @type dict |
|
132 @return name of the ast.Call node |
|
133 @rtype str |
|
134 """ |
|
135 if isinstance(node.func, ast.Name): |
|
136 if deepgetattr(node, 'func.id') in aliases: |
|
137 return aliases[deepgetattr(node, 'func.id')] |
|
138 return deepgetattr(node, 'func.id') |
|
139 elif isinstance(node.func, ast.Attribute): |
|
140 return getAttrQualName(node.func, aliases) |
|
141 else: |
|
142 return "" |
|
143 |
|
144 |
|
145 def getQualAttr(node, aliases): |
|
146 """ |
|
147 Function to extract the qualified name from an ast.Attribute node. |
|
148 |
|
149 @param node node to extract information from |
|
150 @type ast.Attribute |
|
151 @param aliases dictionary of import aliases |
|
152 @type dict |
|
153 @return qualified attribute name |
|
154 @rtype str |
|
155 """ |
|
156 prefix = "" |
|
157 if isinstance(node, ast.Attribute): |
|
158 with contextlib.suppress(Exception): |
|
159 val = deepgetattr(node, 'value.id') |
|
160 prefix = ( |
|
161 aliases[val] if val in aliases |
|
162 else deepgetattr(node, 'value.id') |
|
163 ) |
|
164 # Id we can't get the fully qualified name for an attr, just return |
|
165 # its base name. |
|
166 |
|
167 return "{0}.{1}".format(prefix, node.attr) |
|
168 else: |
|
169 return "" |
|
170 |
|
171 |
|
172 def deepgetattr(obj, attr): |
|
173 """ |
|
174 Function to recurs through an attribute chain to get the ultimate value. |
|
175 |
|
176 @param obj reference to the object to be recursed |
|
177 @type ast.Name or ast.Attribute |
|
178 @param attr attribute chain to be parsed |
|
179 @type ast.Attribute |
|
180 @return ultimate value |
|
181 @rtype ast.AST |
|
182 """ |
|
183 for key in attr.split('.'): |
|
184 obj = getattr(obj, key) |
|
185 return obj |
|
186 |
|
187 |
|
188 def linerange(node): |
|
189 """ |
|
190 Function to get line number range from a node. |
|
191 |
|
192 @param node node to extract a line range from |
|
193 @type ast.AST |
|
194 @return list containing the line number range |
|
195 @rtype list of int |
|
196 """ |
|
197 strip = { |
|
198 "body": None, |
|
199 "orelse": None, |
|
200 "handlers": None, |
|
201 "finalbody": None |
|
202 } |
|
203 for key in strip: |
|
204 if hasattr(node, key): |
|
205 strip[key] = getattr(node, key) |
|
206 node.key = [] |
|
207 |
|
208 lines_min = 9999999999 |
|
209 lines_max = -1 |
|
210 for n in ast.walk(node): |
|
211 if hasattr(n, 'lineno'): |
|
212 lines_min = min(lines_min, n.lineno) |
|
213 lines_max = max(lines_max, n.lineno) |
|
214 |
|
215 for key in strip: |
|
216 if strip[key] is not None: |
|
217 node.key = strip[key] |
|
218 |
|
219 if lines_max > -1: |
|
220 return list(range(lines_min, lines_max + 1)) |
|
221 |
|
222 return [0, 1] |
|
223 |
|
224 |
|
225 def linerange_fix(node): |
|
226 """ |
|
227 Function to get a line number range working around a known Python bug |
|
228 with multi-line strings. |
|
229 |
|
230 @param node node to extract a line range from |
|
231 @type ast.AST |
|
232 @return list containing the line number range |
|
233 @rtype list of int |
|
234 """ |
|
235 # deal with multiline strings lineno behavior (Python issue #16806) |
|
236 lines = linerange(node) |
|
237 if ( |
|
238 hasattr(node, '_securitySibling') and |
|
239 hasattr(node._securitySibling, 'lineno') |
|
240 ): |
|
241 start = min(lines) |
|
242 delta = node._securitySibling.lineno - start |
|
243 if delta > 1: |
|
244 return list(range(start, node._securitySibling.lineno)) |
|
245 |
|
246 return lines |
|
247 |
|
248 |
|
249 def escapedBytesRepresentation(b): |
|
250 """ |
|
251 Function to escape bytes for comparison with other strings. |
|
252 |
|
253 In practice it turns control characters into acceptable codepoints then |
|
254 encodes them into bytes again to turn unprintable bytes into printable |
|
255 escape sequences. |
|
256 |
|
257 This is safe to do for the whole range 0..255 and result matches |
|
258 unicode_escape on a unicode string. |
|
259 |
|
260 @param b bytes object to be escaped |
|
261 @type bytes |
|
262 @return escaped bytes object |
|
263 @rtype bytes |
|
264 """ |
|
265 return b.decode('unicode_escape').encode('unicode_escape') |
|
266 |
|
267 |
|
268 def concatString(node, stop=None): |
|
269 """ |
|
270 Function to build a string from an ast.BinOp chain. |
|
271 |
|
272 This will build a string from a series of ast.Str/ast.Constant nodes |
|
273 wrapped in ast.BinOp nodes. Something like "a" + "b" + "c" or "a %s" % val |
|
274 etc. The provided node can be any participant in the BinOp chain. |
|
275 |
|
276 @param node node to be processed |
|
277 @type ast.BinOp or ast.Str/ast.Constant |
|
278 @param stop base node to stop at |
|
279 @type ast.BinOp or ast.Str/ast.Constant |
|
280 @return tuple containing the root node of the expression and the string |
|
281 value |
|
282 @rtype tuple of (ast.AST, str) |
|
283 """ |
|
284 def _get(node, bits, stop=None): |
|
285 if node != stop: |
|
286 bits.append( |
|
287 _get(node.left, bits, stop) |
|
288 if isinstance(node.left, ast.BinOp) |
|
289 else node.left |
|
290 ) |
|
291 bits.append( |
|
292 _get(node.right, bits, stop) |
|
293 if isinstance(node.right, ast.BinOp) |
|
294 else node.right |
|
295 ) |
|
296 |
|
297 bits = [node] |
|
298 while isinstance(node._securityParent, ast.BinOp): |
|
299 node = node._securityParent |
|
300 if isinstance(node, ast.BinOp): |
|
301 _get(node, bits, stop) |
|
302 |
|
303 return ( |
|
304 node, |
|
305 " ".join([x.s for x in bits if AstUtilities.isString(x)]) |
|
306 ) |
|
307 |
|
308 |
|
309 def getCalledName(node): |
|
310 """ |
|
311 Function to get the function name from an ast.Call node. |
|
312 |
|
313 An ast.Call node representing a method call will present differently to one |
|
314 wrapping a function call: thing.call() vs call(). This helper will grab the |
|
315 unqualified call name correctly in either case. |
|
316 |
|
317 @param node reference to the call node |
|
318 @type ast.Call |
|
319 @return function name of the node |
|
320 @rtype str |
|
321 """ |
|
322 func = node.func |
|
323 try: |
|
324 return func.attr if isinstance(func, ast.Attribute) else func.id |
|
325 except AttributeError: |
|
326 return "" |
|
327 |
|
328 # |
|
329 # eflag: noqa = M601 |