|
1 # -*- coding: utf-8 -*- |
|
2 |
|
3 # Copyright (c) 2020 Detlev Offenbach <detlev@die-offenbachs.de> |
|
4 # |
|
5 |
|
6 """ |
|
7 Module implementing utility functions used by the security checks. |
|
8 """ |
|
9 |
|
10 import ast |
|
11 import os |
|
12 |
|
13 |
|
14 class InvalidModulePath(Exception): |
|
15 """ |
|
16 Class defining an exception for invalid module paths. |
|
17 """ |
|
18 pass |
|
19 |
|
20 |
|
21 def getModuleQualnameFromPath(path): |
|
22 """ |
|
23 Function to get the module's qualified name by analysis of the |
|
24 path. |
|
25 |
|
26 Resolve the absolute pathname and eliminate symlinks. This could result |
|
27 in an incorrect name if symlinks are used to restructure the python lib |
|
28 directory. |
|
29 |
|
30 Starting from the right-most directory component look for __init__.py |
|
31 in the directory component. If it exists then the directory name is |
|
32 part of the module name. Move left to the subsequent directory |
|
33 components until a directory is found without __init__.py. |
|
34 |
|
35 @param path path of the module to be analyzed |
|
36 @type str |
|
37 @return qualified name of the module |
|
38 @rtype str |
|
39 """ |
|
40 (head, tail) = os.path.split(path) |
|
41 if head == '' or tail == '': |
|
42 raise InvalidModulePath('Invalid python file path: "{0}"' |
|
43 ' Missing path or file name'.format(path)) |
|
44 |
|
45 qname = [os.path.splitext(tail)[0]] |
|
46 while head not in ['/', '.', '']: |
|
47 if os.path.isfile(os.path.join(head, '__init__.py')): |
|
48 (head, tail) = os.path.split(head) |
|
49 qname.insert(0, tail) |
|
50 else: |
|
51 break |
|
52 |
|
53 qualname = '.'.join(qname) |
|
54 return qualname |
|
55 |
|
56 |
|
57 def namespacePathJoin(namespace, name): |
|
58 """ |
|
59 Function to extend a given namespace path. |
|
60 |
|
61 @param namespace namespace to be extended |
|
62 @type str |
|
63 @param name node name to be appended |
|
64 @type str |
|
65 @return extended namespace |
|
66 @rtype str |
|
67 """ |
|
68 return "{0}.{1}".format(namespace, name) |
|
69 |
|
70 |
|
71 def namespacePathSplit(path): |
|
72 """ |
|
73 Function to split a namespace path into a head and tail. |
|
74 |
|
75 Tail will be the last namespace path component and head will |
|
76 be everything leading up to that in the path. This is similar to |
|
77 os.path.split. |
|
78 |
|
79 @param path namespace path to be split |
|
80 @type str |
|
81 @return tuple containing the namespace path head and tail |
|
82 @rtype tuple of (str, str) |
|
83 """ |
|
84 return tuple(path.rsplit('.', 1)) |
|
85 |
|
86 |
|
87 def getAttrQualName(node, aliases): |
|
88 """ |
|
89 Function to get a the full name for the attribute node. |
|
90 |
|
91 This will resolve a pseudo-qualified name for the attribute |
|
92 rooted at node as long as all the deeper nodes are Names or |
|
93 Attributes. This will give you how the code referenced the name but |
|
94 will not tell you what the name actually refers to. If we |
|
95 encounter a node without a static name we punt with an |
|
96 empty string. If this encounters something more complex, such as |
|
97 foo.mylist[0](a,b) we just return empty string. |
|
98 |
|
99 @param node attribute node to be treated |
|
100 @type ast.Attribute |
|
101 @param aliases dictionary of import aliases |
|
102 @type dict |
|
103 @return qualified name of the attribute |
|
104 @rtype str |
|
105 """ |
|
106 if isinstance(node, ast.Name): |
|
107 if node.id in aliases: |
|
108 return aliases[node.id] |
|
109 return node.id |
|
110 elif isinstance(node, ast.Attribute): |
|
111 name = "{0}.{1}".format(getAttrQualName(node.value, aliases), |
|
112 node.attr) |
|
113 if name in aliases: |
|
114 return aliases[name] |
|
115 return name |
|
116 else: |
|
117 return "" |
|
118 |
|
119 |
|
120 def getCallName(node, aliases): |
|
121 """ |
|
122 Function to extract the call name from an ast.Call node. |
|
123 |
|
124 @param node node to extract information from |
|
125 @type ast.Call |
|
126 @param aliases dictionary of import aliases |
|
127 @type dict |
|
128 @return name of the ast.Call node |
|
129 @rtype str |
|
130 """ |
|
131 if isinstance(node.func, ast.Name): |
|
132 if deepgetattr(node, 'func.id') in aliases: |
|
133 return aliases[deepgetattr(node, 'func.id')] |
|
134 return deepgetattr(node, 'func.id') |
|
135 elif isinstance(node.func, ast.Attribute): |
|
136 return getAttrQualName(node.func, aliases) |
|
137 else: |
|
138 return "" |
|
139 |
|
140 |
|
141 def getQualAttr(node, aliases): |
|
142 """ |
|
143 Function to extract the qualified name from an ast.Attribute node. |
|
144 |
|
145 @param node node to extract information from |
|
146 @type ast.Attribute |
|
147 @param aliases dictionary of import aliases |
|
148 @type dict |
|
149 @return qualified attribute name |
|
150 @rtype str |
|
151 """ |
|
152 prefix = "" |
|
153 if isinstance(node, ast.Attribute): |
|
154 try: |
|
155 val = deepgetattr(node, 'value.id') |
|
156 if val in aliases: |
|
157 prefix = aliases[val] |
|
158 else: |
|
159 prefix = deepgetattr(node, 'value.id') |
|
160 except Exception: |
|
161 # We can't get the fully qualified name for an attr, just return |
|
162 # its base name. |
|
163 pass |
|
164 |
|
165 return "{0}.{1}".format(prefix, node.attr) |
|
166 else: |
|
167 return "" |
|
168 |
|
169 |
|
170 def deepgetattr(obj, attr): |
|
171 """ |
|
172 Function to recurs through an attribute chain to get the ultimate value. |
|
173 |
|
174 @param attr attribute chain to be parsed |
|
175 @type ast.Attribute |
|
176 @return ultimate value |
|
177 @rtype ast.AST |
|
178 """ |
|
179 for key in attr.split('.'): |
|
180 obj = getattr(obj, key) |
|
181 return obj |
|
182 |
|
183 |
|
184 def linerange(node): |
|
185 """ |
|
186 Function to get line number range from a node. |
|
187 |
|
188 @param node node to extract a line range from |
|
189 @type ast.AST |
|
190 @return list containing the line number range |
|
191 @rtype list of int |
|
192 """ |
|
193 strip = {"body": None, "orelse": None, |
|
194 "handlers": None, "finalbody": None} |
|
195 for key in strip.keys(): |
|
196 if hasattr(node, key): |
|
197 strip[key] = getattr(node, key) |
|
198 node.key = [] |
|
199 |
|
200 lines_min = 9999999999 |
|
201 lines_max = -1 |
|
202 for n in ast.walk(node): |
|
203 if hasattr(n, 'lineno'): |
|
204 lines_min = min(lines_min, n.lineno) |
|
205 lines_max = max(lines_max, n.lineno) |
|
206 |
|
207 for key in strip.keys(): |
|
208 if strip[key] is not None: |
|
209 node.key = strip[key] |
|
210 |
|
211 if lines_max > -1: |
|
212 return list(range(lines_min, lines_max + 1)) |
|
213 |
|
214 return [0, 1] |
|
215 |
|
216 |
|
217 def linerange_fix(node): |
|
218 """ |
|
219 Function to get a line number range working around a known Python bug |
|
220 with multi-line strings. |
|
221 |
|
222 @param node node to extract a line range from |
|
223 @type ast.AST |
|
224 @return list containing the line number range |
|
225 @rtype list of int |
|
226 """ |
|
227 # deal with multiline strings lineno behavior (Python issue #16806) |
|
228 lines = linerange(node) |
|
229 if ( |
|
230 hasattr(node, '_securitySibling') and |
|
231 hasattr(node._securitySibling, 'lineno') |
|
232 ): |
|
233 start = min(lines) |
|
234 delta = node._securitySibling.lineno - start |
|
235 if delta > 1: |
|
236 return list(range(start, node._securitySibling.lineno)) |
|
237 |
|
238 return lines |
|
239 |
|
240 |
|
241 def escapedBytesRepresentation(b): |
|
242 """ |
|
243 Function to escape bytes for comparison with other strings. |
|
244 |
|
245 In practice it turns control characters into acceptable codepoints then |
|
246 encodes them into bytes again to turn unprintable bytes into printable |
|
247 escape sequences. |
|
248 |
|
249 This is safe to do for the whole range 0..255 and result matches |
|
250 unicode_escape on a unicode string. |
|
251 |
|
252 @param b bytes object to be escaped |
|
253 @type bytes |
|
254 @return escaped bytes object |
|
255 @rtype bytes |
|
256 """ |
|
257 return b.decode('unicode_escape').encode('unicode_escape') |