|
1 # -*- coding: utf-8 -*- |
|
2 |
|
3 # Copyright (c) 2015 - 2021 Detlev Offenbach <detlev@die-offenbachs.de> |
|
4 # |
|
5 |
|
6 """ |
|
7 Module implementing a class to store and parse diff output. |
|
8 """ |
|
9 |
|
10 import re |
|
11 import os |
|
12 |
|
13 |
|
14 class GitDiffParser: |
|
15 """ |
|
16 Class implementing a class to store and parse diff output. |
|
17 """ |
|
18 HunkHeaderRegexp = re.compile(r'^@@ -([0-9,]+) \+([0-9,]+) @@(.*)', |
|
19 re.DOTALL) |
|
20 |
|
21 def __init__(self, diff): |
|
22 """ |
|
23 Constructor |
|
24 |
|
25 @param diff output of the diff command (list of string) |
|
26 """ |
|
27 self.__diff = diff[:] |
|
28 |
|
29 self.__headerLines = [] |
|
30 self.__hunks = [] |
|
31 self.__parsed = False |
|
32 # diff parsing is done on demand |
|
33 |
|
34 def __initHunk(self): |
|
35 """ |
|
36 Private method to initialize a hunk data structure. |
|
37 |
|
38 @return hunk data structure (dictionary) |
|
39 """ |
|
40 hunk = { |
|
41 "start": -1, |
|
42 "end": -1, |
|
43 "lines": [], |
|
44 "oldStart": -1, |
|
45 "oldCount": -1, |
|
46 "newStart": -1, |
|
47 "newCount": -1, |
|
48 "heading": "", |
|
49 } |
|
50 return hunk |
|
51 |
|
52 def __parseRange(self, headerRange): |
|
53 """ |
|
54 Private method to parse the hunk header range part. |
|
55 |
|
56 @param headerRange hunk header range (string) |
|
57 @return tuple of hunk start and hunk length (integer, integer) |
|
58 """ |
|
59 if ',' in headerRange: |
|
60 begin, end = headerRange.split(',', 1) |
|
61 return int(begin), int(end) |
|
62 else: |
|
63 return int(headerRange), 1 |
|
64 |
|
65 def __parseDiff(self): |
|
66 """ |
|
67 Private method to parse the diff output. |
|
68 |
|
69 @exception AssertionError raised when a malformed hunk header is |
|
70 encountered |
|
71 """ |
|
72 if not self.__parsed: |
|
73 # step 1: extract the diff header |
|
74 for line in self.__diff: |
|
75 if not line.startswith("@@ "): |
|
76 self.__headerLines.append(line) |
|
77 else: |
|
78 break |
|
79 |
|
80 # step 2: break the rest into diff hunks |
|
81 for lineIdx, line in enumerate( |
|
82 self.__diff[len(self.__headerLines):]): |
|
83 # disect the hunk header line |
|
84 m = self.HunkHeaderRegexp.match(line) |
|
85 if m: |
|
86 self.__hunks.append(self.__initHunk()) |
|
87 self.__hunks[-1]["start"] = lineIdx |
|
88 (self.__hunks[-1]["oldStart"], |
|
89 self.__hunks[-1]["oldCount"]) = self.__parseRange( |
|
90 m.group(1)) |
|
91 (self.__hunks[-1]["newStart"], |
|
92 self.__hunks[-1]["newCount"]) = self.__parseRange( |
|
93 m.group(2)) |
|
94 self.__hunks[-1]["heading"] = m.group(3) |
|
95 elif not self.__hunks: |
|
96 raise AssertionError("Malformed hunk header: '{0}'" |
|
97 .format(line)) |
|
98 self.__hunks[-1]["lines"].append(line) |
|
99 # step 3: calculate hunk end lines |
|
100 for hunk in self.__hunks: |
|
101 hunk["end"] = hunk["start"] + len(hunk["lines"]) - 1 |
|
102 |
|
103 def __generateRange(self, start, count): |
|
104 """ |
|
105 Private method to generate a hunk header range. |
|
106 |
|
107 @param start start line (integer) |
|
108 @param count line count (integer) |
|
109 @return hunk header range (string) |
|
110 """ |
|
111 if count == 1: |
|
112 return "{0}".format(start) |
|
113 else: |
|
114 return "{0},{1}".format(start, count) |
|
115 |
|
116 def __generateHunkHeader(self, oldStart, oldCount, newStart, newCount, |
|
117 heading=os.linesep): |
|
118 """ |
|
119 Private method to generate a hunk header line. |
|
120 |
|
121 @param oldStart start line of the old part (integer) |
|
122 @param oldCount line count of the old part (integer) |
|
123 @param newStart start line of the new part (integer) |
|
124 @param newCount line count of the new part (integer) |
|
125 @param heading hunk heading (string) |
|
126 @return hunk header (string) |
|
127 """ |
|
128 return "@@ -{0} +{1} @@{2}".format( |
|
129 self.__generateRange(oldStart, oldCount), |
|
130 self.__generateRange(newStart, newCount), |
|
131 heading) |
|
132 |
|
133 def headerLength(self): |
|
134 """ |
|
135 Public method to get the header length. |
|
136 |
|
137 @return length of the header (integer) |
|
138 """ |
|
139 self.__parseDiff() |
|
140 return len(self.__headerLines) |
|
141 |
|
142 def createHunkPatch(self, lineIndex): |
|
143 """ |
|
144 Public method to create a hunk based patch. |
|
145 |
|
146 @param lineIndex line number of the hunk (integer) |
|
147 @return diff lines of the patch (string) |
|
148 """ |
|
149 self.__parseDiff() |
|
150 |
|
151 patch = self.__headerLines[:] |
|
152 for hunk in self.__hunks: |
|
153 if hunk["start"] <= lineIndex <= hunk["end"]: |
|
154 patch.extend(hunk["lines"]) |
|
155 break |
|
156 |
|
157 return "".join(patch) |
|
158 |
|
159 def createLinesPatch(self, startIndex, endIndex, reverse=False): |
|
160 """ |
|
161 Public method to create a selected lines based patch. |
|
162 |
|
163 @param startIndex start line number (integer) |
|
164 @param endIndex end line number (integer) |
|
165 @param reverse flag indicating a reverse patch (boolean) |
|
166 @return diff lines of the patch (string) |
|
167 """ |
|
168 self.__parseDiff() |
|
169 |
|
170 ADDITION = "+" |
|
171 DELETION = "-" |
|
172 CONTEXT = " " |
|
173 NONEWLINE = "\\" |
|
174 |
|
175 patch = [] |
|
176 startOffset = 0 |
|
177 |
|
178 for hunk in self.__hunks: |
|
179 if hunk["end"] < startIndex: |
|
180 # skip hunks before the selected lines |
|
181 continue |
|
182 |
|
183 if hunk["start"] > endIndex: |
|
184 # done, exit the loop |
|
185 break |
|
186 |
|
187 counts = { |
|
188 ADDITION: 0, |
|
189 DELETION: 0, |
|
190 CONTEXT: 0, |
|
191 } |
|
192 previousLineSkipped = False |
|
193 processedLines = [] |
|
194 |
|
195 for lineIndex, line in enumerate(hunk["lines"][1:], |
|
196 start=hunk["start"] + 1): |
|
197 lineType = line[0] |
|
198 lineContent = line[1:] |
|
199 |
|
200 if not (startIndex <= lineIndex <= endIndex): |
|
201 if ( |
|
202 (not reverse and lineType == ADDITION) or |
|
203 (reverse and lineType == DELETION) |
|
204 ): |
|
205 previousLineSkipped = True |
|
206 continue |
|
207 |
|
208 elif ( |
|
209 (not reverse and lineType == DELETION) or |
|
210 (reverse and lineType == ADDITION) |
|
211 ): |
|
212 lineType = CONTEXT |
|
213 |
|
214 if lineType == NONEWLINE and previousLineSkipped: |
|
215 continue |
|
216 |
|
217 processedLines.append(lineType + lineContent) |
|
218 counts[lineType] += 1 |
|
219 previousLineSkipped = False |
|
220 |
|
221 # hunks consisting of pure context lines are excluded |
|
222 if counts[ADDITION] == 0 and counts[DELETION] == 0: |
|
223 continue |
|
224 |
|
225 oldCount = counts[CONTEXT] + counts[DELETION] |
|
226 newCount = counts[CONTEXT] + counts[ADDITION] |
|
227 oldStart = hunk["oldStart"] |
|
228 newStart = oldStart + startOffset |
|
229 if oldCount == 0: |
|
230 newStart += 1 |
|
231 if newCount == 0: |
|
232 newStart -= 1 |
|
233 |
|
234 startOffset += counts[ADDITION] - counts[DELETION] |
|
235 |
|
236 patch.append(self.__generateHunkHeader(oldStart, oldCount, |
|
237 newStart, newCount, |
|
238 hunk["heading"])) |
|
239 patch.extend(processedLines) |
|
240 |
|
241 if not patch: |
|
242 return "" |
|
243 else: |
|
244 return "".join(self.__headerLines[:] + patch) |