|
1 # -*- coding: utf-8 -*- |
|
2 |
|
3 # Copyright (c) 2015 - 2017 Detlev Offenbach <detlev@die-offenbachs.de> |
|
4 # |
|
5 |
|
6 """ |
|
7 Module implementing a class to store and parse diff output. |
|
8 """ |
|
9 |
|
10 from __future__ import unicode_literals |
|
11 |
|
12 import re |
|
13 import os |
|
14 |
|
15 |
|
16 class GitDiffParser(object): |
|
17 """ |
|
18 Class implementing a class to store and parse diff output. |
|
19 """ |
|
20 HunkHeaderRegexp = re.compile(r'^@@ -([0-9,]+) \+([0-9,]+) @@(.*)', |
|
21 re.DOTALL) |
|
22 |
|
23 def __init__(self, diff): |
|
24 """ |
|
25 Constructor |
|
26 |
|
27 @param diff output of the diff command (list of string) |
|
28 """ |
|
29 self.__diff = diff[:] |
|
30 |
|
31 self.__headerLines = [] |
|
32 self.__hunks = [] |
|
33 self.__parsed = False |
|
34 # diff parsing is done on demand |
|
35 |
|
36 def __initHunk(self): |
|
37 """ |
|
38 Private method to initialize a hunk data structure. |
|
39 |
|
40 @return hunk data structure (dictionary) |
|
41 """ |
|
42 hunk = { |
|
43 "start": -1, |
|
44 "end": -1, |
|
45 "lines": [], |
|
46 "oldStart": -1, |
|
47 "oldCount": -1, |
|
48 "newStart": -1, |
|
49 "newCount": -1, |
|
50 "heading": "", |
|
51 } |
|
52 return hunk |
|
53 |
|
54 def __parseRange(self, headerRange): |
|
55 """ |
|
56 Private method to parse the hunk header range part. |
|
57 |
|
58 @param headerRange hunk header range (string) |
|
59 @return tuple of hunk start and hunk length (integer, integer) |
|
60 """ |
|
61 if ',' in headerRange: |
|
62 begin, end = headerRange.split(',', 1) |
|
63 return int(begin), int(end) |
|
64 else: |
|
65 return int(headerRange), 1 |
|
66 |
|
67 def __parseDiff(self): |
|
68 """ |
|
69 Private method to parse the diff output. |
|
70 |
|
71 @exception AssertionError raised when a malformed hunk header is |
|
72 encountered |
|
73 """ |
|
74 if not self.__parsed: |
|
75 # step 1: extract the diff header |
|
76 for line in self.__diff: |
|
77 if not line.startswith("@@ "): |
|
78 self.__headerLines.append(line) |
|
79 else: |
|
80 break |
|
81 |
|
82 # step 2: break the rest into diff hunks |
|
83 for lineIdx, line in enumerate( |
|
84 self.__diff[len(self.__headerLines):]): |
|
85 # disect the hunk header line |
|
86 m = self.HunkHeaderRegexp.match(line) |
|
87 if m: |
|
88 self.__hunks.append(self.__initHunk()) |
|
89 self.__hunks[-1]["start"] = lineIdx |
|
90 (self.__hunks[-1]["oldStart"], |
|
91 self.__hunks[-1]["oldCount"]) = \ |
|
92 self.__parseRange(m.group(1)) |
|
93 (self.__hunks[-1]["newStart"], |
|
94 self.__hunks[-1]["newCount"]) = \ |
|
95 self.__parseRange(m.group(2)) |
|
96 self.__hunks[-1]["heading"] = m.group(3) |
|
97 elif not self.__hunks: |
|
98 raise AssertionError("Malformed hunk header: '{0}'" |
|
99 .format(line)) |
|
100 self.__hunks[-1]["lines"].append(line) |
|
101 # step 3: calculate hunk end lines |
|
102 for hunk in self.__hunks: |
|
103 hunk["end"] = hunk["start"] + len(hunk["lines"]) - 1 |
|
104 |
|
105 def __generateRange(self, start, count): |
|
106 """ |
|
107 Private method to generate a hunk header range. |
|
108 |
|
109 @param start start line (integer) |
|
110 @param count line count (integer) |
|
111 @return hunk header range (string) |
|
112 """ |
|
113 if count == 1: |
|
114 return "{0}".format(start) |
|
115 else: |
|
116 return "{0},{1}".format(start, count) |
|
117 |
|
118 def __generateHunkHeader(self, oldStart, oldCount, newStart, newCount, |
|
119 heading=os.linesep): |
|
120 """ |
|
121 Private method to generate a hunk header line. |
|
122 |
|
123 @param oldStart start line of the old part (integer) |
|
124 @param oldCount line count of the old part (integer) |
|
125 @param newStart start line of the new part (integer) |
|
126 @param newCount line count of the new part (integer) |
|
127 @param heading hunk heading (string) |
|
128 @return hunk header (string) |
|
129 """ |
|
130 return "@@ -{0} +{1} @@{2}".format( |
|
131 self.__generateRange(oldStart, oldCount), |
|
132 self.__generateRange(newStart, newCount), |
|
133 heading) |
|
134 |
|
135 def headerLength(self): |
|
136 """ |
|
137 Public method to get the header length. |
|
138 |
|
139 @return length of the header (integer) |
|
140 """ |
|
141 self.__parseDiff() |
|
142 return len(self.__headerLines) |
|
143 |
|
144 def createHunkPatch(self, lineIndex): |
|
145 """ |
|
146 Public method to create a hunk based patch. |
|
147 |
|
148 @param lineIndex line number of the hunk (integer) |
|
149 @return diff lines of the patch (string) |
|
150 """ |
|
151 self.__parseDiff() |
|
152 |
|
153 patch = self.__headerLines[:] |
|
154 for hunk in self.__hunks: |
|
155 if hunk["start"] <= lineIndex <= hunk["end"]: |
|
156 patch.extend(hunk["lines"]) |
|
157 break |
|
158 |
|
159 return "".join(patch) |
|
160 |
|
161 def createLinesPatch(self, startIndex, endIndex, reverse=False): |
|
162 """ |
|
163 Public method to create a selected lines based patch. |
|
164 |
|
165 @param startIndex start line number (integer) |
|
166 @param endIndex end line number (integer) |
|
167 @param reverse flag indicating a reverse patch (boolean) |
|
168 @return diff lines of the patch (string) |
|
169 """ |
|
170 self.__parseDiff() |
|
171 |
|
172 ADDITION = "+" |
|
173 DELETION = "-" |
|
174 CONTEXT = " " |
|
175 NONEWLINE = "\\" |
|
176 |
|
177 patch = [] |
|
178 startOffset = 0 |
|
179 |
|
180 for hunk in self.__hunks: |
|
181 if hunk["end"] < startIndex: |
|
182 # skip hunks before the selected lines |
|
183 continue |
|
184 |
|
185 if hunk["start"] > endIndex: |
|
186 # done, exit the loop |
|
187 break |
|
188 |
|
189 counts = { |
|
190 ADDITION: 0, |
|
191 DELETION: 0, |
|
192 CONTEXT: 0, |
|
193 } |
|
194 previousLineSkipped = False |
|
195 processedLines = [] |
|
196 |
|
197 for lineIndex, line in enumerate(hunk["lines"][1:], |
|
198 start=hunk["start"] + 1): |
|
199 lineType = line[0] |
|
200 lineContent = line[1:] |
|
201 |
|
202 if not (startIndex <= lineIndex <= endIndex): |
|
203 if (not reverse and lineType == ADDITION) or \ |
|
204 (reverse and lineType == DELETION): |
|
205 previousLineSkipped = True |
|
206 continue |
|
207 |
|
208 elif (not reverse and lineType == DELETION) or \ |
|
209 (reverse and lineType == ADDITION): |
|
210 lineType = CONTEXT |
|
211 |
|
212 if lineType == NONEWLINE and previousLineSkipped: |
|
213 continue |
|
214 |
|
215 processedLines.append(lineType + lineContent) |
|
216 counts[lineType] += 1 |
|
217 previousLineSkipped = False |
|
218 |
|
219 # hunks consisting of pure context lines are excluded |
|
220 if counts[ADDITION] == 0 and counts[DELETION] == 0: |
|
221 continue |
|
222 |
|
223 oldCount = counts[CONTEXT] + counts[DELETION] |
|
224 newCount = counts[CONTEXT] + counts[ADDITION] |
|
225 oldStart = hunk["oldStart"] |
|
226 newStart = oldStart + startOffset |
|
227 if oldCount == 0: |
|
228 newStart += 1 |
|
229 if newCount == 0: |
|
230 newStart -= 1 |
|
231 |
|
232 startOffset += counts[ADDITION] - counts[DELETION] |
|
233 |
|
234 patch.append(self.__generateHunkHeader(oldStart, oldCount, |
|
235 newStart, newCount, |
|
236 hunk["heading"])) |
|
237 patch.extend(processedLines) |
|
238 |
|
239 if not patch: |
|
240 return "" |
|
241 else: |
|
242 return "".join(self.__headerLines[:] + patch) |