Fri, 09 Apr 2021 18:38:01 +0200
Applied some more code simplifications suggested by the new Simplify checker.
# -*- coding: utf-8 -*- # Copyright (c) 2015 - 2021 Detlev Offenbach <detlev@die-offenbachs.de> # """ Module implementing a class to store and parse diff output. """ import re import os class GitDiffParser: """ Class implementing a class to store and parse diff output. """ HunkHeaderRegexp = re.compile(r'^@@ -([0-9,]+) \+([0-9,]+) @@(.*)', re.DOTALL) def __init__(self, diff): """ Constructor @param diff output of the diff command (list of string) """ self.__diff = diff[:] self.__headerLines = [] self.__hunks = [] self.__parsed = False # diff parsing is done on demand def __initHunk(self): """ Private method to initialize a hunk data structure. @return hunk data structure (dictionary) """ hunk = { "start": -1, "end": -1, "lines": [], "oldStart": -1, "oldCount": -1, "newStart": -1, "newCount": -1, "heading": "", } return hunk def __parseRange(self, headerRange): """ Private method to parse the hunk header range part. @param headerRange hunk header range (string) @return tuple of hunk start and hunk length (integer, integer) """ if ',' in headerRange: begin, end = headerRange.split(',', 1) return int(begin), int(end) else: return int(headerRange), 1 def __parseDiff(self): """ Private method to parse the diff output. @exception AssertionError raised when a malformed hunk header is encountered """ if not self.__parsed: # step 1: extract the diff header for line in self.__diff: if not line.startswith("@@ "): self.__headerLines.append(line) else: break # step 2: break the rest into diff hunks for lineIdx, line in enumerate( self.__diff[len(self.__headerLines):]): # disect the hunk header line m = self.HunkHeaderRegexp.match(line) if m: self.__hunks.append(self.__initHunk()) self.__hunks[-1]["start"] = lineIdx (self.__hunks[-1]["oldStart"], self.__hunks[-1]["oldCount"]) = self.__parseRange( m.group(1)) (self.__hunks[-1]["newStart"], self.__hunks[-1]["newCount"]) = self.__parseRange( m.group(2)) self.__hunks[-1]["heading"] = m.group(3) elif not self.__hunks: raise AssertionError("Malformed hunk header: '{0}'" .format(line)) self.__hunks[-1]["lines"].append(line) # step 3: calculate hunk end lines for hunk in self.__hunks: hunk["end"] = hunk["start"] + len(hunk["lines"]) - 1 def __generateRange(self, start, count): """ Private method to generate a hunk header range. @param start start line (integer) @param count line count (integer) @return hunk header range (string) """ if count == 1: return "{0}".format(start) else: return "{0},{1}".format(start, count) def __generateHunkHeader(self, oldStart, oldCount, newStart, newCount, heading=os.linesep): """ Private method to generate a hunk header line. @param oldStart start line of the old part (integer) @param oldCount line count of the old part (integer) @param newStart start line of the new part (integer) @param newCount line count of the new part (integer) @param heading hunk heading (string) @return hunk header (string) """ return "@@ -{0} +{1} @@{2}".format( self.__generateRange(oldStart, oldCount), self.__generateRange(newStart, newCount), heading) def headerLength(self): """ Public method to get the header length. @return length of the header (integer) """ self.__parseDiff() return len(self.__headerLines) def createHunkPatch(self, lineIndex): """ Public method to create a hunk based patch. @param lineIndex line number of the hunk (integer) @return diff lines of the patch (string) """ self.__parseDiff() patch = self.__headerLines[:] for hunk in self.__hunks: if hunk["start"] <= lineIndex <= hunk["end"]: patch.extend(hunk["lines"]) break return "".join(patch) def createLinesPatch(self, startIndex, endIndex, reverse=False): """ Public method to create a selected lines based patch. @param startIndex start line number (integer) @param endIndex end line number (integer) @param reverse flag indicating a reverse patch (boolean) @return diff lines of the patch (string) """ self.__parseDiff() ADDITION = "+" DELETION = "-" CONTEXT = " " NONEWLINE = "\\" patch = [] startOffset = 0 for hunk in self.__hunks: if hunk["end"] < startIndex: # skip hunks before the selected lines continue if hunk["start"] > endIndex: # done, exit the loop break counts = { ADDITION: 0, DELETION: 0, CONTEXT: 0, } previousLineSkipped = False processedLines = [] for lineIndex, line in enumerate(hunk["lines"][1:], start=hunk["start"] + 1): lineType = line[0] lineContent = line[1:] if not (startIndex <= lineIndex <= endIndex): if ( (not reverse and lineType == ADDITION) or (reverse and lineType == DELETION) ): previousLineSkipped = True continue elif ( (not reverse and lineType == DELETION) or (reverse and lineType == ADDITION) ): lineType = CONTEXT if lineType == NONEWLINE and previousLineSkipped: continue processedLines.append(lineType + lineContent) counts[lineType] += 1 previousLineSkipped = False # hunks consisting of pure context lines are excluded if counts[ADDITION] == 0 and counts[DELETION] == 0: continue oldCount = counts[CONTEXT] + counts[DELETION] newCount = counts[CONTEXT] + counts[ADDITION] oldStart = hunk["oldStart"] newStart = oldStart + startOffset if oldCount == 0: newStart += 1 if newCount == 0: newStart -= 1 startOffset += counts[ADDITION] - counts[DELETION] patch.append(self.__generateHunkHeader(oldStart, oldCount, newStart, newCount, hunk["heading"])) patch.extend(processedLines) if not patch: return "" else: return "".join(self.__headerLines[:] + patch)