--- a/OllamaInterface/OllamaClient.py	Mon Aug 05 18:37:16 2024 +0200
+++ b/OllamaInterface/OllamaClient.py	Tue Aug 06 18:18:39 2024 +0200
@@ -41,8 +41,8 @@
     """
     Class implementing the 'ollama' client.
 
-    @signal replyReceived(content:str, role:str) emitted after a response from the
-        'ollama' server was received
+    @signal replyReceived(content:str, role:str, done:bool) emitted after a response
+        from the 'ollama' server was received
     @signal modelsList(modelNames:list[str]) emitted after the list of model
         names was obtained from the 'ollama' server
     @signal detailedModelsList(models:list[dict]) emitted after the list of
@@ -61,7 +61,7 @@
         responsiveness
     """
 
-    replyReceived = pyqtSignal(str, str)
+    replyReceived = pyqtSignal(str, str, bool)
     modelsList = pyqtSignal(list)
     detailedModelsList = pyqtSignal(list)
     runningModelsList = pyqtSignal(list)
@@ -101,7 +101,7 @@
         self.__plugin.preferencesChanged.connect(self.__setHeartbeatTimer)
         self.__setHeartbeatTimer()
 
-    def chat(self, model, messages):
+    def chat(self, model, messages, streaming=True):
         """
         Public method to request a chat completion from the 'ollama' server.
 
@@ -109,11 +109,13 @@
         @type str
         @param messages list of message objects
         @type list of dict
+        @param streaming flag indicating to receive a streaming response
+        @type bool
         """
-        # TODO: not implemented yet
         ollamaRequest = {
             "model": model,
             "messages": messages,
+            "stream": streaming,
         }
         self.__sendRequest(
             "chat", data=ollamaRequest, processResponse=self.__processChatResponse
@@ -128,8 +130,9 @@
         """
         with contextlib.suppress(KeyError):
             message = response["message"]
+            done = response["done"]
             if message:
-                self.replyReceived.emit(message["content"], message["role"])
+                self.replyReceived.emit(message["content"], message["role"], done)
 
     def generate(self, model, prompt, suffix=None):
         """
@@ -142,7 +145,6 @@
         @param suffix text after the model response (defaults to None)
         @type str (optional)
         """
-        # TODO: not implemented yet
         ollamaRequest = {
             "model": model,
             "prompt": prompt,
@@ -163,7 +165,7 @@
         @type dict
         """
         with contextlib.suppress(KeyError):
-            self.replyReceived.emit(response["response"], "")
+            self.replyReceived.emit(response["response"], "", response["done"])
 
     def pull(self, model):
         """