OllamaInterface/OllamaClient.py

changeset 8
3118d16e526e
parent 7
eb1dec15b2f0
child 9
c471738b75b3
equal deleted inserted replaced
7:eb1dec15b2f0 8:3118d16e526e
43 43
44 @signal replyReceived(content:str, role:str, done:bool) emitted after a response 44 @signal replyReceived(content:str, role:str, done:bool) emitted after a response
45 from the 'ollama' server was received 45 from the 'ollama' server was received
46 @signal modelsList(modelNames:list[str]) emitted after the list of model 46 @signal modelsList(modelNames:list[str]) emitted after the list of model
47 names was obtained from the 'ollama' server 47 names was obtained from the 'ollama' server
48 @signal detailedModelsList(models:list[dict]) emitted after the list of
49 models was obtained from the 'ollama' server giving some model details
50 @signal runningModelsList(models:list[dict]) emitted after the list of
51 running models was obtained from the 'ollama' server giving some model
52 execution details
53 @signal pullStatus(msg:str, id:str, total:int, completed:int) emitted to indicate 48 @signal pullStatus(msg:str, id:str, total:int, completed:int) emitted to indicate
54 the status of a pull request as reported by the 'ollama' server 49 the status of a pull request as reported by the 'ollama' server
55 @signal serverVersion(version:str) emitted after the server version was obtained 50 @signal serverVersion(version:str) emitted after the server version was obtained
56 from the 'ollama' server 51 from the 'ollama' server
57 @signal finished() emitted to indicate the completion of a request 52 @signal finished() emitted to indicate the completion of a request
61 responsiveness 56 responsiveness
62 """ 57 """
63 58
64 replyReceived = pyqtSignal(str, str, bool) 59 replyReceived = pyqtSignal(str, str, bool)
65 modelsList = pyqtSignal(list) 60 modelsList = pyqtSignal(list)
66 detailedModelsList = pyqtSignal(list)
67 runningModelsList = pyqtSignal(list)
68 pullStatus = pyqtSignal(str, str, int, int) 61 pullStatus = pyqtSignal(str, str, int, int)
69 serverVersion = pyqtSignal(str) 62 serverVersion = pyqtSignal(str)
70 finished = pyqtSignal() 63 finished = pyqtSignal()
71 errorOccurred = pyqtSignal(str) 64 errorOccurred = pyqtSignal(str)
72 serverStateChanged = pyqtSignal(bool) 65 serverStateChanged = pyqtSignal(bool)
245 def listDetails(self): 238 def listDetails(self):
246 """ 239 """
247 Public method to request a list of models available locally from the 'ollama' 240 Public method to request a list of models available locally from the 'ollama'
248 server with some model details. 241 server with some model details.
249 """ 242 """
250 # TODO: not implemented yet 243 response = self.__sendSyncRequest("tags")
251 self.__sendRequest("tags", processResponse=self.__processDetailedModelsList) 244
252
253 def __processDetailedModelsList(self, response):
254 """
255 Private method to process the tags response of the 'ollama' server extracting
256 some model details.
257
258 @param response dictionary containing the tags response
259 @type dict
260 """
261 models = [] 245 models = []
262 with contextlib.suppress(KeyError): 246 if response is not None:
263 for model in response["models"]: 247 with contextlib.suppress(KeyError):
264 name = model["name"] 248 for model in response["models"]:
265 if name: 249 name = model["name"]
266 models.append( 250 if name:
267 { 251 models.append(
268 "name": name, 252 {
269 "id": model["digest"][:20], # first 20 characters only 253 "name": name,
270 "size": model["size"], 254 "id": model["digest"][:20], # first 20 characters only
271 "modified": datetime.datetime.fromisoformat( 255 "size": model["size"],
272 model["modified_at"] 256 "modified": datetime.datetime.fromisoformat(
273 ), 257 model["modified_at"]
274 } 258 ),
275 ) 259 }
276 self.detailedModelsList.emit(models) 260 )
261
262 return models
277 263
278 def listRunning(self): 264 def listRunning(self):
279 """ 265 """
280 Public method to request a list of running models from the 'ollama' server. 266 Public method to request a list of running models from the 'ollama' server.
281 """ 267 """
282 # TODO: not implemented yet 268 response = self.__sendSyncRequest("ps")
283 self.__sendRequest("ps", processResponse=self.__processRunningModelsList) 269
284
285 def __processRunningModelsList(self, response):
286 """
287 Private method to process the ps response of the 'ollama' server extracting
288 some model execution details.
289
290 @param response dictionary containing the ps response
291 @type dict
292 """
293 models = [] 270 models = []
294 with contextlib.suppress(KeyError): 271 if response is not None:
295 for model in response["models"]: 272 with contextlib.suppress(KeyError):
296 name = model["name"] 273 for model in response["models"]:
297 if name: 274 name = model["name"]
298 if model["size_vram"] == 0: 275 if name:
299 processor = self.tr("100% CPU") 276 if model["size_vram"] == 0:
300 elif model["size_vram"] == model["size"]: 277 processor = self.tr("100% CPU")
301 processor = self.tr("100% GPU") 278 elif model["size_vram"] == model["size"]:
302 elif model["size_vram"] > model["size_"] or model["size"] == 0: 279 processor = self.tr("100% GPU")
303 processor = self.tr("unknown") 280 elif model["size_vram"] > model["size"] or model["size"] == 0:
304 else: 281 processor = self.tr("unknown")
305 sizeCpu = model["size"] - model["size_vram"] 282 else:
306 cpuPercent = round(sizeCpu / model["size_vram"] * 100) 283 sizeCpu = model["size"] - model["size_vram"]
307 processor = self.tr("{0}% / {1}% CPU / GPU").format( 284 cpuPercent = round(sizeCpu / model["size_vram"] * 100)
308 cpuPercent, 100 - cpuPercent 285 processor = self.tr("{0}% / {1}% CPU / GPU").format(
286 cpuPercent, 100 - cpuPercent
287 )
288 models.append(
289 {
290 "name": name,
291 "id": model["digest"][:20], # first 20 characters only
292 "size": model["size"],
293 "size_vram": model["size_vram"],
294 "processor": processor,
295 "expires": datetime.datetime.fromisoformat(
296 model["expires_at"]
297 ),
298 }
309 ) 299 )
310 models.append( 300
311 { 301 return models
312 "name": name,
313 "id": model["digest"][:20], # first 20 characters only
314 "size": model["size"],
315 "size_vram": model["size_vram"],
316 "processor": processor,
317 "expires": datetime.datetime.fromisoformat(
318 model["expires_at"]
319 ),
320 }
321 )
322 self.runningModelsList.emit(models)
323 302
324 def version(self): 303 def version(self):
325 """ 304 """
326 Public method to request the version from the 'ollama' server. 305 Public method to request the version from the 'ollama' server.
327 """ 306 """
344 @return current client state 323 @return current client state
345 @rtype OllamaClientState 324 @rtype OllamaClientState
346 """ 325 """
347 return self.__state 326 return self.__state
348 327
349 def __sendRequest(self, endpoint, data=None, processResponse=None): 328 def __getServerReply(self, endpoint, data=None):
350 """ 329 """
351 Private method to send a request to the 'ollama' server and handle its 330 Private method to send a request to the 'ollama' server and return a reply
352 responses. 331 object.
353 332
354 @param endpoint 'ollama' API endpoint to be contacted 333 @param endpoint 'ollama' API endpoint to be contacted
355 @type str 334 @type str
356 @param data dictionary containing the data to send to the server 335 @param data dictionary containing the data to send to the server
357 (defaults to None) 336 (defaults to None)
358 @type dict (optional) 337 @type dict (optional)
359 @param processResponse function handling the received data (defaults to None) 338 @return 'ollama' server reply
360 @type function (optional) 339 @rtype QNetworkReply
361 """ 340 """
362 self.__state = OllamaClientState.Requesting 341 ollamaUrl = QUrl(
363
364 ollamaUrl = QUrl(
365 "{0}://{1}:{2}/api/{3}".format( 342 "{0}://{1}:{2}/api/{3}".format(
366 self.__plugin.getPreferences("OllamaScheme"), 343 self.__plugin.getPreferences("OllamaScheme"),
367 ( 344 (
368 "127.0.0.1" 345 "127.0.0.1"
369 if self.__localServer 346 if self.__localServer
384 ) 361 )
385 jsonData = json.dumps(data).encode("utf-8") 362 jsonData = json.dumps(data).encode("utf-8")
386 reply = self.__networkManager.post(request, jsonData) 363 reply = self.__networkManager.post(request, jsonData)
387 else: 364 else:
388 reply = self.__networkManager.get(request) 365 reply = self.__networkManager.get(request)
389 366 reply.errorOccurred.connect(lambda error: self.__errorOccurred(error, reply))
367 return reply
368
369 def __sendRequest(self, endpoint, data=None, processResponse=None):
370 """
371 Private method to send a request to the 'ollama' server and handle its
372 responses.
373
374 @param endpoint 'ollama' API endpoint to be contacted
375 @type str
376 @param data dictionary containing the data to send to the server
377 (defaults to None)
378 @type dict (optional)
379 @param processResponse function handling the received data (defaults to None)
380 @type function (optional)
381 """
382 self.__state = OllamaClientState.Requesting
383
384 ##ollamaUrl = QUrl(
385 ##"{0}://{1}:{2}/api/{3}".format(
386 ##self.__plugin.getPreferences("OllamaScheme"),
387 ##(
388 ##"127.0.0.1"
389 ##if self.__localServer
390 ##else self.__plugin.getPreferences("OllamaHost")
391 ##),
392 ##(
393 ##self.__plugin.getPreferences("OllamaLocalPort")
394 ##if self.__localServer
395 ##else self.__plugin.getPreferences("OllamaPort")
396 ##),
397 ##endpoint,
398 ##)
399 ##)
400 ##request = QNetworkRequest(ollamaUrl)
401 ##if data is not None:
402 ##request.setHeader(
403 ##QNetworkRequest.KnownHeaders.ContentTypeHeader, "application/json"
404 ##)
405 ##jsonData = json.dumps(data).encode("utf-8")
406 ##reply = self.__networkManager.post(request, jsonData)
407 ##else:
408 ##reply = self.__networkManager.get(request)
409 ##
410 reply = self.__getServerReply(endpoint=endpoint, data=data)
390 reply.finished.connect(lambda: self.__replyFinished(reply)) 411 reply.finished.connect(lambda: self.__replyFinished(reply))
391 reply.errorOccurred.connect(lambda error: self.__errorOccurred(error, reply)) 412 ##reply.errorOccurred.connect(lambda error: self.__errorOccurred(error, reply))
392 reply.readyRead.connect(lambda: self.__processData(reply, processResponse)) 413 reply.readyRead.connect(lambda: self.__processData(reply, processResponse))
393 self.__replies.append(reply) 414 self.__replies.append(reply)
394 415
395 def __replyFinished(self, reply): 416 def __replyFinished(self, reply):
396 """ 417 """
438 if buffer: 459 if buffer:
439 with contextlib.suppress(json.JSONDecodeError): 460 with contextlib.suppress(json.JSONDecodeError):
440 data = json.loads(buffer) 461 data = json.loads(buffer)
441 if data and processResponse: 462 if data and processResponse:
442 processResponse(data) 463 processResponse(data)
464
465 def __sendSyncRequest(self, endpoint, data=None):
466 """
467 Private method to send a request to the 'ollama' server and handle its
468 responses.
469
470 @param endpoint 'ollama' API endpoint to be contacted
471 @type str
472 @param data dictionary containing the data to send to the server
473 (defaults to None)
474 @type dict (optional)
475 """
476 self.__state = OllamaClientState.Requesting
477
478 reply = self.__getServerReply(endpoint=endpoint, data=data)
479 while not reply.isFinished():
480 QCoreApplication.processEvents()
481 QThread.msleep(100)
482
483 reply.deleteLater()
484
485 self.__state = OllamaClientState.Finished
486
487 if reply.error() == QNetworkReply.NetworkError.NoError:
488 buffer = bytes(reply.readAll())
489 with contextlib.suppress(json.JSONDecodeError):
490 data = json.loads(buffer)
491 return data
492
493 return None
443 494
444 def heartbeat(self): 495 def heartbeat(self):
445 """ 496 """
446 Public method to check, if the 'ollama' server has started and is responsive. 497 Public method to check, if the 'ollama' server has started and is responsive.
447 498

eric ide

mercurial