43 |
43 |
44 @signal replyReceived(content:str, role:str, done:bool) emitted after a response |
44 @signal replyReceived(content:str, role:str, done:bool) emitted after a response |
45 from the 'ollama' server was received |
45 from the 'ollama' server was received |
46 @signal modelsList(modelNames:list[str]) emitted after the list of model |
46 @signal modelsList(modelNames:list[str]) emitted after the list of model |
47 names was obtained from the 'ollama' server |
47 names was obtained from the 'ollama' server |
48 @signal detailedModelsList(models:list[dict]) emitted after the list of |
|
49 models was obtained from the 'ollama' server giving some model details |
|
50 @signal runningModelsList(models:list[dict]) emitted after the list of |
|
51 running models was obtained from the 'ollama' server giving some model |
|
52 execution details |
|
53 @signal pullStatus(msg:str, id:str, total:int, completed:int) emitted to indicate |
48 @signal pullStatus(msg:str, id:str, total:int, completed:int) emitted to indicate |
54 the status of a pull request as reported by the 'ollama' server |
49 the status of a pull request as reported by the 'ollama' server |
55 @signal serverVersion(version:str) emitted after the server version was obtained |
50 @signal serverVersion(version:str) emitted after the server version was obtained |
56 from the 'ollama' server |
51 from the 'ollama' server |
57 @signal finished() emitted to indicate the completion of a request |
52 @signal finished() emitted to indicate the completion of a request |
61 responsiveness |
56 responsiveness |
62 """ |
57 """ |
63 |
58 |
64 replyReceived = pyqtSignal(str, str, bool) |
59 replyReceived = pyqtSignal(str, str, bool) |
65 modelsList = pyqtSignal(list) |
60 modelsList = pyqtSignal(list) |
66 detailedModelsList = pyqtSignal(list) |
|
67 runningModelsList = pyqtSignal(list) |
|
68 pullStatus = pyqtSignal(str, str, int, int) |
61 pullStatus = pyqtSignal(str, str, int, int) |
69 serverVersion = pyqtSignal(str) |
62 serverVersion = pyqtSignal(str) |
70 finished = pyqtSignal() |
63 finished = pyqtSignal() |
71 errorOccurred = pyqtSignal(str) |
64 errorOccurred = pyqtSignal(str) |
72 serverStateChanged = pyqtSignal(bool) |
65 serverStateChanged = pyqtSignal(bool) |
245 def listDetails(self): |
238 def listDetails(self): |
246 """ |
239 """ |
247 Public method to request a list of models available locally from the 'ollama' |
240 Public method to request a list of models available locally from the 'ollama' |
248 server with some model details. |
241 server with some model details. |
249 """ |
242 """ |
250 # TODO: not implemented yet |
243 response = self.__sendSyncRequest("tags") |
251 self.__sendRequest("tags", processResponse=self.__processDetailedModelsList) |
244 |
252 |
|
253 def __processDetailedModelsList(self, response): |
|
254 """ |
|
255 Private method to process the tags response of the 'ollama' server extracting |
|
256 some model details. |
|
257 |
|
258 @param response dictionary containing the tags response |
|
259 @type dict |
|
260 """ |
|
261 models = [] |
245 models = [] |
262 with contextlib.suppress(KeyError): |
246 if response is not None: |
263 for model in response["models"]: |
247 with contextlib.suppress(KeyError): |
264 name = model["name"] |
248 for model in response["models"]: |
265 if name: |
249 name = model["name"] |
266 models.append( |
250 if name: |
267 { |
251 models.append( |
268 "name": name, |
252 { |
269 "id": model["digest"][:20], # first 20 characters only |
253 "name": name, |
270 "size": model["size"], |
254 "id": model["digest"][:20], # first 20 characters only |
271 "modified": datetime.datetime.fromisoformat( |
255 "size": model["size"], |
272 model["modified_at"] |
256 "modified": datetime.datetime.fromisoformat( |
273 ), |
257 model["modified_at"] |
274 } |
258 ), |
275 ) |
259 } |
276 self.detailedModelsList.emit(models) |
260 ) |
|
261 |
|
262 return models |
277 |
263 |
278 def listRunning(self): |
264 def listRunning(self): |
279 """ |
265 """ |
280 Public method to request a list of running models from the 'ollama' server. |
266 Public method to request a list of running models from the 'ollama' server. |
281 """ |
267 """ |
282 # TODO: not implemented yet |
268 response = self.__sendSyncRequest("ps") |
283 self.__sendRequest("ps", processResponse=self.__processRunningModelsList) |
269 |
284 |
|
285 def __processRunningModelsList(self, response): |
|
286 """ |
|
287 Private method to process the ps response of the 'ollama' server extracting |
|
288 some model execution details. |
|
289 |
|
290 @param response dictionary containing the ps response |
|
291 @type dict |
|
292 """ |
|
293 models = [] |
270 models = [] |
294 with contextlib.suppress(KeyError): |
271 if response is not None: |
295 for model in response["models"]: |
272 with contextlib.suppress(KeyError): |
296 name = model["name"] |
273 for model in response["models"]: |
297 if name: |
274 name = model["name"] |
298 if model["size_vram"] == 0: |
275 if name: |
299 processor = self.tr("100% CPU") |
276 if model["size_vram"] == 0: |
300 elif model["size_vram"] == model["size"]: |
277 processor = self.tr("100% CPU") |
301 processor = self.tr("100% GPU") |
278 elif model["size_vram"] == model["size"]: |
302 elif model["size_vram"] > model["size_"] or model["size"] == 0: |
279 processor = self.tr("100% GPU") |
303 processor = self.tr("unknown") |
280 elif model["size_vram"] > model["size"] or model["size"] == 0: |
304 else: |
281 processor = self.tr("unknown") |
305 sizeCpu = model["size"] - model["size_vram"] |
282 else: |
306 cpuPercent = round(sizeCpu / model["size_vram"] * 100) |
283 sizeCpu = model["size"] - model["size_vram"] |
307 processor = self.tr("{0}% / {1}% CPU / GPU").format( |
284 cpuPercent = round(sizeCpu / model["size_vram"] * 100) |
308 cpuPercent, 100 - cpuPercent |
285 processor = self.tr("{0}% / {1}% CPU / GPU").format( |
|
286 cpuPercent, 100 - cpuPercent |
|
287 ) |
|
288 models.append( |
|
289 { |
|
290 "name": name, |
|
291 "id": model["digest"][:20], # first 20 characters only |
|
292 "size": model["size"], |
|
293 "size_vram": model["size_vram"], |
|
294 "processor": processor, |
|
295 "expires": datetime.datetime.fromisoformat( |
|
296 model["expires_at"] |
|
297 ), |
|
298 } |
309 ) |
299 ) |
310 models.append( |
300 |
311 { |
301 return models |
312 "name": name, |
|
313 "id": model["digest"][:20], # first 20 characters only |
|
314 "size": model["size"], |
|
315 "size_vram": model["size_vram"], |
|
316 "processor": processor, |
|
317 "expires": datetime.datetime.fromisoformat( |
|
318 model["expires_at"] |
|
319 ), |
|
320 } |
|
321 ) |
|
322 self.runningModelsList.emit(models) |
|
323 |
302 |
324 def version(self): |
303 def version(self): |
325 """ |
304 """ |
326 Public method to request the version from the 'ollama' server. |
305 Public method to request the version from the 'ollama' server. |
327 """ |
306 """ |
344 @return current client state |
323 @return current client state |
345 @rtype OllamaClientState |
324 @rtype OllamaClientState |
346 """ |
325 """ |
347 return self.__state |
326 return self.__state |
348 |
327 |
349 def __sendRequest(self, endpoint, data=None, processResponse=None): |
328 def __getServerReply(self, endpoint, data=None): |
350 """ |
329 """ |
351 Private method to send a request to the 'ollama' server and handle its |
330 Private method to send a request to the 'ollama' server and return a reply |
352 responses. |
331 object. |
353 |
332 |
354 @param endpoint 'ollama' API endpoint to be contacted |
333 @param endpoint 'ollama' API endpoint to be contacted |
355 @type str |
334 @type str |
356 @param data dictionary containing the data to send to the server |
335 @param data dictionary containing the data to send to the server |
357 (defaults to None) |
336 (defaults to None) |
358 @type dict (optional) |
337 @type dict (optional) |
359 @param processResponse function handling the received data (defaults to None) |
338 @return 'ollama' server reply |
360 @type function (optional) |
339 @rtype QNetworkReply |
361 """ |
340 """ |
362 self.__state = OllamaClientState.Requesting |
341 ollamaUrl = QUrl( |
363 |
|
364 ollamaUrl = QUrl( |
|
365 "{0}://{1}:{2}/api/{3}".format( |
342 "{0}://{1}:{2}/api/{3}".format( |
366 self.__plugin.getPreferences("OllamaScheme"), |
343 self.__plugin.getPreferences("OllamaScheme"), |
367 ( |
344 ( |
368 "127.0.0.1" |
345 "127.0.0.1" |
369 if self.__localServer |
346 if self.__localServer |
384 ) |
361 ) |
385 jsonData = json.dumps(data).encode("utf-8") |
362 jsonData = json.dumps(data).encode("utf-8") |
386 reply = self.__networkManager.post(request, jsonData) |
363 reply = self.__networkManager.post(request, jsonData) |
387 else: |
364 else: |
388 reply = self.__networkManager.get(request) |
365 reply = self.__networkManager.get(request) |
389 |
366 reply.errorOccurred.connect(lambda error: self.__errorOccurred(error, reply)) |
|
367 return reply |
|
368 |
|
369 def __sendRequest(self, endpoint, data=None, processResponse=None): |
|
370 """ |
|
371 Private method to send a request to the 'ollama' server and handle its |
|
372 responses. |
|
373 |
|
374 @param endpoint 'ollama' API endpoint to be contacted |
|
375 @type str |
|
376 @param data dictionary containing the data to send to the server |
|
377 (defaults to None) |
|
378 @type dict (optional) |
|
379 @param processResponse function handling the received data (defaults to None) |
|
380 @type function (optional) |
|
381 """ |
|
382 self.__state = OllamaClientState.Requesting |
|
383 |
|
384 ##ollamaUrl = QUrl( |
|
385 ##"{0}://{1}:{2}/api/{3}".format( |
|
386 ##self.__plugin.getPreferences("OllamaScheme"), |
|
387 ##( |
|
388 ##"127.0.0.1" |
|
389 ##if self.__localServer |
|
390 ##else self.__plugin.getPreferences("OllamaHost") |
|
391 ##), |
|
392 ##( |
|
393 ##self.__plugin.getPreferences("OllamaLocalPort") |
|
394 ##if self.__localServer |
|
395 ##else self.__plugin.getPreferences("OllamaPort") |
|
396 ##), |
|
397 ##endpoint, |
|
398 ##) |
|
399 ##) |
|
400 ##request = QNetworkRequest(ollamaUrl) |
|
401 ##if data is not None: |
|
402 ##request.setHeader( |
|
403 ##QNetworkRequest.KnownHeaders.ContentTypeHeader, "application/json" |
|
404 ##) |
|
405 ##jsonData = json.dumps(data).encode("utf-8") |
|
406 ##reply = self.__networkManager.post(request, jsonData) |
|
407 ##else: |
|
408 ##reply = self.__networkManager.get(request) |
|
409 ## |
|
410 reply = self.__getServerReply(endpoint=endpoint, data=data) |
390 reply.finished.connect(lambda: self.__replyFinished(reply)) |
411 reply.finished.connect(lambda: self.__replyFinished(reply)) |
391 reply.errorOccurred.connect(lambda error: self.__errorOccurred(error, reply)) |
412 ##reply.errorOccurred.connect(lambda error: self.__errorOccurred(error, reply)) |
392 reply.readyRead.connect(lambda: self.__processData(reply, processResponse)) |
413 reply.readyRead.connect(lambda: self.__processData(reply, processResponse)) |
393 self.__replies.append(reply) |
414 self.__replies.append(reply) |
394 |
415 |
395 def __replyFinished(self, reply): |
416 def __replyFinished(self, reply): |
396 """ |
417 """ |
438 if buffer: |
459 if buffer: |
439 with contextlib.suppress(json.JSONDecodeError): |
460 with contextlib.suppress(json.JSONDecodeError): |
440 data = json.loads(buffer) |
461 data = json.loads(buffer) |
441 if data and processResponse: |
462 if data and processResponse: |
442 processResponse(data) |
463 processResponse(data) |
|
464 |
|
465 def __sendSyncRequest(self, endpoint, data=None): |
|
466 """ |
|
467 Private method to send a request to the 'ollama' server and handle its |
|
468 responses. |
|
469 |
|
470 @param endpoint 'ollama' API endpoint to be contacted |
|
471 @type str |
|
472 @param data dictionary containing the data to send to the server |
|
473 (defaults to None) |
|
474 @type dict (optional) |
|
475 """ |
|
476 self.__state = OllamaClientState.Requesting |
|
477 |
|
478 reply = self.__getServerReply(endpoint=endpoint, data=data) |
|
479 while not reply.isFinished(): |
|
480 QCoreApplication.processEvents() |
|
481 QThread.msleep(100) |
|
482 |
|
483 reply.deleteLater() |
|
484 |
|
485 self.__state = OllamaClientState.Finished |
|
486 |
|
487 if reply.error() == QNetworkReply.NetworkError.NoError: |
|
488 buffer = bytes(reply.readAll()) |
|
489 with contextlib.suppress(json.JSONDecodeError): |
|
490 data = json.loads(buffer) |
|
491 return data |
|
492 |
|
493 return None |
443 |
494 |
444 def heartbeat(self): |
495 def heartbeat(self): |
445 """ |
496 """ |
446 Public method to check, if the 'ollama' server has started and is responsive. |
497 Public method to check, if the 'ollama' server has started and is responsive. |
447 |
498 |