@inproceedings{091d8c42d1b849c89455df449a579575,
title = "Improving Performance of Local Chatbot with Caching",
abstract = "Chatbots and the technology behind them are widely used in many places and in various ways. Retrieval Augmented Generation AI framework has gained its popularity by its linking of large language model with private dataset. It enables one to run AI locally and privately with the most updated information and knowledge. In this report, we aim to improve the local private chatbot response time by using a cache. From our experimental results, the majority of time spent during the query process is in the generation of the response. The response time can be significantly improved when there is a hit on the cache system which enables us to return the response to the user immediately without going through the generation step. In this report, we focus our efforts on improving the turnaround time of the generation step. The cache is organized into categories which can be used for efficient searching. User{\textquoteright}s query information such as query string, embedding information, and its response are recorded and stored in the cache. Experiment results are presented and the issues of speed up of request response turnaround time is addressed.",
keywords = "Cache, Chatbot, Embeddings, LLM, RAG, Similarity Search",
author = "John Jenq",
note = "Publisher Copyright: {\textcopyright} 2024 by the International Institute of Informatics and Systemics. All rights reserved.; 28th World Multi-Conference on Systemics, Cybernetics and Informatics, WMSCI 2024 ; Conference date: 10-09-2024 Through 13-09-2024",
year = "2024",
doi = "10.54808/WMSCI2024.01.68",
language = "English",
series = "Proceedings of World Multi-Conference on Systemics, Cybernetics and Informatics, WMSCI",
publisher = "International Institute of Informatics and Cybernetics",
pages = "68--71",
editor = "Callaos, {Nagib C.} and Elina Gaile-Sarkane and Natalja Lace and Belkis Sanchez and Michael Savoie",
booktitle = "WMSCI 2024 - 28th World Multi-Conference on Systemics, Cybernetics and Informatics, Proceedings",
}