diff --git a/gen_ai_configs/gpt_4_all.mdx b/gen_ai_configs/gpt_4_all.mdx new file mode 100644 index 0000000..84721ef --- /dev/null +++ b/gen_ai_configs/gpt_4_all.mdx @@ -0,0 +1,26 @@ +--- +title: GPT4All +description: 'Configure Danswer to use GPT4All models in memory' +--- + +Refer to [Model Configs](https://docs.danswer.dev/gen_ai_configs/overview#model-configs) for how to set the +environment variables for your particular deployment. + +## What is GPT4All +GPT4All provides a way to run the latest LLMs (closed and opensource) by calling APIs or running in memory. +For self-hosted models, GPT4All offers models that are quantized or running with reduced float precision. +Both of these are ways to compress models to run on weaker hardware at a slight cost in model capabilities. + +GPT4All provides a Python wrapper which Danswer uses to run the models in same container as the Danswer API Server. + +**Note**: Despite GPT4All offering quantized models, it is still significantly slower than models fully hosted on GPUs. +If you're running the models purely on CPU, there may be significant delay to processing the context documents and in +generating answers. + +## Set Danswer to use GPT4All via next-token generation prompting +- INTERNAL_MODEL_VERSION=gpt4all-completion +- GEN_AI_MODEL_VERSION=ggml-model-gpt4all-falcon-q4_0.bin # Or any other GPT4All model + +## Set Danswer to use GPT4All via chat (conversational) prompting +- INTERNAL_MODEL_VERSION=gpt4all-chat-completion +- GEN_AI_MODEL_VERSION=ggml-model-gpt4all-falcon-q4_0.bin # Or any other GPT4All model diff --git a/gen_ai_configs/transformers.mdx b/gen_ai_configs/transformers.mdx new file mode 100644 index 0000000..8dc3434 --- /dev/null +++ b/gen_ai_configs/transformers.mdx @@ -0,0 +1,24 @@ +--- +title: Q&A Transformers +description: 'Configure Danswer to use last generation Transformers trained for Q&A' +--- + +Refer to [Model Configs](https://docs.danswer.dev/gen_ai_configs/overview#model-configs) for how to set the +environment variables for your particular deployment. + +## What are Q&A Transformers +Before the billion+ parameter Generative AI models became possible/popular, there was a class of models trained +specifically to answer questions based on provided context. These models are not general purpose and much weaker at +generalizing compared to the latest LLMs. They mostly function by extracting answers from the passage and presenting a +confidence score and are not capable of combining this with internal knowledge. + +However, these models are able to be run on CPU for inference without further compression techniques. + +Also, they are less capable of making up reasonable sounding answers that are actually hallucinations. + +## Set Danswer to use Q&A Transformers +- INTERNAL_MODEL_VERSION=transformers +- GEN_AI_MODEL_VERSION=deepset/deberta-v3-large-squad2 + +Credits to [deepset.ai](https://huggingface.co/deepset/deberta-v3-large-squad2) for the `deberta-v3-large-squad2` model. +This model is provided under `cc-by-4` License and used in Danswer without alterations. diff --git a/mint.json b/mint.json index 5b660d1..a8eb3fd 100644 --- a/mint.json +++ b/mint.json @@ -42,8 +42,10 @@ "gen_ai_configs/overview", "gen_ai_configs/open_ai", "gen_ai_configs/azure", + "gen_ai_configs/rest_api", + "gen_ai_configs/gpt_4_all", "gen_ai_configs/huggingface", - "gen_ai_configs/rest_api" + "gen_ai_configs/transformers" ] }, "contact_us"