diff --git a/apps/ollama-nvidia/config.json b/apps/ollama-nvidia/config.json new file mode 100755 index 00000000..baf4214b --- /dev/null +++ b/apps/ollama-nvidia/config.json @@ -0,0 +1,18 @@ +{ + "$schema": "../schema.json", + "name": "Ollama - Nvidia", + "available": true, + "exposable": true, + "port": 11435, + "id": "ollama-nvidia", + "tipi_version": 1, + "version": "0.1.32", + "categories": ["ai"], + "description": "Get up and running with Llama 3, Mistral, Gemma, and other large language models.", + "short_desc": "LLMs inference server with OpenAI compatible API", + "author": "ollama", + "source": "https://github.com/ollama/ollama", + "website": "https://ollama.com", + "form_fields": [], + "supported_architectures": ["arm64", "amd64"] +} diff --git a/apps/ollama-nvidia/docker-compose.yml b/apps/ollama-nvidia/docker-compose.yml new file mode 100755 index 00000000..153fdcbf --- /dev/null +++ b/apps/ollama-nvidia/docker-compose.yml @@ -0,0 +1,48 @@ +version: '3.7' + +services: + ollama-nvidia: + image: ollama/ollama + restart: unless-stopped + container_name: ollama-nvidia + environment: + - PORT=11435 + ports: + - '${APP_PORT}:11435' + networks: + - tipi_main_network + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: + - gpu + volumes: + - ${APP_DATA_DIR}/.ollama:/root/.ollama + labels: + # Main + traefik.enable: true + traefik.http.middlewares.ollama-nvidia-web-redirect.redirectscheme.scheme: https + traefik.http.services.ollama-nvidia.loadbalancer.server.port: 11435 + # Web + traefik.http.routers.ollama-nvidia-insecure.rule: Host(`${APP_DOMAIN}`) + traefik.http.routers.ollama-nvidia-insecure.entrypoints: web + traefik.http.routers.ollama-nvidia-insecure.service: ollama-nvidia + traefik.http.routers.ollama-nvidia-insecure.middlewares: ollama-nvidia-web-redirect + # Websecure + traefik.http.routers.ollama-nvidia.rule: Host(`${APP_DOMAIN}`) + traefik.http.routers.ollama-nvidia.entrypoints: websecure + traefik.http.routers.ollama-nvidia.service: ollama-nvidia + traefik.http.routers.ollama-nvidia.tls.certresolver: myresolver + # Local domain + traefik.http.routers.ollama-nvidia-local-insecure.rule: Host(`ollama-nvidia.${LOCAL_DOMAIN}`) + traefik.http.routers.ollama-nvidia-local-insecure.entrypoints: web + traefik.http.routers.ollama-nvidia-local-insecure.service: ollama-nvidia + traefik.http.routers.ollama-nvidia-local-insecure.middlewares: ollama-nvidia-web-redirect + # Local domain secure + traefik.http.routers.ollama-nvidia-local.rule: Host(`ollama-nvidia.${LOCAL_DOMAIN}`) + traefik.http.routers.ollama-nvidia-local.entrypoints: websecure + traefik.http.routers.ollama-nvidia-local.service: ollama-nvidia + traefik.http.routers.ollama-nvidia-local.tls: true diff --git a/apps/ollama-nvidia/metadata/description.md b/apps/ollama-nvidia/metadata/description.md new file mode 100755 index 00000000..369a19cf --- /dev/null +++ b/apps/ollama-nvidia/metadata/description.md @@ -0,0 +1,108 @@ +# Ollama - Nvidia +[Ollama](https://github.com/ollama/ollama) allows you to run open-source large language models, such as Llama 3 & , locally. Ollama bundles model weights, configuration, and data into a single package, defined by a Modelfile. + +--- + +## Nvidia Instructions +To enable your Nvidia GPU in Docker : +- You need to install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installation) + +- And configure Docker to use Nvidia driver +```sh +sudo nvidia-ctk runtime configure --runtime=docker +sudo systemctl restart docker +``` +--- + +## Usage + +### Use with a frontend +- [LobeChat](https://github.com/lobehub/lobe-chat) +- [LibreChat](https://github.com/danny-avila/LibreChat) +- [OpenWebUI](https://github.com/open-webui/open-webui) +- [And more ...](https://github.com/ollama/ollama) + +--- + +### Try the REST API +Ollama has a REST API for running and managing models. + +**Generate a response** +```sh +curl http://localhost:11434/api/generate -d '{ + "model": "llama3", + "prompt":"Why is the sky blue?" +}' +``` + +**Chat with a model** +```sh +curl http://localhost:11434/api/chat -d '{ + "model": "llama3", + "messages": [ + { "role": "user", "content": "why is the sky blue?" } + ] +}' +``` +--- + +### Try in terminal +```sh +docker exec -it ollama-nvidia ollama run llama3 --verbose +``` + +--- + +## Compatible GPUs +Ollama supports Nvidia GPUs with compute capability 5.0+. + +Check your compute compatibility to see if your card is supported: +[https://developer.nvidia.com/cuda-gpus](https://developer.nvidia.com/cuda-gpus) + +| Compute Capability | Family | Cards | +| ------------------ | ------------------- | ----------------------------------------------------------------------------------------------------------- | +| 9.0 | NVIDIA | `H100` | +| 8.9 | GeForce RTX 40xx | `RTX 4090` `RTX 4080` `RTX 4070 Ti` `RTX 4060 Ti` | +| | NVIDIA Professional | `L4` `L40` `RTX 6000` | +| 8.6 | GeForce RTX 30xx | `RTX 3090 Ti` `RTX 3090` `RTX 3080 Ti` `RTX 3080` `RTX 3070 Ti` `RTX 3070` `RTX 3060 Ti` `RTX 3060` | +| | NVIDIA Professional | `A40` `RTX A6000` `RTX A5000` `RTX A4000` `RTX A3000` `RTX A2000` `A10` `A16` `A2` | +| 8.0 | NVIDIA | `A100` `A30` | +| 7.5 | GeForce GTX/RTX | `GTX 1650 Ti` `TITAN RTX` `RTX 2080 Ti` `RTX 2080` `RTX 2070` `RTX 2060` | +| | NVIDIA Professional | `T4` `RTX 5000` `RTX 4000` `RTX 3000` `T2000` `T1200` `T1000` `T600` `T500` | +| | Quadro | `RTX 8000` `RTX 6000` `RTX 5000` `RTX 4000` | +| 7.0 | NVIDIA | `TITAN V` `V100` `Quadro GV100` | +| 6.1 | NVIDIA TITAN | `TITAN Xp` `TITAN X` | +| | GeForce GTX | `GTX 1080 Ti` `GTX 1080` `GTX 1070 Ti` `GTX 1070` `GTX 1060` `GTX 1050` | +| | Quadro | `P6000` `P5200` `P4200` `P3200` `P5000` `P4000` `P3000` `P2200` `P2000` `P1000` `P620` `P600` `P500` `P520` | +| | Tesla | `P40` `P4` | +| 6.0 | NVIDIA | `Tesla P100` `Quadro GP100` | +| 5.2 | GeForce GTX | `GTX TITAN X` `GTX 980 Ti` `GTX 980` `GTX 970` `GTX 960` `GTX 950` | +| | Quadro | `M6000 24GB` `M6000` `M5000` `M5500M` `M4000` `M2200` `M2000` `M620` | +| | Tesla | `M60` `M40` | +| 5.0 | GeForce GTX | `GTX 750 Ti` `GTX 750` `NVS 810` | +| | Quadro | `K2200` `K1200` `K620` `M1200` `M520` `M5000M` `M4000M` `M3000M` `M2000M` `M1000M` `K620M` `M600M` `M500M` | + + +--- + +## Model library +Ollama supports a list of models available on [ollama.com/library](https://ollama.com/library 'ollama model library') + +Here are some example models that can be downloaded: + +| Model | Parameters | Size | Download | +| ------------------ | ---------- | ----- | ------------------------------ | +| Llama 3 | 8B | 4.7GB | `ollama run llama3` | +| Llama 3 | 70B | 40GB | `ollama run llama3:70b` | +| Phi-3 | 3,8B | 2.3GB | `ollama run phi3` | +| Mistral | 7B | 4.1GB | `ollama run mistral` | +| Neural Chat | 7B | 4.1GB | `ollama run neural-chat` | +| Starling | 7B | 4.1GB | `ollama run starling-lm` | +| Code Llama | 7B | 3.8GB | `ollama run codellama` | +| Llama 2 Uncensored | 7B | 3.8GB | `ollama run llama2-uncensored` | +| LLaVA | 7B | 4.5GB | `ollama run llava` | +| Gemma | 2B | 1.4GB | `ollama run gemma:2b` | +| Gemma | 7B | 4.8GB | `ollama run gemma:7b` | +| Solar | 10.7B | 6.1GB | `ollama run solar` | + +> Note: You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models. \ No newline at end of file diff --git a/apps/ollama-nvidia/metadata/logo.jpg b/apps/ollama-nvidia/metadata/logo.jpg new file mode 100755 index 00000000..5e8051d8 Binary files /dev/null and b/apps/ollama-nvidia/metadata/logo.jpg differ