services: app: build: . image: open-llama-image container_name: open-llama-container volumes: - .:/app #runtime: nvidia deploy: resources: limits: memory: 20G reservations: devices: - driver: nvidia count: all capabilities: [gpu] memory: 20G memswap_limit: 60G environment: - NVIDIA_VISIBLE_DEVICES=all - NVIDIA_DRIVER_CAPABILITIES=compute,utility - CUDA_DEVICE_ORDER=PCI_BUS_ID - CUDA_VISIBLE_DEVICES=0 - CUDA_LAUNCH_BLOCKING=1 - TORCH_USE_CUDA_DSA=1 - PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:256