pulumi · sicarul · Mar 12, 2026 · Mar 12, 2026 · Mar 12, 2026 · Mar 13, 2026
diff --git a/.claude/commands/blog-meta-image/assets/logos/alibaba.svg b/.claude/commands/blog-meta-image/assets/logos/alibaba.svg
diff --git a/.claude/commands/blog-meta-image/assets/logos/tailscale.svg b/.claude/commands/blog-meta-image/assets/logos/tailscale.svg
diff --git a/content/blog/self-host-qwen-llama-cpp-k8s-tailscale-pulumi/conduit.png b/content/blog/self-host-qwen-llama-cpp-k8s-tailscale-pulumi/conduit.png
diff --git a/content/blog/self-host-qwen-llama-cpp-k8s-tailscale-pulumi/index.md b/content/blog/self-host-qwen-llama-cpp-k8s-tailscale-pulumi/index.md
diff --git a/content/blog/self-host-qwen-llama-cpp-k8s-tailscale-pulumi/meta.png b/content/blog/self-host-qwen-llama-cpp-k8s-tailscale-pulumi/meta.png
diff --git a/content/blog/self-host-qwen-llama-cpp-k8s-tailscale-pulumi/opencode.png b/content/blog/self-host-qwen-llama-cpp-k8s-tailscale-pulumi/opencode.png
diff --git a/data/related.yaml b/data/related.yaml
@@ -368,10 +368,10 @@ tags:
     - 2022-03-10-hierarchical-config
 
   ai:
-    - codegen-learnings
-    - pulumi-copilot-rest
-    - copilot-lessons
-    - future-cloud-infrastructure-10-trends-shaping-2024-and-beyond
+    - run-deepseek-on-aws-ec2-using-pulumi
+    - policy-next-gen
+    - low-code-llm-apps-with-local-ai-flowise-and-pulumi
+    - easy-ai-apps-with-langserve-and-pulumi
 
   ml:
     - devops-ai-developer-future--pulumi-user-group-tech-talks
@@ -1266,6 +1266,11 @@ fargate-vs-ec2:
   - easy-ai-apps-with-langserve-and-pulumi
   - advanced-aws-networking-part-2
 
+self-host-qwen-llama-cpp-k8s-tailscale-pulumi:
+  - deploy-openclaw-aws-hetzner
+  - low-code-llm-apps-with-local-ai-flowise-and-pulumi
+  - mlops-huggingface-llm-aws-sagemaker-python
+
 when-to-use-azure-cosmos-db:
   - azure-deployment-environments
   - sam-cogan-testing-best-practices

diff --git a/scripts/programs/ignore.txt b/scripts/programs/ignore.txt
@@ -33,6 +33,7 @@ awsx-apigateway-custom-domain-.*
 kubernetes-.*
 k8s-.*
 helm-.*
+self-host-qwen-llm-python
 
 # Skip broken programs to get back to green
 # https://github.com/pulumi/docs/issues/14505

diff --git a/static/programs/self-host-qwen-llm-python/Pulumi.yaml b/static/programs/self-host-qwen-llm-python/Pulumi.yaml
@@ -0,0 +1,62 @@
+name: self-host-qwen-llm-python
+description: Self-hosted llama-server (llama.cpp) with Open WebUI and Tailscale
+runtime:
+  name: python
+  options:
+    toolchain: pip
+config:
+  pulumi:tags:
+    value:
+      pulumi:template: python
+  model:
+    type: string
+    default: unsloth/Qwen3.5-35B-A3B-GGUF
+    description: HuggingFace model repository
+  modelFile:
+    type: string
+    default: Qwen3.5-35B-A3B-Q4_K_M.gguf
+    description: GGUF filename to download from the model repo
+  gpuVendor:
+    type: string
+    default: nvidia
+    description: GPU vendor ("nvidia" or "amd")
+  gpuCount:
+    type: integer
+    default: 1
+    description: Number of GPUs to allocate
+  contextSize:
+    type: integer
+    default: 65536
+    description: Context window size in tokens
+  fitTarget:
+    type: integer
+    default: 2048
+    description: VRAM fit target in MB for llama.cpp layer placement
+  threads:
+    type: integer
+    default: 5
+    description: Number of CPU threads for inference
+  jinja:
+    type: boolean
+    default: true
+    description: Enable Jinja template processing for chat templates
+  parallel:
+    type: integer
+    default: 1
+    description: Number of parallel request slots
+  llmPort:
+    type: integer
+    default: 8080
+    description: LLM service port
+  llmNodePort:
+    type: integer
+    default: 30080
+    description: NodePort for external LLM access
+  webuiPort:
+    type: integer
+    default: 30000
+    description: Open WebUI NodePort
+  hostname:
+    type: string
+    default: llm-server
+    description: Tailscale hostname