From 1dd92c076c376a044c701be9ebe9e52c32390275 Mon Sep 17 00:00:00 2001 From: James Dumay Date: Mon, 13 Apr 2026 11:39:22 +1000 Subject: [PATCH 1/2] Move recommended models catalog to dataset-backed schema --- mesh-llm/src/cli/commands/models/mod.rs | 394 ++++++++++++++++++- mesh-llm/src/cli/models.rs | 28 +- mesh-llm/src/models/catalog.json | 497 ------------------------ mesh-llm/src/models/catalog.rs | 444 ++++++++++++++++++++- mesh-llm/src/models/resolve.rs | 5 +- 5 files changed, 849 insertions(+), 519 deletions(-) delete mode 100644 mesh-llm/src/models/catalog.json diff --git a/mesh-llm/src/cli/commands/models/mod.rs b/mesh-llm/src/cli/commands/models/mod.rs index fd4c50f6..83089ac3 100644 --- a/mesh-llm/src/cli/commands/models/mod.rs +++ b/mesh-llm/src/cli/commands/models/mod.rs @@ -2,14 +2,20 @@ mod formatters; mod formatters_console; mod formatters_json; -use crate::cli::models::ModelsCommand; +use crate::cli::models::{ModelsCommand, RecommendedCommand}; use crate::models::{ catalog, download_exact_ref, find_catalog_model_exact, installed_model_capabilities, - scan_installed_models, search_catalog_models, search_huggingface, show_exact_model, - show_model_variants_with_progress, SearchArtifactFilter, SearchProgress, ShowVariantsProgress, + resolve_huggingface_model_identity, scan_installed_models, search_catalog_models, + search_huggingface, show_exact_model, show_model_variants_with_progress, SearchArtifactFilter, + SearchProgress, ShowVariantsProgress, }; -use anyhow::{anyhow, Result}; +use anyhow::{anyhow, Context, Result}; +use base64::Engine; +use reqwest::StatusCode; +use serde::Deserialize; +use serde_json::json; use std::io::{IsTerminal, Write}; +use std::path::Path; use std::time::Instant; use formatters::{ @@ -87,11 +93,335 @@ pub async fn run_model_search( } pub fn run_model_recommended(json_output: bool) -> Result<()> { + if !json_output { + eprintln!( + "🔎 Fetching recommended models from Hugging Face dataset {}...", + catalog::DEFAULT_RECOMMENDED_MODELS_DATASET_REPO + ); + catalog::preload_catalog_dataset_with_progress( + catalog::DEFAULT_RECOMMENDED_MODELS_DATASET_REPO, + |progress| match progress { + catalog::CatalogLoadProgress::ListingEntries => {} + catalog::CatalogLoadProgress::LoadingEntry { completed, total } => { + if total == 0 { + return; + } + eprint!("\r Loaded {completed}/{total} recommended entries..."); + let _ = std::io::stderr().flush(); + if completed == total { + eprintln!(); + } + } + }, + )?; + } let formatter = models_formatter(json_output); let models: Vec<_> = catalog::MODEL_CATALOG.iter().collect(); formatter.render_recommended(&models) } +async fn run_model_recommended_share( + model: &str, + description: &str, + name: Option<&str>, + draft: Option<&str>, + dataset_repo: &str, + json_output: bool, +) -> Result<()> { + let details = show_exact_model(model).await?; + let identity = resolve_huggingface_model_identity(model) + .await? + .ok_or_else(|| anyhow!("Recommended models must resolve to a Hugging Face-backed model"))?; + + let catalog_model = build_recommended_catalog_model( + model, + description, + name, + draft, + &identity, + &details, + fetch_recommended_repo_manifest(&identity).await?, + ); + let metadata_path = catalog::dataset_metadata_path_for_model_id(model); + let metadata_body = catalog::serialize_recommended_model_metadata(&catalog_model)?; + let mut index_entries = catalog::load_catalog_index(dataset_repo).unwrap_or_default(); + if index_entries.iter().any(|entry| entry.id == model) { + if json_output { + formatters::print_json(json!({ + "status": "already_published", + "dataset_repo": dataset_repo, + "path": metadata_path, + "id": model, + }))?; + } else { + println!("✅ Already published"); + println!(" repo: {dataset_repo}"); + println!(" id: {model}"); + } + return Ok(()); + } + index_entries.push(catalog::build_catalog_index_entry(&catalog_model)); + let index_body = catalog::serialize_recommended_catalog_index(&index_entries)?; + + let api = crate::models::build_hf_api(false).context("Build Hugging Face client")?; + let dataset = api.repo(hf_hub::Repo::with_revision( + dataset_repo.to_string(), + hf_hub::RepoType::Dataset, + "main".to_string(), + )); + let info = dataset + .info() + .with_context(|| format!("Fetch dataset info for {}", dataset_repo))?; + if info + .siblings + .iter() + .any(|entry| entry.rfilename == metadata_path) + { + if json_output { + formatters::print_json(json!({ + "status": "already_published", + "dataset_repo": dataset_repo, + "path": metadata_path, + "id": model, + }))?; + } else { + println!("✅ Already published"); + println!(" repo: {dataset_repo}"); + println!(" path: {metadata_path}"); + } + return Ok(()); + } + + let token = crate::models::hf_token_override().ok_or_else(|| { + anyhow!( + "Missing Hugging Face token. Set HF_TOKEN or HUGGING_FACE_HUB_TOKEN before running `mesh-llm models recommended share`." + ) + })?; + let endpoint = std::env::var("HF_ENDPOINT") + .ok() + .filter(|value| !value.trim().is_empty()) + .unwrap_or_else(|| "https://huggingface.co".to_string()); + let commit_url = format!( + "{}/api/datasets/{}/commit/main", + endpoint.trim_end_matches('/'), + dataset_repo + ); + let commit_message = format!("Add recommended model {}", catalog_model.id); + let commit_description = format!( + "Publish recommended model entry for {} ({})", + catalog_model.name, catalog_model.id + ); + let body = vec![ + ndjson_header(&commit_message, &commit_description), + ndjson_file_op( + catalog::recommended_models_index_path(), + index_body.as_bytes(), + ), + ndjson_file_op(&metadata_path, metadata_body.as_bytes()), + ] + .into_iter() + .map(|value| serde_json::to_string(&value)) + .collect::, _>>()? + .join("\n") + + "\n"; + + if !json_output { + println!("📤 Recommended model share"); + println!("📦 {}", catalog_model.name); + println!(" id: {}", catalog_model.id); + println!(" source: {}", details.exact_ref); + println!("☁️ Dataset contribution"); + println!(" repo: {dataset_repo}"); + println!(" path: {metadata_path}"); + println!("⬆️ Opening contribution PR..."); + } + + let response = reqwest::Client::new() + .post(&commit_url) + .bearer_auth(token) + .query(&[("create_pr", "1")]) + .header("Content-Type", "application/x-ndjson") + .body(body) + .send() + .await + .with_context(|| format!("POST {}", commit_url))?; + if response.status() != StatusCode::OK { + let status = response.status(); + let body = response.text().await.unwrap_or_default(); + return Err(anyhow!( + "Dataset contribution failed: {}: {}", + status, + body.trim() + )); + } + let commit: HfCommitResponse = response + .json() + .await + .context("Decode Hugging Face response")?; + if json_output { + formatters::print_json(json!({ + "status": "opened_pr", + "dataset_repo": dataset_repo, + "path": metadata_path, + "id": model, + "commit_oid": commit.commit_oid, + "commit_url": commit.commit_url, + "pull_request_url": commit.pull_request_url, + }))?; + } else { + println!("✅ Opened recommended model contribution"); + println!(" commit: {}", commit.commit_oid); + println!(" url: {}", commit.commit_url); + if let Some(pr_url) = commit.pull_request_url.as_deref() { + println!(" pr: {}", pr_url); + } + } + Ok(()) +} + +fn build_recommended_catalog_model( + input_id: &str, + description: &str, + name: Option<&str>, + draft: Option<&str>, + identity: &crate::models::local::HuggingFaceModelIdentity, + details: &crate::models::ModelDetails, + manifest: RecommendedRepoManifest, +) -> catalog::CatalogModel { + let file = identity + .file + .rsplit('/') + .next() + .unwrap_or(&identity.file) + .to_string(); + catalog::CatalogModel { + id: input_id.to_string(), + name: name + .map(str::to_string) + .unwrap_or_else(|| recommended_model_name_from_file(&file)), + file: file.clone(), + url: format!( + "https://huggingface.co/{}/resolve/{}/{}", + identity.repo_id, identity.revision, identity.file + ), + primary_size_bytes: manifest.primary_size_bytes, + size: details + .size_label + .clone() + .unwrap_or_else(|| "unknown".to_string()), + description: description.to_string(), + draft: draft.map(str::to_string), + moe: details.moe.clone(), + extra_files: split_gguf_extra_files(identity, &manifest), + mmproj: manifest.mmproj, + } +} + +#[derive(Clone, Debug, Default)] +struct RecommendedRepoManifest { + primary_size_bytes: Option, + mmproj: Option, + split_sizes: std::collections::HashMap>, +} + +async fn fetch_recommended_repo_manifest( + identity: &crate::models::local::HuggingFaceModelIdentity, +) -> Result { + let api = crate::models::build_hf_tokio_api(false)?; + let info = api + .repo(hf_hub::Repo::with_revision( + identity.repo_id.clone(), + hf_hub::RepoType::Model, + identity.revision.clone(), + )) + .info() + .await + .with_context(|| { + format!( + "Fetch Hugging Face repo {}@{}", + identity.repo_id, identity.revision + ) + })?; + let primary_size_bytes = info + .siblings + .iter() + .find(|entry| entry.rfilename == identity.file) + .and_then(|entry| entry.size); + let mmproj = info.siblings.iter().find_map(|entry| { + let basename = entry + .rfilename + .rsplit('/') + .next() + .unwrap_or(&entry.rfilename); + let lower = basename.to_ascii_lowercase(); + if lower.starts_with("mmproj") && lower.ends_with(".gguf") { + Some(catalog::CatalogAsset { + file: basename.to_string(), + url: format!( + "https://huggingface.co/{}/resolve/{}/{}", + identity.repo_id, identity.revision, entry.rfilename + ), + size_bytes: entry.size, + }) + } else { + None + } + }); + let split_sizes = info + .siblings + .iter() + .map(|entry| (entry.rfilename.clone(), entry.size)) + .collect(); + Ok(RecommendedRepoManifest { + primary_size_bytes, + mmproj, + split_sizes, + }) +} + +fn recommended_model_name_from_file(file: &str) -> String { + let basename = Path::new(file) + .file_name() + .and_then(|value| value.to_str()) + .unwrap_or(file); + let mut stem = basename + .trim_end_matches(".gguf") + .trim_end_matches(".safetensors") + .trim_end_matches(".json") + .to_string(); + let split_re = regex_lite::Regex::new(r"-00001-of-\d{5}$").unwrap(); + stem = split_re.replace(&stem, "").into_owned(); + stem +} + +fn split_gguf_extra_files( + identity: &crate::models::local::HuggingFaceModelIdentity, + manifest: &RecommendedRepoManifest, +) -> Vec { + let re = regex_lite::Regex::new(r"-00001-of-(\d{5})\.gguf$").unwrap(); + let Some(caps) = re.captures(&identity.file) else { + return Vec::new(); + }; + let Ok(count) = caps[1].parse::() else { + return Vec::new(); + }; + (2..=count) + .map(|index| { + let file = identity + .file + .replace("-00001-of-", &format!("-{index:05}-of-")); + catalog::CatalogAsset { + file: file.rsplit('/').next().unwrap_or(&file).to_string(), + url: format!( + "https://huggingface.co/{}/resolve/{}/{}", + identity.repo_id, identity.revision, file + ), + size_bytes: manifest.split_sizes.get(&file).copied().flatten(), + } + }) + .collect() +} + pub fn run_model_installed(json_output: bool) -> Result<()> { let formatter = models_formatter(json_output); let rows: Vec = scan_installed_models() @@ -213,9 +543,28 @@ pub async fn run_model_download( pub async fn dispatch_models_command(command: &ModelsCommand) -> Result<()> { match command { - ModelsCommand::Recommended { json } | ModelsCommand::List { json } => { - run_model_recommended(*json)? - } + ModelsCommand::Recommended { command, json } => match command { + Some(RecommendedCommand::Share { + model, + description, + name, + draft, + dataset_repo, + json: share_json, + }) => { + run_model_recommended_share( + model, + description, + name.as_deref(), + draft.as_deref(), + dataset_repo, + *share_json, + ) + .await? + } + None => run_model_recommended(*json)?, + }, + ModelsCommand::List { json } => run_model_recommended(*json)?, ModelsCommand::Installed { json } => run_model_installed(*json)?, ModelsCommand::Search { query, @@ -244,3 +593,34 @@ pub async fn dispatch_models_command(command: &ModelsCommand) -> Result<()> { } Ok(()) } + +#[derive(Deserialize)] +struct HfCommitResponse { + #[serde(rename = "commitOid")] + commit_oid: String, + #[serde(rename = "commitUrl")] + commit_url: String, + #[serde(rename = "pullRequestUrl")] + pull_request_url: Option, +} + +fn ndjson_header(summary: &str, description: &str) -> serde_json::Value { + json!({ + "key": "header", + "value": { + "summary": summary, + "description": description, + } + }) +} + +fn ndjson_file_op(path_in_repo: &str, content: &[u8]) -> serde_json::Value { + json!({ + "key": "file", + "value": { + "content": base64::engine::general_purpose::STANDARD.encode(content), + "path": path_in_repo, + "encoding": "base64", + } + }) +} diff --git a/mesh-llm/src/cli/models.rs b/mesh-llm/src/cli/models.rs index 274fc4a8..84bbad64 100644 --- a/mesh-llm/src/cli/models.rs +++ b/mesh-llm/src/cli/models.rs @@ -1,9 +1,35 @@ use clap::Subcommand; +#[derive(Subcommand, Debug)] +pub enum RecommendedCommand { + /// Share one recommended model entry to the Hugging Face dataset. + Share { + /// Model spec: catalog id, HF repo selector, HF exact ref, or HF URL. + model: String, + /// Short human description shown in `mesh-llm models recommended`. + #[arg(long)] + description: String, + /// Override the display name stored in the recommendation entry. + #[arg(long)] + name: Option, + /// Recommended draft model id for speculative decoding. + #[arg(long)] + draft: Option, + /// Hugging Face dataset repo that stores recommended models. + #[arg(long, default_value = crate::models::catalog::DEFAULT_RECOMMENDED_MODELS_DATASET_REPO)] + dataset_repo: String, + /// Emit JSON output. + #[arg(long)] + json: bool, + }, +} + #[derive(Subcommand, Debug)] pub enum ModelsCommand { - /// List built-in recommended models. + /// List or share recommended models from the Hugging Face dataset. Recommended { + #[command(subcommand)] + command: Option, /// Emit JSON output. #[arg(long)] json: bool, diff --git a/mesh-llm/src/models/catalog.json b/mesh-llm/src/models/catalog.json deleted file mode 100644 index b82cf894..00000000 --- a/mesh-llm/src/models/catalog.json +++ /dev/null @@ -1,497 +0,0 @@ -[ - { - "name": "Qwen3-4B-Q4_K_M", - "file": "Qwen3-4B-Q4_K_M.gguf", - "url": "https://huggingface.co/unsloth/Qwen3-4B-GGUF/resolve/main/Qwen3-4B-Q4_K_M.gguf", - "size": "2.5GB", - "description": "Qwen3 starter, thinking/non-thinking modes", - "draft": "Qwen3-0.6B-Q4_K_M", - "moe": null, - "extra_files": [], - "mmproj": null - }, - { - "name": "Qwen2.5-3B-Instruct-Q4_K_M", - "file": "Qwen2.5-3B-Instruct-Q4_K_M.gguf", - "url": "https://huggingface.co/Qwen/Qwen2.5-3B-Instruct-GGUF/resolve/main/qwen2.5-3b-instruct-q4_k_m.gguf", - "size": "2.1GB", - "description": "Small & fast general chat", - "draft": "Qwen2.5-0.5B-Instruct-Q4_K_M", - "moe": null, - "extra_files": [], - "mmproj": null - }, - { - "name": "Llama-3.2-3B-Instruct-Q4_K_M", - "file": "Llama-3.2-3B-Instruct-Q4_K_M.gguf", - "url": "https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct-Q4_K_M.gguf", - "size": "2.0GB", - "description": "Meta Llama 3.2, goose default, good tool calling", - "draft": "Llama-3.2-1B-Instruct-Q4_K_M", - "moe": null, - "extra_files": [], - "mmproj": null - }, - { - "name": "Qwen3-8B-Q4_K_M", - "file": "Qwen3-8B-Q4_K_M.gguf", - "url": "https://huggingface.co/unsloth/Qwen3-8B-GGUF/resolve/main/Qwen3-8B-Q4_K_M.gguf", - "size": "5.0GB", - "description": "Qwen3 mid-tier, strong for its size", - "draft": "Qwen3-0.6B-Q4_K_M", - "moe": null, - "extra_files": [], - "mmproj": null - }, - { - "name": "Qwen2.5-Coder-7B-Instruct-Q4_K_M", - "file": "Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf", - "url": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-GGUF/resolve/main/qwen2.5-coder-7b-instruct-q4_k_m.gguf", - "size": "4.4GB", - "description": "Code generation & completion", - "draft": "Qwen2.5-0.5B-Instruct-Q4_K_M", - "moe": null, - "extra_files": [], - "mmproj": null - }, - { - "name": "Gemma-3-12B-it-Q4_K_M", - "file": "Gemma-3-12B-it-Q4_K_M.gguf", - "url": "https://huggingface.co/unsloth/gemma-3-12b-it-GGUF/resolve/main/gemma-3-12b-it-Q4_K_M.gguf", - "size": "7.3GB", - "description": "Google Gemma 3 12B, punches above weight", - "draft": "Gemma-3-1B-it-Q4_K_M", - "moe": null, - "extra_files": [], - "mmproj": null - }, - { - "name": "Hermes-2-Pro-Mistral-7B-Q4_K_M", - "file": "Hermes-2-Pro-Mistral-7B-Q4_K_M.gguf", - "url": "https://huggingface.co/bartowski/Hermes-2-Pro-Mistral-7B-GGUF/resolve/main/Hermes-2-Pro-Mistral-7B-Q4_K_M.gguf", - "size": "4.4GB", - "description": "Goose default, strong tool calling for agents", - "draft": null, - "moe": null, - "extra_files": [], - "mmproj": null - }, - { - "name": "Qwen3-14B-Q4_K_M", - "file": "Qwen3-14B-Q4_K_M.gguf", - "url": "https://huggingface.co/unsloth/Qwen3-14B-GGUF/resolve/main/Qwen3-14B-Q4_K_M.gguf", - "size": "9.0GB", - "description": "Qwen3 strong chat, thinking modes", - "draft": "Qwen3-0.6B-Q4_K_M", - "moe": null, - "extra_files": [], - "mmproj": null - }, - { - "name": "Qwen2.5-14B-Instruct-Q4_K_M", - "file": "Qwen2.5-14B-Instruct-Q4_K_M.gguf", - "url": "https://huggingface.co/bartowski/Qwen2.5-14B-Instruct-GGUF/resolve/main/Qwen2.5-14B-Instruct-Q4_K_M.gguf", - "size": "9.0GB", - "description": "Solid general chat", - "draft": "Qwen2.5-0.5B-Instruct-Q4_K_M", - "moe": null, - "extra_files": [], - "mmproj": null - }, - { - "name": "Qwen2.5-Coder-14B-Instruct-Q4_K_M", - "file": "Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf", - "url": "https://huggingface.co/bartowski/Qwen2.5-Coder-14B-Instruct-GGUF/resolve/main/Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf", - "size": "9.0GB", - "description": "Strong code gen, fills gap between 7B and 32B", - "draft": "Qwen2.5-0.5B-Instruct-Q4_K_M", - "moe": null, - "extra_files": [], - "mmproj": null - }, - { - "name": "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M", - "file": "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf", - "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-14B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf", - "size": "9.0GB", - "description": "DeepSeek R1 reasoning distilled into Qwen 14B", - "draft": "Qwen2.5-0.5B-Instruct-Q4_K_M", - "moe": null, - "extra_files": [], - "mmproj": null - }, - { - "name": "Devstral-Small-2505-Q4_K_M", - "file": "Devstral-Small-2505-Q4_K_M.gguf", - "url": "https://huggingface.co/unsloth/Devstral-Small-2505-GGUF/resolve/main/Devstral-Small-2505-Q4_K_M.gguf", - "size": "14.3GB", - "description": "Mistral agentic coding, tool use", - "draft": null, - "moe": null, - "extra_files": [], - "mmproj": null - }, - { - "name": "Mistral-Small-3.1-24B-Instruct-Q4_K_M", - "file": "Mistral-Small-3.1-24B-Instruct-2503-Q4_K_M.gguf", - "url": "https://huggingface.co/unsloth/Mistral-Small-3.1-24B-Instruct-2503-GGUF/resolve/main/Mistral-Small-3.1-24B-Instruct-2503-Q4_K_M.gguf", - "size": "14.3GB", - "description": "Mistral general chat, good tool calling", - "draft": null, - "moe": null, - "extra_files": [], - "mmproj": null - }, - { - "name": "GLM-4.7-Flash-Q4_K_M", - "file": "GLM-4.7-Flash-Q4_K_M.gguf", - "url": "https://huggingface.co/unsloth/GLM-4.7-Flash-GGUF/resolve/main/GLM-4.7-Flash-Q4_K_M.gguf", - "size": "18GB", - "description": "MoE 30B/3B active, 64 experts top-4, fast inference, tool calling", - "draft": null, - "moe": { - "n_expert": 64, - "n_expert_used": 4, - "min_experts_per_node": 24 - }, - "extra_files": [], - "mmproj": null - }, - { - "name": "Qwen3-30B-A3B-Q4_K_M", - "file": "Qwen3-30B-A3B-Q4_K_M.gguf", - "url": "https://huggingface.co/unsloth/Qwen3-30B-A3B-GGUF/resolve/main/Qwen3-30B-A3B-Q4_K_M.gguf", - "size": "17.3GB", - "description": "MoE general chat, 128 experts top-8, thinking/non-thinking", - "draft": "Qwen3-0.6B-Q4_K_M", - "moe": { - "n_expert": 128, - "n_expert_used": 8, - "min_experts_per_node": 46 - }, - "extra_files": [], - "mmproj": null - }, - { - "name": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "file": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf", - "url": "https://huggingface.co/unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF/resolve/main/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf", - "size": "18.6GB", - "description": "MoE agentic coding, tool use, 128 experts top-8", - "draft": "Qwen3-0.6B-Q4_K_M", - "moe": { - "n_expert": 128, - "n_expert_used": 8, - "min_experts_per_node": 46 - }, - "extra_files": [], - "mmproj": null - }, - { - "name": "GLM-4-32B-0414-Q4_K_M", - "file": "GLM-4-32B-0414-Q4_K_M.gguf", - "url": "https://huggingface.co/unsloth/GLM-4-32B-0414-GGUF/resolve/main/GLM-4-32B-0414-Q4_K_M.gguf", - "size": "19.7GB", - "description": "Strong 32B, good tool calling", - "draft": null, - "moe": null, - "extra_files": [], - "mmproj": null - }, - { - "name": "Qwen3-32B-Q4_K_M", - "file": "Qwen3-32B-Q4_K_M.gguf", - "url": "https://huggingface.co/unsloth/Qwen3-32B-GGUF/resolve/main/Qwen3-32B-Q4_K_M.gguf", - "size": "19.8GB", - "description": "Best Qwen3 dense, thinking/non-thinking modes", - "draft": "Qwen3-0.6B-Q4_K_M", - "moe": null, - "extra_files": [], - "mmproj": null - }, - { - "name": "DeepSeek-R1-Distill-Qwen-32B-Q4_K_M", - "file": "DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf", - "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-32B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf", - "size": "19.9GB", - "description": "DeepSeek R1 reasoning distilled into Qwen 32B", - "draft": "Qwen2.5-0.5B-Instruct-Q4_K_M", - "moe": null, - "extra_files": [], - "mmproj": null - }, - { - "name": "Qwen2.5-32B-Instruct-Q4_K_M", - "file": "Qwen2.5-32B-Instruct-Q4_K_M.gguf", - "url": "https://huggingface.co/bartowski/Qwen2.5-32B-Instruct-GGUF/resolve/main/Qwen2.5-32B-Instruct-Q4_K_M.gguf", - "size": "20GB", - "description": "Proven general chat", - "draft": "Qwen2.5-0.5B-Instruct-Q4_K_M", - "moe": null, - "extra_files": [], - "mmproj": null - }, - { - "name": "Qwen2.5-Coder-32B-Instruct-Q4_K_M", - "file": "Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf", - "url": "https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct-GGUF/resolve/main/qwen2.5-coder-32b-instruct-q4_k_m.gguf", - "size": "20GB", - "description": "Top-tier code gen, matches GPT-4o on code", - "draft": "Qwen2.5-0.5B-Instruct-Q4_K_M", - "moe": null, - "extra_files": [], - "mmproj": null - }, - { - "name": "Llama-4-Scout-Q4_K_M", - "file": "Llama-4-Scout-4bit-Q4_K_M.gguf", - "url": "https://huggingface.co/glogwa68/Llama-4-scout-GGUF/resolve/main/Llama-4-Scout-4bit-Q4_K_M.gguf", - "size": "22.5GB", - "description": "MoE 109B/17B active, 16 experts top-1, Meta latest, tool calling", - "draft": null, - "moe": { - "n_expert": 16, - "n_expert_used": 1, - "min_experts_per_node": 6 - }, - "extra_files": [], - "mmproj": null - }, - { - "name": "Gemma-3-27B-it-Q4_K_M", - "file": "Gemma-3-27B-it-Q4_K_M.gguf", - "url": "https://huggingface.co/bartowski/google_gemma-3-27b-it-GGUF/resolve/main/google_gemma-3-27b-it-Q4_K_M.gguf", - "size": "17GB", - "description": "Google Gemma 3 27B, strong reasoning", - "draft": "Gemma-3-1B-it-Q4_K_M", - "moe": null, - "extra_files": [], - "mmproj": null - }, - { - "name": "Qwen3.5-27B-Q4_K_M", - "file": "Qwen3.5-27B-Q4_K_M.gguf", - "url": "https://registry.ollama.ai/v2/library/qwen3.5/blobs/sha256:d4b8b4f4c350f5d322dc8235175eeae02d32c6f3fd70bdb9ea481e3abb7d7fc4", - "size": "17GB", - "description": "Qwen3.5 27B, vision + text, strong reasoning and coding", - "draft": "Qwen3-0.6B-Q4_K_M", - "moe": null, - "extra_files": [], - "mmproj": { - "file": "Qwen3.5-27B-mmproj-BF16.gguf", - "url": "https://huggingface.co/unsloth/Qwen3.5-27B-GGUF/resolve/main/mmproj-BF16.gguf" - } - }, - { - "name": "Qwen3-Coder-Next-Q4_K_M", - "file": "Qwen3-Coder-Next-Q4_K_M-00001-of-00004.gguf", - "url": "https://huggingface.co/Qwen/Qwen3-Coder-Next-GGUF/resolve/main/Qwen3-Coder-Next-Q4_K_M/Qwen3-Coder-Next-Q4_K_M-00001-of-00004.gguf", - "size": "48GB", - "description": "Qwen3 Coder Next ~85B dense, frontier coding model", - "draft": null, - "moe": null, - "extra_files": [ - { - "file": "Qwen3-Coder-Next-Q4_K_M-00002-of-00004.gguf", - "url": "https://huggingface.co/Qwen/Qwen3-Coder-Next-GGUF/resolve/main/Qwen3-Coder-Next-Q4_K_M/Qwen3-Coder-Next-Q4_K_M-00002-of-00004.gguf" - }, - { - "file": "Qwen3-Coder-Next-Q4_K_M-00003-of-00004.gguf", - "url": "https://huggingface.co/Qwen/Qwen3-Coder-Next-GGUF/resolve/main/Qwen3-Coder-Next-Q4_K_M/Qwen3-Coder-Next-Q4_K_M-00003-of-00004.gguf" - }, - { - "file": "Qwen3-Coder-Next-Q4_K_M-00004-of-00004.gguf", - "url": "https://huggingface.co/Qwen/Qwen3-Coder-Next-GGUF/resolve/main/Qwen3-Coder-Next-Q4_K_M/Qwen3-Coder-Next-Q4_K_M-00004-of-00004.gguf" - } - ], - "mmproj": null - }, - { - "name": "Llama-3.3-70B-Instruct-Q4_K_M", - "file": "Llama-3.3-70B-Instruct-Q4_K_M.gguf", - "url": "https://registry.ollama.ai/v2/library/llama3.3/blobs/sha256:4824460d29f2058aaf6e1118a63a7a197a09bed509f0e7d4e2efb1ee273b447d", - "size": "43GB", - "description": "Meta Llama 3.3 70B, strong all-around", - "draft": "Llama-3.2-1B-Instruct-Q4_K_M", - "moe": null, - "extra_files": [], - "mmproj": null - }, - { - "name": "Qwen2.5-72B-Instruct-Q4_K_M", - "file": "Qwen2.5-72B-Instruct-Q4_K_M.gguf", - "url": "https://registry.ollama.ai/v2/library/qwen2.5/blobs/sha256:6e7fdda508e91cb0f63de5c15ff79ac63a1584ccafd751c07ca12b7f442101b8", - "size": "47GB", - "description": "Flagship Qwen2.5, great tensor split showcase", - "draft": "Qwen2.5-0.5B-Instruct-Q4_K_M", - "moe": null, - "extra_files": [], - "mmproj": null - }, - { - "name": "DeepSeek-R1-Distill-70B-Q4_K_M", - "file": "DeepSeek-R1-Distill-70B-Q4_K_M.gguf", - "url": "https://registry.ollama.ai/v2/library/deepseek-r1/blobs/sha256:4cd576d9aa16961244012223abf01445567b061f1814b57dfef699e4cf8df339", - "size": "43GB", - "description": "DeepSeek R1 distilled to 70B (Qwen2.5-based), strong reasoning", - "draft": null, - "moe": null, - "extra_files": [], - "mmproj": null - }, - { - "name": "Mixtral-8x22B-Instruct-Q4_K_M", - "file": "Mixtral-8x22B-Instruct-Q4_K_M.gguf", - "url": "https://registry.ollama.ai/v2/library/mixtral/blobs/sha256:f3329ad0c787f4f73cab99e8c877bb76403060561dd0caa318127683c87bbcb4", - "size": "86GB", - "description": "Mixtral 8x22B MoE, 8 experts top-2", - "draft": null, - "moe": { - "n_expert": 8, - "n_expert_used": 2, - "min_experts_per_node": 4 - }, - "extra_files": [], - "mmproj": null - }, - { - "name": "Qwen3-235B-A22B-Q4_K_M", - "file": "Qwen3-235B-A22B-Q4_K_M.gguf", - "url": "https://registry.ollama.ai/v2/library/qwen3/blobs/sha256:aeacdadecbed8a07e42026d1a1d3cd30715bb2994ebe4e4ca4009e1a4abe8d5d", - "size": "142GB", - "description": "Qwen3 235B MoE A22B active, 128 experts top-8", - "draft": null, - "moe": { - "n_expert": 128, - "n_expert_used": 8, - "min_experts_per_node": 46 - }, - "extra_files": [], - "mmproj": null - }, - { - "name": "Llama-3.1-405B-Instruct-Q2_K", - "file": "Llama-3.1-405B-Instruct-Q2_K.gguf", - "url": "https://registry.ollama.ai/v2/library/llama3.1/blobs/sha256:e7e1972e5b13caead8a8dd9c94f4a0dec59ac2d9dd52e0cd1c067e6077eb4677", - "size": "149GB", - "description": "Llama 3.1 405B Instruct Q2_K, largest dense model", - "draft": null, - "moe": null, - "extra_files": [], - "mmproj": null - }, - { - "name": "MiniMax-M2.5-Q4_K_M", - "file": "MiniMax-M2.5-Q4_K_M-00001-of-00004.gguf", - "url": "https://huggingface.co/unsloth/MiniMax-M2.5-GGUF/resolve/main/Q4_K_M/MiniMax-M2.5-Q4_K_M-00001-of-00004.gguf", - "size": "138GB", - "description": "MiniMax-M2.5 MoE 456B/46B active, 256 experts top-8, Q4_K_M", - "draft": null, - "moe": { - "n_expert": 256, - "n_expert_used": 8, - "min_experts_per_node": 96 - }, - "extra_files": [ - { - "file": "MiniMax-M2.5-Q4_K_M-00002-of-00004.gguf", - "url": "https://huggingface.co/unsloth/MiniMax-M2.5-GGUF/resolve/main/Q4_K_M/MiniMax-M2.5-Q4_K_M-00002-of-00004.gguf" - }, - { - "file": "MiniMax-M2.5-Q4_K_M-00003-of-00004.gguf", - "url": "https://huggingface.co/unsloth/MiniMax-M2.5-GGUF/resolve/main/Q4_K_M/MiniMax-M2.5-Q4_K_M-00003-of-00004.gguf" - }, - { - "file": "MiniMax-M2.5-Q4_K_M-00004-of-00004.gguf", - "url": "https://huggingface.co/unsloth/MiniMax-M2.5-GGUF/resolve/main/Q4_K_M/MiniMax-M2.5-Q4_K_M-00004-of-00004.gguf" - } - ], - "mmproj": null - }, - { - "name": "Qwen3.5-0.8B-Vision-Q4_K_M", - "file": "Qwen3.5-0.8B-Q4_K_M.gguf", - "url": "https://huggingface.co/unsloth/Qwen3.5-0.8B-GGUF/resolve/main/Qwen3.5-0.8B-Q4_K_M.gguf", - "size": "508MB", - "description": "Tiny vision model, OCR, screenshots, runs anywhere", - "draft": null, - "moe": null, - "extra_files": [], - "mmproj": { - "file": "Qwen3.5-0.8B-mmproj-BF16.gguf", - "url": "https://huggingface.co/unsloth/Qwen3.5-0.8B-GGUF/resolve/main/mmproj-BF16.gguf" - } - }, - { - "name": "Qwen3.5-4B-Vision-Q4_K_M", - "file": "Qwen3.5-4B-Q4_K_M.gguf", - "url": "https://huggingface.co/unsloth/Qwen3.5-4B-GGUF/resolve/main/Qwen3.5-4B-Q4_K_M.gguf", - "size": "2.7GB", - "description": "Small vision model, good quality/size balance", - "draft": null, - "moe": null, - "extra_files": [], - "mmproj": { - "file": "Qwen3.5-4B-mmproj-BF16.gguf", - "url": "https://huggingface.co/unsloth/Qwen3.5-4B-GGUF/resolve/main/mmproj-BF16.gguf" - } - }, - { - "name": "Qwen3.5-9B-Vision-Q4_K_M", - "file": "Qwen3.5-9B-Q4_K_M.gguf", - "url": "https://huggingface.co/unsloth/Qwen3.5-9B-GGUF/resolve/main/Qwen3.5-9B-Q4_K_M.gguf", - "size": "5.8GB", - "description": "Vision + text, replaces Qwen3-8B with image understanding", - "draft": null, - "moe": null, - "extra_files": [], - "mmproj": { - "file": "Qwen3.5-9B-mmproj-BF16.gguf", - "url": "https://huggingface.co/unsloth/Qwen3.5-9B-GGUF/resolve/main/mmproj-BF16.gguf" - } - }, - { - "name": "Qwen2.5-0.5B-Instruct-Q4_K_M", - "file": "Qwen2.5-0.5B-Instruct-Q4_K_M.gguf", - "url": "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5-0.5b-instruct-q4_k_m.gguf", - "size": "491MB", - "description": "Draft for Qwen2.5 and DeepSeek-R1-Distill models", - "draft": null, - "moe": null, - "extra_files": [], - "mmproj": null - }, - { - "name": "Qwen3-0.6B-Q4_K_M", - "file": "Qwen3-0.6B-Q4_K_M.gguf", - "url": "https://huggingface.co/unsloth/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B-Q4_K_M.gguf", - "size": "397MB", - "description": "Draft for Qwen3 models", - "draft": null, - "moe": null, - "extra_files": [], - "mmproj": null - }, - { - "name": "Llama-3.2-1B-Instruct-Q4_K_M", - "file": "Llama-3.2-1B-Instruct-Q4_K_M.gguf", - "url": "https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q4_K_M.gguf", - "size": "760MB", - "description": "Draft for Llama 3.x and Llama 4 models", - "draft": null, - "moe": null, - "extra_files": [], - "mmproj": null - }, - { - "name": "Gemma-3-1B-it-Q4_K_M", - "file": "Gemma-3-1B-it-Q4_K_M.gguf", - "url": "https://huggingface.co/bartowski/google_gemma-3-1b-it-GGUF/resolve/main/google_gemma-3-1b-it-Q4_K_M.gguf", - "size": "780MB", - "description": "Draft for Gemma 3 models", - "draft": null, - "moe": null, - "extra_files": [], - "mmproj": null - } -] diff --git a/mesh-llm/src/models/catalog.rs b/mesh-llm/src/models/catalog.rs index 32bb711c..1911e73f 100644 --- a/mesh-llm/src/models/catalog.rs +++ b/mesh-llm/src/models/catalog.rs @@ -1,9 +1,10 @@ -//! Built-in model catalog plus managed acquisition helpers. +//! Dataset-backed recommended model catalog plus managed acquisition helpers. use anyhow::{Context, Result}; use hf_hub::api::Progress as HfProgress; use hf_hub::{Repo, RepoType}; -use serde::Deserialize; +use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; #[cfg(test)] use std::collections::HashMap; use std::io::Write; @@ -15,19 +16,33 @@ use std::sync::LazyLock; use std::sync::Mutex; use std::time::{Duration, Instant}; use tokio::io::AsyncWriteExt; -#[derive(Clone, Debug, Deserialize)] + +pub const DEFAULT_RECOMMENDED_MODELS_DATASET_REPO: &str = "meshllm/recommended-models"; +const RECOMMENDED_MODELS_DATASET_REVISION: &str = "main"; +const RECOMMENDED_MODELS_PREFIX: &str = "models/"; +const RECOMMENDED_MODELS_INDEX_FILE: &str = "index.json"; +const RECOMMENDED_MODELS_METADATA_FILE: &str = "metadata.json"; + +#[derive(Clone, Debug, Deserialize, Serialize)] pub struct CatalogAsset { pub file: String, pub url: String, + #[serde(default)] + pub size_bytes: Option, } #[derive(Clone, Debug)] pub struct CatalogModel { + /// Raw model identifier supplied when this recommendation was curated. + /// This is the dataset entry identity, e.g. `org/repo`, `org/repo:Q4_K_M`, + /// or `org/repo/file.gguf`. + pub id: String, pub name: String, pub file: String, /// Legacy transport field. Prefer `source_repo()`, `source_revision()`, /// and `source_file()` for curated model identity. pub url: String, + pub primary_size_bytes: Option, pub size: String, pub description: String, /// If set, this model has a recommended draft model for speculative decoding. @@ -73,8 +88,9 @@ pub struct MoeConfig { pub ranking: Vec, } -#[derive(Debug, Deserialize)] -struct CatalogModelJson { +#[derive(Debug, Deserialize, Serialize)] +struct LegacyCatalogModelJson { + id: String, name: String, file: String, url: String, @@ -87,27 +103,258 @@ struct CatalogModelJson { mmproj: Option, } -#[derive(Clone, Debug, Deserialize)] +#[derive(Clone, Debug, Deserialize, Serialize)] struct MoeConfigJson { n_expert: u32, n_expert_used: u32, min_experts_per_node: u32, } +#[derive(Debug, Deserialize, Serialize)] +struct CatalogMetadataV1 { + schema_version: u32, + id: String, + name: String, + source: CatalogSourceV1, + artifacts: Vec, + curation: CatalogCurationV1, + moe: Option, +} + +#[derive(Debug, Deserialize, Serialize)] +struct CatalogSourceV1 { + repo: String, + #[serde(default)] + revision: Option, + #[serde(default)] + commit: Option, + #[serde(default)] + selector: Option, +} + +#[derive(Debug, Deserialize, Serialize)] +struct CatalogArtifactV1 { + role: String, + path: String, + url: String, + #[serde(default)] + size_bytes: Option, +} + +#[derive(Debug, Deserialize, Serialize)] +struct CatalogCurationV1 { + description: String, + #[serde(default)] + draft_model_id: Option, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum CatalogLoadProgress { + ListingEntries, + LoadingEntry { completed: usize, total: usize }, +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct CatalogIndexEntry { + pub id: String, + pub name: String, + pub path: String, +} + +#[derive(Clone, Debug, Deserialize, Serialize)] +struct CatalogIndexJson { + schema_version: u32, + entries: Vec, +} + +#[derive(Clone, Debug, Deserialize, Serialize)] +struct CatalogIndexEntryJson { + id: String, + name: String, + path: String, +} + pub static MODEL_CATALOG: LazyLock> = LazyLock::new(load_catalog); fn load_catalog() -> Vec { - let raw: Vec = - serde_json::from_str(include_str!("catalog.json")).expect("parse bundled catalog.json"); - raw.into_iter().map(CatalogModel::from_json).collect() + load_catalog_from_dataset(DEFAULT_RECOMMENDED_MODELS_DATASET_REPO, None).unwrap_or_else(|err| { + eprintln!( + "⚠️ Failed to load recommended model dataset {}: {err:#}", + DEFAULT_RECOMMENDED_MODELS_DATASET_REPO + ); + Vec::new() + }) +} + +pub fn preload_catalog_dataset_with_progress(dataset_repo: &str, mut progress: F) -> Result<()> +where + F: FnMut(CatalogLoadProgress), +{ + load_catalog_from_dataset(dataset_repo, Some(&mut progress)).map(|_| ()) +} + +fn load_catalog_from_dataset( + dataset_repo: &str, + mut progress: Option<&mut dyn FnMut(CatalogLoadProgress)>, +) -> Result> { + let api = super::build_hf_api(false)?; + let repo = api.repo(Repo::with_revision( + dataset_repo.to_string(), + RepoType::Dataset, + RECOMMENDED_MODELS_DATASET_REVISION.to_string(), + )); + if let Some(progress) = progress.as_mut() { + progress(CatalogLoadProgress::ListingEntries); + } + let metadata_paths = match load_catalog_index_json(&repo) { + Ok(index) => index + .entries + .into_iter() + .map(|entry| entry.path) + .collect::>(), + Err(_) => { + let info = repo.info().with_context(|| { + format!( + "fetch recommended model dataset info {}@{}", + dataset_repo, RECOMMENDED_MODELS_DATASET_REVISION + ) + })?; + let mut metadata_paths: Vec<_> = info + .siblings + .into_iter() + .map(|entry| entry.rfilename) + .filter(|path| is_recommended_model_metadata_path(path)) + .collect(); + metadata_paths.sort(); + metadata_paths + } + }; + + let total = metadata_paths.len(); + let mut out = Vec::with_capacity(total); + for (index, metadata_path) in metadata_paths.into_iter().enumerate() { + if let Some(progress) = progress.as_mut() { + progress(CatalogLoadProgress::LoadingEntry { + completed: index, + total, + }); + } + let path = repo.download(&metadata_path).with_context(|| { + format!( + "download {} from dataset {}@{}", + metadata_path, dataset_repo, RECOMMENDED_MODELS_DATASET_REVISION + ) + })?; + let text = + std::fs::read_to_string(&path).with_context(|| format!("read {}", path.display()))?; + out.push(parse_catalog_metadata(&text, &metadata_path, &path)?); + } + if let Some(progress) = progress.as_mut() { + progress(CatalogLoadProgress::LoadingEntry { + completed: total, + total, + }); + } + Ok(out) +} + +fn is_recommended_model_metadata_path(path: &str) -> bool { + path.starts_with(RECOMMENDED_MODELS_PREFIX) && path.ends_with(RECOMMENDED_MODELS_METADATA_FILE) +} + +pub fn recommended_models_index_path() -> &'static str { + RECOMMENDED_MODELS_INDEX_FILE +} + +pub fn dataset_entry_prefix_for_model_id(model_id: &str) -> String { + let digest = Sha256::digest(model_id.as_bytes()); + format!("{RECOMMENDED_MODELS_PREFIX}sha256-{digest:x}") +} + +pub fn dataset_metadata_path_for_model_id(model_id: &str) -> String { + format!( + "{}/{}", + dataset_entry_prefix_for_model_id(model_id), + RECOMMENDED_MODELS_METADATA_FILE + ) +} + +pub fn dataset_entry_id_from_metadata_path(path: &str) -> Option { + let _ = path.strip_prefix(RECOMMENDED_MODELS_PREFIX)?; + None +} + +pub fn build_catalog_index_entry(model: &CatalogModel) -> CatalogIndexEntry { + CatalogIndexEntry { + id: model.id.clone(), + name: model.name.clone(), + path: dataset_metadata_path_for_model_id(&model.id), + } +} + +pub fn serialize_recommended_catalog_index(entries: &[CatalogIndexEntry]) -> Result { + let mut entries = entries.to_vec(); + entries.sort_by(|left, right| left.id.cmp(&right.id)); + let raw = CatalogIndexJson { + schema_version: 1, + entries: entries + .into_iter() + .map(|entry| CatalogIndexEntryJson { + id: entry.id, + name: entry.name, + path: entry.path, + }) + .collect(), + }; + Ok(serde_json::to_string_pretty(&raw)? + "\n") +} + +pub fn load_catalog_index(dataset_repo: &str) -> Result> { + let api = super::build_hf_api(false)?; + let repo = api.repo(Repo::with_revision( + dataset_repo.to_string(), + RepoType::Dataset, + RECOMMENDED_MODELS_DATASET_REVISION.to_string(), + )); + let index = load_catalog_index_json(&repo)?; + Ok(index + .entries + .into_iter() + .map(|entry| CatalogIndexEntry { + id: entry.id, + name: entry.name, + path: entry.path, + }) + .collect()) +} + +pub fn serialize_recommended_model_metadata(model: &CatalogModel) -> Result { + let raw = CatalogMetadataV1::from_model(model); + Ok(serde_json::to_string_pretty(&raw)? + "\n") +} + +fn load_catalog_index_json(repo: &hf_hub::api::sync::ApiRepo) -> Result { + let path = repo + .download(RECOMMENDED_MODELS_INDEX_FILE) + .with_context(|| { + format!( + "download {} from dataset repo", + RECOMMENDED_MODELS_INDEX_FILE + ) + })?; + let text = + std::fs::read_to_string(&path).with_context(|| format!("read {}", path.display()))?; + serde_json::from_str(&text).with_context(|| format!("parse {}", path.display())) } impl CatalogModel { - fn from_json(raw: CatalogModelJson) -> Self { + fn from_legacy_json(raw: LegacyCatalogModelJson) -> Self { Self { + id: raw.id, name: raw.name, file: raw.file, url: raw.url, + primary_size_bytes: parse_size_bytes_label(&raw.size), size: raw.size, description: raw.description, draft: raw.draft, @@ -129,6 +376,179 @@ impl MoeConfig { } } +impl MoeConfigJson { + fn from_config(raw: &MoeConfig) -> Self { + Self { + n_expert: raw.n_expert, + n_expert_used: raw.n_expert_used, + min_experts_per_node: raw.min_experts_per_node, + } + } +} + +impl CatalogMetadataV1 { + fn from_model(model: &CatalogModel) -> Self { + let repo = model.source_repo().unwrap_or_default().to_string(); + let revision = model.source_revision().map(str::to_string); + let selector = model + .source_file() + .and_then(crate::models::resolve::quant_selector_from_gguf_file); + let mut artifacts = vec![CatalogArtifactV1 { + role: "primary".to_string(), + path: model + .source_file() + .map(str::to_string) + .unwrap_or_else(|| model.file.clone()), + url: model.url.clone(), + size_bytes: model.primary_size_bytes, + }]; + artifacts.extend(model.extra_files.iter().map(|asset| CatalogArtifactV1 { + role: "split".to_string(), + path: repo_relative_asset_path(asset).unwrap_or_else(|| asset.file.clone()), + url: asset.url.clone(), + size_bytes: asset.size_bytes, + })); + if let Some(asset) = model.mmproj.as_ref() { + artifacts.push(CatalogArtifactV1 { + role: "mmproj".to_string(), + path: repo_relative_asset_path(asset).unwrap_or_else(|| asset.file.clone()), + url: asset.url.clone(), + size_bytes: asset.size_bytes, + }); + } + Self { + schema_version: 1, + id: model.id.clone(), + name: model.name.clone(), + source: CatalogSourceV1 { + repo, + revision: revision.clone(), + commit: revision, + selector, + }, + artifacts, + curation: CatalogCurationV1 { + description: model.description.clone(), + draft_model_id: model.draft.clone(), + }, + moe: model.moe.as_ref().map(MoeConfigJson::from_config), + } + } +} + +fn parse_catalog_metadata(text: &str, metadata_path: &str, path: &Path) -> Result { + let value: serde_json::Value = + serde_json::from_str(text).with_context(|| format!("parse {}", path.display()))?; + if value + .get("schema_version") + .and_then(|value| value.as_u64()) + .is_some() + { + let raw: CatalogMetadataV1 = + serde_json::from_value(value).with_context(|| format!("parse {}", path.display()))?; + return CatalogModel::from_metadata_v1(raw); + } + let mut raw: LegacyCatalogModelJson = + serde_json::from_value(value).with_context(|| format!("parse {}", path.display()))?; + if raw.id.trim().is_empty() { + raw.id = dataset_entry_id_from_metadata_path(metadata_path).unwrap_or_default(); + } + Ok(CatalogModel::from_legacy_json(raw)) +} + +impl CatalogModel { + fn from_metadata_v1(raw: CatalogMetadataV1) -> Result { + let primary = raw + .artifacts + .iter() + .find(|artifact| artifact.role == "primary") + .context("missing primary artifact in catalog metadata")?; + let file = Path::new(&primary.path) + .file_name() + .and_then(|value| value.to_str()) + .unwrap_or(&primary.path) + .to_string(); + let extra_files = raw + .artifacts + .iter() + .filter(|artifact| artifact.role == "split") + .map(|artifact| CatalogAsset { + file: Path::new(&artifact.path) + .file_name() + .and_then(|value| value.to_str()) + .unwrap_or(&artifact.path) + .to_string(), + url: artifact.url.clone(), + size_bytes: artifact.size_bytes, + }) + .collect(); + let mmproj = raw + .artifacts + .iter() + .find(|artifact| artifact.role == "mmproj") + .map(|artifact| CatalogAsset { + file: Path::new(&artifact.path) + .file_name() + .and_then(|value| value.to_str()) + .unwrap_or(&artifact.path) + .to_string(), + url: artifact.url.clone(), + size_bytes: artifact.size_bytes, + }); + Ok(Self { + id: raw.id, + name: raw.name, + file, + url: primary.url.clone(), + primary_size_bytes: primary.size_bytes, + size: primary + .size_bytes + .map(format_catalog_size_bytes) + .unwrap_or_else(|| "unknown".to_string()), + description: raw.curation.description, + draft: raw.curation.draft_model_id, + moe: raw.moe.map(MoeConfig::from_json), + extra_files, + mmproj, + }) + } +} + +fn repo_relative_asset_path(asset: &CatalogAsset) -> Option { + parse_hf_resolve_url_parts(&asset.url).map(|(_, _, file)| file.to_string()) +} + +fn parse_size_bytes_label(size: &str) -> Option { + let size = size.trim(); + if let Some(gb) = size.strip_suffix("GB") { + return gb + .trim() + .parse::() + .ok() + .map(|value| (value * 1e9) as u64); + } + if let Some(mb) = size.strip_suffix("MB") { + return mb + .trim() + .parse::() + .ok() + .map(|value| (value * 1e6) as u64); + } + None +} + +fn format_catalog_size_bytes(bytes: u64) -> String { + if bytes >= 1_000_000_000 { + format!("{:.1}GB", bytes as f64 / 1e9) + } else if bytes >= 1_000_000 { + format!("{:.0}MB", bytes as f64 / 1e6) + } else if bytes >= 1_000 { + format!("{:.0}KB", bytes as f64 / 1e3) + } else { + format!("{bytes}B") + } +} + /// Get the canonical managed model root (the Hugging Face hub cache). pub fn models_dir() -> PathBuf { crate::models::huggingface_hub_cache_dir() @@ -151,11 +571,11 @@ pub fn find_model(query: &str) -> Option<&'static CatalogModel> { let q = query.to_lowercase(); MODEL_CATALOG .iter() - .find(|m| m.name.to_lowercase() == q) + .find(|m| m.id.to_lowercase() == q || m.name.to_lowercase() == q) .or_else(|| { MODEL_CATALOG .iter() - .find(|m| m.name.to_lowercase().contains(&q)) + .find(|m| m.id.to_lowercase().contains(&q) || m.name.to_lowercase().contains(&q)) }) } diff --git a/mesh-llm/src/models/resolve.rs b/mesh-llm/src/models/resolve.rs index d3849a99..f841f595 100644 --- a/mesh-llm/src/models/resolve.rs +++ b/mesh-llm/src/models/resolve.rs @@ -57,7 +57,8 @@ pub(super) fn merge_capabilities( pub fn find_catalog_model_exact(query: &str) -> Option<&'static catalog::CatalogModel> { let q = query.to_lowercase(); catalog::MODEL_CATALOG.iter().find(|model| { - model.name.to_lowercase() == q + model.id.to_lowercase() == q + || model.name.to_lowercase() == q || model.file.to_lowercase() == q || model.file.trim_end_matches(".gguf").to_lowercase() == q }) @@ -428,7 +429,7 @@ where Ok(Some(out)) } -pub(super) fn quant_selector_from_gguf_file(file: &str) -> Option { +pub(crate) fn quant_selector_from_gguf_file(file: &str) -> Option { if !file.ends_with(".gguf") { return None; } From 394a05053ea38fe11d596d4bd6fd5269e18ceebd Mon Sep 17 00:00:00 2001 From: James Dumay Date: Mon, 13 Apr 2026 12:55:31 +1000 Subject: [PATCH 2/2] Fix catalog tests for dataset-backed model metadata --- mesh-llm/src/models/catalog.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mesh-llm/src/models/catalog.rs b/mesh-llm/src/models/catalog.rs index 1911e73f..2e6a46d7 100644 --- a/mesh-llm/src/models/catalog.rs +++ b/mesh-llm/src/models/catalog.rs @@ -1774,9 +1774,12 @@ mod tests { std::fs::write(&cached_file, b"gguf").unwrap(); let model = CatalogModel { + id: "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF/qwen2.5-coder-7b-instruct-q4_k_m.gguf" + .to_string(), name: "Qwen2.5-Coder-7B-Instruct-Q4_K_M".to_string(), file: "Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf".to_string(), url: "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-GGUF/resolve/main/qwen2.5-coder-7b-instruct-q4_k_m.gguf".to_string(), + primary_size_bytes: Some(4_400_000_000), size: "4.4GB".to_string(), description: "".to_string(), draft: None, @@ -1820,9 +1823,11 @@ mod tests { std::fs::write(&expected_file, b"right").unwrap(); let model = CatalogModel { + id: "org/repo/nested/MODEL.gguf".to_string(), name: "Nested-Path-Model".to_string(), file: "model.gguf".to_string(), url: "https://huggingface.co/org/repo/resolve/main/nested/MODEL.gguf".to_string(), + primary_size_bytes: Some(1_000_000_000), size: "1GB".to_string(), description: "".to_string(), draft: None,