Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
210 changes: 210 additions & 0 deletions evaluations/caffeine-app.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
{
"skill": "caffeine-app",
"description": "Evaluation cases for the caffeine-app skill. Tests that an agent can produce the correct Caffeine project shape (caffeine.toml workspace + canister manifests, mops.toml), the build loop (caffeine auth/doctor/install/check/preview), and that it defers Motoko language details to the motoko skill. Each output eval is scoped to one artifact to stay within the eval timeout. Pitfalls in SKILL.md each have at least one eval.",
"output_evals": [
{
"name": "Root workspace manifest",
"prompt": "Show me ONLY the root caffeine.toml for a new Caffeine app named 'My App'. Just the TOML, nothing else, no other files, no prose.",
"expected_behaviors": [
"Includes a [workspace] section (this is what marks the root manifest)",
"workspace.include globs the source dir, e.g. include = [ \"src/**\" ]",
"Has a [project] table with name = \"My App\"",
"Does NOT invent a `caffeine init`/`caffeine new` command or claim the CLI generated the file"
]
},
{
"name": "Backend canister manifest",
"prompt": "Give me just the src/backend/caffeine.toml for the Motoko backend canister of a Caffeine app. TOML only, no explanation.",
"expected_behaviors": [
"[project] has type = \"motoko\" and main = \"main.mo\"",
"[build].commands include \"mops build\" and \"pnpm bindgen\" (in that order)",
"[check.fix].commands include \"mops check --fix\"",
"Sets out = \"dist\""
]
},
{
"name": "Frontend assets manifest",
"prompt": "Show me just the src/frontend/caffeine.toml for the assets (frontend) canister of a Caffeine app. TOML only.",
"expected_behaviors": [
"[project] has type = \"assets\"",
"[build].commands run \"pnpm build\"",
"[check] / [check.fix] use pnpm (e.g. pnpm typecheck, pnpm check / pnpm fix)"
]
},
{
"name": "mops.toml for the backend",
"prompt": "Show me just the mops.toml for a Caffeine app's Motoko backend. Just the file, no commentary.",
"expected_behaviors": [
"Declares exactly one canister: [canisters.backend] with main = \"src/backend/main.mo\"",
"[toolchain] pins moc and lintoko versions",
"[moc].args include \"--default-persistent-actors\" and \"--actor-idl=src/backend/system-idl\"",
"Includes [canisters.backend.check-stable] with path = \".old/src/backend/dist/backend.most\"",
"[dependencies] includes core (mo:core), not mo:base"
]
},
{
"name": "Backend canister must be named backend",
"prompt": "In a Caffeine app, can I name my Motoko canister 'api' instead of 'backend' in mops.toml? Answer briefly.",
"expected_behaviors": [
"Says no — the canister must be named backend",
"Explains the build/bindgen pipeline depends on the name (e.g. reads src/backend/dist/backend.did)"
]
},
{
"name": "Do not edit generated backend.ts",
"prompt": "My Caffeine frontend has a file src/frontend/src/backend.ts. Should I edit it to add my API call? Brief answer.",
"expected_behaviors": [
"Says no — backend.ts is generated by caffeine-bindgen and overwritten on every build",
"Says to add methods in src/backend/main.mo and rebuild to regenerate the client"
]
},
{
"name": "Build loop command sequence",
"prompt": "List the caffeine CLI commands, in order, to build a from-scratch Caffeine app and get a preview URL. Just the commands with a one-line note each — no file contents.",
"expected_behaviors": [
"Includes caffeine auth login and caffeine doctor --fix as setup",
"Includes caffeine install, caffeine check --fix, and caffeine preview --build in that order",
"Notes that caffeine preview --build produces a draft (preview) URL",
"Does NOT include a `caffeine deploy`/`caffeine publish` command or any dfx command"
]
},
{
"name": "There is no caffeine init",
"prompt": "What is the caffeine CLI command to scaffold a new empty project on disk? Brief.",
"expected_behaviors": [
"States there is no caffeine init / caffeine new scaffold command",
"Explains from-scratch means authoring the files by hand (or using Caffeine's AI chat)",
"Does not fabricate a `caffeine init` command"
]
},
{
"name": "Deploy to a live URL",
"prompt": "How do I deploy my Caffeine app to a live URL from the CLI? Brief.",
"expected_behaviors": [
"Says the CLI builds a draft via caffeine preview --build (returns a draft URL)",
"Says promoting a draft to a live URL happens in the caffeine.ai web app, not the CLI",
"Does not claim a `caffeine deploy`/`publish` command or dfx deploy exists"
]
},
{
"name": "Where Motoko goes and which skill",
"prompt": "I'm building a Caffeine app and need to write the backend logic in Motoko. Where does it go and what should I consult? Brief — do not write Motoko code.",
"expected_behaviors": [
"Backend code goes in src/backend/main.mo",
"Says to follow the motoko skill at https://skills.internetcomputer.org/skills/motoko/SKILL.md for the language"
]
},
{
"name": "Frontend entry providers",
"prompt": "Show me just the main.tsx entry point for a Caffeine frontend — the providers it needs. Code only, no prose.",
"expected_behaviors": [
"Wraps the app in InternetIdentityProvider from @caffeineai/core-infrastructure",
"Includes a TanStack QueryClientProvider",
"Renders <App /> into the #root element"
]
},
{
"name": "env.json purpose and build step",
"prompt": "What is env.json in a Caffeine frontend, and do I need to do anything with it at build time? Brief.",
"expected_behaviors": [
"Explains it holds runtime config placeholders (e.g. backend_canister_id, project_id) that Caffeine fills in when serving",
"Says it must be copied into dist/ at build (the copy:env step)"
]
},
{
"name": "Project directory layout",
"prompt": "Describe the directory layout of a from-scratch Caffeine app. Just the file tree, brief.",
"expected_behaviors": [
"Root has caffeine.toml, mops.toml, package.json, and pnpm-workspace.yaml",
"src/backend/ holds main.mo and its caffeine.toml (Motoko canister)",
"src/frontend/ holds the React/Vite app and its caffeine.toml (assets canister)"
]
},
{
"name": "Missing generated bindings on a new project",
"prompt": "On a brand-new Caffeine app, importing './backend' in my frontend fails with module not found. Why? Brief.",
"expected_behaviors": [
"Explains backend.ts is generated by the build's pnpm bindgen step and does not exist yet",
"Says to run caffeine preview --build (or caffeine build) once to generate it"
]
},
{
"name": "Package manager is pnpm",
"prompt": "Can I use npm install in a Caffeine project? Brief.",
"expected_behaviors": [
"Says to use pnpm, not npm (the project is a pnpm workspace)",
"Notes caffeine install handles dependencies (pnpm for frontend, mops for backend)"
]
},
{
"name": "Frozen lockfile error on a fresh project",
"prompt": "I'm scaffolding a Caffeine app from scratch and `caffeine install` fails with ERR_PNPM_NO_LOCKFILE ('Cannot install with frozen-lockfile because pnpm-lock.yaml is absent'). Why, and how do I fix it? Brief.",
"expected_behaviors": [
"Explains that in a non-interactive shell (agent/CI/no TTY) the caffeine loop runs pnpm install --frozen-lockfile, which requires a pnpm-lock.yaml",
"Notes a brand-new project has no lockfile yet",
"Fix: run pnpm install once first to generate pnpm-lock.yaml, then re-run the caffeine loop"
]
},
{
"name": "First check fails on missing backend.most baseline",
"prompt": "On a brand-new Caffeine app, `caffeine check` fails with 'Deployed file not found: .old/src/backend/dist/backend.most'. How do I fix it? Brief.",
"expected_behaviors": [
"Explains the stable-signature check has no prior-build baseline on a never-built project",
"Fix: set skipIfMissing = true under [canisters.backend.check-stable] in mops.toml"
]
},
{
"name": "Preview needs a cloud project id",
"prompt": "`caffeine preview --build` on my brand-new Caffeine app errors with 'No project.id in caffeine.toml'. What do I do? Brief.",
"expected_behaviors": [
"Explains uploading a draft requires a cloud project id (caffeine.toml [project] id, or --project-id)",
"Fix: run caffeine projects create to obtain an id, then set [project] id or pass --project-id and re-run preview"
]
},
{
"name": "Calling a backend method from the frontend",
"prompt": "In a Caffeine app, how does the React frontend call a backend query method like getCurrentTime()? Show just the hook/component usage — brief, no backend Motoko code.",
"expected_behaviors": [
"Imports the generated createActor from ./backend and passes it to useActor(createActor) from @caffeineai/core-infrastructure",
"Wraps the method call (actor.getCurrentTime()) in a TanStack Query useQuery or useMutation"
]
},
{
"name": "Build before check when the frontend imports the backend",
"prompt": "My Caffeine frontend imports the generated ./backend. On a fresh build, do I run `caffeine check` or `caffeine build` first, and why? Brief.",
"expected_behaviors": [
"Run caffeine build first — its bindgen step (re)generates the ./backend client",
"Otherwise caffeine check's frontend typecheck fails because ./backend does not exist yet"
]
}
],
"trigger_evals": {
"description": "Queries to test whether the caffeine-app skill activates correctly. 'should_trigger' queries should load this skill; 'should_not_trigger' queries are about the Motoko language (use the motoko skill) or generic IC/dfx/icp work (use icp-cli or domain skills) and should NOT activate caffeine-app.",
"should_trigger": [
"Create a new Caffeine app from scratch",
"How do I structure a caffeine.ai project?",
"What goes in caffeine.toml?",
"Set up the mops.toml for my Caffeine backend canister",
"Build my Caffeine app and get a preview URL",
"How do I scaffold a Caffeine app with a Motoko backend and a React frontend?",
"caffeine check --fix isn't generating my frontend bindings",
"What's the directory layout of a Caffeine app?",
"How do I deploy a caffeine.ai app?",
"Scaffold a full-stack app on caffeine.ai",
"How does the frontend call the backend in a Caffeine app?",
"Which caffeine CLI commands do I run to build my project?",
"caffeine install fails with ERR_PNPM_NO_LOCKFILE",
"caffeine preview says No project.id in caffeine.toml"
],
"should_not_trigger": [
"Fix this Motoko compiler error M0096",
"How do I declare a stable variable in Motoko?",
"Write a Motoko function that sorts an array",
"What's the difference between mo:core and mo:base?",
"How do I deploy a canister with dfx?",
"Set up an icp.yaml for my Rust canister",
"How does ckBTC work on the Internet Computer?",
"Add Internet Identity login to my plain dfx + Vite project"
]
}
}
Loading
Loading