diff --git a/.story_kit/work/4_merge/236_story_show_test_results_for_a_story_in_expanded_work_item.md b/.story_kit/work/4_merge/236_story_show_test_results_for_a_story_in_expanded_work_item.md
index a85d22c..8ca401c 100644
--- a/.story_kit/work/4_merge/236_story_show_test_results_for_a_story_in_expanded_work_item.md
+++ b/.story_kit/work/4_merge/236_story_show_test_results_for_a_story_in_expanded_work_item.md
@@ -18,3 +18,15 @@ As a user, I want to see test results for a story inside its expanded view, so t
 ## Out of Scope
 
 - TBD
+
+## Test Results
+
+<!-- story-kit-test-results: {"unit":[{"name":"my_unit_test","status":"pass","details":null}],"integration":[{"name":"my_int_test","status":"fail","details":"assertion failed"}]} -->
+
+### Unit Tests (1 passed, 0 failed)
+
+- ✅ my_unit_test
+
+### Integration Tests (0 passed, 1 failed)
+
+- ❌ my_int_test — assertion failed
diff --git a/frontend/src/api/client.ts b/frontend/src/api/client.ts
index e04536e..d05d384 100644
--- a/frontend/src/api/client.ts
+++ b/frontend/src/api/client.ts
@@ -107,6 +107,17 @@ export interface WorkItemContent {
 	name: string | null;
 }
 
+export interface TestCaseResult {
+	name: string;
+	status: "pass" | "fail";
+	details: string | null;
+}
+
+export interface TestResultsResponse {
+	unit: TestCaseResult[];
+	integration: TestCaseResult[];
+}
+
 export interface FileEntry {
 	name: string;
 	kind: "file" | "dir";
@@ -280,6 +291,13 @@ export const api = {
 			baseUrl,
 		);
 	},
+	getTestResults(storyId: string, baseUrl?: string) {
+		return requestJson<TestResultsResponse | null>(
+			`/work-items/${encodeURIComponent(storyId)}/test-results`,
+			{},
+			baseUrl,
+		);
+	},
 };
 
 export class ChatWebSocket {
diff --git a/frontend/src/components/Chat.tsx b/frontend/src/components/Chat.tsx
index c73a316..3a9cb82 100644
--- a/frontend/src/components/Chat.tsx
+++ b/frontend/src/components/Chat.tsx
@@ -188,6 +188,7 @@ export function Chat({ projectPath, onCloseProject }: ChatProps) {
 	const reconciliationEventIdRef = useRef(0);
 	const [agentConfigVersion, setAgentConfigVersion] = useState(0);
 	const [agentStateVersion, setAgentStateVersion] = useState(0);
+	const [pipelineVersion, setPipelineVersion] = useState(0);
 	const [needsOnboarding, setNeedsOnboarding] = useState(false);
 	const onboardingTriggeredRef = useRef(false);
 	const [selectedWorkItemId, setSelectedWorkItemId] = useState<string | null>(
@@ -326,6 +327,7 @@ export function Chat({ projectPath, onCloseProject }: ChatProps) {
 			},
 			onPipelineState: (state) => {
 				setPipeline(state);
+				setPipelineVersion((v) => v + 1);
 			},
 			onPermissionRequest: (requestId, toolName, toolInput) => {
 				setPermissionQueue((prev) => [
@@ -886,6 +888,7 @@ export function Chat({ projectPath, onCloseProject }: ChatProps) {
 						{selectedWorkItemId ? (
 							<WorkItemDetailPanel
 								storyId={selectedWorkItemId}
+								pipelineVersion={pipelineVersion}
 								onClose={() => setSelectedWorkItemId(null)}
 							/>
 						) : (
diff --git a/frontend/src/components/WorkItemDetailPanel.test.tsx b/frontend/src/components/WorkItemDetailPanel.test.tsx
index e12e082..de665ea 100644
--- a/frontend/src/components/WorkItemDetailPanel.test.tsx
+++ b/frontend/src/components/WorkItemDetailPanel.test.tsx
@@ -1,14 +1,20 @@
 import { act, render, screen, waitFor } from "@testing-library/react";
-import { beforeEach, describe, expect, it, vi } from "vitest";
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 import type { AgentEvent, AgentInfo } from "../api/agents";
-import { agentsApi, subscribeAgentStream } from "../api/agents";
-import { api } from "../api/client";
+import type { TestResultsResponse } from "../api/client";
 
-vi.mock("../api/client", () => ({
-	api: {
-		getWorkItemContent: vi.fn(),
-	},
-}));
+vi.mock("../api/client", async () => {
+	const actual =
+		await vi.importActual<typeof import("../api/client")>("../api/client");
+	return {
+		...actual,
+		api: {
+			...actual.api,
+			getWorkItemContent: vi.fn(),
+			getTestResults: vi.fn(),
+		},
+	};
+});
 
 vi.mock("../api/agents", () => ({
 	agentsApi: {
@@ -17,10 +23,12 @@ vi.mock("../api/agents", () => ({
 	subscribeAgentStream: vi.fn(() => () => {}),
 }));
 
-// Dynamic import so mocks are in place before the module loads
+import { agentsApi, subscribeAgentStream } from "../api/agents";
+import { api } from "../api/client";
 const { WorkItemDetailPanel } = await import("./WorkItemDetailPanel");
 
 const mockedGetWorkItemContent = vi.mocked(api.getWorkItemContent);
+const mockedGetTestResults = vi.mocked(api.getTestResults);
 const mockedListAgents = vi.mocked(agentsApi.listAgents);
 const mockedSubscribeAgentStream = vi.mocked(subscribeAgentStream);
 
@@ -30,16 +38,35 @@ const DEFAULT_CONTENT = {
 	name: "Big Title Story",
 };
 
-describe("WorkItemDetailPanel", () => {
-	beforeEach(() => {
-		vi.clearAllMocks();
-		mockedGetWorkItemContent.mockResolvedValue(DEFAULT_CONTENT);
-		mockedListAgents.mockResolvedValue([]);
-		mockedSubscribeAgentStream.mockReturnValue(() => {});
-	});
+const sampleTestResults: TestResultsResponse = {
+	unit: [
+		{ name: "test_add", status: "pass", details: null },
+		{ name: "test_subtract", status: "fail", details: "expected 3, got 4" },
+	],
+	integration: [{ name: "test_api_endpoint", status: "pass", details: null }],
+};
 
+beforeEach(() => {
+	vi.clearAllMocks();
+	mockedGetWorkItemContent.mockResolvedValue(DEFAULT_CONTENT);
+	mockedGetTestResults.mockResolvedValue(null);
+	mockedListAgents.mockResolvedValue([]);
+	mockedSubscribeAgentStream.mockReturnValue(() => {});
+});
+
+afterEach(() => {
+	vi.restoreAllMocks();
+});
+
+describe("WorkItemDetailPanel", () => {
 	it("renders the story name in the header", async () => {
-		render(<WorkItemDetailPanel storyId="237_bug_test" onClose={() => {}} />);
+		render(
+			<WorkItemDetailPanel
+				storyId="237_bug_test"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
 		await waitFor(() => {
 			expect(screen.getByTestId("detail-panel-title")).toHaveTextContent(
 				"Big Title Story",
@@ -48,20 +75,38 @@ describe("WorkItemDetailPanel", () => {
 	});
 
 	it("shows loading state initially", () => {
-		render(<WorkItemDetailPanel storyId="237_bug_test" onClose={() => {}} />);
+		render(
+			<WorkItemDetailPanel
+				storyId="237_bug_test"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
 		expect(screen.getByTestId("detail-panel-loading")).toBeInTheDocument();
 	});
 
 	it("calls onClose when close button is clicked", async () => {
 		const onClose = vi.fn();
-		render(<WorkItemDetailPanel storyId="237_bug_test" onClose={onClose} />);
+		render(
+			<WorkItemDetailPanel
+				storyId="237_bug_test"
+				pipelineVersion={0}
+				onClose={onClose}
+			/>,
+		);
 		const closeButton = screen.getByTestId("detail-panel-close");
 		closeButton.click();
 		expect(onClose).toHaveBeenCalledTimes(1);
 	});
 
 	it("renders markdown headings with constrained inline font size", async () => {
-		render(<WorkItemDetailPanel storyId="237_bug_test" onClose={() => {}} />);
+		render(
+			<WorkItemDetailPanel
+				storyId="237_bug_test"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
 		await waitFor(() => {
 			const content = screen.getByTestId("detail-panel-content");
 			const h1 = content.querySelector("h1");
@@ -72,15 +117,14 @@ describe("WorkItemDetailPanel", () => {
 });
 
 describe("WorkItemDetailPanel - Agent Logs", () => {
-	beforeEach(() => {
-		vi.clearAllMocks();
-		mockedGetWorkItemContent.mockResolvedValue(DEFAULT_CONTENT);
-		mockedListAgents.mockResolvedValue([]);
-		mockedSubscribeAgentStream.mockReturnValue(() => {});
-	});
-
 	it("shows placeholder when no agent is assigned to the story", async () => {
-		render(<WorkItemDetailPanel storyId="42_story_test" onClose={() => {}} />);
+		render(
+			<WorkItemDetailPanel
+				storyId="42_story_test"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
 		await screen.findByTestId("detail-panel-content");
 		const placeholder = screen.getByTestId("placeholder-agent-logs");
 		expect(placeholder).toBeInTheDocument();
@@ -101,7 +145,13 @@ describe("WorkItemDetailPanel - Agent Logs", () => {
 		];
 		mockedListAgents.mockResolvedValue(agentList);
 
-		render(<WorkItemDetailPanel storyId="42_story_test" onClose={() => {}} />);
+		render(
+			<WorkItemDetailPanel
+				storyId="42_story_test"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
 
 		const statusBadge = await screen.findByTestId("agent-status-badge");
 		expect(statusBadge).toHaveTextContent("coder-1");
@@ -130,7 +180,13 @@ describe("WorkItemDetailPanel - Agent Logs", () => {
 		];
 		mockedListAgents.mockResolvedValue(agentList);
 
-		render(<WorkItemDetailPanel storyId="42_story_test" onClose={() => {}} />);
+		render(
+			<WorkItemDetailPanel
+				storyId="42_story_test"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
 
 		await screen.findByTestId("agent-status-badge");
 
@@ -169,7 +225,13 @@ describe("WorkItemDetailPanel - Agent Logs", () => {
 		];
 		mockedListAgents.mockResolvedValue(agentList);
 
-		render(<WorkItemDetailPanel storyId="42_story_test" onClose={() => {}} />);
+		render(
+			<WorkItemDetailPanel
+				storyId="42_story_test"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
 
 		await screen.findByTestId("agent-status-badge");
 
@@ -218,7 +280,13 @@ describe("WorkItemDetailPanel - Agent Logs", () => {
 		];
 		mockedListAgents.mockResolvedValue(agentList);
 
-		render(<WorkItemDetailPanel storyId="42_story_test" onClose={() => {}} />);
+		render(
+			<WorkItemDetailPanel
+				storyId="42_story_test"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
 
 		await screen.findByTestId("agent-status-badge");
 
@@ -257,7 +325,13 @@ describe("WorkItemDetailPanel - Agent Logs", () => {
 		];
 		mockedListAgents.mockResolvedValue(agentList);
 
-		render(<WorkItemDetailPanel storyId="42_story_test" onClose={() => {}} />);
+		render(
+			<WorkItemDetailPanel
+				storyId="42_story_test"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
 
 		await screen.findByTestId("agent-status-badge");
 
@@ -291,7 +365,13 @@ describe("WorkItemDetailPanel - Agent Logs", () => {
 		];
 		mockedListAgents.mockResolvedValue(agentList);
 
-		render(<WorkItemDetailPanel storyId="42_story_test" onClose={() => {}} />);
+		render(
+			<WorkItemDetailPanel
+				storyId="42_story_test"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
 
 		const statusBadge = await screen.findByTestId("agent-status-badge");
 		expect(statusBadge).toHaveTextContent("completed");
@@ -312,7 +392,13 @@ describe("WorkItemDetailPanel - Agent Logs", () => {
 		];
 		mockedListAgents.mockResolvedValue(agentList);
 
-		render(<WorkItemDetailPanel storyId="42_story_test" onClose={() => {}} />);
+		render(
+			<WorkItemDetailPanel
+				storyId="42_story_test"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
 
 		const statusBadge = await screen.findByTestId("agent-status-badge");
 		expect(statusBadge).toHaveTextContent("failed");
@@ -333,7 +419,13 @@ describe("WorkItemDetailPanel - Agent Logs", () => {
 		];
 		mockedListAgents.mockResolvedValue(agentList);
 
-		render(<WorkItemDetailPanel storyId="42_story_test" onClose={() => {}} />);
+		render(
+			<WorkItemDetailPanel
+				storyId="42_story_test"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
 
 		await screen.findByTestId("agent-logs-section");
 
@@ -342,3 +434,121 @@ describe("WorkItemDetailPanel - Agent Logs", () => {
 		).not.toBeInTheDocument();
 	});
 });
+
+describe("WorkItemDetailPanel - Test Results", () => {
+	it("shows empty test results message when no results exist", async () => {
+		mockedGetTestResults.mockResolvedValue(null);
+
+		render(
+			<WorkItemDetailPanel
+				storyId="42_story_foo"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
+
+		await waitFor(() => {
+			expect(screen.getByTestId("test-results-empty")).toBeInTheDocument();
+		});
+		expect(screen.getByText("No test results recorded")).toBeInTheDocument();
+	});
+
+	it("shows unit and integration test results when available", async () => {
+		mockedGetTestResults.mockResolvedValue(sampleTestResults);
+
+		render(
+			<WorkItemDetailPanel
+				storyId="42_story_foo"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
+
+		await waitFor(() => {
+			expect(screen.getByTestId("test-results-content")).toBeInTheDocument();
+		});
+
+		// Unit test section
+		expect(screen.getByTestId("test-section-unit")).toBeInTheDocument();
+		expect(
+			screen.getByText("Unit Tests (1 passed, 1 failed)"),
+		).toBeInTheDocument();
+
+		// Integration test section
+		expect(screen.getByTestId("test-section-integration")).toBeInTheDocument();
+		expect(
+			screen.getByText("Integration Tests (1 passed, 0 failed)"),
+		).toBeInTheDocument();
+	});
+
+	it("shows pass/fail status and details for each test", async () => {
+		mockedGetTestResults.mockResolvedValue(sampleTestResults);
+
+		render(
+			<WorkItemDetailPanel
+				storyId="42_story_foo"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
+
+		await waitFor(() => {
+			expect(screen.getByTestId("test-case-test_add")).toBeInTheDocument();
+		});
+
+		// Passing test
+		expect(screen.getByTestId("test-status-test_add")).toHaveTextContent(
+			"PASS",
+		);
+		expect(screen.getByText("test_add")).toBeInTheDocument();
+
+		// Failing test with details
+		expect(screen.getByTestId("test-status-test_subtract")).toHaveTextContent(
+			"FAIL",
+		);
+		expect(screen.getByText("test_subtract")).toBeInTheDocument();
+		expect(screen.getByTestId("test-details-test_subtract")).toHaveTextContent(
+			"expected 3, got 4",
+		);
+
+		// Integration test
+		expect(
+			screen.getByTestId("test-status-test_api_endpoint"),
+		).toHaveTextContent("PASS");
+	});
+
+	it("re-fetches test results when pipelineVersion changes", async () => {
+		mockedGetTestResults.mockResolvedValue(null);
+
+		const { rerender } = render(
+			<WorkItemDetailPanel
+				storyId="42_story_foo"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
+
+		await waitFor(() => {
+			expect(mockedGetTestResults).toHaveBeenCalledTimes(1);
+		});
+
+		// Update with new results and bump pipelineVersion.
+		mockedGetTestResults.mockResolvedValue(sampleTestResults);
+
+		rerender(
+			<WorkItemDetailPanel
+				storyId="42_story_foo"
+				pipelineVersion={1}
+				onClose={() => {}}
+			/>,
+		);
+
+		await waitFor(() => {
+			expect(mockedGetTestResults).toHaveBeenCalledTimes(2);
+		});
+
+		await waitFor(() => {
+			expect(screen.getByTestId("test-results-content")).toBeInTheDocument();
+		});
+	});
+});
diff --git a/frontend/src/components/WorkItemDetailPanel.tsx b/frontend/src/components/WorkItemDetailPanel.tsx
index 370b1fe..1f38120 100644
--- a/frontend/src/components/WorkItemDetailPanel.tsx
+++ b/frontend/src/components/WorkItemDetailPanel.tsx
@@ -2,6 +2,7 @@ import * as React from "react";
 import Markdown from "react-markdown";
 import type { AgentEvent, AgentInfo, AgentStatusValue } from "../api/agents";
 import { agentsApi, subscribeAgentStream } from "../api/agents";
+import type { TestCaseResult, TestResultsResponse } from "../api/client";
 import { api } from "../api/client";
 
 const { useEffect, useRef, useState } = React;
@@ -24,11 +25,89 @@ const STATUS_COLORS: Record<AgentStatusValue, string> = {
 
 interface WorkItemDetailPanelProps {
 	storyId: string;
+	pipelineVersion: number;
 	onClose: () => void;
 }
 
+function TestCaseRow({ tc }: { tc: TestCaseResult }) {
+	const isPassing = tc.status === "pass";
+	return (
+		<div
+			data-testid={`test-case-${tc.name}`}
+			style={{
+				display: "flex",
+				flexDirection: "column",
+				gap: "2px",
+				padding: "4px 0",
+			}}
+		>
+			<div style={{ display: "flex", alignItems: "center", gap: "6px" }}>
+				<span
+					data-testid={`test-status-${tc.name}`}
+					style={{
+						fontSize: "0.85em",
+						color: isPassing ? "#3fb950" : "#f85149",
+					}}
+				>
+					{isPassing ? "PASS" : "FAIL"}
+				</span>
+				<span style={{ fontSize: "0.82em", color: "#ccc" }}>{tc.name}</span>
+			</div>
+			{tc.details && (
+				<div
+					data-testid={`test-details-${tc.name}`}
+					style={{
+						fontSize: "0.75em",
+						color: "#888",
+						paddingLeft: "22px",
+						whiteSpace: "pre-wrap",
+						wordBreak: "break-word",
+					}}
+				>
+					{tc.details}
+				</div>
+			)}
+		</div>
+	);
+}
+
+function TestSection({
+	title,
+	tests,
+	testId,
+}: {
+	title: string;
+	tests: TestCaseResult[];
+	testId: string;
+}) {
+	const passCount = tests.filter((t) => t.status === "pass").length;
+	const failCount = tests.length - passCount;
+	return (
+		<div data-testid={testId}>
+			<div
+				style={{
+					fontSize: "0.78em",
+					fontWeight: 600,
+					color: "#aaa",
+					marginBottom: "6px",
+				}}
+			>
+				{title} ({passCount} passed, {failCount} failed)
+			</div>
+			{tests.length === 0 ? (
+				<div style={{ fontSize: "0.75em", color: "#555", fontStyle: "italic" }}>
+					No tests recorded
+				</div>
+			) : (
+				tests.map((tc) => <TestCaseRow key={tc.name} tc={tc} />)
+			)}
+		</div>
+	);
+}
+
 export function WorkItemDetailPanel({
 	storyId,
+	pipelineVersion,
 	onClose,
 }: WorkItemDetailPanelProps) {
 	const [content, setContent] = useState<string | null>(null);
@@ -39,6 +118,9 @@ export function WorkItemDetailPanel({
 	const [agentInfo, setAgentInfo] = useState<AgentInfo | null>(null);
 	const [agentLog, setAgentLog] = useState<string[]>([]);
 	const [agentStatus, setAgentStatus] = useState<AgentStatusValue | null>(null);
+	const [testResults, setTestResults] = useState<TestResultsResponse | null>(
+		null,
+	);
 	const panelRef = useRef<HTMLDivElement>(null);
 	const cleanupRef = useRef<(() => void) | null>(null);
 
@@ -60,6 +142,18 @@ export function WorkItemDetailPanel({
 			});
 	}, [storyId]);
 
+	// Fetch test results on mount and when pipeline updates arrive.
+	useEffect(() => {
+		api
+			.getTestResults(storyId)
+			.then((data) => {
+				setTestResults(data);
+			})
+			.catch(() => {
+				// Silently ignore — test results may not exist yet.
+			});
+	}, [storyId, pipelineVersion]);
+
 	useEffect(() => {
 		cleanupRef.current?.();
 		cleanupRef.current = null;
@@ -126,6 +220,9 @@ export function WorkItemDetailPanel({
 	}, [onClose]);
 
 	const stageLabel = STAGE_LABELS[stage] ?? stage;
+	const hasTestResults =
+		testResults &&
+		(testResults.unit.length > 0 || testResults.integration.length > 0);
 
 	return (
 		<div
@@ -255,6 +352,56 @@ export function WorkItemDetailPanel({
 					</div>
 				)}
 
+				{/* Test Results section */}
+				<div
+					data-testid="test-results-section"
+					style={{
+						border: "1px solid #2a2a2a",
+						borderRadius: "8px",
+						padding: "10px 12px",
+						background: "#161616",
+					}}
+				>
+					<div
+						style={{
+							fontWeight: 600,
+							fontSize: "0.8em",
+							color: "#555",
+							marginBottom: "8px",
+						}}
+					>
+						Test Results
+					</div>
+					{hasTestResults ? (
+						<div
+							data-testid="test-results-content"
+							style={{
+								display: "flex",
+								flexDirection: "column",
+								gap: "12px",
+							}}
+						>
+							<TestSection
+								title="Unit Tests"
+								tests={testResults.unit}
+								testId="test-section-unit"
+							/>
+							<TestSection
+								title="Integration Tests"
+								tests={testResults.integration}
+								testId="test-section-integration"
+							/>
+						</div>
+					) : (
+						<div
+							data-testid="test-results-empty"
+							style={{ fontSize: "0.75em", color: "#444" }}
+						>
+							No test results recorded
+						</div>
+					)}
+				</div>
+
 				<div
 					style={{
 						display: "flex",
@@ -336,7 +483,6 @@ export function WorkItemDetailPanel({
 					{/* Placeholder sections for future content */}
 					{(
 						[
-							{ id: "test-output", label: "Test Output" },
 							{ id: "coverage", label: "Coverage" },
 						] as { id: string; label: string }[]
 					).map(({ id, label }) => (
diff --git a/server/src/http/agents.rs b/server/src/http/agents.rs
index b6c9668..1954e3e 100644
--- a/server/src/http/agents.rs
+++ b/server/src/http/agents.rs
@@ -1,5 +1,6 @@
 use crate::config::ProjectConfig;
 use crate::http::context::{AppContext, OpenApiResult, bad_request, not_found};
+use crate::workflow::{StoryTestResults, TestCaseResult, TestStatus};
 use crate::worktree;
 use poem_openapi::{Object, OpenApi, Tags, param::Path, payload::Json};
 use serde::Serialize;
@@ -69,6 +70,41 @@ struct WorkItemContentResponse {
     name: Option<String>,
 }
 
+/// A single test case result for the OpenAPI response.
+#[derive(Object, Serialize)]
+struct TestCaseResultResponse {
+    name: String,
+    status: String,
+    details: Option<String>,
+}
+
+/// Response for the work item test results endpoint.
+#[derive(Object, Serialize)]
+struct TestResultsResponse {
+    unit: Vec<TestCaseResultResponse>,
+    integration: Vec<TestCaseResultResponse>,
+}
+
+impl TestResultsResponse {
+    fn from_story_results(results: &StoryTestResults) -> Self {
+        Self {
+            unit: results.unit.iter().map(Self::map_case).collect(),
+            integration: results.integration.iter().map(Self::map_case).collect(),
+        }
+    }
+
+    fn map_case(tc: &TestCaseResult) -> TestCaseResultResponse {
+        TestCaseResultResponse {
+            name: tc.name.clone(),
+            status: match tc.status {
+                TestStatus::Pass => "pass".to_string(),
+                TestStatus::Fail => "fail".to_string(),
+            },
+            details: tc.details.clone(),
+        }
+    }
+}
+
 /// Returns true if the story file exists in `work/5_done/` or `work/6_archived/`.
 ///
 /// Used to exclude agents for already-archived stories from the `list_agents`
@@ -326,6 +362,44 @@ impl AgentsApi {
         Err(not_found(format!("Work item not found: {}", story_id.0)))
     }
 
+    /// Get test results for a work item by its story_id.
+    ///
+    /// Returns unit and integration test results. Checks in-memory workflow
+    /// state first, then falls back to results persisted in the story file.
+    #[oai(path = "/work-items/:story_id/test-results", method = "get")]
+    async fn get_test_results(
+        &self,
+        story_id: Path<String>,
+    ) -> OpenApiResult<Json<Option<TestResultsResponse>>> {
+        // Try in-memory workflow state first.
+        let workflow = self
+            .ctx
+            .workflow
+            .lock()
+            .map_err(|e| bad_request(format!("Lock error: {e}")))?;
+
+        if let Some(results) = workflow.results.get(&story_id.0) {
+            return Ok(Json(Some(TestResultsResponse::from_story_results(results))));
+        }
+        drop(workflow);
+
+        // Fall back to file-persisted results.
+        let project_root = self
+            .ctx
+            .agents
+            .get_project_root(&self.ctx.state)
+            .map_err(bad_request)?;
+
+        let file_results = crate::http::workflow::read_test_results_from_story_file(
+            &project_root,
+            &story_id.0,
+        );
+
+        Ok(Json(
+            file_results.map(|r| TestResultsResponse::from_story_results(&r)),
+        ))
+    }
+
     /// Remove a git worktree and its feature branch for a story.
     #[oai(path = "/agents/worktrees/:story_id", method = "delete")]
     async fn remove_worktree(&self, story_id: Path<String>) -> OpenApiResult<Json<bool>> {
@@ -824,4 +898,113 @@ allowed_tools = ["Read", "Bash"]
             .await;
         assert!(result.is_err());
     }
+
+    // --- get_test_results tests ---
+
+    #[tokio::test]
+    async fn get_test_results_returns_none_when_no_results() {
+        let tmp = TempDir::new().unwrap();
+        let root = make_work_dirs(&tmp);
+        let ctx = AppContext::new_test(root);
+        let api = AgentsApi {
+            ctx: Arc::new(ctx),
+        };
+        let result = api
+            .get_test_results(Path("42_story_foo".to_string()))
+            .await
+            .unwrap()
+            .0;
+        assert!(result.is_none());
+    }
+
+    #[tokio::test]
+    async fn get_test_results_returns_in_memory_results() {
+        let tmp = TempDir::new().unwrap();
+        let root = make_work_dirs(&tmp);
+        let ctx = AppContext::new_test(root);
+
+        // Record test results in-memory.
+        {
+            let mut workflow = ctx.workflow.lock().unwrap();
+            workflow
+                .record_test_results_validated(
+                    "42_story_foo".to_string(),
+                    vec![crate::workflow::TestCaseResult {
+                        name: "unit_test_1".to_string(),
+                        status: crate::workflow::TestStatus::Pass,
+                        details: None,
+                    }],
+                    vec![crate::workflow::TestCaseResult {
+                        name: "int_test_1".to_string(),
+                        status: crate::workflow::TestStatus::Fail,
+                        details: Some("assertion failed".to_string()),
+                    }],
+                )
+                .unwrap();
+        }
+
+        let api = AgentsApi {
+            ctx: Arc::new(ctx),
+        };
+        let result = api
+            .get_test_results(Path("42_story_foo".to_string()))
+            .await
+            .unwrap()
+            .0
+            .expect("should have test results");
+
+        assert_eq!(result.unit.len(), 1);
+        assert_eq!(result.unit[0].name, "unit_test_1");
+        assert_eq!(result.unit[0].status, "pass");
+        assert!(result.unit[0].details.is_none());
+
+        assert_eq!(result.integration.len(), 1);
+        assert_eq!(result.integration[0].name, "int_test_1");
+        assert_eq!(result.integration[0].status, "fail");
+        assert_eq!(
+            result.integration[0].details.as_deref(),
+            Some("assertion failed")
+        );
+    }
+
+    #[tokio::test]
+    async fn get_test_results_falls_back_to_file_persisted_results() {
+        let tmp = TempDir::new().unwrap();
+        let root = tmp.path().to_path_buf();
+        // Create work dirs including 2_current for the story file.
+        for stage in &["1_upcoming", "2_current", "5_done", "6_archived"] {
+            std::fs::create_dir_all(root.join(".story_kit").join("work").join(stage)).unwrap();
+        }
+
+        // Write a story file with persisted test results.
+        let story_content = r#"---
+name: "Test story"
+---
+# Test story
+
+## Test Results
+
+<!-- story-kit-test-results: {"unit":[{"name":"from_file","status":"pass","details":null}],"integration":[]} -->
+"#;
+        std::fs::write(
+            root.join(".story_kit/work/2_current/42_story_foo.md"),
+            story_content,
+        )
+        .unwrap();
+
+        let ctx = AppContext::new_test(root);
+        let api = AgentsApi {
+            ctx: Arc::new(ctx),
+        };
+        let result = api
+            .get_test_results(Path("42_story_foo".to_string()))
+            .await
+            .unwrap()
+            .0
+            .expect("should fall back to file results");
+
+        assert_eq!(result.unit.len(), 1);
+        assert_eq!(result.unit[0].name, "from_file");
+        assert_eq!(result.unit[0].status, "pass");
+    }
 }