diff --git a/package.json b/package.json index 23ee6d0..0c59903 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "opencode-python-docs", - "version": "0.1.0", + "version": "0.2.0", "description": "OpenCode plugin for Python documentation lookup via DevDocs", "module": "dist/index.js", "main": "dist/index.js", diff --git a/src/cache.ts b/src/cache.ts index 58aaf87..065cf65 100644 --- a/src/cache.ts +++ b/src/cache.ts @@ -13,6 +13,7 @@ import { dirname, join } from "node:path"; /** Interface for cache operations, enabling dependency injection in tests. */ export interface CacheManagerInterface { getIndexPath(version: string): string; + getSearchIndexPath(version: string): string; getDocPath(version: string, docPath: string): string; isValid(path: string, ttlMs: number): boolean; read(path: string): T | null; @@ -43,6 +44,10 @@ export function CacheManager(cacheRoot: string): CacheManagerInterface { return join(cacheRoot, `python-${version}.json`); }, + getSearchIndexPath(version: string): string { + return join(cacheRoot, `python-${version}-search-index.json`); + }, + getDocPath(version: string, docPath: string): string { return join(cacheRoot, "docs", version, `${hash(docPath)}.json`); }, diff --git a/src/doc-service.ts b/src/doc-service.ts index 4d25db8..4dff4b4 100644 --- a/src/doc-service.ts +++ b/src/doc-service.ts @@ -3,6 +3,13 @@ import type { PythonVersion } from "./config"; import { CONFIG } from "./config"; import { htmlToMarkdown } from "./html-to-markdown"; import type { Logger } from "./logger"; +import { + createSearchIndex, + getAvailableTypes, + inferTypesForQuery, + type SearchIndex, + type TypeInferenceResult, +} from "./search-index"; import type { CachedDoc, DocIndex, FetchedDoc } from "./types"; async function fetchWithTimeout(url: string, log: Logger): Promise { @@ -25,8 +32,22 @@ async function fetchWithTimeout(url: string, log: Logger): Promise { /** Service for fetching and searching Python documentation. */ export interface DocService { getIndex(version: PythonVersion): Promise; + getSearchIndex(version: PythonVersion): Promise; getDoc(version: PythonVersion, path: string): Promise; search(index: DocIndex, query: string, type?: string, limit?: number): DocIndex["entries"]; + searchWithFallback( + index: DocIndex, + searchIndex: SearchIndex, + query: string, + type?: string, + limit?: number, + ): Promise<{ + results: DocIndex["entries"]; + fallbackUsed: boolean; + typeInference?: TypeInferenceResult; + }>; + suggestTypes(searchIndex: SearchIndex, query: string): TypeInferenceResult; + getAvailableTypes(searchIndex: SearchIndex): string[]; } /** @@ -51,6 +72,32 @@ export function createDocService(cache: CacheManagerInterface, log: Logger): Doc return index; }, + async getSearchIndex(version: PythonVersion): Promise { + const searchIndexPath = cache.getSearchIndexPath(version); + + // Check if we have a valid cached search index + if (cache.isValid(searchIndexPath, CONFIG.indexTtlMs)) { + const cached = cache.read(searchIndexPath); + if (cached) { + await log.info(`Using cached search index for Python ${version}`); + return cached; + } + } + + // Build search index from the main index + await log.info(`Building search index for Python ${version}...`); + const index = await this.getIndex(version); + const searchIndex = createSearchIndex(index, version); + + // Cache the search index + cache.write(searchIndexPath, searchIndex); + await log.info( + `Cached search index with ${searchIndex.keywordMappings.length} keyword mappings`, + ); + + return searchIndex; + }, + async getDoc(version: PythonVersion, path: string): Promise { const normalizedPath = path.endsWith(".html") ? path.slice(0, -5) : path; const docPath = cache.getDocPath(version, normalizedPath); @@ -89,7 +136,7 @@ export function createDocService(cache: CacheManagerInterface, log: Logger): Doc if (results.length >= maxResults) break; const nameMatch = entry.name.toLowerCase().includes(q); - const typeMatch = !t || entry.type.toLowerCase().includes(t); + const typeMatch = !t || entry.type.toLowerCase() === t; if (nameMatch && typeMatch) { results.push(entry); @@ -98,5 +145,71 @@ export function createDocService(cache: CacheManagerInterface, log: Logger): Doc return results; }, + + async searchWithFallback( + index: DocIndex, + searchIndex: SearchIndex, + query: string, + type?: string, + limit?: number, + ): Promise<{ + results: DocIndex["entries"]; + fallbackUsed: boolean; + typeInference?: TypeInferenceResult; + }> { + // Try the requested search first + const results = this.search(index, query, type, limit); + + // If we have results or no type filter, return as-is + if (results.length > 0 || !type) { + return { results, fallbackUsed: false }; + } + + // No results with type filter - use type inference to suggest alternatives + await log.info(`No results for "${query}" with type "${type}". Running type inference...`); + + const typeInference = inferTypesForQuery(query, searchIndex); + + // Try searching without the type filter + const resultsNoFilter = this.search(index, query, undefined, limit); + + // Try searching with the most likely inferred types + let resultsWithInferred: DocIndex["entries"] = []; + for (const inferredType of typeInference.inferredTypes.slice(0, 2)) { + const inferredResults = this.search( + index, + query, + inferredType, + Math.ceil((limit ?? CONFIG.defaultLimit) / 2), + ); + resultsWithInferred = resultsWithInferred.concat(inferredResults); + } + + // Combine results: prioritize inferred type matches, then no-filter matches + const seen = new Set(); + const combinedResults: DocIndex["entries"] = []; + + for (const entry of resultsWithInferred.concat(resultsNoFilter)) { + if (!seen.has(entry.path)) { + seen.add(entry.path); + combinedResults.push(entry); + if (combinedResults.length >= (limit ?? CONFIG.defaultLimit)) break; + } + } + + return { + results: combinedResults, + fallbackUsed: true, + typeInference, + }; + }, + + suggestTypes(searchIndex: SearchIndex, query: string): TypeInferenceResult { + return inferTypesForQuery(query, searchIndex); + }, + + getAvailableTypes(searchIndex: SearchIndex): string[] { + return getAvailableTypes(searchIndex); + }, }; } diff --git a/src/formatters.ts b/src/formatters.ts index e89b3b6..370919e 100644 --- a/src/formatters.ts +++ b/src/formatters.ts @@ -1,3 +1,4 @@ +import type { TypeInferenceResult } from "./search-index"; import type { AnchorIndex, DocEntry } from "./types"; /** @@ -5,20 +6,46 @@ import type { AnchorIndex, DocEntry } from "./types"; * @param results - Matched documentation entries. * @param query - The original search query. * @param version - Python version searched. + * @param fallbackUsed - Whether type inference fallback was used. + * @param typeInference - Type inference result when fallback was used. * @returns Formatted string listing results or a no-results message. */ -export function formatSearchResults(results: DocEntry[], query: string, version: string): string { +export function formatSearchResults( + results: DocEntry[], + query: string, + version: string, + fallbackUsed?: boolean, + typeInference?: TypeInferenceResult, +): string { if (!results.length) { - return `No results found for "${query}" in Python ${version} docs.`; + let message = `No results found for "${query}" in Python ${version} docs.`; + if (typeInference && typeInference.inferredTypes.length > 0) { + message += `\n\nSuggested types based on your query:\n`; + message += typeInference.inferredTypes.map((t) => ` - ${t}`).join("\n"); + if (typeInference.alternativeTypes.length > 0) { + message += `\n\nAlternative types:\n`; + message += typeInference.alternativeTypes.map((t) => ` - ${t}`).join("\n"); + } + } + return message; } - return [ + const lines: string[] = [ `Found ${results.length} result(s) for "${query}" in Python ${version} docs.`, - "", - ...results.map((r) => `- ${r.name} [${r.type}] -> ${r.path}`), - "", - "Use fetch_python_doc with the path to get the full documentation.", - ].join("\n"); + ]; + + if (fallbackUsed && typeInference) { + lines.push(""); + lines.push("⚠️ Original search returned no results. Used type inference to find matches."); + lines.push(` Inferred types: ${typeInference.inferredTypes.join(", ")}`); + } + + lines.push(""); + lines.push(...results.map((r) => `- ${r.name} [${r.type}] -> ${r.path}`)); + lines.push(""); + lines.push("Use fetch_python_doc with the path to get the full documentation."); + + return lines.join("\n"); } /** diff --git a/src/search-index.ts b/src/search-index.ts new file mode 100644 index 0000000..a4959b7 --- /dev/null +++ b/src/search-index.ts @@ -0,0 +1,205 @@ +import type { DocIndex } from "./types"; + +/** + * Mapping from a keyword to relevant documentation types and sample entries. + */ +export interface KeywordMapping { + keyword: string; + types: string[]; + sampleEntries: string[]; + score: number; +} + +/** + * Enhanced search index with keyword-to-types mappings for type inference. + */ +export interface SearchIndex { + version: string; + generatedAt: string; + totalEntries: number; + typeStats: Record; + keywordMappings: KeywordMapping[]; +} + +/** + * Result of type inference for a query. + */ +export interface TypeInferenceResult { + query: string; + inferredTypes: string[]; + confidence: number; + matchingKeywords: string[]; + alternativeTypes: string[]; +} + +/** + * Extract meaningful keywords from an entry name. + * Filters out short words, numbers, and common noise words. + * @internal Exported for testing only. + */ +export function extractKeywords(name: string): string[] { + const normalized = name.toLowerCase(); + + // Remove section numbers like "1.", "2.1.", etc. + const withoutNumbers = normalized.replace(/^\d+(\.\d+)*\.?\s*/g, ""); + + // Split on common separators and filter + const keywords = withoutNumbers + .split(/[\s\-_.()<>,:]+/) + .filter((k) => k.length > 2) + .filter( + (k) => !["and", "the", "for", "with", "from", "using", "objects", "object"].includes(k), + ); + + return [...new Set(keywords)]; +} + +/** + * Calculate statistics about each documentation type. + */ +function calculateTypeStats(index: DocIndex): Record { + const stats: Record = {}; + for (const entry of index.entries) { + stats[entry.type] = (stats[entry.type] || 0) + 1; + } + return stats; +} + +/** + * Build keyword mappings from index entries. + * Maps each keyword to the types it appears in, ranked by frequency. + */ +function buildKeywordMappings(index: DocIndex): KeywordMapping[] { + const keywordTypeMap = new Map>(); + const keywordEntryMap = new Map(); + + for (const entry of index.entries) { + const keywords = extractKeywords(entry.name); + + for (const keyword of keywords) { + let typeFreq = keywordTypeMap.get(keyword); + let samples = keywordEntryMap.get(keyword); + + if (!typeFreq || !samples) { + typeFreq = new Map(); + samples = []; + keywordTypeMap.set(keyword, typeFreq); + keywordEntryMap.set(keyword, samples); + } + + typeFreq.set(entry.type, (typeFreq.get(entry.type) || 0) + 1); + + if (samples.length < 3) { + samples.push(`${entry.name} (${entry.type})`); + } + } + } + + const mappings: KeywordMapping[] = []; + + for (const [keyword, typeFreq] of keywordTypeMap) { + const totalFreq = Array.from(typeFreq.values()).reduce((a, b) => a + b, 0); + + // Get top types (at least 2 occurrences, or 1 if that's all there is) + const sortedTypes = Array.from(typeFreq.entries()) + .sort((a, b) => b[1] - a[1]) + .filter(([_, count]) => count >= 2 || typeFreq.size === 1) + .slice(0, 5) + .map(([type]) => type); + + if (sortedTypes.length > 0) { + mappings.push({ + keyword, + types: sortedTypes, + sampleEntries: keywordEntryMap.get(keyword) || [], + score: totalFreq, + }); + } + } + + // Sort by score (descending) for faster lookups + return mappings.sort((a, b) => b.score - a.score); +} + +/** + * Creates a search index from a documentation index. + * @param index - The documentation index to analyze + * @param version - The Python version + * @returns SearchIndex with keyword mappings for type inference + */ +export function createSearchIndex(index: DocIndex, version: string): SearchIndex { + const typeStats = calculateTypeStats(index); + const keywordMappings = buildKeywordMappings(index); + + return { + version, + generatedAt: new Date().toISOString(), + totalEntries: index.entries.length, + typeStats, + keywordMappings, + }; +} + +/** + * Infer the most relevant documentation types for a search query. + * @param query - The search query + * @param searchIndex - The search index with keyword mappings + * @returns Type inference result with suggested types and confidence + */ +export function inferTypesForQuery(query: string, searchIndex: SearchIndex): TypeInferenceResult { + const queryLower = query.toLowerCase(); + const queryKeywords = extractKeywords(query); + + // Find matching keywords in our index + const matchingMappings = searchIndex.keywordMappings.filter( + (m) => queryLower.includes(m.keyword) || queryKeywords.includes(m.keyword), + ); + + // Score and rank types + const typeScores = new Map(); + const matchingKeywords: string[] = []; + + for (const mapping of matchingMappings) { + matchingKeywords.push(mapping.keyword); + for (const type of mapping.types) { + const currentScore = typeScores.get(type) || 0; + // Weight by keyword frequency in the documentation + typeScores.set(type, currentScore + mapping.score); + } + } + + // Sort types by score + const sortedTypes = Array.from(typeScores.entries()).sort((a, b) => b[1] - a[1]); + + // Top types are the primary recommendations + const inferredTypes = sortedTypes.slice(0, 3).map(([type]) => type); + + // Alternative types are the next tier + const alternativeTypes = sortedTypes.slice(3, 6).map(([type]) => type); + + // Calculate confidence based on keyword matches and scores + const totalScore = sortedTypes.reduce((sum, [_, score]) => sum + score, 0); + const confidence = + matchingMappings.length > 0 + ? Math.min(100, matchingMappings.length * 10 + totalScore / 100) + : 0; + + return { + query, + inferredTypes, + confidence, + matchingKeywords: matchingKeywords.slice(0, 5), + alternativeTypes, + }; +} + +/** + * Get all available types from a search index. + * @param searchIndex - The search index + * @returns Array of types sorted by frequency + */ +export function getAvailableTypes(searchIndex: SearchIndex): string[] { + return Object.entries(searchIndex.typeStats) + .sort((a, b) => b[1] - a[1]) + .map(([type]) => type); +} diff --git a/src/tools.ts b/src/tools.ts index 046a5d0..e15c2bc 100644 --- a/src/tools.ts +++ b/src/tools.ts @@ -11,7 +11,7 @@ import { formatDocument, formatSearchResults } from "./formatters"; export function createTools(docService: DocService) { return { python_docs: tool({ - description: "Search Python docs (DevDocs).", + description: `Search Python docs with automatic type inference. If no results match your query, the tool automatically infers the best documentation types and retries. Returns: path [type] -> document_path. To get full documentation content for a path, use fetch_python_doc - do NOT use WebFetch. Available types include: "Language Reference", "Basics", "Python/C API", "Built-in Functions", "Library", "Tutorial", etc. You do not need to specify a type - the tool handles type inference automatically.`, args: { query: tool.schema.string(), version: tool.schema.enum(SUPPORTED_VERSIONS).optional(), @@ -21,13 +21,23 @@ export function createTools(docService: DocService) { async execute(args) { const version = args.version ?? DEFAULT_VERSION; const index = await docService.getIndex(version); - const results = docService.search(index, args.query, args.type, args.limit); - return formatSearchResults(results, args.query, version); + const searchIndex = await docService.getSearchIndex(version); + + // Use the enhanced search with fallback + const { results, fallbackUsed, typeInference } = await docService.searchWithFallback( + index, + searchIndex, + args.query, + args.type, + args.limit, + ); + + return formatSearchResults(results, args.query, version, fallbackUsed, typeInference); }, }), fetch_python_doc: tool({ - description: "Get Python doc page content as Markdown.", + description: `Fetch Python documentation content from DevDocs (mirrors official Python docs). Use this to retrieve full documentation for paths returned by python_docs. Do NOT use WebFetch - this tool provides the same content already converted to Markdown with proper formatting and anchor navigation support.`, args: { path: tool.schema.string(), version: tool.schema.enum(SUPPORTED_VERSIONS).optional(), @@ -52,5 +62,53 @@ export function createTools(docService: DocService) { ); }, }), + + suggest_python_doc_types: tool({ + description: `Preview which documentation types would be searched for a query. This is an optional debugging/exploration tool - python_docs already performs automatic type inference. Use this only if you want to see type inference details before searching, or if you want to understand why certain results were returned.`, + args: { + query: tool.schema.string(), + version: tool.schema.enum(SUPPORTED_VERSIONS).optional(), + }, + async execute(args) { + const version = args.version ?? DEFAULT_VERSION; + const searchIndex = await docService.getSearchIndex(version); + const inference = docService.suggestTypes(searchIndex, args.query); + + const lines: string[] = [`Type suggestions for "${args.query}" in Python ${version}:`, ""]; + + if (inference.inferredTypes.length > 0) { + lines.push("Recommended types (highest confidence):"); + lines.push( + ...inference.inferredTypes.map( + (t) => ` - ${t} (confidence: ${Math.round(inference.confidence)}%)`, + ), + ); + } + + if (inference.alternativeTypes.length > 0) { + lines.push(""); + lines.push("Alternative types:"); + lines.push(...inference.alternativeTypes.map((t) => ` - ${t}`)); + } + + if (inference.matchingKeywords.length > 0) { + lines.push(""); + lines.push(`Matching keywords: ${inference.matchingKeywords.join(", ")}`); + } + + if (inference.inferredTypes.length === 0) { + lines.push("No type suggestions available for this query."); + lines.push("Try searching without a type filter."); + } + + lines.push(""); + lines.push("Example usage:"); + lines.push( + `python_docs(query="${args.query}", type="${inference.inferredTypes[0] || "Language Reference"}")`, + ); + + return lines.join("\n"); + }, + }), }; } diff --git a/tests/doc-service.test.ts b/tests/doc-service.test.ts index 4f7e190..ca5246d 100644 --- a/tests/doc-service.test.ts +++ b/tests/doc-service.test.ts @@ -5,6 +5,7 @@ import { createDocService } from "../src/testing"; describe("DocService", () => { const mockCache: CacheManagerInterface = { getIndexPath: mock(() => "/mock/index.json"), + getSearchIndexPath: mock(() => "/mock/search-index.json"), getDocPath: mock(() => "/mock/doc.json"), isValid: mock(), read: mock(), @@ -14,6 +15,7 @@ describe("DocService", () => { const mockLogger: Logger = { info: mock(), + warn: mock(), error: mock(), }; diff --git a/tests/search-index.test.ts b/tests/search-index.test.ts new file mode 100644 index 0000000..b235356 --- /dev/null +++ b/tests/search-index.test.ts @@ -0,0 +1,333 @@ +import { describe, expect, it } from "bun:test"; +import { + createSearchIndex, + extractKeywords, + getAvailableTypes, + inferTypesForQuery, + type SearchIndex, +} from "../src/search-index"; +import type { DocIndex } from "../src/types"; + +describe("createSearchIndex", () => { + it("should handle empty index", () => { + const emptyIndex: DocIndex = { entries: [] }; + const result = createSearchIndex(emptyIndex, "3.12"); + + expect(result.version).toBe("3.12"); + expect(result.totalEntries).toBe(0); + expect(Object.keys(result.typeStats)).toHaveLength(0); + expect(result.keywordMappings).toHaveLength(0); + expect(result.generatedAt).toBeDefined(); + }); + + it("should calculate type statistics correctly for single entry", () => { + const singleEntryIndex: DocIndex = { + entries: [{ name: "asyncio", path: "library/asyncio.html", type: "Library" }], + }; + const result = createSearchIndex(singleEntryIndex, "3.12"); + + expect(result.typeStats).toEqual({ Library: 1 }); + expect(result.totalEntries).toBe(1); + }); + + it("should aggregate multiple entries with same type", () => { + const index: DocIndex = { + entries: [ + { name: "json", path: "library/json.html", type: "Library" }, + { name: "csv", path: "library/csv.html", type: "Library" }, + { name: "pickle", path: "library/pickle.html", type: "Library" }, + ], + }; + const result = createSearchIndex(index, "3.12"); + + expect(result.typeStats).toEqual({ Library: 3 }); + }); + + it("should track multiple different types", () => { + const index: DocIndex = { + entries: [ + { name: "asyncio", path: "library/asyncio.html", type: "Library" }, + { name: "os.path", path: "library/os.path.html", type: "File" }, + { name: "print()", path: "library/functions.html", type: "Built-in Functions" }, + ], + }; + const result = createSearchIndex(index, "3.12"); + + expect(result.typeStats).toEqual({ + Library: 1, + File: 1, + "Built-in Functions": 1, + }); + }); + + it("should extract keywords from entry names", () => { + const index: DocIndex = { + entries: [ + { name: "asyncio.create_task", path: "asyncio.html", type: "Library" }, + { name: "json.loads()", path: "json.html", type: "Library" }, + ], + }; + const result = createSearchIndex(index, "3.12"); + + expect(result.keywordMappings.length).toBeGreaterThan(0); + const asyncioMapping = result.keywordMappings.find((m) => m.keyword === "asyncio"); + expect(asyncioMapping).toBeDefined(); + expect(asyncioMapping?.types).toContain("Library"); + }); + + it("should handle complex entry names with section numbers", () => { + const index: DocIndex = { + entries: [ + { name: "1. Introduction", path: "intro.html", type: "Tutorial" }, + { name: "2.1. Getting Started", path: "start.html", type: "Tutorial" }, + ], + }; + const result = createSearchIndex(index, "3.12"); + + // Keywords should not include section numbers + const keywords = result.keywordMappings.map((m) => m.keyword); + expect(keywords).not.toContain("1"); + expect(keywords).not.toContain("2"); + expect(keywords).toContain("introduction"); + expect(keywords).toContain("getting"); + expect(keywords).toContain("started"); + }); +}); + +describe("inferTypesForQuery", () => { + const mockSearchIndex: SearchIndex = { + version: "3.12", + generatedAt: new Date().toISOString(), + totalEntries: 10, + typeStats: { + Library: 5, + "Built-in Functions": 3, + Tutorial: 2, + }, + keywordMappings: [ + { + keyword: "asyncio", + types: ["Library", "Built-in Functions"], + sampleEntries: ["asyncio (Library)"], + score: 10, + }, + { + keyword: "json", + types: ["Library"], + sampleEntries: ["json (Library)"], + score: 5, + }, + { + keyword: "introduction", + types: ["Tutorial"], + sampleEntries: ["Introduction (Tutorial)"], + score: 8, + }, + { + keyword: "path", + types: ["Library", "File"], + sampleEntries: ["os.path (Library)"], + score: 7, + }, + ], + }; + + it("should return zero confidence for empty query", () => { + const result = inferTypesForQuery("", mockSearchIndex); + + expect(result.confidence).toBe(0); + expect(result.inferredTypes).toHaveLength(0); + expect(result.alternativeTypes).toHaveLength(0); + expect(result.matchingKeywords).toHaveLength(0); + }); + + it("should return zero confidence for non-matching query", () => { + const result = inferTypesForQuery("xyznonexistent", mockSearchIndex); + + expect(result.confidence).toBe(0); + expect(result.inferredTypes).toHaveLength(0); + }); + + it("should infer types from single keyword match", () => { + const result = inferTypesForQuery("asyncio", mockSearchIndex); + + expect(result.inferredTypes).toContain("Library"); + expect(result.inferredTypes).toContain("Built-in Functions"); + expect(result.matchingKeywords).toContain("asyncio"); + expect(result.confidence).toBeGreaterThan(0); + }); + + it("should return top 3 types as inferredTypes", () => { + const result = inferTypesForQuery("asyncio", mockSearchIndex); + + expect(result.inferredTypes.length).toBeLessThanOrEqual(3); + expect(result.inferredTypes[0]).toBe("Library"); // Highest score + }); + + it("should return types 4-6 as alternativeTypes when available", () => { + const result = inferTypesForQuery("path", mockSearchIndex); + + // path matches "Library" and "File" types + expect(result.inferredTypes.length).toBeGreaterThan(0); + }); + + it("should be case insensitive", () => { + const result1 = inferTypesForQuery("ASYNCIO", mockSearchIndex); + const result2 = inferTypesForQuery("asyncio", mockSearchIndex); + + expect(result1.inferredTypes).toEqual(result2.inferredTypes); + expect(result1.matchingKeywords).toContain("asyncio"); + }); + + it("should calculate confidence based on match count and scores", () => { + const result = inferTypesForQuery("asyncio tutorial", mockSearchIndex); + + // Should match both "asyncio" and "tutorial" keywords + expect(result.matchingKeywords.length).toBeGreaterThanOrEqual(1); + expect(result.confidence).toBeGreaterThan(0); + }); + + it("should limit matching keywords to max 5", () => { + const longQuery = "asyncio json introduction path tutorial"; + const result = inferTypesForQuery(longQuery, mockSearchIndex); + + expect(result.matchingKeywords.length).toBeLessThanOrEqual(5); + }); + + it("should include query in result", () => { + const query = "test query"; + const result = inferTypesForQuery(query, mockSearchIndex); + + expect(result.query).toBe(query); + }); +}); + +describe("getAvailableTypes", () => { + it("should return empty array for empty stats", () => { + const emptyIndex: SearchIndex = { + version: "3.12", + generatedAt: new Date().toISOString(), + totalEntries: 0, + typeStats: {}, + keywordMappings: [], + }; + const result = getAvailableTypes(emptyIndex); + + expect(result).toEqual([]); + }); + + it("should return single type for single entry", () => { + const index: SearchIndex = { + version: "3.12", + generatedAt: new Date().toISOString(), + totalEntries: 1, + typeStats: { Library: 1 }, + keywordMappings: [], + }; + const result = getAvailableTypes(index); + + expect(result).toEqual(["Library"]); + }); + + it("should sort types by frequency descending", () => { + const index: SearchIndex = { + version: "3.12", + generatedAt: new Date().toISOString(), + totalEntries: 10, + typeStats: { + Library: 5, + Tutorial: 2, + "Built-in Functions": 3, + }, + keywordMappings: [], + }; + const result = getAvailableTypes(index); + + expect(result[0]).toBe("Library"); // Most frequent (5) + expect(result[1]).toBe("Built-in Functions"); // Second (3) + expect(result[2]).toBe("Tutorial"); // Least frequent (2) + }); +}); + +describe("extractKeywords", () => { + it("should return empty array for empty string", () => { + const result = extractKeywords(""); + expect(result).toEqual([]); + }); + + it("should filter out short words (< 3 chars)", () => { + const result = extractKeywords("a ab abc"); + expect(result).toEqual(["abc"]); + }); + + it("should filter out common noise words", () => { + const result = extractKeywords("the and for with from using objects object"); + expect(result).toEqual([]); + }); + + it("should remove leading section numbers", () => { + const result1 = extractKeywords("1. Introduction"); + expect(result1).toEqual(["introduction"]); + + const result2 = extractKeywords("2.1. Getting Started"); + expect(result2).toEqual(["getting", "started"]); + + const result3 = extractKeywords("10.20.30. Section Name"); + expect(result3).toEqual(["section", "name"]); + }); + + it("should split on various separators", () => { + const result = extractKeywords("test_test.test test-test test.test()"); + expect(result).toContain("test"); + }); + + it("should handle angle brackets", () => { + const result = extractKeywords("JSON "); + expect(result).toEqual(["json", "string"]); + }); + + it("should handle parentheses", () => { + const result = extractKeywords("asyncio.create_task()"); + expect(result).toEqual(["asyncio", "create", "task"]); + }); + + it("should deduplicate keywords", () => { + const result = extractKeywords("test test test"); + expect(result).toEqual(["test"]); + }); + + it("should normalize to lowercase", () => { + const result = extractKeywords("ASYNC JSON Test"); + expect(result).toEqual(["async", "json", "test"]); + }); + + it("should handle complex dot notation", () => { + const result = extractKeywords("asyncio.create_task"); + expect(result).toEqual(["asyncio", "create", "task"]); + }); + + it("should handle hyphens", () => { + const result = extractKeywords("built-in functions"); + expect(result).toEqual(["built", "functions"]); + }); + + it("should handle colons", () => { + const result = extractKeywords("module: function"); + expect(result).toEqual(["module", "function"]); + }); + + it("should handle commas", () => { + const result = extractKeywords("a, b, c"); + expect(result).toEqual([]); // All are < 3 chars + }); + + it("should handle mixed case with numbers", () => { + const result = extractKeywords("Python 3.12 Documentation"); + expect(result).toEqual(["python", "documentation"]); + }); + + it("should preserve meaningful long words", () => { + const result = extractKeywords("Multiprocessing Shared Memory Concurrent Execution"); + expect(result).toEqual(["multiprocessing", "shared", "memory", "concurrent", "execution"]); + }); +});