From b8973d6d284938f3ba534014b17bc3b1ebe2a0d2 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Fri, 11 Jul 2025 17:43:36 +0900 Subject: [PATCH 1/3] enter_scope --- compiler/codegen/src/compile.rs | 212 +++++++++++++++++++++++++--- compiler/codegen/src/symboltable.rs | 8 ++ 2 files changed, 202 insertions(+), 18 deletions(-) diff --git a/compiler/codegen/src/compile.rs b/compiler/codegen/src/compile.rs index 6b8007c151..cbfb8139a5 100644 --- a/compiler/codegen/src/compile.rs +++ b/compiler/codegen/src/compile.rs @@ -176,7 +176,7 @@ pub fn compile_program( .map_err(|e| e.into_codegen_error(source_code.path.to_owned()))?; let mut compiler = Compiler::new(opts, source_code, "".to_owned()); compiler.compile_program(ast, symbol_table)?; - let code = compiler.pop_code_object(); + let code = compiler.exit_scope(); trace!("Compilation completed: {code:?}"); Ok(code) } @@ -191,7 +191,7 @@ pub fn compile_program_single( .map_err(|e| e.into_codegen_error(source_code.path.to_owned()))?; let mut compiler = Compiler::new(opts, source_code, "".to_owned()); compiler.compile_program_single(&ast.body, symbol_table)?; - let code = compiler.pop_code_object(); + let code = compiler.exit_scope(); trace!("Compilation completed: {code:?}"); Ok(code) } @@ -205,7 +205,7 @@ pub fn compile_block_expression( .map_err(|e| e.into_codegen_error(source_code.path.to_owned()))?; let mut compiler = Compiler::new(opts, source_code, "".to_owned()); compiler.compile_block_expr(&ast.body, symbol_table)?; - let code = compiler.pop_code_object(); + let code = compiler.exit_scope(); trace!("Compilation completed: {code:?}"); Ok(code) } @@ -219,7 +219,7 @@ pub fn compile_expression( .map_err(|e| e.into_codegen_error(source_code.path.to_owned()))?; let mut compiler = Compiler::new(opts, source_code, "".to_owned()); compiler.compile_eval(ast, symbol_table)?; - let code = compiler.pop_code_object(); + let code = compiler.exit_scope(); Ok(code) } @@ -404,6 +404,179 @@ impl Compiler<'_> { self.symbol_table_stack.pop().expect("compiler bug") } + /// Enter a new scope + // = compiler_enter_scope + fn enter_scope( + &mut self, + name: &str, + scope_type: SymbolTableType, + key: usize, // In RustPython, we use the index in symbol_table_stack as key + lineno: u32, + ) -> CompileResult<()> { + // Create location + let location = ruff_source_file::SourceLocation { + row: OneIndexed::new(lineno as usize).unwrap_or(OneIndexed::MIN), + column: OneIndexed::new(1).unwrap(), + }; + + // Allocate a new compiler unit + + // In Rust, we'll create the structure directly + let source_path = self.source_code.path.to_owned(); + + // Lookup symbol table entry using key (_PySymtable_Lookup) + let ste = if key < self.symbol_table_stack.len() { + &self.symbol_table_stack[key] + } else { + return Err(self.error(CodegenErrorType::SyntaxError( + "unknown symbol table entry".to_owned(), + ))); + }; + + // Use varnames from symbol table (already collected in definition order) + let varname_cache: IndexSet = ste.varnames.iter().cloned().collect(); + + // Build cellvars using dictbytype (CELL scope, sorted) + let mut cellvar_cache = IndexSet::default(); + let mut cell_names: Vec<_> = ste + .symbols + .iter() + .filter(|(_, s)| s.scope == SymbolScope::Cell) + .map(|(name, _)| name.clone()) + .collect(); + cell_names.sort(); + for name in cell_names { + cellvar_cache.insert(name); + } + + // Handle implicit __class__ cell if needed + if ste.needs_class_closure { + // Cook up an implicit __class__ cell + debug_assert_eq!(scope_type, SymbolTableType::Class); + cellvar_cache.insert("__class__".to_string()); + } + + // Handle implicit __classdict__ cell if needed + if ste.needs_classdict { + // Cook up an implicit __classdict__ cell + debug_assert_eq!(scope_type, SymbolTableType::Class); + cellvar_cache.insert("__classdict__".to_string()); + } + + // Build freevars using dictbytype (FREE scope, offset by cellvars size) + let mut freevar_cache = IndexSet::default(); + let mut free_names: Vec<_> = ste + .symbols + .iter() + .filter(|(_, s)| { + s.scope == SymbolScope::Free || s.flags.contains(SymbolFlags::FREE_CLASS) + }) + .map(|(name, _)| name.clone()) + .collect(); + free_names.sort(); + for name in free_names { + freevar_cache.insert(name); + } + + // Initialize u_metadata fields + let (flags, posonlyarg_count, arg_count, kwonlyarg_count) = match scope_type { + SymbolTableType::Module => (bytecode::CodeFlags::empty(), 0, 0, 0), + SymbolTableType::Class => (bytecode::CodeFlags::empty(), 0, 0, 0), + SymbolTableType::Function | SymbolTableType::Lambda => ( + bytecode::CodeFlags::NEW_LOCALS | bytecode::CodeFlags::IS_OPTIMIZED, + 0, // Will be set later in enter_function + 0, // Will be set later in enter_function + 0, // Will be set later in enter_function + ), + SymbolTableType::Comprehension => ( + bytecode::CodeFlags::NEW_LOCALS | bytecode::CodeFlags::IS_OPTIMIZED, + 0, + 1, // comprehensions take one argument (.0) + 0, + ), + SymbolTableType::TypeParams => ( + bytecode::CodeFlags::NEW_LOCALS | bytecode::CodeFlags::IS_OPTIMIZED, + 0, + 0, + 0, + ), + }; + + // Get private name from parent scope + let private = if !self.code_stack.is_empty() { + self.code_stack.last().unwrap().private.clone() + } else { + None + }; + + // Create the new compilation unit + let code_info = ir::CodeInfo { + flags, + source_path: source_path.clone(), + private: private, + blocks: vec![ir::Block::default()], + current_block: BlockIdx(0), + metadata: ir::CodeUnitMetadata { + name: name.to_owned(), + qualname: None, // Will be set below + consts: IndexSet::default(), + names: IndexSet::default(), + varnames: varname_cache, + cellvars: cellvar_cache, + freevars: freevar_cache, + fast_hidden: IndexMap::default(), + argcount: arg_count, + posonlyargcount: posonlyarg_count, + kwonlyargcount: kwonlyarg_count, + firstlineno: OneIndexed::new(lineno as usize).unwrap_or(OneIndexed::MIN), + }, + static_attributes: if scope_type == SymbolTableType::Class { + Some(IndexSet::default()) + } else { + None + }, + in_inlined_comp: false, + fblock: Vec::with_capacity(MAXBLOCKS), + }; + + // Push the old compiler unit on the stack (like PyCapsule) + // This happens before setting qualname + self.code_stack.push(code_info); + + // Set qualname after pushing (uses compiler_set_qualname logic) + if scope_type != SymbolTableType::Module { + self.set_qualname(); + } + + // Emit RESUME instruction + let _resume_loc = if scope_type == SymbolTableType::Module { + // Module scope starts with lineno 0 + ruff_source_file::SourceLocation { + row: OneIndexed::MIN, + column: OneIndexed::MIN, + } + } else { + location + }; + + // Set the source range for the RESUME instruction + // For now, just use an empty range at the beginning + self.current_source_range = TextRange::default(); + emit!( + self, + Instruction::Resume { + arg: bytecode::ResumeType::AtFuncStart as u32 + } + ); + + if scope_type == SymbolTableType::Module { + // This would be loc.lineno = -1 in CPython + // We handle this differently in RustPython + } + + Ok(()) + } + fn push_output( &mut self, flags: bytecode::CodeFlags, @@ -473,9 +646,18 @@ impl Compiler<'_> { fblock: Vec::with_capacity(MAXBLOCKS), }; self.code_stack.push(info); + + // Add RESUME instruction just like CPython's compiler_enter_scope + emit!( + self, + Instruction::Resume { + arg: bytecode::ResumeType::AtFuncStart as u32 + } + ); } - fn pop_code_object(&mut self) -> CodeObject { + // compiler_exit_scope + fn exit_scope(&mut self) -> CodeObject { let table = self.pop_symbol_table(); assert!(table.sub_tables.is_empty()); let pop = self.code_stack.pop(); @@ -755,7 +937,7 @@ impl Compiler<'_> { } fn mangle<'a>(&self, name: &'a str) -> Cow<'a, str> { - // Use u_private from current code unit for name mangling + // Use private from current code unit for name mangling let private = self .code_stack .last() @@ -1758,13 +1940,7 @@ impl Compiler<'_> { .consts .insert_full(ConstantData::None); - // Emit RESUME instruction at function start - emit!( - self, - Instruction::Resume { - arg: bytecode::ResumeType::AtFuncStart as u32 - } - ); + // RESUME instruction is already emitted in push_output self.compile_statements(body)?; @@ -1778,7 +1954,7 @@ impl Compiler<'_> { } } - let code = self.pop_code_object(); + let code = self.exit_scope(); self.ctx = prev_ctx; // Prepare generic type parameters: @@ -2030,7 +2206,7 @@ impl Compiler<'_> { self.emit_return_value(); - let code = self.pop_code_object(); + let code = self.exit_scope(); self.ctx = prev_ctx; emit!(self, Instruction::LoadBuildClass); @@ -3820,7 +3996,7 @@ impl Compiler<'_> { self.compile_expression(body)?; self.emit_return_value(); - let code = self.pop_code_object(); + let code = self.exit_scope(); if self.build_closure(&code) { func_flags |= bytecode::MakeFunctionFlags::CLOSURE; } @@ -4369,7 +4545,7 @@ impl Compiler<'_> { self.emit_return_value(); // Fetch code for listcomp function: - let code = self.pop_code_object(); + let code = self.exit_scope(); self.ctx = prev_ctx; @@ -5076,7 +5252,7 @@ mod tests { .unwrap(); let mut compiler = Compiler::new(opts, source_code, "".to_owned()); compiler.compile_program(&ast, symbol_table).unwrap(); - compiler.pop_code_object() + compiler.exit_scope() } macro_rules! assert_dis_snapshot { diff --git a/compiler/codegen/src/symboltable.rs b/compiler/codegen/src/symboltable.rs index 52b6bae644..3e222a7b70 100644 --- a/compiler/codegen/src/symboltable.rs +++ b/compiler/codegen/src/symboltable.rs @@ -48,6 +48,12 @@ pub struct SymbolTable { /// Variable names in definition order (parameters first, then locals) pub varnames: Vec, + + /// Whether this class scope needs an implicit __class__ cell + pub needs_class_closure: bool, + + /// Whether this class scope needs an implicit __classdict__ cell + pub needs_classdict: bool, } impl SymbolTable { @@ -60,6 +66,8 @@ impl SymbolTable { symbols: IndexMap::default(), sub_tables: vec![], varnames: Vec::new(), + needs_class_closure: false, + needs_classdict: false, } } From 8ade52a35fab9d96ff0a1f8ae742f00ea8ada60c Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Fri, 11 Jul 2025 22:45:04 +0900 Subject: [PATCH 2/3] drop_class_free --- compiler/codegen/src/compile.rs | 28 +++++++++++++++++++++++---- compiler/codegen/src/symboltable.rs | 30 +++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/compiler/codegen/src/compile.rs b/compiler/codegen/src/compile.rs index cbfb8139a5..63d9ea5e86 100644 --- a/compiler/codegen/src/compile.rs +++ b/compiler/codegen/src/compile.rs @@ -593,20 +593,37 @@ impl Compiler<'_> { let table = self.push_symbol_table(); - let cellvar_cache = table + // Build cellvars in sorted order (like CPython's dictbytype) + let mut cell_names: Vec<_> = table .symbols .iter() .filter(|(_, s)| s.scope == SymbolScope::Cell) - .map(|(var, _)| var.clone()) + .map(|(name, _)| name.clone()) .collect(); - let freevar_cache = table + cell_names.sort(); // Sort for deterministic order + let mut cellvar_cache: IndexSet = cell_names.into_iter().collect(); + + // Handle implicit __class__ cell if needed (like CPython) + if table.needs_class_closure { + cellvar_cache.insert("__class__".to_string()); + } + + // Handle implicit __classdict__ cell if needed (like CPython) + if table.needs_classdict { + cellvar_cache.insert("__classdict__".to_string()); + } + + // Build freevars in sorted order (like CPython's dictbytype) + let mut free_names: Vec<_> = table .symbols .iter() .filter(|(_, s)| { s.scope == SymbolScope::Free || s.flags.contains(SymbolFlags::FREE_CLASS) }) - .map(|(var, _)| var.clone()) + .map(|(name, _)| name.clone()) .collect(); + free_names.sort(); // Sort for deterministic order + let freevar_cache: IndexSet = free_names.into_iter().collect(); // Initialize varname_cache from SymbolTable::varnames let varname_cache: IndexSet = table.varnames.iter().cloned().collect(); @@ -647,6 +664,9 @@ impl Compiler<'_> { }; self.code_stack.push(info); + // We just pushed a code object, and need the qualname + self.set_qualname(); + // Add RESUME instruction just like CPython's compiler_enter_scope emit!( self, diff --git a/compiler/codegen/src/symboltable.rs b/compiler/codegen/src/symboltable.rs index 3e222a7b70..16a65bca11 100644 --- a/compiler/codegen/src/symboltable.rs +++ b/compiler/codegen/src/symboltable.rs @@ -236,6 +236,30 @@ fn analyze_symbol_table(symbol_table: &mut SymbolTable) -> SymbolTableResult { analyzer.analyze_symbol_table(symbol_table) } +/* Drop __class__ and __classdict__ from free variables in class scope + and set the appropriate flags. Equivalent to CPython's drop_class_free(). + See: https://github.com/python/cpython/blob/main/Python/symtable.c#L884 +*/ +fn drop_class_free(symbol_table: &mut SymbolTable) { + // Check if __class__ is used as a free variable + if let Some(class_symbol) = symbol_table.symbols.get("__class__") { + if class_symbol.scope == SymbolScope::Free { + symbol_table.needs_class_closure = true; + // Note: In CPython, the symbol is removed from the free set, + // but in RustPython we handle this differently during code generation + } + } + + // Check if __classdict__ is used as a free variable + if let Some(classdict_symbol) = symbol_table.symbols.get("__classdict__") { + if classdict_symbol.scope == SymbolScope::Free { + symbol_table.needs_classdict = true; + // Note: In CPython, the symbol is removed from the free set, + // but in RustPython we handle this differently during code generation + } + } +} + type SymbolMap = IndexMap; mod stack { @@ -322,6 +346,12 @@ impl SymbolTableAnalyzer { for symbol in symbol_table.symbols.values_mut() { self.analyze_symbol(symbol, symbol_table.typ, sub_tables)?; } + + // Handle class-specific implicit cells (like CPython) + if symbol_table.typ == SymbolTableType::Class { + drop_class_free(symbol_table); + } + Ok(()) } From 4db33c56008f705aaa698aa76af59f128bc9062e Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Fri, 11 Jul 2025 22:30:45 +0900 Subject: [PATCH 3/3] push_output based on enter_scope --- compiler/codegen/src/compile.rs | 107 +++++++------------------------- 1 file changed, 21 insertions(+), 86 deletions(-) diff --git a/compiler/codegen/src/compile.rs b/compiler/codegen/src/compile.rs index 63d9ea5e86..14495499c5 100644 --- a/compiler/codegen/src/compile.rs +++ b/compiler/codegen/src/compile.rs @@ -513,7 +513,7 @@ impl Compiler<'_> { let code_info = ir::CodeInfo { flags, source_path: source_path.clone(), - private: private, + private, blocks: vec![ir::Block::default()], current_block: BlockIdx(0), metadata: ir::CodeUnitMetadata { @@ -585,95 +585,32 @@ impl Compiler<'_> { kwonlyarg_count: u32, obj_name: String, ) { - let source_path = self.source_code.path.to_owned(); - let first_line_number = self.get_source_line_number(); - - // Get the private name from current scope if exists - let private = self.code_stack.last().and_then(|info| info.private.clone()); - + // First push the symbol table let table = self.push_symbol_table(); + let scope_type = table.typ; - // Build cellvars in sorted order (like CPython's dictbytype) - let mut cell_names: Vec<_> = table - .symbols - .iter() - .filter(|(_, s)| s.scope == SymbolScope::Cell) - .map(|(name, _)| name.clone()) - .collect(); - cell_names.sort(); // Sort for deterministic order - let mut cellvar_cache: IndexSet = cell_names.into_iter().collect(); + // The key is the current position in the symbol table stack + let key = self.symbol_table_stack.len() - 1; - // Handle implicit __class__ cell if needed (like CPython) - if table.needs_class_closure { - cellvar_cache.insert("__class__".to_string()); - } + // Get the line number + let lineno = self.get_source_line_number().get(); - // Handle implicit __classdict__ cell if needed (like CPython) - if table.needs_classdict { - cellvar_cache.insert("__classdict__".to_string()); + // Call enter_scope which does most of the work + if let Err(e) = self.enter_scope(&obj_name, scope_type, key, lineno.to_u32()) { + // In the current implementation, push_output doesn't return an error, + // so we panic here. This maintains the same behavior. + panic!("enter_scope failed: {e:?}"); } - // Build freevars in sorted order (like CPython's dictbytype) - let mut free_names: Vec<_> = table - .symbols - .iter() - .filter(|(_, s)| { - s.scope == SymbolScope::Free || s.flags.contains(SymbolFlags::FREE_CLASS) - }) - .map(|(name, _)| name.clone()) - .collect(); - free_names.sort(); // Sort for deterministic order - let freevar_cache: IndexSet = free_names.into_iter().collect(); - - // Initialize varname_cache from SymbolTable::varnames - let varname_cache: IndexSet = table.varnames.iter().cloned().collect(); - - // Qualname will be set later by set_qualname - let qualname = None; - - // Check if this is a class scope - let is_class_scope = table.typ == SymbolTableType::Class; - - let info = ir::CodeInfo { - flags, - source_path, - private, - blocks: vec![ir::Block::default()], - current_block: ir::BlockIdx(0), - metadata: ir::CodeUnitMetadata { - name: obj_name, - qualname, - consts: IndexSet::default(), - names: IndexSet::default(), - varnames: varname_cache, - cellvars: cellvar_cache, - freevars: freevar_cache, - fast_hidden: IndexMap::default(), - argcount: arg_count, - posonlyargcount: posonlyarg_count, - kwonlyargcount: kwonlyarg_count, - firstlineno: first_line_number, - }, - static_attributes: if is_class_scope { - Some(IndexSet::default()) - } else { - None - }, - in_inlined_comp: false, - fblock: Vec::with_capacity(MAXBLOCKS), - }; - self.code_stack.push(info); - - // We just pushed a code object, and need the qualname - self.set_qualname(); - - // Add RESUME instruction just like CPython's compiler_enter_scope - emit!( - self, - Instruction::Resume { - arg: bytecode::ResumeType::AtFuncStart as u32 - } - ); + // Override the values that push_output sets explicitly + // enter_scope sets default values based on scope_type, but push_output + // allows callers to specify exact values + if let Some(info) = self.code_stack.last_mut() { + info.flags = flags; + info.metadata.argcount = arg_count; + info.metadata.posonlyargcount = posonlyarg_count; + info.metadata.kwonlyargcount = kwonlyarg_count; + } } // compiler_exit_scope @@ -1960,8 +1897,6 @@ impl Compiler<'_> { .consts .insert_full(ConstantData::None); - // RESUME instruction is already emitted in push_output - self.compile_statements(body)?; // Emit None at end: