3333from numba .extending import (typeof_impl , unbox , register_model , models ,
3434 NativeValue , box , intrinsic )
3535from numba import types
36- from numba .core import cgutils
36+ from numba .core import cgutils , typing
3737from numba .np import numpy_support
3838from numba .core .typing import signature
3939from numba .core .boxing import box_array , unbox_array , box_list
4848from sdc .hiframes .pd_series_ext import SeriesType
4949from sdc .hiframes .pd_series_type import _get_series_array_type
5050
51+ from sdc .hiframes .pd_dataframe_ext import get_structure_maps
52+
5153from .. import hstr_ext
5254import llvmlite .binding as ll
5355from llvmlite import ir as lir
5860
5961@typeof_impl .register (pd .DataFrame )
6062def typeof_pd_dataframe (val , c ):
63+
6164 col_names = tuple (val .columns .tolist ())
6265 # TODO: support other types like string and timestamp
6366 col_types = get_hiframes_dtypes (val )
6467 index_type = _infer_index_type (val .index )
68+ column_loc , _ , _ = get_structure_maps (col_types , col_names )
6569
66- return DataFrameType (col_types , index_type , col_names , True )
70+ return DataFrameType (col_types , index_type , col_names , True , column_loc = column_loc )
6771
6872
6973# register series types for import
@@ -86,21 +90,55 @@ def unbox_dataframe(typ, val, c):
8690 # create dataframe struct and store values
8791 dataframe = cgutils .create_struct_proxy (typ )(c .context , c .builder )
8892
89- column_tup = c .context .make_tuple (
90- c .builder , types .UniTuple (string_type , n_cols ), column_strs )
93+ errorptr = cgutils .alloca_once_value (c .builder , cgutils .false_bit )
9194
92- # this unboxes all DF columns so that no column unboxing occurs later
93- for col_ind in range (n_cols ):
94- series_obj = c .pyapi .object_getattr_string (val , typ .columns [col_ind ])
95- arr_obj = c .pyapi .object_getattr_string (series_obj , "values" )
96- ty_series = typ .data [col_ind ]
97- if isinstance (ty_series , types .Array ):
98- native_val = unbox_array (typ .data [col_ind ], arr_obj , c )
99- elif ty_series == string_array_type :
100- native_val = unbox_str_series (string_array_type , series_obj , c )
95+ col_list_type = types .List (string_type )
96+ ok , inst = listobj .ListInstance .allocate_ex (c .context , c .builder , col_list_type , n_cols )
10197
102- dataframe .data = c .builder .insert_value (
103- dataframe .data , native_val .value , col_ind )
98+ with c .builder .if_else (ok , likely = True ) as (if_ok , if_not_ok ):
99+ with if_ok :
100+ inst .size = c .context .get_constant (types .intp , n_cols )
101+ for i , column_str in enumerate (column_strs ):
102+ inst .setitem (c .context .get_constant (types .intp , i ), column_str , incref = False )
103+ dataframe .columns = inst .value
104+
105+ with if_not_ok :
106+ c .builder .store (cgutils .true_bit , errorptr )
107+
108+ # If an error occurred, drop the whole native list
109+ with c .builder .if_then (c .builder .load (errorptr )):
110+ c .context .nrt .decref (c .builder , col_list_type , inst .value )
111+
112+ _ , data_typs_map , types_order = get_structure_maps (typ .data , typ .columns )
113+
114+ for col_typ in types_order :
115+ type_id , col_indices = data_typs_map [col_typ ]
116+ n_type_cols = len (col_indices )
117+ list_type = types .List (col_typ )
118+ ok , inst = listobj .ListInstance .allocate_ex (c .context , c .builder , list_type , n_type_cols )
119+
120+ with c .builder .if_else (ok , likely = True ) as (if_ok , if_not_ok ):
121+ with if_ok :
122+ inst .size = c .context .get_constant (types .intp , n_type_cols )
123+ for i , col_idx in enumerate (col_indices ):
124+ series_obj = c .pyapi .object_getattr_string (val , typ .columns [col_idx ])
125+ arr_obj = c .pyapi .object_getattr_string (series_obj , "values" )
126+ ty_series = typ .data [col_idx ]
127+ if isinstance (ty_series , types .Array ):
128+ native_val = unbox_array (typ .data [col_idx ], arr_obj , c )
129+ elif ty_series == string_array_type :
130+ native_val = unbox_str_series (string_array_type , series_obj , c )
131+
132+ inst .setitem (c .context .get_constant (types .intp , i ), native_val .value , incref = False )
133+
134+ dataframe .data = c .builder .insert_value (dataframe .data , inst .value , type_id )
135+
136+ with if_not_ok :
137+ c .builder .store (cgutils .true_bit , errorptr )
138+
139+ # If an error occurred, drop the whole native list
140+ with c .builder .if_then (c .builder .load (errorptr )):
141+ c .context .nrt .decref (c .builder , list_type , inst .value )
104142
105143 # TODO: support unboxing index
106144 if typ .index == types .none :
@@ -113,7 +151,6 @@ def unbox_dataframe(typ, val, c):
113151 index_data = c .pyapi .object_getattr_string (index_obj , "_data" )
114152 dataframe .index = unbox_array (typ .index , index_data , c ).value
115153
116- dataframe .columns = column_tup
117154 dataframe .parent = val
118155
119156 # increase refcount of stored values
@@ -122,7 +159,7 @@ def unbox_dataframe(typ, val, c):
122159 for var in column_strs :
123160 c .context .nrt .incref (c .builder , string_type , var )
124161
125- return NativeValue (dataframe ._getvalue ())
162+ return NativeValue (dataframe ._getvalue (), is_error = c . builder . load ( errorptr ) )
126163
127164
128165def get_hiframes_dtypes (df ):
0 commit comments