@@ -172,14 +172,19 @@ def from_iceberg(source_url, target_url, progress: bool = False):
172172 logger .info (f"Running Iceberg copy with chunksize={ chunksize } " )
173173 engine = sa .create_engine (str (cratedb_url ))
174174
175- # This conversion to pandas is zero-copy,
176- # so we can utilize their SQL utils for free.
177- # https://github.com/pola-rs/polars/issues/7852
175+ # Note: The conversion to pandas is zero-copy,
176+ # so we can utilize their SQL utils for free.
177+ # https://github.com/pola-rs/polars/issues/7852
178178 # Note: This code also uses the most efficient `insert_bulk` method with CrateDB.
179- # https://cratedb.com/docs/sqlalchemy-cratedb/dataframe.html#efficient-insert-operations-with-pandas
179+ # https://cratedb.com/docs/sqlalchemy-cratedb/dataframe.html#efficient-insert-operations-with-pandas
180180 # Note: `collect_batches()` is marked as unstable and slower than native sinks;
181- # consider native Polars sinks (e.g., write_database) as a faster alternative if available.
182- # https://github.com/crate/cratedb-toolkit/pull/444#discussion_r2825382887
181+ # consider native Polars sinks (e.g., write_database) as a faster alternative if available.
182+ # https://github.com/crate/cratedb-toolkit/pull/444#discussion_r2825382887
183+ # Note: This variant appeared to be much slower, let's revisit and investigate why?
184+ # table.to_polars().collect(streaming=True).write_database(
185+ # table_name=cratedb_table.fullname, connection=engine, if_table_exists="replace" # noqa: ERA001
186+ # Note: When `collect_batches` yields more than one batch, the first batch must use the
187+ # user-specified `if_exists`, but subsequent batches must use "append".
183188 with pl .Config (streaming_chunk_size = chunksize ):
184189 table = iceberg_address .load_table ()
185190 for batch in table .collect_batches (engine = "streaming" , chunk_size = chunksize ):
@@ -192,14 +197,6 @@ def from_iceberg(source_url, target_url, progress: bool = False):
192197 chunksize = chunksize ,
193198 method = insert_bulk ,
194199 )
195-
196- # Note: This variant was much slower.
197- """
198- table.to_polars().collect(streaming=True).write_database(
199- table_name=cratedb_table.fullname, connection=engine, if_table_exists="replace"
200- )
201- """
202-
203200 return True
204201
205202
0 commit comments