Skip to content

Commit

Permalink
Add debug print statements to write_parquet function for better trace…
Browse files Browse the repository at this point in the history
…ability
  • Loading branch information
kosiew committed Jan 13, 2025
1 parent 3cc3fca commit a8d4077
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 2 deletions.
23 changes: 21 additions & 2 deletions datafusion/core/src/dataframe/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,29 +59,41 @@ impl DataFrame {
options: DataFrameWriteOptions,
writer_options: Option<TableParquetOptions>,
) -> Result<Vec<RecordBatch>, DataFusionError> {
println!("==> Entered write_parquet function");

if options.insert_op != InsertOp::Append {
return not_impl_err!(
"{} is not implemented for DataFrame::write_parquet.",
options.insert_op
);
}

println!("==> Checked insert_op");

let format = if let Some(parquet_opts) = writer_options {
println!("==> Using provided Parquet options");
Arc::new(ParquetFormatFactory::new_with_options(parquet_opts))
} else {
println!("==> Using default Parquet options");
Arc::new(ParquetFormatFactory::new())
};

let file_type = format_as_file_type(format);

println!("==> Determined file type");

let plan = if options.sort_by.is_empty() {
println!("==> No sort options provided");
self.plan
} else {
println!("==> Sorting by provided options");
LogicalPlanBuilder::from(self.plan)
.sort(options.sort_by)?
.build()?
};

println!("==> Built logical plan");

let plan = LogicalPlanBuilder::copy_to(
plan,
path.into(),
Expand All @@ -90,12 +102,19 @@ impl DataFrame {
options.partition_by,
)?
.build()?;
DataFrame {

println!("==> Built copy_to logical plan");

let result = DataFrame {
session_state: self.session_state,
plan,
}
.collect()
.await
.await;

println!("==> Collected results");

result
}
}

Expand Down
1 change: 1 addition & 0 deletions datafusion/physical-plan/src/execution_plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -797,6 +797,7 @@ pub async fn collect(
plan: Arc<dyn ExecutionPlan>,
context: Arc<TaskContext>,
) -> Result<Vec<RecordBatch>> {
println!("==> execution_plan.collect");
let stream = execute_stream(plan, context)?;
crate::common::collect(stream).await
}
Expand Down

0 comments on commit a8d4077

Please sign in to comment.