From 4b216a429999e5218643b85944669e62f35b9a2b Mon Sep 17 00:00:00 2001 From: Aaron Zavora Date: Fri, 8 Mar 2024 14:53:57 -0500 Subject: [PATCH] Update README.md --- README.md | 51 +++++++++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index cde8c79..d61b10b 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,35 @@ customFormat = type("", (), dict({'SEGMENT_DELIM': '~', 'ELEMENT_DELIM': '*', 'S ``` +#### EDI as a Table for SQL + +```python +"""" +Look at all data refernce -> https://justransform.com/edi-essentials/edi-structure/ + (1) Including control header / ISA & IEA segments +""" +from pyspark.sql.functions import input_file_name + +( df.withColumn("filename", input_file_name()).rdd + .map(lambda x: (x.asDict().get("filename"),x.asDict().get("value"))) + .map(lambda x: (x[0], EDI(x[1]))) + .map(lambda x: [{**{"filename": x[0]}, **y} for y in x[1].toRows()]) + .flatMap(lambda x: x) + .toDF()).show() + +""" ++--------------------+----------+--------------------------+--------------+------------+-----------------------------+--------+ +| row_data|row_number|segment_element_delim_char|segment_length|segment_name|segment_subelement_delim_char|filename| ++--------------------+----------+--------------------------+--------------+------------+-----------------------------+--------+ +|ISA*00* ...| 0| *| 17| ISA| :|file:///| +|GS*HC*CLEARINGHOU...| 1| *| 9| GS| :|file:///| +|ST*837*000000001*...| 2| *| 4| ST| :|file:///| +|BHT*0019*00*73490...| 3| *| 7| BHT| :|file:///| +|NM1*41*2*CLEARING...| 4| *| 10| NM1| :|file:///| +|PER*IC*CLEARINGHO...| 5| *| 7| PER| :|file:///| +|NM1*40*2*12345678...| 6| *| 10| NM1| :|file:///| +``` + ```python from databricksx12.edi import * @@ -103,30 +132,8 @@ ediDF.show() +-----------------+ """ -"""" -Look at all data refernce -> https://justransform.com/edi-essentials/edi-structure/ - (1) Including control header / ISA & IEA segments -""" -( df.withColumn("filename", input_file_name()).rdd - .map(lambda x: (x.asDict().get("filename"),x.asDict().get("value"))) - .map(lambda x: (x[0], EDI(x[1]))) - .map(lambda x: [{**{"filename": x[0]}, **y} for y in x[1].toRows()]) - .flatMap(lambda x: x) - .toDF()).show() """ -Includes filename column but not shown below -+--------------------+----------+--------------------------+--------------+------------+-----------------------------+ -| row_data|row_number|segment_element_delim_char|segment_length|segment_name|segment_subelement_delim_char| -+--------------------+----------+--------------------------+--------------+------------+-----------------------------+ -|ISA*00* ...| 0| *| 17| ISA| :| -|GS*HC*CLEARINGHOU...| 1| *| 9| GS| :| -|ST*837*000000001*...| 2| *| 4| ST| :| -|BHT*0019*00*73490...| 3| *| 7| BHT| :| -|NM1*41*2*CLEARING...| 4| *| 10| NM1| :| -|PER*IC*CLEARINGHO...| 5| *| 7| PER| :| -|NM1*40*2*12345678...| 6| *| 10| NM1| :| -""" # (2) Individual Transactions (Functional header) / ST & SE segments trxDF = ( df.withColumn("filename", input_file_name()).rdd