-
Notifications
You must be signed in to change notification settings - Fork 44
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Invalid datatype error on HPE Cray #708
Comments
It would be interesting to know if that occurs during the run or at the end and ideally a stack trace would be helpful. I see two possible places:
|
|
While I do not think this will fix it, but here is some cleanup and a check before calling diff --git i/dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h w/dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h
index d146865c9..1a66ea3f4 100644 dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h
--- i/dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h
+++ w/dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h
@@ -75,8 +75,6 @@ dart_ret_t dart__mpi__op_fini();
*/
#define MAX_CONTIG_ELEMENTS (INT_MAX)
-#define DART_MPI_TYPE_UNDEFINED (MPI_Datatype)MPI_UNDEFINED
-
typedef enum {
DART_KIND_BASIC = 0,
DART_KIND_STRIDED,
@@ -190,7 +188,7 @@ MPI_Datatype dart__mpi__datatype_maxtype(dart_datatype_t dart_type) {
dart_datatype_struct_t *dts = dart__mpi__datatype_struct(dart_type);
MPI_Datatype res;
if (dart__mpi__datatype_iscontiguous(dart_type)) {
- if (dts->contiguous.max_type == DART_MPI_TYPE_UNDEFINED) {
+ if (dts->contiguous.max_type == MPI_DATATYPE_NULL) {
dts->contiguous.max_type = dart__mpi__datatype_create_max_datatype(
dts->contiguous.mpi_type);
}
diff --git i/dart-impl/mpi/src/dart_communication.c w/dart-impl/mpi/src/dart_communication.c
index b4da40d73..7f3330360 100644 dart-impl/mpi/src/dart_communication.c
--- i/dart-impl/mpi/src/dart_communication.c
+++ w/dart-impl/mpi/src/dart_communication.c
@@ -391,11 +391,11 @@ dart__mpi__put_basic(
CHECK_MPI_RET(
dart__mpi__put(src_ptr,
nchunks,
- dart__mpi__datatype_struct(dtype)->contiguous.max_type,
+ dart__mpi__datatype_maxtype(dtype),
team_unit_id.id,
offset,
nchunks,
- dart__mpi__datatype_struct(dtype)->contiguous.max_type,
+ dart__mpi__datatype_maxtype(dtype),
win,
reqs, num_reqs),
"MPI_Put");
diff --git i/dart-impl/mpi/src/dart_mpi_types.c w/dart-impl/mpi/src/dart_mpi_types.c
index e90c380f3..2ab8a21c9 100644 dart-impl/mpi/src/dart_mpi_types.c
--- i/dart-impl/mpi/src/dart_mpi_types.c
+++ w/dart-impl/mpi/src/dart_mpi_types.c
@@ -312,7 +312,7 @@ dart_type_create_custom(
new_struct->contiguous.size = num_bytes;
new_struct->contiguous.mpi_type = new_mpi_dtype;
// max_type will be created on-demand for custom types
- new_struct->contiguous.max_type = DART_MPI_TYPE_UNDEFINED;
+ new_struct->contiguous.max_type = MPI_DATATYPE_NULL;
*newtype = (dart_datatype_t)new_struct;
DART_LOG_TRACE("Created new custom data type %p with %zu bytes`",
@@ -343,7 +343,7 @@ dart_type_destroy(dart_datatype_t *dart_type_ptr)
MPI_Type_free(&dart_type->indexed.mpi_type);
} else if (dart_type->kind == DART_KIND_CUSTOM) {
MPI_Type_free(&dart_type->contiguous.mpi_type);
- if (dart_type->contiguous.max_type != DART_MPI_TYPE_UNDEFINED) {
+ if (dart_type->contiguous.max_type != MPI_DATATYPE_NULL) {
MPI_Type_free(&dart_type->contiguous.max_type);
}
}
@@ -357,7 +357,8 @@ dart_type_destroy(dart_datatype_t *dart_type_ptr)
static void destroy_basic_type(dart_datatype_t dart_type_id)
{
dart_datatype_struct_t *dart_type = dart__mpi__datatype_struct(dart_type_id);
- MPI_Type_free(&dart_type->contiguous.max_type);
+ if (dart_type->contiguous.max_type != MPI_DATATYPE_NULL)
+ MPI_Type_free(&dart_type->contiguous.max_type);
dart_type->contiguous.max_type = MPI_DATATYPE_NULL;
} |
Can you please post a PR for this? 👍 |
Sure, do you think it fixes anything related to my problem here? |
Hard to say, the confusion of |
Actually, the last two lines of the patch might be the culprit (https://github.com/dash-project/dash/pull/709/files#diff-f99e41ced414d50f5b467c4e54685f19R360). |
But this should be |
I will push the changes to SPEC and ask kindly if this is fixed on HPE Cray |
Got this error from a SPEC reporter:
Any ideas where this comes from or what I could request from the report?
The text was updated successfully, but these errors were encountered: