Skip to content

Commit

Permalink
Update the FedSQL container to handle sensitive column names with a p…
Browse files Browse the repository at this point in the history
…refix.

Device uploads include a <query name>/ prefix for all columns.

Change-Id: I2a0c6883daecb21ad277df08fbb2a96b430111b8
  • Loading branch information
zpgong committed Dec 26, 2024
1 parent aaa3b6e commit 04e5989
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 3 deletions.
7 changes: 5 additions & 2 deletions containers/fed_sql/sensitive_columns.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include "absl/status/status.h"
#include "absl/log/log.h"
#include "absl/status/statusor.h"
#include "absl/strings/match.h"
#include "absl/strings/str_format.h"
Expand Down Expand Up @@ -60,7 +60,10 @@ absl::Status HashColumn(TensorColumn& column, absl::string_view key) {
absl::Status HashSensitiveColumns(std::vector<TensorColumn>& contents,
absl::string_view key) {
for (TensorColumn& column : contents) {
if (absl::StartsWith(column.column_schema_.name(), "SENSITIVE_")) {
// Client upload columns are prefixed by <query_name>/ while server-side
// data isn't.
if (absl::StartsWith(column.column_schema_.name(), "SENSITIVE_") ||
absl::StrContains(column.column_schema_.name(), "/SENSITIVE_")) {
if (column.column_schema_.type() !=
ExampleQuerySpec_OutputVectorSpec_DataType_STRING &&
column.column_schema_.type() !=
Expand Down
26 changes: 25 additions & 1 deletion containers/fed_sql/sensitive_columns_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,30 @@ TEST(SensitiveColumnsTest, SensitiveColumnWithBytesType) {
EXPECT_THAT(column_span, UnorderedElementsAre(*hash));
}

TEST(SensitiveColumnsTest, SensitiveColumnWithPrefix) {
std::string sensitive_value1 = "sensitive_value1";
std::string sensitive_value2 = "sensitive_value2";

absl::StatusOr<TensorColumn> tensor_column = CreateStringTensorColumn(
"query-name/SENSITIVE_str_col", {sensitive_value1, sensitive_value2});
CHECK_OK(tensor_column);
std::vector<TensorColumn> columns;
columns.push_back(std::move(*tensor_column));

std::string key = "test_key";
absl::StatusOr<std::string> hash1 = KeyedHash(sensitive_value1, key);
CHECK_OK(hash1);
absl::StatusOr<std::string> hash2 = KeyedHash(sensitive_value2, key);
CHECK_OK(hash2);

ASSERT_TRUE(HashSensitiveColumns(columns, key).ok());
ASSERT_EQ(columns.size(), 1);

absl::Span<const absl::string_view> column_span =
columns[0].tensor_.AsSpan<absl::string_view>();
EXPECT_THAT(column_span, UnorderedElementsAre(*hash1, *hash2));
}

TEST(SensitiveColumnsTest, SensitiveColumnWithInvalidType) {
absl::StatusOr<TensorColumn> tensor_column =
CreateStringTensorColumn("SENSITIVE_invalid_col", {"unused"});
Expand Down Expand Up @@ -219,7 +243,7 @@ TEST(SensitiveColumnsTest, MultipleSensitiveColumns) {
CreateStringTensorColumn("SENSITIVE_col_1", {sensitive_value1});
CHECK_OK(tensor_column1);
absl::StatusOr<TensorColumn> tensor_column2 =
CreateStringTensorColumn("SENSITIVE_col_2", {sensitive_value2});
CreateStringTensorColumn("prefix/SENSITIVE_col_2", {sensitive_value2});
CHECK_OK(tensor_column2);

std::vector<TensorColumn> columns;
Expand Down

0 comments on commit 04e5989

Please sign in to comment.