Skip to content

Commit

Permalink
Editions: Embed resolved features of descriptor.proto for pure python.
Browse files Browse the repository at this point in the history
Because pure python builds all descriptors at runtime via reflection, it's unable to parse options during the build of descriptor.proto (i.e. before we've built the options schemas).  We always lazily parse these options to avoid this, but that still means options can't be *used* during this build.  Since the current build process makes heavy use of features (which previously just relied on syntax), this poses a problem for editions.

To get around this, we just embed the resolved features directly into the gencode for this one file.  This will allow us to skip feature resolution for these descriptors and still consider features in their build.

PiperOrigin-RevId: 577495949
  • Loading branch information
mkruskal-google authored and copybara-github committed Oct 28, 2023
1 parent 57bb1e5 commit 63f4c50
Show file tree
Hide file tree
Showing 3 changed files with 175 additions and 4 deletions.
67 changes: 63 additions & 4 deletions python/google/protobuf/internal/descriptor_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@
import unittest
import warnings

from google.protobuf import descriptor_pb2
from google.protobuf.internal import api_implementation
from google.protobuf.internal import test_util
from google.protobuf import descriptor
from google.protobuf import descriptor_pb2
from google.protobuf import descriptor_pool
from google.protobuf import symbol_database
from google.protobuf import text_format
from google.protobuf.internal import api_implementation
from google.protobuf.internal import test_util

from google.protobuf.internal import _parameterized
from google.protobuf import unittest_custom_options_pb2
from google.protobuf import unittest_import_pb2
from google.protobuf import unittest_pb2
Expand Down Expand Up @@ -1169,7 +1171,6 @@ def testMakeDescriptorWithUnsignedIntField(self):
self.assertEqual(result.fields[0].cpp_type,
descriptor.FieldDescriptor.CPPTYPE_UINT64)


def testMakeDescriptorWithOptions(self):
descriptor_proto = descriptor_pb2.DescriptorProto()
aggregate_message = unittest_custom_options_pb2.AggregateMessage
Expand Down Expand Up @@ -1214,5 +1215,63 @@ def testJsonName(self):
json_names[index])


class FeaturesTest(_parameterized.TestCase):

# TODO Add _features for upb and C++.
@_parameterized.named_parameters([
('File', lambda: descriptor_pb2.DESCRIPTOR),
('Message', lambda: descriptor_pb2.FeatureSet.DESCRIPTOR),
(
'Enum',
lambda: descriptor_pb2.FeatureSet.FieldPresence.DESCRIPTOR,
),
(
'Field',
lambda: descriptor_pb2.FeatureSet.DESCRIPTOR.fields_by_name[
'enum_type'
],
),
])
@unittest.skipIf(
api_implementation.Type() != 'python',
'Features field is only available with the pure python implementation',
)
def testDescriptorProtoDefaultFeatures(self, desc):
self.assertEqual(
desc()._features.field_presence,
descriptor_pb2.FeatureSet.FieldPresence.EXPLICIT,
)
self.assertEqual(
desc()._features.enum_type,
descriptor_pb2.FeatureSet.EnumType.CLOSED,
)
self.assertEqual(
desc()._features.repeated_field_encoding,
descriptor_pb2.FeatureSet.RepeatedFieldEncoding.EXPANDED,
)

# TODO Add _features for upb and C++.
@unittest.skipIf(
api_implementation.Type() != 'python',
'Features field is only available with the pure python implementation',
)
def testDescriptorProtoOverrideFeatures(self):
desc = descriptor_pb2.SourceCodeInfo.Location.DESCRIPTOR.fields_by_name[
'path'
]
self.assertEqual(
desc._features.field_presence,
descriptor_pb2.FeatureSet.FieldPresence.EXPLICIT,
)
self.assertEqual(
desc._features.enum_type,
descriptor_pb2.FeatureSet.EnumType.CLOSED,
)
self.assertEqual(
desc._features.repeated_field_encoding,
descriptor_pb2.FeatureSet.RepeatedFieldEncoding.PACKED,
)


if __name__ == '__main__':
unittest.main()
109 changes: 109 additions & 0 deletions src/google/protobuf/compiler/python/generator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "absl/container/flat_hash_map.h"
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "absl/memory/memory.h"
#include "absl/strings/ascii.h"
#include "absl/strings/escaping.h"
#include "absl/strings/str_cat.h"
Expand All @@ -50,6 +51,8 @@
#include "google/protobuf/descriptor.h"
#include "google/protobuf/descriptor.pb.h"
#include "google/protobuf/descriptor_legacy.h"
#include "google/protobuf/descriptor_visitor.h"
#include "google/protobuf/dynamic_message.h"
#include "google/protobuf/io/printer.h"
#include "google/protobuf/io/strtod.h"
#include "google/protobuf/io/zero_copy_stream.h"
Expand Down Expand Up @@ -285,6 +288,7 @@ bool Generator::Generate(const FileDescriptor* file,
PrintAllEnumsInFile();
PrintMessageDescriptors();
FixForeignFieldsInDescriptors();
PrintResolvedFeatures();
printer_->Outdent();
printer_->Print("else:\n");
printer_->Indent();
Expand Down Expand Up @@ -416,6 +420,111 @@ void Generator::PrintImports() const {
printer_->Print("\n");
}

template <typename DescriptorT>
std::string Generator::GetResolvedFeatures(
const DescriptorT& descriptor) const {
if (!GeneratingDescriptorProto()) {
// Everything but descriptor.proto can handle proper feature resolution.
return "None";
}

// Load the resolved features from our pool.
const Descriptor* feature_set = file_->pool()->FindMessageTypeByName(
FeatureSet::GetDescriptor()->full_name());
auto message_factory = absl::make_unique<DynamicMessageFactory>();
auto features =
absl::WrapUnique(message_factory->GetPrototype(feature_set)->New());
features->ParseFromString(
GetResolvedSourceFeatures(descriptor).SerializeAsString());

// Collect all of the resolved features.
std::vector<std::string> feature_args;
const Reflection* reflection = features->GetReflection();
std::vector<const FieldDescriptor*> fields;
reflection->ListFields(*features, &fields);
for (const auto* field : fields) {
// Assume these are all enums. If we add non-enum global features or any
// python-specific features, we will need to come back and improve this
// logic.
ABSL_CHECK(field->enum_type() != nullptr)
<< "Unexpected non-enum field found!";
if (field->options().retention() == FieldOptions::RETENTION_SOURCE) {
// Skip any source-retention features.
continue;
}
const EnumDescriptor* enm = field->enum_type();
const EnumValueDescriptor* value =
enm->FindValueByNumber(reflection->GetEnumValue(*features, field));

feature_args.emplace_back(absl::StrCat(
field->name(), "=",
absl::StrFormat("%s.values_by_name[\"%s\"].number",
ModuleLevelDescriptorName(*enm), value->name())));
}
return absl::StrCat("_ResolvedFeatures(", absl::StrJoin(feature_args, ","),
")");
}

void Generator::PrintResolvedFeatures() const {
// Since features are used during the descriptor build, it's impossible to do
// feature resolution at the normal point for descriptor.proto. Instead, we do
// feature resolution here in the generator, and embed a custom object on all
// of the generated descriptors. This object should act like any other
// FeatureSet message on normal descriptors, but will never have to be
// resolved by the python runtime.
ABSL_CHECK(GeneratingDescriptorProto());
printer_->Emit({{"resolved_features", GetResolvedFeatures(*file_)},
{"descriptor_name", kDescriptorKey}},
R"py(
class _ResolvedFeatures:
def __init__(self, features = None, **kwargs):
if features:
for k, v in features.FIELDS.items():
setattr(self, k, getattr(features, k))
else:
for k, v in kwargs.items():
setattr(self, k, v)
$descriptor_name$._features = $resolved_features$
)py");

#define MAKE_NESTED(desc, CPP_FIELD, PY_FIELD) \
[&] { \
for (int i = 0; i < desc.CPP_FIELD##_count(); ++i) { \
printer_->Emit( \
{{"resolved_subfeatures", GetResolvedFeatures(*desc.CPP_FIELD(i))}, \
{"index", absl::StrCat(i)}, \
{"field", PY_FIELD}}, \
"$descriptor_name$.$field$[$index$]._features = " \
"$resolved_subfeatures$\n"); \
} \
}

internal::VisitDescriptors(*file_, [&](const Descriptor& msg) {
printer_->Emit(
{{"resolved_features", GetResolvedFeatures(msg)},
{"descriptor_name", ModuleLevelDescriptorName(msg)},
{"field_features", MAKE_NESTED(msg, field, "fields")},
{"oneof_features", MAKE_NESTED(msg, oneof_decl, "oneofs")},
{"ext_features", MAKE_NESTED(msg, extension, "extensions")}},
R"py(
$descriptor_name$._features = $resolved_features$
$field_features$
$oneof_features$
$ext_features$
)py");
});
internal::VisitDescriptors(*file_, [&](const EnumDescriptor& enm) {
printer_->Emit({{"resolved_features", GetResolvedFeatures(enm)},
{"descriptor_name", ModuleLevelDescriptorName(enm)},
{"value_features", MAKE_NESTED(enm, value, "values")}},
R"py(
$descriptor_name$._features = $resolved_features$
$value_features$
)py");
});
#undef MAKE_NESTED
}

// Prints the single file descriptor for this file.
void Generator::PrintFileDescriptor() const {
absl::flat_hash_map<absl::string_view, std::string> m;
Expand Down
3 changes: 3 additions & 0 deletions src/google/protobuf/compiler/python/generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ class PROTOC_EXPORT Generator : public CodeGenerator {
GeneratorOptions ParseParameter(absl::string_view parameter,
std::string* error) const;
void PrintImports() const;
template <typename DescriptorT>
std::string GetResolvedFeatures(const DescriptorT& descriptor) const;
void PrintResolvedFeatures() const;
void PrintFileDescriptor() const;
void PrintAllEnumsInFile() const;
void PrintNestedEnums(const Descriptor& descriptor) const;
Expand Down

0 comments on commit 63f4c50

Please sign in to comment.