diff --git a/packages/playground/data-liberation/bootstrap.php b/packages/playground/data-liberation/bootstrap.php index 51c2543dee..e5c6b32ede 100644 --- a/packages/playground/data-liberation/bootstrap.php +++ b/packages/playground/data-liberation/bootstrap.php @@ -99,6 +99,7 @@ require_once __DIR__ . '/src/git/WP_Git_Filesystem.php'; require_once __DIR__ . '/src/git/WP_Git_Server.php'; require_once __DIR__ . '/src/git/WP_Git_Merge_Engine.php'; +require_once __DIR__ . '/src/git/functions.php'; require_once __DIR__ . '/src/WP_Data_Liberation_HTML_Processor.php'; require_once __DIR__ . '/src/utf8_decoder.php'; diff --git a/packages/playground/data-liberation/src/git/WP_Git_Client.php b/packages/playground/data-liberation/src/git/WP_Git_Client.php index 3a80c1323b..b5ffa465a6 100644 --- a/packages/playground/data-liberation/src/git/WP_Git_Client.php +++ b/packages/playground/data-liberation/src/git/WP_Git_Client.php @@ -91,6 +91,7 @@ public function force_push_one_commit() { $pack_objects = []; foreach($delta as $oid) { // @TODO: just stream the saved object instead of re-reading and re-encoding it. + $this->index->read_object($oid); $body = ''; do { $body .= $this->index->get_body_chunk(); diff --git a/packages/playground/data-liberation/src/git/WP_Git_Pack_Processor.php b/packages/playground/data-liberation/src/git/WP_Git_Pack_Processor.php index 9076c70b7e..4255e606a7 100644 --- a/packages/playground/data-liberation/src/git/WP_Git_Pack_Processor.php +++ b/packages/playground/data-liberation/src/git/WP_Git_Pack_Processor.php @@ -165,13 +165,22 @@ static public function encode_packet_lines(array $payloads): string { } static public function encode_packet_line(string $payload, $channel=''): string { - $payload = $channel . $payload; - if($payload !== '0000' && $payload !== '0001' && $payload !== '0002') { - $length = sprintf("%04x", strlen($payload) + 4); - } else { - $length = ''; + // @TODO: Stream instead of buffering + if($payload === '0000' || $payload === '0001' || $payload === '0002') { + $payload = $channel . $payload; + return $payload; } - return $length . $payload; + + $chunk_size = 8000; + $offset = 0; + $lines = []; + while($offset < strlen($payload)) { + $chunk = $channel . substr($payload, $offset, $chunk_size); + $length = sprintf("%04x", strlen($chunk) + 4); + $lines[] = $length . $chunk; + $offset += $chunk_size; + } + return implode('', $lines); } /** diff --git a/packages/playground/data-liberation/src/git/WP_Git_Repository.php b/packages/playground/data-liberation/src/git/WP_Git_Repository.php index 687efdd1dd..08e2fe8f7f 100644 --- a/packages/playground/data-liberation/src/git/WP_Git_Repository.php +++ b/packages/playground/data-liberation/src/git/WP_Git_Repository.php @@ -330,6 +330,7 @@ public function get_parsed_commit() { if(null === $this->parsed_commit && $this->oid) { $commit_body = $this->read_entire_object_contents(); $this->parsed_commit = WP_Git_Pack_Processor::parse_commit_body($commit_body); + $this->parsed_commit['oid'] = $this->oid; if(!$this->parsed_commit) { $this->last_error = 'Failed to parse commit'; $this->parsed_commit = []; @@ -433,84 +434,53 @@ public function find_path_descendants($path) { return $oids; } - public function find_objects_added_in($new_tree_oid, $old_tree_oid=WP_Git_Repository::NULL_OID, $options=[]) { - $old_tree_index = $options['old_tree_index'] ?? $this; - if($old_tree_index === null) { - $old_tree_index = $this; - } - - // Resolve the actual tree oid if $new_tree_oid is a commit - if(false === $this->read_object($new_tree_oid)) { - $this->last_error = 'Failed to read object: ' . $new_tree_oid; + public function find_objects_added_in($new_commit_hash, $old_commit_hash=WP_Git_Repository::NULL_OID) { + $new_commit = wp_git_get_parsed_commit($this, $new_commit_hash); + if(!$new_commit) { return false; } - if($this->get_type() === WP_Git_Pack_Processor::OBJECT_TYPE_COMMIT) { - // yield the commit object itself - $parsed_commit = $this->get_parsed_commit(); - $new_tree_oid = $parsed_commit['tree']; - yield $this->oid; - } - // Resolve the actual tree oid if $old_tree_oid is a commit - if(!$this->is_null_oid($old_tree_oid)) { - if(false === $old_tree_index->read_object($old_tree_oid)) { - $this->last_error = 'Failed to read object: ' . $old_tree_oid; + // Resolve the actual tree oid if $old_commit_hash is a commit + $old_tree_hash = WP_Git_Repository::NULL_OID; + $old_objects_oids = []; + if(!wp_git_is_null_oid($old_commit_hash)) { + if(false === $this->read_object($old_commit_hash)) { + $this->last_error = 'Failed to read object: ' . $old_commit_hash; return false; } - if($old_tree_index->get_type() === WP_Git_Pack_Processor::OBJECT_TYPE_COMMIT) { - $old_tree_oid = $old_tree_index->get_parsed_commit()['tree']; + if($this->get_type() === WP_Git_Pack_Processor::OBJECT_TYPE_COMMIT) { + $old_tree_hash = $this->get_parsed_commit()['tree']; } + $old_objects_oids = array_flip( + wp_git_get_all_descendant_oids_in_tree($this, $old_tree_hash) + ); } + $old_objects_oids[$old_commit_hash] = true; - if($new_tree_oid === $old_tree_oid) { - return false; - } - - $stack = [[$new_tree_oid, $old_tree_oid]]; - - while(!empty($stack)) { - list($current_new_oid, $current_old_oid) = array_pop($stack); + $new_objects_oids = []; + // Optimization – don't process the same tree more than once. + $processed_trees = []; - // Object is unchanged - if($current_new_oid === $current_old_oid) { - continue; - } - if($this->is_null_oid($current_new_oid)) { - continue; + while($new_commit_hash !== $old_commit_hash && !wp_git_is_null_oid($new_commit_hash)) { + if(false === $this->read_object($new_commit_hash)) { + throw new Exception('Failed to read object: ' . $new_commit_hash); } - - if(false === $this->read_object($current_new_oid)) { - $this->last_error = 'Failed to read object: ' . $current_new_oid; - return false; - } - if($this->get_type() === WP_Git_Pack_Processor::OBJECT_TYPE_BLOB) { - yield $this->get_oid(); - continue; - } else if($this->get_type() !== WP_Git_Pack_Processor::OBJECT_TYPE_TREE) { - _doing_it_wrong(__METHOD__, 'Invalid object type in find_objects_added_in: ' . $this->get_type(), '1.0.0'); - return false; - } - - $new_tree = $this->get_parsed_tree(); - yield $this->get_oid(); - - $old_tree = []; - if(!$this->is_null_oid($current_old_oid)) { - if(false === $old_tree_index->read_object($current_old_oid)) { - $this->last_error = 'Failed to read object: ' . $current_old_oid; - return false; + $new_objects_oids[$new_commit_hash] = true; + $parsed_commit = $this->get_parsed_commit(); + $tree_oid = $parsed_commit['tree']; + $new_objects_oids[$tree_oid] = true; + if(!isset($processed_trees[$tree_oid])) { + $descendants = wp_git_get_all_descendant_oids_in_tree($this, $tree_oid); + foreach($descendants as $descendant) { + $new_objects_oids[$descendant] = true; } - $old_tree = $old_tree_index->get_parsed_tree(); - } - - foreach($new_tree as $name => $object) { - $stack[] = [$object['sha1'], $old_tree[$name]['sha1'] ?? null]; } + $processed_trees[$tree_oid] = true; + $new_commit_hash = $parsed_commit['parent'] ?? WP_Git_Repository::NULL_OID; } - } - private function is_null_oid($oid) { - return $oid === null || $oid === WP_Git_Repository::NULL_OID; + $diff = array_diff_key($new_objects_oids, $old_objects_oids); + return array_keys($diff); } public function set_ref_head($ref, $oid) { @@ -680,13 +650,13 @@ public function commit($options=[]) { $is_amend = isset($options['amend']) && $options['amend']; $this->read_object($this->get_ref_head('refs/heads/main')); - $old_tree_oid = $this->get_parsed_commit()['tree']; + $old_commit_hash = $this->get_parsed_commit()['tree']; // Process trees bottom-up recursively $root_tree_oid = $this->commit_tree('/', $changed_trees); if( - $root_tree_oid === $old_tree_oid && + $root_tree_oid === $old_commit_hash && !$is_amend ) { // Nothing has changed, skip creating a new empty commit. diff --git a/packages/playground/data-liberation/src/git/WP_Git_Server.php b/packages/playground/data-liberation/src/git/WP_Git_Server.php index dfbf47532d..82f6903166 100644 --- a/packages/playground/data-liberation/src/git/WP_Git_Server.php +++ b/packages/playground/data-liberation/src/git/WP_Git_Server.php @@ -282,25 +282,28 @@ public function handle_fetch_request($request_bytes, $response) { $parsed_commit = $this->repository->get_parsed_commit(); if(!isset($parsed_commit['parent'])) { + $common_parent_hash = WP_Git_Repository::NULL_OID; break; } $commit_hash = $parsed_commit['parent']; if(isset($have_oids[$commit_hash])) { + $common_parent_hash = $commit_hash; break; } } - $common_parent_hash = $commit_hash; // For each wanted commit, find objects not present in any of the have commits $new_objects = $this->repository->find_objects_added_in( $want_hash, $common_parent_hash ); - $objects_to_send = array_merge( - $objects_to_send, - iterator_to_array($new_objects) - ); + if(false !== $new_objects) { + $objects_to_send = array_merge( + $objects_to_send, + $new_objects + ); + } if($common_parent_hash !== WP_Git_Repository::NULL_OID) { $acks[] = $common_parent_hash; } @@ -358,7 +361,7 @@ public function handle_fetch_request($request_bytes, $response) { // @TODO: Stream the pack data instead of buffering it $pack_data = WP_Git_Pack_Processor::encode($pack_objects); - $response->write(WP_Git_Pack_Processor::encode_packet_line("\x01" . $pack_data)); + $response->write(WP_Git_Pack_Processor::encode_packet_line($pack_data, "\x01")); $response->write(WP_Git_Pack_Processor::encode_packet_line("0000")); return true; } diff --git a/packages/playground/data-liberation/src/git/functions.php b/packages/playground/data-liberation/src/git/functions.php new file mode 100644 index 0000000000..801008bc95 --- /dev/null +++ b/packages/playground/data-liberation/src/git/functions.php @@ -0,0 +1,41 @@ +read_object($tree_oid)) { + return false; + } + $oids = [$tree_oid]; + $trees = [$tree_oid]; + + while (!empty($trees)) { + $tree_hash = array_pop($trees); + if (!$repository->read_object($tree_hash)) { + _doing_it_wrong('wp_git_get_all_descendant_oids_in_tree', 'Failed to read object: ' . $tree_hash, '1.0.0'); + return false; + } + $tree = $repository->get_parsed_tree(); + foreach ($tree as $object) { + $oids[] = $object['sha1']; + if ($object['mode'] === WP_Git_Pack_Processor::FILE_MODE_DIRECTORY) { + $trees[] = $object['sha1']; + } + } + } + return $oids; +} + +function wp_git_get_parsed_commit(WP_Git_Repository $repository, $commit_oid) { + if(false === $repository->read_object($commit_oid)) { + _doing_it_wrong('wp_git_get_parsed_commit', 'Failed to read object: ' . $commit_oid, '1.0.0'); + return false; + } + if($repository->get_type() !== WP_Git_Pack_Processor::OBJECT_TYPE_COMMIT) { + _doing_it_wrong('wp_git_get_parsed_commit', 'Object was not a commit in find_objects_added_in: ' . $repository->get_type(), '1.0.0'); + return false; + } + return $repository->get_parsed_commit(); +} + +function wp_git_is_null_oid($oid) { + return $oid === null || $oid === WP_Git_Repository::NULL_OID; +}