Skip to content

Commit

Permalink
Fixed FileNormalizationIT after apache#5237
Browse files Browse the repository at this point in the history
The changes in apache#5237 to make the external compaction json metadata
entry easier to use ended up breaking FileNormalizationIT. apache#5237
removed the metadata entry string which ended up escaping the
double quotes in the string for the json and replaced it with an
object that contains the path, start, and end row. The problem
with this is that the metadata entry retains any misnormalization
of the file name (e.g. double slashes in the path) and that path
alone does not. This fixes the issue by putting the metadata entry
into the new object that is serialized to json so that the json
for the external compaction files now contains the metadata entry,
normalized path, start and end row.
  • Loading branch information
dlmarion committed Jan 9, 2025
1 parent 3f438b1 commit 17aca3b
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ public String toJson() {
GSonData jData = new GSonData();
jData.inputs =
jobFiles.stream().map(stf -> new TabletFileCqMetadataGson(stf)).collect(toList());
jData.tmp = new TabletFileCqMetadataGson(compactTmpName);
jData.tmp = new TabletFileCqMetadataGson(compactTmpName.insert());
jData.compactor = compactorId;
jData.kind = kind.name();
jData.groupId = cgid.toString();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ private static String serialize(TabletFileCq tabletFileCq) {
* returned as an empty byte array
**/

private static byte[] encodeRow(final Key key) {
protected static byte[] encodeRow(final Key key) {
final Text row = key != null ? key.getRow() : null;
if (row != null) {
try (ByteArrayOutputStream baos = new ByteArrayOutputStream();
Expand All @@ -252,7 +252,7 @@ private static byte[] encodeRow(final Key key) {
return new byte[0];
}

private static Text decodeRow(byte[] serialized) {
protected static Text decodeRow(byte[] serialized) {
// Empty byte array means null row
if (serialized.length == 0) {
return null;
Expand Down Expand Up @@ -292,26 +292,26 @@ public TabletFileCq(Path path, Range range) {
}

static class TabletFileCqMetadataGson {
private String path;
private byte[] startRow;
private byte[] endRow;
protected String metadataEntry;
protected String path;
protected byte[] startRow;
protected byte[] endRow;

TabletFileCqMetadataGson() {}

TabletFileCqMetadataGson(AbstractTabletFile<?> atf) {
path = Objects.requireNonNull(atf.path.toString());
startRow = encodeRow(atf.range.getStartKey());
endRow = encodeRow(atf.range.getEndKey());
TabletFileCqMetadataGson(StoredTabletFile stf) {
metadataEntry = Objects.requireNonNull(stf.getMetadata());
path = Objects.requireNonNull(stf.getMetadataPath());
startRow = encodeRow(stf.range.getStartKey());
endRow = encodeRow(stf.range.getEndKey());
}

ReferencedTabletFile toReferencedTabletFile() {
return new ReferencedTabletFile(new Path(URI.create(path)),
new Range(decodeRow(startRow), true, decodeRow(endRow), false));
return new StoredTabletFile(metadataEntry).getTabletFile();
}

StoredTabletFile toStoredTabletFile() {
return StoredTabletFile.of(new Path(URI.create(path)),
new Range(decodeRow(startRow), true, decodeRow(endRow), false));
return new StoredTabletFile(metadataEntry);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,23 @@
*/
package org.apache.accumulo.core.metadata;

import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.net.URI;

import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.TableId;
import org.apache.accumulo.core.dataImpl.KeyExtent;
import org.apache.accumulo.core.metadata.StoredTabletFile.TabletFileCqMetadataGson;
import org.apache.accumulo.core.util.json.ByteArrayToBase64TypeAdapter;
import org.apache.hadoop.io.Text;
import org.junit.jupiter.api.Test;

import com.google.gson.Gson;

public class StoredTabletFileTest {

@Test
Expand All @@ -37,4 +49,28 @@ public void fileConversionTest() {
assertFalse(StoredTabletFile.fileNeedsConversion(s31));
assertFalse(StoredTabletFile.fileNeedsConversion(s31_untrimmed));
}

@Test
public void testSerDe() {
Gson gson = ByteArrayToBase64TypeAdapter.createBase64Gson();
String metadataEntry =
"{ \"path\":\"hdfs://localhost:8020/accumulo//tables//1/t-0000000/A000003v.rf\",\"startRow\":\"AmEA\",\"endRow\":\"AnoA\" }";
URI normalizedPath =
URI.create("hdfs://localhost:8020/accumulo/tables/1/t-0000000/A000003v.rf");
KeyExtent ke = new KeyExtent(TableId.of("t"), new Text("z"), new Text("a"));
Range r = ke.toDataRange();
StoredTabletFile expected = new StoredTabletFile(metadataEntry);
TabletFileCqMetadataGson meta = new TabletFileCqMetadataGson(expected);
assertEquals(metadataEntry, meta.metadataEntry);
assertEquals(normalizedPath.toString(), meta.path);
assertArrayEquals(StoredTabletFile.encodeRow(r.getStartKey()), meta.startRow);
assertArrayEquals(StoredTabletFile.encodeRow(r.getEndKey()), meta.endRow);
String json = gson.toJson(meta);
System.out.println(json);
TabletFileCqMetadataGson des = gson.fromJson(json, TabletFileCqMetadataGson.class);
assertEquals(metadataEntry, des.metadataEntry);
assertEquals(normalizedPath.toString(), des.path);
assertArrayEquals(StoredTabletFile.encodeRow(r.getStartKey()), des.startRow);
assertArrayEquals(StoredTabletFile.encodeRow(r.getEndKey()), des.endRow);
}
}

0 comments on commit 17aca3b

Please sign in to comment.