Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Structure type mapping refactoring #619

Merged
merged 1 commit into from
Jan 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 32 additions & 20 deletions src/main/java/org/verapdf/pd/structure/PDStructElem.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,15 @@
import org.verapdf.tools.StaticResources;
import org.verapdf.tools.TaggedPDFConstants;
import org.verapdf.tools.TaggedPDFHelper;

import java.util.List;
import java.util.Map;
import org.verapdf.tools.TaggedPDFRoleMapHelper;

/**
* @author Maksim Bezrukov
*/
public class PDStructElem extends PDStructTreeNode {

private final Map<ASAtom, ASAtom> rootRoleMap;

public PDStructElem(COSObject obj, Map<ASAtom, ASAtom> rootRoleMap) {
public PDStructElem(COSObject obj) {
super(obj);
this.rootRoleMap = rootRoleMap;
}

public ASAtom getType() {
Expand Down Expand Up @@ -101,7 +96,7 @@ public String getExpandedAbbreviation() {
public PDStructElem getParent() {
COSObject parentObject = getKey(ASAtom.P);
if (parentObject != null) {
return new PDStructElem(parentObject, this.rootRoleMap);
return new PDStructElem(parentObject);
}
return null;
}
Expand All @@ -115,34 +110,31 @@ public COSKey getPageObjectNumber() {
}

public StructureType getDefaultStructureType() {
return TaggedPDFHelper.getDefaultStructureType(this.getStructureType(), this.rootRoleMap);
return getDefaultStructureType(this.getStructureType());
}

@Override
public List<PDStructElem> getStructChildren() {
return TaggedPDFHelper.getStructElemStructChildren(getObject(), rootRoleMap);
public static StructureType getDefaultStructureType(StructureType structureType) {
return TaggedPDFHelper.getDefaultStructureType(structureType);
}

public String getRoleMapToSameNamespaceTag() {
return TaggedPDFHelper.getRoleMapToSameNamespaceTag(getStructureType(), rootRoleMap);
return TaggedPDFHelper.getRoleMapToSameNamespaceTag(getStructureType());
}

@Override
public List<Object> getChildren() {
return TaggedPDFHelper.getStructElemChildren(getObject(), rootRoleMap);
public static StructureType getStructureElementStandardStructureType(PDStructElem pdStructElem) {
return getStructureTypeStandardStructureType(pdStructElem.getStructureType());
}

public static StructureType getStructureElementStandardStructureType(PDStructElem pdStructElem) {
public static StructureType getStructureTypeStandardStructureType(StructureType type) {
PDFFlavour flavour = StaticResources.getFlavour();
if (flavour.getSpecification() == PDFFlavour.Specification.ISO_19005_4 || flavour == PDFFlavour.PDFUA_2 ||
if (flavour.getSpecification() == PDFFlavour.Specification.ISO_19005_4 || flavour == PDFFlavour.PDFUA_2 ||
flavour.getSpecification().getFamily() == PDFFlavour.SpecificationFamily.WCAG) {
StructureType defaultStructureType = pdStructElem.getDefaultStructureType();
StructureType defaultStructureType = PDStructElem.getDefaultStructureType(type);
if (defaultStructureType != null) {
return defaultStructureType;
}
}
if (flavour.getSpecification() != PDFFlavour.Specification.ISO_19005_4 && flavour != PDFFlavour.PDFUA_2) {
StructureType type = pdStructElem.getStructureType();
if (type != null) {
return StructureType.createStructureType(ASAtom.getASAtom(
StaticResources.getRoleMapHelper().getStandardType(type.getType())));
Expand All @@ -151,11 +143,31 @@ public static StructureType getStructureElementStandardStructureType(PDStructEle
return null;
}

public static String getStructureTypeStandardType(StructureType structureType) {
StructureType type = getStructureTypeStandardStructureType(structureType);
return type != null ? type.getType().getValue() : null;
}

public static String getStructureElementStandardType(PDStructElem pdStructElem) {
StructureType type = getStructureElementStandardStructureType(pdStructElem);
return type != null ? type.getType().getValue() : null;
}

public static boolean isStandardStructureType(StructureType type) {
PDFFlavour flavour = StaticResources.getFlavour();
boolean isStandard = false;
if (flavour.getSpecification() == PDFFlavour.Specification.ISO_19005_4 || flavour == PDFFlavour.PDFUA_2 ||
flavour.getSpecification().getFamily() == PDFFlavour.SpecificationFamily.WCAG) {
isStandard = TaggedPDFHelper.isStandardType(type);
}
if (flavour.getSpecification() != PDFFlavour.Specification.ISO_19005_4 && flavour != PDFFlavour.PDFUA_2) {
if (type != null) {
isStandard |= TaggedPDFRoleMapHelper.isStandardType(type);
}
}
return isStandard;
}

public static boolean isMathStandardType(StructureType standardStructureType) {
return StaticResources.getFlavour() == PDFFlavour.PDFUA_2 && standardStructureType != null &&
TaggedPDFConstants.MATH_ML_NAMESPACE.equals(standardStructureType.getNameSpaceURI());
Expand Down
11 changes: 8 additions & 3 deletions src/main/java/org/verapdf/pd/structure/PDStructTreeNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,24 @@

import org.verapdf.cos.COSObject;
import org.verapdf.pd.PDObject;
import org.verapdf.tools.TaggedPDFHelper;

import java.util.List;

/**
* @author Maksim Bezrukov
*/
public abstract class PDStructTreeNode extends PDObject {
public class PDStructTreeNode extends PDObject {

protected PDStructTreeNode(COSObject obj) {
super(obj);
}

public abstract List<PDStructElem> getStructChildren();
public List<PDStructElem> getStructChildren() {
return TaggedPDFHelper.getStructNodeStructChildren(getObject());
}

public abstract List<Object> getChildren();
public List<Object> getChildren() {
return TaggedPDFHelper.getStructNodeChildren(getObject());
}
}
11 changes: 0 additions & 11 deletions src/main/java/org/verapdf/pd/structure/PDStructTreeRoot.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import org.verapdf.cos.COSObjType;
import org.verapdf.cos.COSObject;
import org.verapdf.tools.StaticResources;
import org.verapdf.tools.TaggedPDFHelper;

import java.util.*;

Expand All @@ -38,16 +37,6 @@ public PDStructTreeRoot(COSObject obj) {
StaticResources.setRoleMapHelper(getRoleMap());
}

@Override
public List<PDStructElem> getStructChildren() {
return TaggedPDFHelper.getStructTreeRootStructChildren(getObject(), getRoleMap());
}

@Override
public List<Object> getChildren() {
return TaggedPDFHelper.getStructTreeRootChildren(getObject(), getRoleMap());
}

public Map<ASAtom, ASAtom> getRoleMap() {
COSObject roleMap = getKey(ASAtom.ROLE_MAP);
if (roleMap != null && roleMap.getType() == COSObjType.COS_DICT && roleMap.size() > 0) {
Expand Down
52 changes: 22 additions & 30 deletions src/main/java/org/verapdf/tools/TaggedPDFHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -154,14 +154,14 @@ private TaggedPDFHelper() {
// disable default constructor
}

public static StructureType getDefaultStructureType(StructureType type, Map<ASAtom, ASAtom> rootRoleMap) {
public static StructureType getDefaultStructureType(StructureType type) {
if (type == null) {
return null;
}
visitedWithNS.clear();
visitedWithoutNS.clear();
addVisited(type);
StructureType curr = getEquivalent(type, rootRoleMap);
StructureType curr = getEquivalent(type);
if (curr == null || isVisited(curr)) {
return isStandardType(type) ? type : null;
}
Expand All @@ -170,20 +170,20 @@ public static StructureType getDefaultStructureType(StructureType type, Map<ASAt
return curr;
}
addVisited(curr);
curr = getEquivalent(curr, rootRoleMap);
curr = getEquivalent(curr);
}
return null;
}

public static String getRoleMapToSameNamespaceTag(StructureType type, Map<ASAtom, ASAtom> rootRoleMap) {
public static String getRoleMapToSameNamespaceTag(StructureType type) {
if (type == null) {
return null;
}
visitedWithNS.clear();
visitedWithoutNS.clear();
addVisited(type);
StructureType prev = type;
StructureType curr = getEquivalent(prev, rootRoleMap);
StructureType curr = getEquivalent(prev);
while (curr != null) {
if (curr.getNameSpaceURI() != null && curr.getNameSpaceURI().equals(prev.getNameSpaceURI())) {
return curr.getNameSpaceURI();
Expand All @@ -193,12 +193,12 @@ public static String getRoleMapToSameNamespaceTag(StructureType type, Map<ASAtom
}
addVisited(curr);
prev = curr;
curr = getEquivalent(prev, rootRoleMap);
curr = getEquivalent(prev);
}
return null;
}

private static StructureType getEquivalent(StructureType type, Map<ASAtom, ASAtom> rootRoleMap) {
private static StructureType getEquivalent(StructureType type) {
PDStructureNameSpace nameSpace = type.getNameSpace();
if (nameSpace != null) {
PDNameSpaceRoleMapping nameSpaceMapping = nameSpace.getNameSpaceMapping();
Expand All @@ -208,7 +208,7 @@ private static StructureType getEquivalent(StructureType type, Map<ASAtom, ASAto
return null;
}
}
ASAtom equiv = rootRoleMap.get(type.getType());
ASAtom equiv = StaticResources.getRoleMapHelper().getRoleMap().get(type.getType());
return equiv == null ? null : StructureType.createStructureType(equiv);
}

Expand Down Expand Up @@ -271,23 +271,15 @@ private static boolean isVisited(StructureType type) {
}
}

public static List<PDStructElem> getStructTreeRootStructChildren(COSObject parent, Map<ASAtom, ASAtom> roleMap) {
return getStructChildren(parent, roleMap, false);
public static List<PDStructElem> getStructNodeStructChildren(COSObject parent) {
return getStructChildren(parent, true);
}

public static List<Object> getStructTreeRootChildren(COSObject parent, Map<ASAtom, ASAtom> roleMap) {
return getChildren(parent, roleMap, false);
public static List<Object> getStructNodeChildren(COSObject parent) {
return getChildren(parent, true);
}

public static List<PDStructElem> getStructElemStructChildren(COSObject parent, Map<ASAtom, ASAtom> roleMap) {
return getStructChildren(parent, roleMap, true);
}

public static List<Object> getStructElemChildren(COSObject parent, Map<ASAtom, ASAtom> roleMap) {
return getChildren(parent, roleMap, true);
}

private static List<Object> getChildren(COSObject parent, Map<ASAtom, ASAtom> roleMap, boolean checkType) {
private static List<Object> getChildren(COSObject parent, boolean checkType) {
if (parent == null || parent.getType() != COSObjType.COS_DICT) {
LOGGER.log(Level.FINE, "Parent element for struct elements is null or not a COSDictionary");
return Collections.emptyList();
Expand All @@ -297,7 +289,7 @@ private static List<Object> getChildren(COSObject parent, Map<ASAtom, ASAtom> ro
if (children != null) {
if (isStructElem(children, checkType)) {
List<Object> list = new ArrayList<>(MAX_NUMBER_OF_ELEMENTS);
list.add(new PDStructElem(children, roleMap));
list.add(new PDStructElem(children));
return Collections.unmodifiableList(list);
} else if (isMCR(children)) {
List<Object> list = new ArrayList<>(MAX_NUMBER_OF_ELEMENTS);
Expand All @@ -308,7 +300,7 @@ private static List<Object> getChildren(COSObject parent, Map<ASAtom, ASAtom> ro
list.add(children);
return Collections.unmodifiableList(list);
} else if (children.getType() == COSObjType.COS_ARRAY) {
return getChildrenFromArray(children, roleMap, checkType);
return getChildrenFromArray(children, checkType);
} else if (isOBJR(children)) {
List<Object> list = new ArrayList<>(MAX_NUMBER_OF_ELEMENTS);
list.add(new PDOBJRDictionary(children));
Expand All @@ -324,7 +316,7 @@ private static List<Object> getChildren(COSObject parent, Map<ASAtom, ASAtom> ro
* @param parent parent dictionary
* @return list of structure elements
*/
private static List<PDStructElem> getStructChildren(COSObject parent, Map<ASAtom, ASAtom> roleMap, boolean checkType) {
private static List<PDStructElem> getStructChildren(COSObject parent, boolean checkType) {
if (parent == null || parent.getType() != COSObjType.COS_DICT) {
LOGGER.log(Level.FINE, "Parent element for struct elements is null or not a COSDictionary");
return Collections.emptyList();
Expand All @@ -334,10 +326,10 @@ private static List<PDStructElem> getStructChildren(COSObject parent, Map<ASAtom
if (children != null) {
if (isStructElem(children, checkType)) {
List<PDStructElem> list = new ArrayList<>(MAX_NUMBER_OF_ELEMENTS);
list.add(new PDStructElem(children, roleMap));
list.add(new PDStructElem(children));
return Collections.unmodifiableList(list);
} else if (children.getType() == COSObjType.COS_ARRAY) {
return getStructChildrenFromArray(children, roleMap, checkType);
return getStructChildrenFromArray(children, checkType);
}
}
return Collections.emptyList();
Expand All @@ -349,27 +341,27 @@ private static List<PDStructElem> getStructChildren(COSObject parent, Map<ASAtom
* @param children array of children structure elements
* @return list of structure elements
*/
private static List<PDStructElem> getStructChildrenFromArray(COSObject children, Map<ASAtom, ASAtom> roleMap, boolean checkType) {
private static List<PDStructElem> getStructChildrenFromArray(COSObject children, boolean checkType) {
if (children.size() > 0) {
List<PDStructElem> list = new ArrayList<>();
for (int i = 0; i < children.size(); ++i) {
COSObject elem = children.at(i);
if (isStructElem(elem, checkType)) {
list.add(new PDStructElem(elem, roleMap));
list.add(new PDStructElem(elem));
}
}
return Collections.unmodifiableList(list);
}
return Collections.emptyList();
}

private static List<Object> getChildrenFromArray(COSObject children, Map<ASAtom, ASAtom> roleMap, boolean checkType) {
private static List<Object> getChildrenFromArray(COSObject children, boolean checkType) {
if (children.size() > 0) {
List<Object> list = new ArrayList<>();
for (int i = 0; i < children.size(); ++i) {
COSObject elem = children.at(i);
if (isStructElem(elem, checkType)) {
list.add(new PDStructElem(elem, roleMap));
list.add(new PDStructElem(elem));
} else if (isMCR(elem)) {
list.add(new PDMCRDictionary(elem));
} else if (elem.getType() == COSObjType.COS_INTEGER) {
Expand Down
24 changes: 17 additions & 7 deletions src/main/java/org/verapdf/tools/TaggedPDFRoleMapHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import org.verapdf.as.ASAtom;
import org.verapdf.parser.PDFFlavour;
import org.verapdf.pd.structure.StructureType;

import java.util.*;

Expand Down Expand Up @@ -50,21 +51,30 @@ public String getStandardType(ASAtom type) {
if (type == null) {
return null;
}
Set<String> currentStandardTypes;
Set<String> currentStandardTypes = getCurrentStandardTypes();
boolean isFastStop;
if (StaticResources.getFlavour().getSpecification() == PDFFlavour.Specification.ISO_19005_1) {
currentStandardTypes = TaggedPDFHelper.getPdf14StandardRoleTypes();
isFastStop = true;
} else {
if (StaticResources.getFlavour().getSpecification().getFamily() == PDFFlavour.SpecificationFamily.WCAG) {
currentStandardTypes = TaggedPDFHelper.getWcagStandardRoleTypes();
} else {
currentStandardTypes = TaggedPDFHelper.getPdf17StandardRoleTypes();
}
isFastStop = false;
}
return getStandardType(type, currentStandardTypes, isFastStop);
}

private static Set<String> getCurrentStandardTypes() {
if (StaticResources.getFlavour().getSpecification() == PDFFlavour.Specification.ISO_19005_1) {
return TaggedPDFHelper.getPdf14StandardRoleTypes();
}
if (StaticResources.getFlavour().getSpecification().getFamily() == PDFFlavour.SpecificationFamily.WCAG) {
return TaggedPDFHelper.getWcagStandardRoleTypes();
}
return TaggedPDFHelper.getPdf17StandardRoleTypes();
}

public static boolean isStandardType(StructureType type) {
return getCurrentStandardTypes().contains(type.getType().getValue());
}


private String getStandardType(ASAtom type, Set<String> currentStandardTypes, boolean isFastStop) {
Set<ASAtom> visitedTypes = new HashSet<>();
Expand Down
Loading