Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update name and number trees parsing #653

Merged
merged 1 commit into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 25 additions & 7 deletions src/main/java/org/verapdf/pd/PDCatalog.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.verapdf.pd.actions.PDCatalogAdditionalActions;
import org.verapdf.pd.form.PDAcroForm;
import org.verapdf.pd.optionalcontent.PDOptionalContentProperties;
import org.verapdf.pd.structure.PDNumberTreeNode;
import org.verapdf.pd.structure.PDStructTreeRoot;
import org.verapdf.tools.PageLabels;

Expand All @@ -46,6 +47,9 @@ public class PDCatalog extends PDObject {
private static final Logger LOGGER = Logger.getLogger(PDCatalog.class.getCanonicalName());

private final PDPageTree pages;
private PDNamesDictionary namesDictionary;
private PDNumberTreeNode pageLabelsTree;
private PageLabels pageLabels;

public PDCatalog() {
super();
Expand Down Expand Up @@ -145,20 +149,34 @@ public PDAcroForm getAcroForm() {
}

public PDNamesDictionary getNamesDictionary() {
COSObject buffer = getKey(ASAtom.NAMES);
if (buffer != null && buffer.getType() == COSObjType.COS_DICT) {
return new PDNamesDictionary(buffer);
if (namesDictionary == null) {
COSObject buffer = getKey(ASAtom.NAMES);
if (buffer != null && buffer.getType() == COSObjType.COS_DICT) {
namesDictionary = new PDNamesDictionary(buffer);
}
}
return null;
return namesDictionary;
}

public PageLabels getPageLabels() {
COSObject labelsTree = getKey(ASAtom.PAGE_LABELS);
if (labelsTree != null && !labelsTree.empty() && labelsTree.getType() == COSObjType.COS_DICT) {
return new PageLabels((COSDictionary) labelsTree.getDirectBase());
if (pageLabels == null) {
PDNumberTreeNode pageLabelsTree = getPageLabelsTree();
if (pageLabelsTree != null) {
pageLabels = new PageLabels(pageLabelsTree);
}
}
return null;
}

public PDNumberTreeNode getPageLabelsTree() {
if (pageLabelsTree == null) {
COSObject labelsTree = getKey(ASAtom.PAGE_LABELS);
if (labelsTree != null && !labelsTree.empty() && labelsTree.getType() == COSObjType.COS_DICT) {
pageLabelsTree = new PDNumberTreeNode(labelsTree);
}
}
return pageLabelsTree;
}

public String getVersion() {
COSObject version = getKey(ASAtom.VERSION);
Expand Down
34 changes: 12 additions & 22 deletions src/main/java/org/verapdf/pd/PDNameTreeNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ public class PDNameTreeNode extends PDObject implements Iterable<COSObject> {

private final Set<COSKey> parents;

private String[] limitsArray = null;
private List<PDNameTreeNode> kids = null;
private Map<String, COSObject> names = null;

Expand All @@ -46,9 +47,8 @@ private PDNameTreeNode(COSObject obj, Set<COSKey> parents) {
if (objectKey != null) {
if (parents.contains(objectKey)) {
throw new LoopedException("Loop in name tree");
} else {
this.parents.add(objectKey);
}
this.parents.add(objectKey);
}
}

Expand Down Expand Up @@ -106,6 +106,13 @@ private Map<String, COSObject> parseNames() {
}

public String[] getLimitsArray() {
if (limitsArray == null) {
limitsArray = parseLimitsArray();
}
return limitsArray;
}

public String[] parseLimitsArray() {
COSObject limits = this.getKey(ASAtom.LIMITS);
if (limits != null && !limits.empty() && limits.getType() == COSObjType.COS_ARRAY
&& limits.size() >= 2 && limits.at(0).getType() == COSObjType.COS_STRING
Expand All @@ -115,21 +122,12 @@ public String[] getLimitsArray() {
res[1] = limits.at(1).getString();
return res;
}
return null;
return new String[0];
}

public COSObject getObject(String key) {
Set<COSKey> visitedKeys = new HashSet<>();
COSKey objectKey = getObject().getObjectKey();
if (objectKey != null) {
visitedKeys.add(objectKey);
}
return getObject(key, visitedKeys);
}

private COSObject getObject(String key, Set<COSKey> visitedKeys) {
String[] limits = this.getLimitsArray();
if (limits != null && (key.compareTo(limits[0]) < 0 || key.compareTo(limits[1]) > 0)) {
if (limits.length == 2 && (key.compareTo(limits[0]) < 0 || key.compareTo(limits[1]) > 0)) {
// string not in the limits
return null;
}
Expand All @@ -145,15 +143,7 @@ private COSObject getObject(String key, Set<COSKey> visitedKeys) {
List<PDNameTreeNode> kids = getKids();
if (kids != null) {
for (PDNameTreeNode kid : kids) {
COSKey kidObjectKey = kid.getObject().getObjectKey();
if (kidObjectKey != null) {
if (visitedKeys.contains(kidObjectKey)) {
throw new LoopedException("Loop inside name tree");
} else {
visitedKeys.add(kidObjectKey);
}
}
COSObject res = kid.getObject(key, visitedKeys);
COSObject res = kid.getObject(key);
if (res != null) {
return res;
}
Expand Down
19 changes: 16 additions & 3 deletions src/main/java/org/verapdf/pd/PDNamesDictionary.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,34 @@
* @author Maksim Bezrukov
*/
public class PDNamesDictionary extends PDObject {

private PDNameTreeNode embeddedFiles;
private PDNameTreeNode javaScript;
private PDNameTreeNode dests;

public PDNamesDictionary(COSObject obj) {
super(obj);
}

public PDNameTreeNode getEmbeddedFiles() {
return getNameTreeByName(ASAtom.EMBEDDED_FILES);
if (embeddedFiles == null) {
embeddedFiles = getNameTreeByName(ASAtom.EMBEDDED_FILES);
}
return embeddedFiles;
}

public PDNameTreeNode getJavaScript() {
return getNameTreeByName(ASAtom.JAVA_SCRIPT);
if (javaScript == null) {
javaScript = getNameTreeByName(ASAtom.JAVA_SCRIPT);
}
return javaScript;
}

public PDNameTreeNode getDests() {
return getNameTreeByName(ASAtom.DESTS);
if (dests == null) {
dests = getNameTreeByName(ASAtom.DESTS);
}
return dests;
}

private PDNameTreeNode getNameTreeByName(ASAtom name) {
Expand Down
59 changes: 46 additions & 13 deletions src/main/java/org/verapdf/pd/structure/PDNumberTreeNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,20 +37,46 @@
*/
public class PDNumberTreeNode extends PDObject implements Iterable<COSObject> {

private final Set<COSKey> parents;

private long[] limitsArray = null;
private List<PDNumberTreeNode> kids = null;
private Map<Long, COSObject> nums = null;

/**
* Constructor from number tree node dictionary.
*
* @param obj is a number tree node dictionary.
*/
public PDNumberTreeNode(COSObject obj) {
this(obj, new HashSet<>());
}

private PDNumberTreeNode(COSObject obj, Set<COSKey> parents) {
super(obj);
COSKey objectKey = obj.getObjectKey();
this.parents = new HashSet<>(parents);
if (objectKey != null) {
if (parents.contains(objectKey)) {
throw new LoopedException("Loop inside number tree");
}
this.parents.add(objectKey);
}
}


/**
* @return array of two numbers representing limits of this node or null if
* proper limits array is not present.
*/
public long[] getLimitsArray() {
if (limitsArray == null) {
limitsArray = parseLimitsArray();
}
return limitsArray;
}

public long[] parseLimitsArray() {
COSObject limits = this.getKey(ASAtom.LIMITS);
if (limits != null && !limits.empty() && limits.getType() == COSObjType.COS_ARRAY
&& limits.size() >= 2 && limits.at(0).getType() == COSObjType.COS_INTEGER
Expand All @@ -60,19 +86,26 @@ public long[] getLimitsArray() {
res[1] = limits.at(1).getInteger();
return res;
}
return null;
return new long[0];
}

public List<PDNumberTreeNode> getKids() {
if (this.kids == null) {
this.kids = parseKids();
}
return Collections.unmodifiableList(this.kids);
}

/**
* @return the list of number tree nodes that are kids of this node or null
* if no kids are present.
*/
public List<PDNumberTreeNode> getKids() {
public List<PDNumberTreeNode> parseKids() {
COSObject kids = this.getKey(ASAtom.KIDS);
if (kids != null && !kids.empty() && kids.getType() == COSObjType.COS_ARRAY) {
List<PDNumberTreeNode> res = new ArrayList<>(kids.size());
for (COSObject obj : (COSArray) kids.get()) {
res.add(new PDNumberTreeNode(obj));
res.add(new PDNumberTreeNode(obj, parents));
}
return Collections.unmodifiableList(res);
}
Expand All @@ -84,7 +117,7 @@ public List<PDNumberTreeNode> getKids() {
* null if nums are not present.
* TODO: test method
*/
public Map<Long, COSObject> getNums() {
public Map<Long, COSObject> parseNums() {
COSObject nums = this.getKey(ASAtom.NUMS);
if (nums != null && !nums.empty() && nums.getType() == COSObjType.COS_ARRAY) {
Map<Long, COSObject> res = new HashMap<>();
Expand All @@ -100,6 +133,14 @@ public Map<Long, COSObject> getNums() {
return Collections.emptyMap();
}

public Map<Long, COSObject> getNums() {
if (this.nums == null) {
this.nums = parseNums();
}
return Collections.unmodifiableMap(this.nums);
}


private List<COSObject> getObjects() {
List<COSObject> result = new LinkedList<>(getNums().values());
for (PDNumberTreeNode kid : getKids()) {
Expand All @@ -126,7 +167,7 @@ public COSObject getObject(Long key) {

private COSObject getObject(Long key, Set<COSKey> visitedKeys) {
long[] limits = this.getLimitsArray();
if (limits != null) {
if (limits.length == 2) {
if (key < limits[0] || key > limits[1]) {
// integer not in the limits
return null;
Expand All @@ -144,14 +185,6 @@ private COSObject getObject(Long key, Set<COSKey> visitedKeys) {
List<PDNumberTreeNode> kids = getKids();
if (kids != null) {
for (PDNumberTreeNode kid : kids) {
COSKey kidObjectKey = kid.getObject().getObjectKey();
if (kidObjectKey != null) {
if (visitedKeys.contains(kidObjectKey)) {
throw new LoopedException("Loop inside number tree");
} else {
visitedKeys.add(kidObjectKey);
}
}
COSObject res = kid.getObject(key, visitedKeys);
if (res != null) {
return res;
Expand Down
12 changes: 8 additions & 4 deletions src/main/java/org/verapdf/pd/structure/PDStructTreeRoot.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
* @author Maksim Bezrukov
*/
public class PDStructTreeRoot extends PDStructTreeNode {

private PDNumberTreeNode parentTree;

public PDStructTreeRoot(COSObject obj) {
super(obj);
Expand Down Expand Up @@ -58,10 +60,12 @@ public COSObject getClassMap() {
}

public PDNumberTreeNode getParentTree() {
COSObject parentTree = getKey(ASAtom.PARENT_TREE);
if (parentTree != null && parentTree.getType().isDictionaryBased()) {
return new PDNumberTreeNode(parentTree);
if (parentTree == null) {
COSObject parentTreeObject = getKey(ASAtom.PARENT_TREE);
if (parentTreeObject != null && parentTreeObject.getType().isDictionaryBased()) {
parentTree = new PDNumberTreeNode(parentTreeObject);
}
}
return null;
return parentTree;
}
}
36 changes: 11 additions & 25 deletions src/main/java/org/verapdf/tools/PageLabels.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@
package org.verapdf.tools;

import org.verapdf.as.ASAtom;
import org.verapdf.cos.COSArray;
import org.verapdf.cos.COSDictionary;
import org.verapdf.cos.COSObjType;
import org.verapdf.cos.COSObject;
import org.verapdf.pd.structure.PDNumberTreeNode;

import java.util.Map;
import java.util.NavigableMap;
Expand All @@ -37,40 +37,26 @@ public class PageLabels {

private final NavigableMap<Integer, PageLabelDictionary> labelsMap;

public PageLabels(COSDictionary numbTree) {
if (numbTree == null) {
public PageLabels(PDNumberTreeNode numberTree) {
if (numberTree == null) {
throw new IllegalArgumentException("Number tree base element can not be null");
}
labelsMap = new TreeMap<>();
parseTree(numbTree);
parseTree(numberTree);
}

private void parseTree(COSDictionary numbTree) {
COSObject nums = numbTree.getKey(ASAtom.NUMS);
if (nums != null && !nums.empty() && nums.getType() == COSObjType.COS_ARRAY) {
addLabelsFromArray((COSArray) nums.getDirectBase());
}

COSObject kids = numbTree.getKey(ASAtom.KIDS);
if (kids != null && !kids.empty() && kids.getType() == COSObjType.COS_ARRAY) {
for (COSObject kid : (COSArray) kids.getDirectBase()) {
if (kid != null && !kid.empty() && kid.getType() == COSObjType.COS_DICT) {
parseTree((COSDictionary) kid.getDirectBase());
}
}
}
}

private void addLabelsFromArray(COSArray nums) {
for (int i = 0; i < nums.size(); i += 2) {
COSObject cosKey = nums.at(i);
Long key = cosKey == null ? null : cosKey.getInteger();
COSObject cosValue = nums.at(i + 1);
private void parseTree(PDNumberTreeNode numberTree) {
for (Map.Entry<Long, COSObject> entry : numberTree.getNums().entrySet()) {
Long key = entry.getKey();
COSObject cosValue = entry.getValue();
if (key != null && cosValue != null && !cosValue.empty() && cosValue.getType() == COSObjType.COS_DICT) {
PageLabelDictionary pageLabelDictionary = new PageLabelDictionary((COSDictionary) cosValue.getDirectBase(), key.intValue());
this.labelsMap.put(key.intValue(), pageLabelDictionary);
}
}
for (PDNumberTreeNode kid : numberTree.getKids()) {
parseTree(kid);
}
}

public String getLabel(int pageIndex) {
Expand Down
Loading