Skip to content

Commit

Permalink
Update name and number trees parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
MaximPlusov committed Nov 10, 2024
1 parent ee4eb2e commit f31f0a6
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 74 deletions.
32 changes: 25 additions & 7 deletions src/main/java/org/verapdf/pd/PDCatalog.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.verapdf.pd.actions.PDCatalogAdditionalActions;
import org.verapdf.pd.form.PDAcroForm;
import org.verapdf.pd.optionalcontent.PDOptionalContentProperties;
import org.verapdf.pd.structure.PDNumberTreeNode;
import org.verapdf.pd.structure.PDStructTreeRoot;
import org.verapdf.tools.PageLabels;

Expand All @@ -46,6 +47,9 @@ public class PDCatalog extends PDObject {
private static final Logger LOGGER = Logger.getLogger(PDCatalog.class.getCanonicalName());

private final PDPageTree pages;
private PDNamesDictionary namesDictionary;
private PDNumberTreeNode pageLabelsTree;
private PageLabels pageLabels;

public PDCatalog() {
super();
Expand Down Expand Up @@ -145,20 +149,34 @@ public PDAcroForm getAcroForm() {
}

public PDNamesDictionary getNamesDictionary() {
COSObject buffer = getKey(ASAtom.NAMES);
if (buffer != null && buffer.getType() == COSObjType.COS_DICT) {
return new PDNamesDictionary(buffer);
if (namesDictionary == null) {
COSObject buffer = getKey(ASAtom.NAMES);
if (buffer != null && buffer.getType() == COSObjType.COS_DICT) {
namesDictionary = new PDNamesDictionary(buffer);
}
}
return null;
return namesDictionary;
}

public PageLabels getPageLabels() {
COSObject labelsTree = getKey(ASAtom.PAGE_LABELS);
if (labelsTree != null && !labelsTree.empty() && labelsTree.getType() == COSObjType.COS_DICT) {
return new PageLabels((COSDictionary) labelsTree.getDirectBase());
if (pageLabels == null) {
PDNumberTreeNode pageLabelsTree = getPageLabelsTree();
if (pageLabelsTree != null) {
pageLabels = new PageLabels(pageLabelsTree);
}
}
return null;
}

public PDNumberTreeNode getPageLabelsTree() {
if (pageLabelsTree == null) {
COSObject labelsTree = getKey(ASAtom.PAGE_LABELS);
if (labelsTree != null && !labelsTree.empty() && labelsTree.getType() == COSObjType.COS_DICT) {
pageLabelsTree = new PDNumberTreeNode(labelsTree);
}
}
return pageLabelsTree;
}

public String getVersion() {
COSObject version = getKey(ASAtom.VERSION);
Expand Down
34 changes: 12 additions & 22 deletions src/main/java/org/verapdf/pd/PDNameTreeNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ public class PDNameTreeNode extends PDObject implements Iterable<COSObject> {

private final Set<COSKey> parents;

private String[] limitsArray = null;
private List<PDNameTreeNode> kids = null;
private Map<String, COSObject> names = null;

Expand All @@ -46,9 +47,8 @@ private PDNameTreeNode(COSObject obj, Set<COSKey> parents) {
if (objectKey != null) {
if (parents.contains(objectKey)) {
throw new LoopedException("Loop in name tree");
} else {
this.parents.add(objectKey);
}
this.parents.add(objectKey);
}
}

Expand Down Expand Up @@ -106,6 +106,13 @@ private Map<String, COSObject> parseNames() {
}

public String[] getLimitsArray() {
if (limitsArray == null) {
limitsArray = parseLimitsArray();
}
return limitsArray;
}

public String[] parseLimitsArray() {
COSObject limits = this.getKey(ASAtom.LIMITS);
if (limits != null && !limits.empty() && limits.getType() == COSObjType.COS_ARRAY
&& limits.size() >= 2 && limits.at(0).getType() == COSObjType.COS_STRING
Expand All @@ -115,21 +122,12 @@ public String[] getLimitsArray() {
res[1] = limits.at(1).getString();
return res;
}
return null;
return new String[0];
}

public COSObject getObject(String key) {
Set<COSKey> visitedKeys = new HashSet<>();
COSKey objectKey = getObject().getObjectKey();
if (objectKey != null) {
visitedKeys.add(objectKey);
}
return getObject(key, visitedKeys);
}

private COSObject getObject(String key, Set<COSKey> visitedKeys) {
String[] limits = this.getLimitsArray();
if (limits != null && (key.compareTo(limits[0]) < 0 || key.compareTo(limits[1]) > 0)) {
if (limits.length == 2 && (key.compareTo(limits[0]) < 0 || key.compareTo(limits[1]) > 0)) {
// string not in the limits
return null;
}
Expand All @@ -145,15 +143,7 @@ private COSObject getObject(String key, Set<COSKey> visitedKeys) {
List<PDNameTreeNode> kids = getKids();
if (kids != null) {
for (PDNameTreeNode kid : kids) {
COSKey kidObjectKey = kid.getObject().getObjectKey();
if (kidObjectKey != null) {
if (visitedKeys.contains(kidObjectKey)) {
throw new LoopedException("Loop inside name tree");
} else {
visitedKeys.add(kidObjectKey);
}
}
COSObject res = kid.getObject(key, visitedKeys);
COSObject res = kid.getObject(key);
if (res != null) {
return res;
}
Expand Down
19 changes: 16 additions & 3 deletions src/main/java/org/verapdf/pd/PDNamesDictionary.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,34 @@
* @author Maksim Bezrukov
*/
public class PDNamesDictionary extends PDObject {

private PDNameTreeNode embeddedFiles;
private PDNameTreeNode javaScript;
private PDNameTreeNode dests;

public PDNamesDictionary(COSObject obj) {
super(obj);
}

public PDNameTreeNode getEmbeddedFiles() {
return getNameTreeByName(ASAtom.EMBEDDED_FILES);
if (embeddedFiles == null) {
embeddedFiles = getNameTreeByName(ASAtom.EMBEDDED_FILES);
}
return embeddedFiles;
}

public PDNameTreeNode getJavaScript() {
return getNameTreeByName(ASAtom.JAVA_SCRIPT);
if (javaScript == null) {
javaScript = getNameTreeByName(ASAtom.JAVA_SCRIPT);
}
return javaScript;
}

public PDNameTreeNode getDests() {
return getNameTreeByName(ASAtom.DESTS);
if (dests == null) {
dests = getNameTreeByName(ASAtom.DESTS);
}
return dests;
}

private PDNameTreeNode getNameTreeByName(ASAtom name) {
Expand Down
59 changes: 46 additions & 13 deletions src/main/java/org/verapdf/pd/structure/PDNumberTreeNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,20 +37,46 @@
*/
public class PDNumberTreeNode extends PDObject implements Iterable<COSObject> {

private final Set<COSKey> parents;

private long[] limitsArray = null;
private List<PDNumberTreeNode> kids = null;
private Map<Long, COSObject> nums = null;

/**
* Constructor from number tree node dictionary.
*
* @param obj is a number tree node dictionary.
*/
public PDNumberTreeNode(COSObject obj) {
this(obj, new HashSet<>());
}

private PDNumberTreeNode(COSObject obj, Set<COSKey> parents) {
super(obj);
COSKey objectKey = obj.getObjectKey();
this.parents = new HashSet<>(parents);
if (objectKey != null) {
if (parents.contains(objectKey)) {
throw new LoopedException("Loop in number tree");
}
this.parents.add(objectKey);
}
}


/**
* @return array of two numbers representing limits of this node or null if
* proper limits array is not present.
*/
public long[] getLimitsArray() {
if (limitsArray == null) {
limitsArray = parseLimitsArray();
}
return limitsArray;
}

public long[] parseLimitsArray() {
COSObject limits = this.getKey(ASAtom.LIMITS);
if (limits != null && !limits.empty() && limits.getType() == COSObjType.COS_ARRAY
&& limits.size() >= 2 && limits.at(0).getType() == COSObjType.COS_INTEGER
Expand All @@ -60,19 +86,26 @@ public long[] getLimitsArray() {
res[1] = limits.at(1).getInteger();
return res;
}
return null;
return new long[0];
}

public List<PDNumberTreeNode> getKids() {
if (this.kids == null) {
this.kids = parseKids();
}
return Collections.unmodifiableList(this.kids);
}

/**
* @return the list of number tree nodes that are kids of this node or null
* if no kids are present.
*/
public List<PDNumberTreeNode> getKids() {
public List<PDNumberTreeNode> parseKids() {
COSObject kids = this.getKey(ASAtom.KIDS);
if (kids != null && !kids.empty() && kids.getType() == COSObjType.COS_ARRAY) {
List<PDNumberTreeNode> res = new ArrayList<>(kids.size());
for (COSObject obj : (COSArray) kids.get()) {
res.add(new PDNumberTreeNode(obj));
res.add(new PDNumberTreeNode(obj, parents));
}
return Collections.unmodifiableList(res);
}
Expand All @@ -84,7 +117,7 @@ public List<PDNumberTreeNode> getKids() {
* null if nums are not present.
* TODO: test method
*/
public Map<Long, COSObject> getNums() {
public Map<Long, COSObject> parseNums() {
COSObject nums = this.getKey(ASAtom.NUMS);
if (nums != null && !nums.empty() && nums.getType() == COSObjType.COS_ARRAY) {
Map<Long, COSObject> res = new HashMap<>();
Expand All @@ -100,6 +133,14 @@ public Map<Long, COSObject> getNums() {
return Collections.emptyMap();
}

public Map<Long, COSObject> getNums() {
if (this.nums == null) {
this.nums = parseNums();
}
return Collections.unmodifiableMap(this.nums);
}


private List<COSObject> getObjects() {
List<COSObject> result = new LinkedList<>(getNums().values());
for (PDNumberTreeNode kid : getKids()) {
Expand All @@ -126,7 +167,7 @@ public COSObject getObject(Long key) {

private COSObject getObject(Long key, Set<COSKey> visitedKeys) {
long[] limits = this.getLimitsArray();
if (limits != null) {
if (limits.length == 2) {
if (key < limits[0] || key > limits[1]) {
// integer not in the limits
return null;
Expand All @@ -144,14 +185,6 @@ private COSObject getObject(Long key, Set<COSKey> visitedKeys) {
List<PDNumberTreeNode> kids = getKids();
if (kids != null) {
for (PDNumberTreeNode kid : kids) {
COSKey kidObjectKey = kid.getObject().getObjectKey();
if (kidObjectKey != null) {
if (visitedKeys.contains(kidObjectKey)) {
throw new LoopedException("Loop inside number tree");
} else {
visitedKeys.add(kidObjectKey);
}
}
COSObject res = kid.getObject(key, visitedKeys);
if (res != null) {
return res;
Expand Down
12 changes: 8 additions & 4 deletions src/main/java/org/verapdf/pd/structure/PDStructTreeRoot.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
* @author Maksim Bezrukov
*/
public class PDStructTreeRoot extends PDStructTreeNode {

private PDNumberTreeNode parentTree;

public PDStructTreeRoot(COSObject obj) {
super(obj);
Expand Down Expand Up @@ -58,10 +60,12 @@ public COSObject getClassMap() {
}

public PDNumberTreeNode getParentTree() {
COSObject parentTree = getKey(ASAtom.PARENT_TREE);
if (parentTree != null && parentTree.getType().isDictionaryBased()) {
return new PDNumberTreeNode(parentTree);
if (parentTree == null) {
COSObject parentTreeObject = getKey(ASAtom.PARENT_TREE);
if (parentTreeObject != null && parentTreeObject.getType().isDictionaryBased()) {
parentTree = new PDNumberTreeNode(parentTreeObject);
}
}
return null;
return parentTree;
}
}
36 changes: 11 additions & 25 deletions src/main/java/org/verapdf/tools/PageLabels.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@
package org.verapdf.tools;

import org.verapdf.as.ASAtom;
import org.verapdf.cos.COSArray;
import org.verapdf.cos.COSDictionary;
import org.verapdf.cos.COSObjType;
import org.verapdf.cos.COSObject;
import org.verapdf.pd.structure.PDNumberTreeNode;

import java.util.Map;
import java.util.NavigableMap;
Expand All @@ -37,40 +37,26 @@ public class PageLabels {

private final NavigableMap<Integer, PageLabelDictionary> labelsMap;

public PageLabels(COSDictionary numbTree) {
if (numbTree == null) {
public PageLabels(PDNumberTreeNode numberTree) {
if (numberTree == null) {
throw new IllegalArgumentException("Number tree base element can not be null");
}
labelsMap = new TreeMap<>();
parseTree(numbTree);
parseTree(numberTree);
}

private void parseTree(COSDictionary numbTree) {
COSObject nums = numbTree.getKey(ASAtom.NUMS);
if (nums != null && !nums.empty() && nums.getType() == COSObjType.COS_ARRAY) {
addLabelsFromArray((COSArray) nums.getDirectBase());
}

COSObject kids = numbTree.getKey(ASAtom.KIDS);
if (kids != null && !kids.empty() && kids.getType() == COSObjType.COS_ARRAY) {
for (COSObject kid : (COSArray) kids.getDirectBase()) {
if (kid != null && !kid.empty() && kid.getType() == COSObjType.COS_DICT) {
parseTree((COSDictionary) kid.getDirectBase());
}
}
}
}

private void addLabelsFromArray(COSArray nums) {
for (int i = 0; i < nums.size(); i += 2) {
COSObject cosKey = nums.at(i);
Long key = cosKey == null ? null : cosKey.getInteger();
COSObject cosValue = nums.at(i + 1);
private void parseTree(PDNumberTreeNode numberTree) {
for (Map.Entry<Long, COSObject> entry : numberTree.getNums().entrySet()) {
Long key = entry.getKey();
COSObject cosValue = entry.getValue();
if (key != null && cosValue != null && !cosValue.empty() && cosValue.getType() == COSObjType.COS_DICT) {
PageLabelDictionary pageLabelDictionary = new PageLabelDictionary((COSDictionary) cosValue.getDirectBase(), key.intValue());
this.labelsMap.put(key.intValue(), pageLabelDictionary);
}
}
for (PDNumberTreeNode kid : numberTree.getKids()) {
parseTree(kid);
}
}

public String getLabel(int pageIndex) {
Expand Down

0 comments on commit f31f0a6

Please sign in to comment.