Skip to content

Commit

Permalink
support swissmodel
Browse files Browse the repository at this point in the history
  • Loading branch information
jjgao committed May 27, 2018
1 parent 2ac34c5 commit ed6a4c2
Show file tree
Hide file tree
Showing 10 changed files with 378 additions and 99 deletions.
21 changes: 13 additions & 8 deletions MutationHotspotsDetection/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
<parent>
<groupId>org.cbioportal.mutationhotspots</groupId>
<artifactId>master</artifactId>
<version>1.1.0</version>
<version>1.2.0</version>
</parent>
<artifactId>MutationHotspotsDetection</artifactId>
<version>1.1.0</version>
<version>1.2.0</version>
<packaging>jar</packaging>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
Expand All @@ -19,7 +19,12 @@
<dependency>
<groupId>org.biojava</groupId>
<artifactId>biojava-core</artifactId>
<version>4.0.0</version>
<version>5.0.2</version>
</dependency>
<dependency>
<groupId>org.biojava</groupId>
<artifactId>biojava-structure</artifactId>
<version>5.0.2</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
Expand All @@ -36,11 +41,6 @@
<artifactId>commons-lang</artifactId>
<version>2.4</version>
</dependency>
<dependency>
<groupId>org.biojava</groupId>
<artifactId>biojava-structure</artifactId>
<version>4.2.0</version>
</dependency>
<dependency>
<groupId>org.genomenexus</groupId>
<artifactId>genomenexus-g2s-client</artifactId>
Expand All @@ -63,6 +63,11 @@
<artifactId>commons-io</artifactId>
<version>2.4</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.9.5</version>
</dependency>
</dependencies>

<build>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
import org.cbioportal.mutationhotspots.mutationhotspotsdetection.stat.DetectedInDecoy;
import org.cbioportal.mutationhotspots.mutationhotspotsdetection.stat.StructureHotspotDetectedInDecoy;
import org.cbioportal.mutationhotspots.mutationhotspotsdetection.ContactMap;
import org.cbioportal.mutationhotspots.mutationhotspotsdetection.utils.ProteinStructureUtils;
import org.cbioportal.mutationhotspots.mutationhotspotsdetection.utils.structure.ContactMapFromPDB;
import org.cbioportal.mutationhotspots.mutationhotspotsdetection.utils.structure.ProteinStructureContactMapCalculator;


/**
Expand Down Expand Up @@ -54,15 +55,18 @@ protected Set<Hotspot> processSingleHotspotsOnAProtein(MutatedProtein protein,
int[] counts = getMutationCountsOnProtein(mapResidueHotspot, protein.getLength());

Map<SortedSet<Integer>,Set<Hotspot>> mapResiduesHotspots3D = new HashMap<>();
Map<MutatedProtein3D,ContactMap> contactMaps = ProteinStructureUtils.getInstance().getContactMaps(protein,
Map<MutatedProtein3D,ContactMap> contactMaps = ProteinStructureContactMapCalculator.getPDBCalculator().getContactMaps(protein,
parameters.getIdentpThresholdFor3DHotspots(), parameters.getDistanceClosestAtomsThresholdFor3DHotspots());
contactMaps.putAll(ProteinStructureContactMapCalculator.getSwissModelCalculator().getContactMaps(protein,
parameters.getIdentpThresholdFor3DHotspots(), parameters.getDistanceClosestAtomsThresholdFor3DHotspots()));

int i = 0;
for (Map.Entry<MutatedProtein3D, ContactMap> entryContactMaps : contactMaps.entrySet()) {
MutatedProtein3D protein3D = entryContactMaps.getKey();
ContactMap contactMap = entryContactMaps.getValue();

if (contactMap.getProteinRight()>protein.getLength()) {
System.err.println("\tMapped Protein resisue longer than protein length.");
System.err.println("\tMapped Protein residue longer than protein length.");
continue;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package org.cbioportal.mutationhotspots.mutationhotspotsdetection.utils;
package org.cbioportal.mutationhotspots.mutationhotspotsdetection.utils.structure;

import org.cbioportal.mutationhotspots.mutationhotspotsdetection.ContactMap;
import java.util.ArrayList;
Expand All @@ -29,7 +29,6 @@
import org.cbioportal.mutationhotspots.mutationhotspotsdetection.MutatedProtein3D;
import org.cbioportal.mutationhotspots.mutationhotspotsdetection.impl.MutatedProtein3DImpl;
import org.genomenexus.g2s.client.ApiException;
import org.genomenexus.g2s.client.api.GetAlignmentsApi;
import org.genomenexus.g2s.client.api.GetResidueMappingApi;
import org.genomenexus.g2s.client.model.Alignment;
import org.genomenexus.g2s.client.model.ResidueMapping;
Expand All @@ -38,16 +37,11 @@
*
* @author jgao
*/
public final class ProteinStructureUtils {
private static final ProteinStructureUtils instance = new ProteinStructureUtils();
public static ProteinStructureUtils getInstance() {
return instance;
}
public class ContactMapFromPDB implements ProteinStructureContactMapCalculator {

private final GetAlignmentsApi g2sGetAlignmentsApi;
private final GetResidueMappingApi g2sGetResidueMappingApi;

private ProteinStructureUtils() {
ContactMapFromPDB() {
AtomCache atomCache = new AtomCache();
// if (dirPdbCache!=null) {
// atomCache.setCachePath(dirPdbCache);
Expand All @@ -58,10 +52,10 @@ private ProteinStructureUtils() {
atomCache.setFileParsingParams(params);
StructureIO.setAtomCache(atomCache);

g2sGetAlignmentsApi = new GetAlignmentsApi();
g2sGetResidueMappingApi = new GetResidueMappingApi();
}

@Override
public Map<MutatedProtein3D, ContactMap> getContactMaps(MutatedProtein mutatedProtein,
double identpThreshold, double distanceThresholdClosestAtoms) {
Map<MutatedProtein3D, ContactMap> contactMaps = new HashMap<>();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package org.cbioportal.mutationhotspots.mutationhotspotsdetection.utils.structure;

import com.fasterxml.jackson.databind.JsonNode;
import org.cbioportal.mutationhotspots.mutationhotspotsdetection.ContactMap;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.biojava.nbio.structure.Atom;
import org.biojava.nbio.structure.Chain;
import org.biojava.nbio.structure.Group;
import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.StructureIO;
import org.biojava.nbio.structure.StructureTools;
import org.biojava.nbio.structure.contact.AtomContact;
import org.biojava.nbio.structure.contact.AtomContactSet;
import org.biojava.nbio.structure.contact.Pair;
import org.cbioportal.mutationhotspots.mutationhotspotsdetection.MutatedProtein;
import org.cbioportal.mutationhotspots.mutationhotspotsdetection.MutatedProtein3D;
import org.cbioportal.mutationhotspots.mutationhotspotsdetection.impl.MutatedProtein3DImpl;
import org.genomenexus.g2s.client.model.Alignment;
import org.genomenexus.g2s.client.model.ResidueMapping;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.IOException;
import java.net.URL;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.biojava.nbio.structure.io.PDBFileReader;

/**
*
* @author jgao
*/
public class ContactMapFromSwissModel implements ProteinStructureContactMapCalculator {

ContactMapFromSwissModel() {

}

@Override
public Map<MutatedProtein3D, ContactMap> getContactMaps(MutatedProtein mutatedProtein,
double identpThreshold, double distanceThresholdClosestAtoms) {
Map<MutatedProtein3D, ContactMap> contactMaps = new HashMap<>();

String uniprotAcc = mutatedProtein.getUniprotAcc();
if (uniprotAcc == null) {
return Collections.emptyMap();
}

List<Map<String, Object>> structures = readSwissModel(uniprotAcc);
for (Map<String, Object> structure : structures) {
Object obj = structure.get("identity");
if (obj != null) {
double identity = Double.class.cast(obj);
if (identity < identpThreshold) {
continue;
}
} // if identity not available, still process

obj = structure.get("template");
if (obj == null) {
continue;
}
String[] template = String.class.cast(obj).split("\\.");
String pdbId = template[0];
String chain = template[2];

obj = structure.get("coordinates");
if (obj == null) {
continue;
}
String structureUrl = String.class.cast(obj);

obj = structure.get("from");
if (obj == null) {
continue;
}
int from = Integer.class.cast(obj);

obj = structure.get("to");
if (obj == null) {
continue;
}
int to = Integer.class.cast(obj);

MutatedProtein3DImpl protein3D = new MutatedProtein3DImpl(mutatedProtein);
protein3D.setPdbId("swissmodel:"+pdbId);
protein3D.setPdbChain(chain);

Map<Integer, Set<Integer>> pdbContactMap = getPdbContactMap(structureUrl, distanceThresholdClosestAtoms);

ContactMap contactMap = getContactMap(pdbContactMap, from, to, mutatedProtein.getLength());

contactMaps.put(protein3D, contactMap);
}

return contactMaps;
}

private List<Map<String, Object>> readSwissModel(String uniprotAcc) {
List<Map<String, Object>> ret = new ArrayList<>();

ObjectMapper mapper = new ObjectMapper();

JsonNode root;
try {
URL url = new URL("https://swissmodel.expasy.org/repository/uniprot/"+uniprotAcc+".json?provider=swissmodel");
root = mapper.readTree(url);
} catch (IOException ex) {
Logger.getLogger(ContactMapFromSwissModel.class.getName()).log(Level.SEVERE, null, ex);
return Collections.emptyList();
}

JsonNode structuresNode = root.get("result").get("structures");
Iterator<JsonNode> it = structuresNode.elements();
while (it.hasNext()) {
JsonNode strucNode = it.next();
Map<String, Object> map = mapper.convertValue(strucNode, Map.class);
ret.add(map);
}

return ret;
}

private ContactMap getContactMap(Map<Integer, Set<Integer>> pdbContactMap,
int start, int end, int proteinLength) {
ContactMap contactMap = new ContactMap(proteinLength);
contactMap.setProteinLeft(start);
contactMap.setProteinRight(end);

boolean[][] matrix = contactMap.getContact();

for (Map.Entry<Integer, Set<Integer>> entryPdbContactMap : pdbContactMap.entrySet()) {
int pdbPos = entryPdbContactMap.getKey();

Set<Integer> pdbNeighbors = entryPdbContactMap.getValue();
if (pdbNeighbors.isEmpty()) {
continue;
}

for (Integer pdbNeighbor : pdbNeighbors) {
matrix[pdbPos][pdbNeighbor] = true;
matrix[pdbNeighbor][pdbPos] = true;
}
}

return contactMap;
}

private Map<Integer, Set<Integer>> getPdbContactMap(String strUrl, double distanceThreashold) {
Map<Integer, Set<Integer>> map = new HashMap<>();
try {
PDBFileReader reader = new PDBFileReader();
URL url = new URL(strUrl);
Structure structure = reader.getStructure(url.openStream());
Chain chain = structure.getChainByIndex(0);
AtomContactSet contacts = StructureTools.getAtomsInContact(chain, distanceThreashold);

Iterator<AtomContact> it = contacts.iterator();
while (it.hasNext()) {
AtomContact atomContact = it.next();
Pair<Atom> atoms = atomContact.getPair();
Group group1 = atoms.getFirst().getGroup();
Group group2 = atoms.getSecond().getGroup();
if (!group1.hasAminoAtoms() || !group2.hasAminoAtoms() || group1==group2) {
continue;
}

Integer res1 = group1.getResidueNumber().getSeqNum();
Integer res2 = group2.getResidueNumber().getSeqNum();

Set<Integer> neighbors = map.get(res1);
if (neighbors == null) {
neighbors = new HashSet<>();
map.put(res1, neighbors);
}

neighbors.add(res2);

// we don't add res1 to the neighbors of res2, so it's not a complete map
}
} catch (Exception e) {
e.printStackTrace();
System.err.println("Unable to calculate contact map for "+strUrl);
}

return map;
}

private OneToOneMap<Integer, Integer> getPdbSeqResidueMapping(List<Alignment> alignments) {
Collections.sort(alignments, (Alignment align1, Alignment align2) -> {
int ret = align1.getEvalue().compareTo(align2.getEvalue()); // sort from small evalue to large evalue
if (ret == 0) {
ret = -align1.getIdentity().compareTo(align2.getIdentity());
}
return ret;
});

OneToOneMap<Integer, Integer> map = new OneToOneMap<Integer, Integer>();

for (Alignment alignment : alignments) {
List<ResidueMapping> residueMapping = alignment.getResidueMapping();

for (ResidueMapping r : residueMapping) {
if (!r.getQueryAminoAcid().equals(r.getPdbAminoAcid())) {
continue;
}

Integer iseq = r.getQueryPosition();
Integer ipdb = r.getPdbPosition();

map.put(ipdb, iseq);
}
}
return map;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package org.cbioportal.mutationhotspots.mutationhotspotsdetection.utils;
package org.cbioportal.mutationhotspots.mutationhotspotsdetection.utils.structure;

import java.util.Collection;
import java.util.Collections;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package org.cbioportal.mutationhotspots.mutationhotspotsdetection.utils.structure;

import java.util.Map;
import org.cbioportal.mutationhotspots.mutationhotspotsdetection.ContactMap;
import org.cbioportal.mutationhotspots.mutationhotspotsdetection.MutatedProtein;
import org.cbioportal.mutationhotspots.mutationhotspotsdetection.MutatedProtein3D;

/**
*
* @author jgao
*/
public interface ProteinStructureContactMapCalculator {
static final ProteinStructureContactMapCalculator pdbCalculator = new ContactMapFromPDB();
public static ProteinStructureContactMapCalculator getPDBCalculator() {
return pdbCalculator;
}

static final ProteinStructureContactMapCalculator swissModelCalculator = new ContactMapFromSwissModel();
public static ProteinStructureContactMapCalculator getSwissModelCalculator() {
return swissModelCalculator;
}

Map<MutatedProtein3D, ContactMap> getContactMaps(MutatedProtein mutatedProtein, double identpThreshold, double distanceThresholdClosestAtoms);

}
Loading

0 comments on commit ed6a4c2

Please sign in to comment.