Commit 6d88f368 authored by tcarver's avatar tcarver
Browse files

Merge pull request #175 from satta/filter_qualifiers

allow to exclude attributes from GFF3 output
parents f567d6d5 0179a734
......@@ -27,8 +27,10 @@ package uk.ac.sanger.artemis.io;
import java.util.Hashtable;
import java.util.HashSet;
import java.util.Enumeration;
import java.util.List;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.Vector;
import java.io.IOException;
......@@ -113,6 +115,28 @@ public class GFFStreamFeature extends SimpleDocumentFeature
{ "\n", "%5C" } // new-line
};
private static Set<String> attrs_to_filter = new HashSet<String>();
/**
* Registers an attribute not to be included in the GFF3 output for
* GFFStreamFeatures
* @param attr The GFF3 attribute to remove
**/
public static void removeAttribute(String attr)
{
attrs_to_filter.add(attr);
}
/**
* Registers an attribute to be included in the GFF3 output for
* GFFStreamFeatures
* @param attr The GFF3 attribute to include
**/
public static void includeAttribute(String attr)
{
attrs_to_filter.remove(attr);
}
/**
* Create a new GFFStreamFeature object. The feature should be added
* to an Entry (with Entry.add()).
......@@ -893,6 +917,9 @@ public class GFFStreamFeature extends SimpleDocumentFeature
if(lname)
continue;
if(attrs_to_filter.contains(this_qualifier.getName()))
continue;
if( (this_qualifier.getName().equals("private") && System.getProperty("noprivate") != null) ||
(this_qualifier.getName().equals("history") && System.getProperty("nohistory") != null) )
continue;
......
......@@ -42,11 +42,11 @@ import uk.ac.sanger.artemis.Options;
public class ReadAndWriteEntry
{
private static org.apache.log4j.Logger logger4j =
private static org.apache.log4j.Logger logger4j =
org.apache.log4j.Logger.getLogger(ReadAndWriteEntry.class);
private static DatabaseEntrySource ENTRY_SOURCE;
/**
* Read from the database, given a srcFeature uniquename
* @param uniqueName
......@@ -56,7 +56,7 @@ public class ReadAndWriteEntry
* @throws IOException
*/
public static Entry readEntryFromDatabase(final String uniqueName,
DatabaseEntrySource entry_source)
DatabaseEntrySource entry_source)
throws OutOfRangeException, NoSequenceException, IOException
{
if(entry_source == null)
......@@ -71,23 +71,23 @@ public class ReadAndWriteEntry
if(!entry_source.setLocation(promptUser))
return null;
}
String url = (String)entry_source.getLocation();
int index = url.indexOf("?");
String userName = url.substring(index+1).trim();
if(userName.startsWith("user="))
userName = userName.substring(5);
final String srcFeatureId = getFeatureId(entry_source, uniqueName);
final InputStreamProgressListener stream_progress_listener =
new InputStreamProgressListener()
new InputStreamProgressListener()
{
public void progressMade(final InputStreamProgressEvent event)
public void progressMade(final InputStreamProgressEvent event)
{
final int char_count = event.getCharCount();
if(char_count != -1)
if(char_count != -1)
logger4j.debug("chars read so far: " + char_count);
}
public void progressMade(String progress)
......@@ -95,10 +95,10 @@ public class ReadAndWriteEntry
logger4j.debug(progress);
}
};
return entry_source.getEntry(srcFeatureId, userName,
return entry_source.getEntry(srcFeatureId, userName,
stream_progress_listener);
}
/**
* Read from the database, given a srcFeature uniquename
* @param uniqueName
......@@ -107,12 +107,12 @@ public class ReadAndWriteEntry
* @throws NoSequenceException
* @throws IOException
*/
public static Entry readEntryFromDatabase(final String uniqueName)
public static Entry readEntryFromDatabase(final String uniqueName)
throws OutOfRangeException, NoSequenceException, IOException
{
return readEntryFromDatabase(uniqueName, null);
}
/**
* Write entry to a file
* @param entry
......@@ -120,7 +120,7 @@ public class ReadAndWriteEntry
* @param flatten Flatten the gene model and combine the qualifiers if true.
* If false it will write all features and qualifiers out.
* @param ignore obsolete features if true
* @param force invalid qualifiers and any features with invalid keys will
* @param force invalid qualifiers and any features with invalid keys will
* be quietly thrown away when saving.
* @param include_diana_extensions false if writing EMBL submission format.
* @param destination_type Should be one of EMBL_FORMAT, GENBANK_FORMAT,
......@@ -137,7 +137,7 @@ public class ReadAndWriteEntry
final boolean force,
final boolean include_diana_extensions,
final int destination_type,
final JFrame parent)
final JFrame parent)
throws IOException, EntryInformationException
{
GeneUtils.lazyLoadAll(entry, parent);
......@@ -148,15 +148,15 @@ public class ReadAndWriteEntry
final FeatureVector features = entry.getAllFeatures();
for(int i=0; i<features.size(); i++)
addAllKeysQualifiers(artemis_entry_information, features.elementAt(i).getEmblFeature());
if(entry.getEMBLEntry() instanceof GFFDocumentEntry)
addQualifierToEntryInfo(artemis_entry_information,
addQualifierToEntryInfo(artemis_entry_information,
(String)PublicDBDocumentEntry.getDatabaseQualifiersToRemove()[0]);
}
PublicDBDocumentEntry.IGNORE_OBSOLETE_FEATURES = ignoreObsolete;
if(destination_type == DocumentEntryFactory.EMBL_FORMAT &&
(entry.getHeaderText() == null ||
(entry.getHeaderText() == null ||
entry.getHeaderText().equals("") ||
entry.getHeaderText().startsWith("#")))
{
......@@ -169,9 +169,9 @@ public class ReadAndWriteEntry
ind = name.lastIndexOf(".embl");
if(ind > -1)
name = name.substring(0, ind);
}
int length = entry.getBases().getLength();
String header = "ID "+name+"; SV ; ; ; ; ; "+length+" BP.";
if(entry.getFeatureCount() > 0)
......@@ -185,10 +185,10 @@ public class ReadAndWriteEntry
else
entry.saveStandardOnly(file, destination_type, force);
}
/**
* Add all keys and qualifiers for a given feature to the EntryInformation
* Add all keys and qualifiers for a given feature to the EntryInformation
* @param entry_information
* @param feature
*/
......@@ -196,18 +196,18 @@ public class ReadAndWriteEntry
final Feature feature)
{
Key new_key = feature.getKey();
new_key = PublicDBDocumentEntry.mapKeys(new_key);
boolean keyAdded = false;
if(!entry_information.isValidKey(new_key))
{
entry_information.addKey(new_key);
keyAdded = true;
}
final QualifierVector feature_qualifiers = feature.getQualifiers();
// check the qualifiers
for(int i = 0 ; i < feature_qualifiers.size() ; ++i)
{
......@@ -216,10 +216,10 @@ public class ReadAndWriteEntry
if(!entry_information.isValidQualifier(this_qualifier_name) ||
!entry_information.isValidQualifier(new_key, this_qualifier_name) ||
keyAdded)
keyAdded)
{
QualifierInfo qualifierInfo = entry_information.getQualifierInfo(this_qualifier_name);
if(qualifierInfo == null)
{
KeyVector keys = new KeyVector();
......@@ -234,14 +234,14 @@ public class ReadAndWriteEntry
e.printStackTrace();
}
}
if(qualifierInfo.getValidKeys() != null)
qualifierInfo.getValidKeys().add(new_key);
}
}
}
protected static void addQualifierToEntryInfo(final EntryInformation entry_information,
final String qualifier_name)
{
......@@ -257,7 +257,7 @@ public class ReadAndWriteEntry
e.printStackTrace();
}
}
/**
* Get feature id
* @param entry_source
......@@ -271,7 +271,7 @@ public class ReadAndWriteEntry
org.gmod.schema.sequence.Feature feature = doc.getFeatureByUniquename(srcUniqueName);
return Integer.toString(feature.getFeatureId());
}
/**
* return the ENTRY_SOURCE
*/
......@@ -287,17 +287,18 @@ public class ReadAndWriteEntry
String names[];
boolean flatten = true;
boolean ignoreObsolete = true;
if( (args != null && args.length == 1 && args[0].startsWith("-h")) ||
(args == null || args.length < 1))
{
System.out.println("-h\tshow help");
System.out.println("-f\t[y|n] flatten the gene model, default is y");
System.out.println("-flt\tspace separated list of qualifiers to ignore (GFF only)");
System.out.println("-i\t[y|n] ignore obsolete features, default is y");
System.out.println("-s\tspace separated list of sequences to read and write out");
System.out.println("-o\t[EMBL|GFF] output format, default is EMBL");
// note that read_only and noprompt -D parameters redundant now
System.out.println("Advanced parameters:");
System.out.println("-l\tlocation of EMBL mapping files (qualifier_mapping and key_mapping)");
......@@ -310,11 +311,11 @@ public class ReadAndWriteEntry
System.out.println("-p\tthe password for connecting to the Chado database");
System.out.println("-fp\t the file path (the folder you want to save the files in)");
System.out.println("-np\t[y|n] do not write out private qualifiers, default is y");
System.exit(0);
}
names = args;
int format = DocumentEntryFactory.EMBL_FORMAT;
boolean include_diana_extensions = true;
......@@ -322,9 +323,9 @@ public class ReadAndWriteEntry
boolean gzip = true;
boolean noprivates = true;
boolean removeProductForPseudo = false;
String filePath = "";
for(int i = 0; i < args.length; i++)
{
String key = args[i].toLowerCase();
......@@ -366,7 +367,7 @@ public class ReadAndWriteEntry
if(i + 1 < args.length && args[i + 1].toLowerCase().equals("y"))
LocalAndRemoteFileManager.domainLoad.setSelected(true);
}
// GSV :: added these command-line parameters
// note that read_only and noprompt -D parameters redundant now
else if (key.equals("-u"))
......@@ -391,10 +392,10 @@ public class ReadAndWriteEntry
filePath = args[i + 1];
}
}
// run this after all the system properties have been set
UI.initalise();
java.util.Vector<String> files = null;
for(int i = 0; i < args.length; i++)
{
......@@ -407,6 +408,18 @@ public class ReadAndWriteEntry
if(args[j].startsWith("-"))
break;
files.add(args[j]);
i++;
}
}
else if(args[i].toLowerCase().equals("-flt"))
{
for(int j = i + 1; j < args.length; j++)
{
if(args[j].startsWith("-")) {
break;
}
GFFStreamFeature.removeAttribute(args[j]);
i++;
}
}
else if(args[i].startsWith("-"))
......@@ -426,7 +439,7 @@ public class ReadAndWriteEntry
names = new String[files.size()];
files.toArray(names);
}
if(filePath.length() != 0)
{
filePath += "/";
......@@ -437,19 +450,19 @@ public class ReadAndWriteEntry
if(noprivates)
System.setProperty("noprivate", "true");
for(int i=0;i < names.length; i++)
{
System.out.println("read :: "+names[i]+" write :: "+names[i]+suffix);
logger4j.info("read :: "+names[i]+" write :: "+names[i]+suffix);
Entry entry = ReadAndWriteEntry.readEntryFromDatabase(names[i], ENTRY_SOURCE);
DocumentEntryFactory.REMOVE_PRODUCT_FROM_PSEUDOGENE = removeProductForPseudo;
try
{
ReadAndWriteEntry.writeDatabaseEntryToFile(
entry, new File(filePath + names[i]+suffix), flatten, ignoreObsolete,
entry, new File(filePath + names[i]+suffix), flatten, ignoreObsolete,
false, include_diana_extensions, format, null);
System.out.println("done");
logger4j.info("done");
......@@ -458,10 +471,10 @@ public class ReadAndWriteEntry
{
String label = "Destination format can't handle all keys/qualifiers - continue?";
boolean canContinue = UI.booleanUserInput(label, eie.getMessage());
if (canContinue)
{
ReadAndWriteEntry.writeDatabaseEntryToFile(entry, new File(filePath + names[i] + suffix),
ReadAndWriteEntry.writeDatabaseEntryToFile(entry, new File(filePath + names[i] + suffix),
flatten, ignoreObsolete, true,
include_diana_extensions, format, null);
System.out.println("done");
......@@ -477,5 +490,5 @@ public class ReadAndWriteEntry
}
System.exit(0);
}
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment