java - Text Segmentation using Gate -
i trying write own program using java in order segment set of text files sentences. have make search on available nlp tools , found gate couldn't use segment using pipeline.
- any ideas how limit functionality of pipeline
- any piece of codes can me write program
adapted different answer:
import gate.*; import gate.creole.serialanalysercontroller; import java.io.file; import java.util.*; public class segmenter { public static void main(string[] args) throws exception { gate.setgatehome(new file("c:\\program files\\gate_developer_8.0")); gate.init(); regitergateplugin("annie"); serialanalysercontroller pipeline = (serialanalysercontroller) factory.createresource("gate.creole.serialanalysercontroller"); pipeline.add((processingresource) factory.createresource("gate.creole.tokeniser.defaulttokeniser")); pipeline.add((processingresource) factory.createresource("gate.creole.splitter.sentencesplitter")); corpus corpus = factory.newcorpus("segmentercorpus"); document document = factory.newdocument("text segmented."); corpus.add(document); pipeline.setcorpus(corpus); pipeline.execute(); annotationset defaultas = document.getannotations(); annotationset sentences = defaultas.get("sentence"); (annotation sentence : sentences) { system.err.println(utils.stringfor(document, sentence)); } //clean factory.deleteresource(document); factory.deleteresource(corpus); (processingresource pr : pipeline.getprs()) { factory.deleteresource(pr); } factory.deleteresource(pipeline); } public static void regitergateplugin(string name) throws exception { gate.getcreoleregister().registerdirectories(new file(gate.getpluginshome(), name).touri().tourl()); } }
Comments
Post a Comment