Skip to content

Commit 2f70646

Browse files
committed
Moved the filtering of markings for the D2KB and Entity Typing class into an EvaluationDecorator to handle the problem not only for experiment tasks but for experiment sub tasks in the same way.
1 parent eead8c5 commit 2f70646

File tree

7 files changed

+97
-104
lines changed

7 files changed

+97
-104
lines changed

src/main/java/org/aksw/gerbil/evaluate/EvaluatorFactory.java

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,15 @@
3232
import org.aksw.gerbil.evaluate.impl.SpanMergingEvaluatorDecorator;
3333
import org.aksw.gerbil.evaluate.impl.SubTaskAverageCalculator;
3434
import org.aksw.gerbil.evaluate.impl.filter.MarkingFilteringEvaluatorDecorator;
35+
import org.aksw.gerbil.evaluate.impl.filter.SearcherBasedNotMatchingMarkingFilter;
3536
import org.aksw.gerbil.matching.Matching;
3637
import org.aksw.gerbil.matching.MatchingsSearcher;
3738
import org.aksw.gerbil.matching.MatchingsSearcherFactory;
3839
import org.aksw.gerbil.matching.impl.CompoundMatchingsCounter;
3940
import org.aksw.gerbil.matching.impl.HierarchicalMatchingsCounter;
4041
import org.aksw.gerbil.matching.impl.MatchingsCounterImpl;
4142
import org.aksw.gerbil.matching.impl.MeaningMatchingsSearcher;
43+
import org.aksw.gerbil.matching.impl.StrongSpanMatchingsSearcher;
4244
import org.aksw.gerbil.semantic.kb.ExactWhiteListBasedUriKBClassifier;
4345
import org.aksw.gerbil.semantic.kb.SimpleWhiteListBasedUriKBClassifier;
4446
import org.aksw.gerbil.semantic.kb.UriKBClassifier;
@@ -142,20 +144,25 @@ protected Evaluator createEvaluator(ExperimentType type, ExperimentTaskConfigura
142144
FMeasureCalculator.MICRO_F1_SCORE_NAME, new DoubleResultComparator());
143145
}
144146
case D2KB: {
145-
return new ConfidenceScoreEvaluatorDecorator<NamedEntity>(
146-
new InKBClassBasedFMeasureCalculator<NamedEntity>(new CompoundMatchingsCounter<NamedEntity>(
147-
(MatchingsSearcher<NamedEntity>) MatchingsSearcherFactory
148-
.createSpanMatchingsSearcher(configuration.matching),
149-
new MeaningMatchingsSearcher<NamedEntity>(globalClassifier)), globalClassifier),
150-
FMeasureCalculator.MICRO_F1_SCORE_NAME, new DoubleResultComparator());
147+
return new SearcherBasedNotMatchingMarkingFilter<NamedEntity>(
148+
new StrongSpanMatchingsSearcher<NamedEntity>(),
149+
new ConfidenceScoreEvaluatorDecorator<NamedEntity>(
150+
new InKBClassBasedFMeasureCalculator<NamedEntity>(
151+
new CompoundMatchingsCounter<NamedEntity>(
152+
(MatchingsSearcher<NamedEntity>) MatchingsSearcherFactory
153+
.createSpanMatchingsSearcher(configuration.matching),
154+
new MeaningMatchingsSearcher<NamedEntity>(globalClassifier)),
155+
globalClassifier),
156+
FMeasureCalculator.MICRO_F1_SCORE_NAME, new DoubleResultComparator()));
151157
}
152158
case ETyping: {
153-
return new ConfidenceScoreEvaluatorDecorator<TypedSpan>(
154-
new HierarchicalFMeasureCalculator<TypedSpan>(new HierarchicalMatchingsCounter<TypedSpan>(
155-
(MatchingsSearcher<TypedSpan>) MatchingsSearcherFactory
156-
.createSpanMatchingsSearcher(configuration.matching),
157-
globalClassifier, inferencer)),
158-
FMeasureCalculator.MICRO_F1_SCORE_NAME, new DoubleResultComparator());
159+
return new SearcherBasedNotMatchingMarkingFilter<TypedSpan>(new StrongSpanMatchingsSearcher<TypedSpan>(),
160+
new ConfidenceScoreEvaluatorDecorator<TypedSpan>(
161+
new HierarchicalFMeasureCalculator<TypedSpan>(new HierarchicalMatchingsCounter<TypedSpan>(
162+
(MatchingsSearcher<TypedSpan>) MatchingsSearcherFactory
163+
.createSpanMatchingsSearcher(configuration.matching),
164+
globalClassifier, inferencer)),
165+
FMeasureCalculator.MICRO_F1_SCORE_NAME, new DoubleResultComparator()));
159166
}
160167
case OKE_Task1: {
161168
ExperimentTaskConfiguration subTaskConfig;
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
package org.aksw.gerbil.evaluate.impl.filter;
2+
3+
import java.util.ArrayList;
4+
import java.util.List;
5+
6+
import org.aksw.gerbil.evaluate.AbstractEvaluatorDecorator;
7+
import org.aksw.gerbil.evaluate.EvaluationResultContainer;
8+
import org.aksw.gerbil.evaluate.Evaluator;
9+
import org.aksw.gerbil.matching.MatchingsSearcher;
10+
import org.aksw.gerbil.transfer.nif.Marking;
11+
12+
import com.carrotsearch.hppc.BitSet;
13+
14+
/**
15+
* This evaluator decorator removes every marking from the given list that does
16+
* not match the given gold standard list based on a given
17+
* {@link MatchingsSearcher} instance.
18+
*
19+
* @author Michael R&ouml;der (roeder@informatik.uni-leipzig.de)
20+
*
21+
*/
22+
public class SearcherBasedNotMatchingMarkingFilter<T extends Marking> extends AbstractEvaluatorDecorator<T> {
23+
24+
protected MatchingsSearcher<T> searcher;
25+
26+
public SearcherBasedNotMatchingMarkingFilter(MatchingsSearcher<T> searcher, Evaluator<T> evaluator) {
27+
super(evaluator);
28+
this.searcher = searcher;
29+
}
30+
31+
protected List<List<T>> filterListOfMarkings(List<List<T>> markings, List<List<T>> goldStandard) {
32+
List<List<T>> filteredMarkings = new ArrayList<List<T>>(markings.size());
33+
for (int i = 0; i < markings.size(); ++i) {
34+
filteredMarkings.add(filterMarkings(markings.get(i), goldStandard.get(i)));
35+
}
36+
return filteredMarkings;
37+
}
38+
39+
protected List<T> filterMarkings(List<T> markings, List<T> goldStandard) {
40+
BitSet matchingElements;
41+
BitSet alreadyUsedResults = new BitSet(goldStandard.size());
42+
List<T> filteredMarkings = new ArrayList<T>(markings.size());
43+
for (T marking : markings) {
44+
matchingElements = searcher.findMatchings(marking, goldStandard, alreadyUsedResults);
45+
if (!matchingElements.isEmpty()) {
46+
filteredMarkings.add(marking);
47+
alreadyUsedResults.set(matchingElements.nextSetBit(0));
48+
}
49+
}
50+
return filteredMarkings;
51+
}
52+
53+
@Override
54+
public void evaluate(List<List<T>> annotatorResults, List<List<T>> goldStandard,
55+
EvaluationResultContainer results) {
56+
evaluator.evaluate(filterListOfMarkings(annotatorResults, goldStandard), goldStandard, results);
57+
}
58+
59+
}

src/main/java/org/aksw/gerbil/execute/ExperimentTask.java

Lines changed: 4 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,6 @@
4545
import org.aksw.gerbil.evaluate.SubTaskResult;
4646
import org.aksw.gerbil.evaluate.impl.FMeasureCalculator;
4747
import org.aksw.gerbil.exceptions.GerbilException;
48-
import org.aksw.gerbil.matching.filter.SearcherBasedNotMatchingMarkingFilter;
49-
import org.aksw.gerbil.matching.impl.StrongSpanMatchingsSearcher;
5048
import org.aksw.gerbil.semantic.sameas.DatasetBasedSameAsRetriever;
5149
import org.aksw.gerbil.semantic.sameas.MultipleSameAsRetriever;
5250
import org.aksw.gerbil.semantic.sameas.SameAsRetriever;
@@ -307,19 +305,11 @@ protected EvaluationResult runExperiment(Dataset dataset, Annotator annotator,
307305
List<List<MeaningSpan>> results = new ArrayList<List<MeaningSpan>>(dataset.size());
308306
List<List<MeaningSpan>> goldStandard = new ArrayList<List<MeaningSpan>>(dataset.size());
309307
D2KBAnnotator linker = ((D2KBAnnotator) annotator);
310-
// For D2KB we have to filter the results to get those results
311-
// that are matching the positions
312-
SearcherBasedNotMatchingMarkingFilter<MeaningSpan> filter = new SearcherBasedNotMatchingMarkingFilter<MeaningSpan>(
313-
new StrongSpanMatchingsSearcher<MeaningSpan>());
314-
List<MeaningSpan> documentGS;
315308

316309
for (Document document : dataset.getInstances()) {
317-
documentGS = document.getMarkings(MeaningSpan.class);
318310
// reduce the document to a text and a list of Spans
319-
results.add(filter.filterMarkings(
320-
linker.performD2KBTask(DocumentInformationReducer.reduceToTextAndSpans(document)),
321-
documentGS));
322-
goldStandard.add(documentGS);
311+
results.add(linker.performD2KBTask(DocumentInformationReducer.reduceToTextAndSpans(document)));
312+
goldStandard.add(document.getMarkings(MeaningSpan.class));
323313
taskState.increaseExperimentStepCount();
324314
}
325315
if (annotatorOutputWriter != null) {
@@ -413,20 +403,11 @@ protected EvaluationResult runExperiment(Dataset dataset, Annotator annotator,
413403
List<List<TypedSpan>> results = new ArrayList<List<TypedSpan>>(dataset.size());
414404
List<List<TypedSpan>> goldStandard = new ArrayList<List<TypedSpan>>(dataset.size());
415405
EntityTyper typer = ((EntityTyper) annotator);
416-
// For ETyping we have to filter the results to get those
417-
// results
418-
// that are matching the positions
419-
SearcherBasedNotMatchingMarkingFilter<TypedSpan> filter = new SearcherBasedNotMatchingMarkingFilter<TypedSpan>(
420-
new StrongSpanMatchingsSearcher<TypedSpan>());
421-
List<TypedSpan> documentGS;
422406

423407
for (Document document : dataset.getInstances()) {
424-
documentGS = document.getMarkings(TypedSpan.class);
425408
// reduce the document to a text and a list of Spans
426-
results.add(filter.filterMarkings(
427-
typer.performTyping(DocumentInformationReducer.reduceToTextAndSpans(document)),
428-
documentGS));
429-
goldStandard.add(documentGS);
409+
results.add(typer.performTyping(DocumentInformationReducer.reduceToTextAndSpans(document)));
410+
goldStandard.add(document.getMarkings(TypedSpan.class));
430411
taskState.increaseExperimentStepCount();
431412
}
432413
if (annotatorOutputWriter != null) {

src/main/java/org/aksw/gerbil/matching/filter/NotMatchingMarkingFilter.java

Lines changed: 0 additions & 24 deletions
This file was deleted.

src/main/java/org/aksw/gerbil/matching/filter/SearcherBasedNotMatchingMarkingFilter.java

Lines changed: 0 additions & 42 deletions
This file was deleted.

src/main/java/org/aksw/gerbil/utils/filter/MarkingFilter.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,20 @@
2222

2323
public interface MarkingFilter<T extends Marking> {
2424

25+
/**
26+
* Returns true if the marking is good and does not have to be filtered out.
27+
*
28+
* @param marking
29+
* @return
30+
*/
2531
public boolean isMarkingGood(T marking);
2632

33+
/**
34+
* Returns a filtered list based on the given list.
35+
*
36+
* @param markings
37+
* @return
38+
*/
2739
public List<T> filterList(List<T> markings);
2840

2941
public List<List<T>> filterListOfLists(List<List<T>> markings);

src/test/java/org/aksw/gerbil/SingleRunTest.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,10 @@ public class SingleRunTest implements TaskObserver {
4343

4444
private static final Logger LOGGER = LoggerFactory.getLogger(SingleRunTest.class);
4545

46-
private static final String ANNOTATOR_NAME = "FOX";
47-
private static final String DATASET_NAME = "N3-Reuters-128";
46+
private static final String ANNOTATOR_NAME = "TagMe 2";
47+
private static final String DATASET_NAME = "ACE2004";
4848
private static final ExperimentType EXPERIMENT_TYPE = ExperimentType.D2KB;
49-
private static final Matching MATCHING = Matching.WEAK_ANNOTATION_MATCH;
49+
private static final Matching MATCHING = Matching.STRONG_ENTITY_MATCH;
5050

5151
public static void main(String[] args) throws Exception {
5252
SingleRunTest test = new SingleRunTest();

0 commit comments

Comments
 (0)