CorpusDocumentAnnotationSubannotationsSimilarity.java

package io.outofprintmagazine.web.rest.api;

import java.io.IOException;
import java.math.BigDecimal;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

import javax.servlet.ServletConfig;
import javax.servlet.ServletContext;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.ws.rs.GET;
import javax.ws.rs.Path;
import javax.ws.rs.Produces;
import javax.ws.rs.QueryParam;
import javax.ws.rs.core.Context;

import org.apache.commons.text.similarity.CosineSimilarity;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;

import io.outofprintmagazine.web.servlets.AbstractOOPCacheableServlet;

@Path("/CorpusDocumentAnnotationSubannotationsSimilarity")
public class CorpusDocumentAnnotationSubannotationsSimilarity extends AbstractOOPCacheableServlet {
	
	private static final long serialVersionUID = 1L;
	@Context
	private ServletConfig servletConfig;
	@Context
	private ServletContext servletContext;
	@Context
	private HttpServletRequest httpServletRequest;
	@Context
	private HttpServletResponse httpServletResponse;
	
    @GET
    @Produces("application/json; charset=utf-8")
	public String doGet(
			@QueryParam("TargetCorpus") String targetCorpus,
			@QueryParam("Corpus") String corpus,
			@QueryParam("Document") String document,
			@QueryParam("Annotation") String annotation
			) throws JsonProcessingException, IOException, ServletException
    {
    	if (getStorage() == null) {
    		super.init(servletConfig);
    	}
	
    	ObjectNode retval = getMapper().createObjectNode();
    	ObjectNode corpusAggregates = (ObjectNode) getStorage().getCorpusAggregatesJson(targetCorpus);
    	ObjectNode documentAggregates = (ObjectNode) getStorage().getCorpusDocumentAggregatesJson(corpus, document);
       	retval.put(
       			"value", 
       			calculateSimilarity(corpusAggregates, documentAggregates, annotation)
       	);

        return getMapper().writeValueAsString(retval);
	}
    
	protected Double calculateSimilarity(ObjectNode corpusAggregates, ObjectNode documentAggregates, String annotation) {
		CosineSimilarity similarity = new CosineSimilarity();
		Map<CharSequence,Integer> corpusScores = new HashMap<CharSequence, Integer>();
		Map<CharSequence,Integer> documentScores = new HashMap<CharSequence, Integer>();
		Iterator<String> annotationNameIter = documentAggregates.fieldNames();
		while (annotationNameIter.hasNext()) {
			String annotationName = annotationNameIter.next();
			if (annotationName.equals(annotation) && documentAggregates.get(annotationName).isObject() && corpusAggregates.has(annotationName) && corpusAggregates.get(annotationName).isObject()) {
				ObjectNode documentAnnotationScoreStats = (ObjectNode) documentAggregates.get(annotationName);
				ArrayNode aggregatedScores = (ArrayNode) documentAnnotationScoreStats.get("aggregatedScores");
				Iterator<JsonNode> aggregatedScoreIter = aggregatedScores.iterator();
				while (aggregatedScoreIter.hasNext()) {
					ObjectNode documentAnnotationSubScoreStatsScore = (ObjectNode) aggregatedScoreIter.next();
					String subscoreName = documentAnnotationSubScoreStatsScore.get("name").asText();		
					ObjectNode corpusAnnotationSubScoreStats = (ObjectNode) getSubscoreFromCorpusAggregate(corpusAggregates, annotationName, subscoreName);
					if (corpusAnnotationSubScoreStats != null) {
						BigDecimal documentAnnotationScore = new BigDecimal(documentAnnotationSubScoreStatsScore.get("score").get("normalized").asText());
						BigDecimal corpusAnnotationScore = new BigDecimal(corpusAnnotationSubScoreStats.get("score").get("normalized").get("median").asText());
			    		
				    	corpusScores.put(subscoreName, corpusAnnotationScore.multiply(new BigDecimal(10000)).intValue());
				    	documentScores.put(subscoreName, documentAnnotationScore.multiply(new BigDecimal(10000)).intValue());
					}
				}
			}
		}
		return similarity.cosineSimilarity(corpusScores, documentScores);
	}
				
	protected ObjectNode getSubscoreFromCorpusAggregate(ObjectNode corpusAggregates, String score, String subscore) {
		ObjectNode corpusAnnotationScoreStats = (ObjectNode) corpusAggregates.get(score);
		if (corpusAnnotationScoreStats.has("aggregatedScores")) {
			ArrayNode aggregatedScores = (ArrayNode) corpusAnnotationScoreStats.get("aggregatedScores");
			for (JsonNode aggregatedScore : aggregatedScores) {
				if (aggregatedScore.get("name").asText().equals(subscore)) {
					return (ObjectNode) aggregatedScore;
				}
			}
		}

		return null;
	}


}