Wednesday, August 17, 2011

A Spring Web Interface for TGNI

Having successfully run some unit tests for various use cases I wanted to cover (see previous post), it was time to build a web interface for TGNI. The web interface I envisioned would allow someone to check out the concept mapping functionality from a web browser, as well as provide (an application-centric) means of navigating the graph database.

As you would expect, there is not much to building a web interface once you have your JUnit tests working, especially if you use something like Spring. Of course, web interface development is slower, because I am not as fluent in JSTL/HTML/CSS as I am in Java, so I end up having to RTFM more often. But when you want to have other people look at and use your software, a web interface is an essential (and often the cheapest) tool to provide.

There are some things I ended up changing in order to make the web interface work the way I wanted it to. In this post, I will list these changes and provide the code and screenshots for the interesting components.

Overview

The central class in the integration is a Spring multi-action controller (written with Spring 3 annotations). It wraps the NodeService to provide the navigation interface, and the aggregate UIMA Analysis Engine (AE) to do concept mapping. Here is the code for the ConceptMappingController.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
// Source: src/main/java/com/mycompany/tgni/spring/ConceptMappingController.java
package com.mycompany.tgni.spring;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import javax.annotation.PostConstruct;

import org.apache.commons.collections15.Bag;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.math.NumberUtils;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.cas.FSIndex;
import org.apache.uima.jcas.JCas;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.servlet.ModelAndView;

import com.mycompany.tgni.beans.TConcept;
import com.mycompany.tgni.beans.TRelTypes;
import com.mycompany.tgni.beans.TRelation;
import com.mycompany.tgni.neo4j.NodeService;
import com.mycompany.tgni.neo4j.NodeServiceFactory;
import com.mycompany.tgni.uima.annotators.concept.ConceptAnnotation;
import com.mycompany.tgni.uima.utils.UimaUtils;

/**
 * Controller to expose TGNI functionality via a web application.
 */
@Controller
public class ConceptMappingController {

  private String conceptMappingAEDescriptor;
  
  private AnalysisEngine conceptMappingAE;
  private NodeService nodeService;
  
  public void setConceptMappingAEDescriptor(
      String conceptMappingAEDescriptor) {
    this.conceptMappingAEDescriptor = conceptMappingAEDescriptor;
  }
  
  @PostConstruct
  public void init() throws Exception {
    conceptMappingAE = UimaUtils.getAE(conceptMappingAEDescriptor, null);
    nodeService = NodeServiceFactory.getInstance();
  }

  @RequestMapping(value="/find.html")
  public ModelAndView find(
      @RequestParam(value="q", required=false) String q) {
    ModelAndView mav = new ModelAndView();
    mav.addObject("operation", "find");
    if (StringUtils.isEmpty(q)) {
      mav.setViewName("find");
      return mav;
    }
    try {
      if (NumberUtils.isNumber(q) && 
          StringUtils.length(q) == 7) {
        return show(Integer.valueOf(q));
      } else {
        long startTs = System.currentTimeMillis();
        List<TConcept> concepts = nodeService.findConcepts(q);
        mav.addObject("concepts", concepts);
        long endTs = System.currentTimeMillis();
        mav.addObject("q", q);
        mav.addObject("elapsed", new Long(endTs - startTs));
      }
    } catch (Exception e) {
      mav.addObject("error", e.getMessage());
    }
    mav.setViewName("find");
    return mav;
  }
  
  @RequestMapping(value="/map.html")
  public ModelAndView map(
      @RequestParam(value="q1", required=false) String q1,
      @RequestParam(value="q2", required=false) String q2,
      @RequestParam(value="q3", required=false) String q3,
      @RequestParam(value="if", required=false, 
        defaultValue=UimaUtils.MIMETYPE_STRING) String inputFormat,
      @RequestParam(value="of", required=true, 
        defaultValue="html") String outputFormat) {

    ModelAndView mav = new ModelAndView();
    mav.addObject("operation", "map");
    // validate parameters (at least one of o, q, u or t must
    // be supplied, otherwise show the input form
    mav.addObject("q1", StringUtils.isEmpty(q1) ? "" : q1);
    mav.addObject("q2", StringUtils.isEmpty(q2) ? "" : q2);
    mav.addObject("q3", StringUtils.isEmpty(q3) ? "" : q3);
    String q = StringUtils.isNotEmpty(q1) ? q1 : 
      StringUtils.isNotEmpty(q2) ? q2 : 
      StringUtils.isNotEmpty(q3) ? q3 : null;
    if (StringUtils.isEmpty(q)) {
      setViewName(mav, outputFormat);
      return mav;
    }
    try {
      if (NumberUtils.isNumber(q) && 
          StringUtils.length(q) == 7) {
        return show(Integer.valueOf(q));
      } else {
        // show list of concepts
        String text = q;
        if ((q.startsWith("http://") && 
            UimaUtils.MIMETYPE_HTML.equals(inputFormat))) {
          URL u = new URL(q);
          BufferedReader br = new BufferedReader(
            new InputStreamReader(u.openStream()));
          StringBuilder tbuf = new StringBuilder();
          String line = null;
          while ((line = br.readLine()) != null) {
            tbuf.append(line).append("\n");
          }
          br.close();
          text = tbuf.toString();
        }
        List<ConceptAnnotation> annotations = 
          new ArrayList<ConceptAnnotation>();
        long startTs = System.currentTimeMillis();
        JCas jcas = UimaUtils.runAE(conceptMappingAE, text, inputFormat);
        FSIndex fsindex = jcas.getAnnotationIndex(ConceptAnnotation.type);
        for (Iterator<ConceptAnnotation> it = fsindex.iterator(); it.hasNext(); ) {
          ConceptAnnotation annotation = it.next();
          annotations.add(annotation);
        }
        if (annotations.size() == 0) {
          mav.addObject("error", "No concepts found");
        } else {
          mav.addObject("text", text);
          mav.addObject("annotations", annotations);
          long endTs = System.currentTimeMillis();
          mav.addObject("elapsed", new Long(endTs - startTs));
        }
        setViewName(mav, outputFormat);
      }
    } catch (Exception e) {
      mav.addObject("error", e.getMessage());
      setViewName(mav, outputFormat);
    }
    return mav;
  }
  
  @RequestMapping(value="/show.html", method=RequestMethod.GET)
  public ModelAndView show(
      @RequestParam(value="q", required=true) int q) {
    ModelAndView mav = new ModelAndView();
    mav.addObject("operation", "show");
    try {
      long startTs = System.currentTimeMillis();
      // show all details about the concept
      TConcept concept = nodeService.getConcept(q);
      Bag<TRelTypes> relCounts = nodeService.getRelationCounts(concept);
      Map<String,List<TRelation>> relmap = 
        new HashMap<String,List<TRelation>>();
      Map<Integer,String> oidmap = new HashMap<Integer,String>();
      for (TRelTypes reltype : relCounts.uniqueSet()) {
        List<TRelation> rels = nodeService.getRelatedConcepts(
          concept, reltype);
        for (TRelation rel : rels) {
          TConcept toConcept = nodeService.getConcept(rel.getToOid());
          oidmap.put(rel.getToOid(), toConcept.getPname());
        }
        relmap.put(reltype.name(), rels);
      }
      mav.addObject("concept", concept);
      mav.addObject("relmap", relmap);
      mav.addObject("oidmap", oidmap);
      long endTs = System.currentTimeMillis();
      mav.addObject("elapsed", new Long(endTs - startTs));
    } catch (Exception e) {
      mav.addObject("error", e.getMessage());
    }
    mav.setViewName("show");
    return mav;
  }
  
  private void setViewName(ModelAndView mav, String format) {
    if ("html".equals(format)) {
      mav.setViewName("map");
    } else if ("xml".equals(format)) {
      mav.setViewName("map-xml");
    } else if ("json".equals(format)) {
      mav.setViewName("map-json");
    } else if ("jsonp".equals(format)) {
      mav.addObject("mapPrefix", "map");
      mav.setViewName("map-json");
    }
  }
}

As you can see, it exposes the map(), find() and show() methods. The map() is the one that exposes the Concept Mapping AE via the web. It takes its input either as a single short string, an OID, a block of plain text or HTML copy-pasted into its textbox, or an URL from which it will pull in the content. It analyzes the string or text and provides a list of concepts that it found. The map() method can also output results in XML, JSON or JSON-P.

The find() method allows you to quickly find concepts by name. Its filtering criteria is not as strict as the one used by the Concept Annotator, you can think of it as a basic search interface into the Lucene index, allowing you to quickly find what concepts exist in your database that match your search string.

The show() method, on the other hand, can be thought of as an interface into the graph database, allowing you to look up a node and all its details by OID, including references to nodes immediately adjacent to it. Of course, all of these are interlinked via JSP references. Here are some screenshots to make things clear.

Some concepts mapped off a block of plain text copy-pasted into the text box.
The same output as above but in XML, for a remote client to consume. Other formats supported are JSON and JSON-P.
Clicking on one of these concepts leads to the Node view screen, with details about the concept and its immediate neighbors.
A list of concept nodes that match "Heart Attack".

I have been curious about how to build tabbed navigation, it looks nicer than a list of links across the top of the page, so I built one based on the advice provided here. The image for the logo was snagged from this blog post.

Configuration and Data needs to be centrally located

The Concept Mapping AE in the controller described above is an aggregate AE, composed of a Boilerplate removal AE, a Sentence Annotating AE, and the Concept Annotator AE. Each of these primitive AEs, as well as the aggregate AE, is instantiated by the UIMA Framework using its XML descriptor files. In addition, some of these AEs have external references to their own properties files.

I wasn't very confident that I would be able to load everything from the classpath, and I needed to move the data (the Lucene index, Neo4j database and the EHCache cache files) to a central location anyway, I decided to do something like Solr does with its SOLR_HOME and move everything to a central location, and have everything be accessed as files.

To do this, I replaced the absolute path names in the various XML and properties files with a @tgni.home@ followed by a relative path. I then added an Ant task (using the Ant replace task) that allows me to "push" the latest configuration changes to $TGNI_HOME/conf. Here is the XML snippet for the target.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
  <!-- default location for TGNI_HOME (if not specified) -->
  <property name="tgni.home" value="/prod/web/data/tgni"/>
  <target name="copyconf" description="Create data/config instance">
    <echo message="Copying config and data files to ${tgni.home}"/>
    <mkdir dir="${tgni.home}/conf"/>
    <copy todir="${tgni.home}/conf">
      <fileset dir="src/main/resources"/>
    </copy>
    <replace dir="${tgni.home}/conf" value="${tgni.home}">
      <include name="**/*.properties"/>
      <include name="**/*.xml"/>
      <replacetoken>@tgni.home@</replacetoken>
    </replace>
  </target>

In the code, I added an extra method to UimaUtils that provided the value for the TGNI_HOME environment variable to the code. If no TGNI_HOME is defined in the environment, then the default value (hardcoded) is used.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
  public static String getTgniHome() {
    if (tgniHome == null) {
      Map<String,String> env = System.getenv();
      if (env.containsKey("TGNI_HOME")) {
        tgniHome = env.get("TGNI_HOME");
      } else {
        tgniHome = DEFAULT_TGNI_HOME;
      }
    }
    return tgniHome;
  }

Any files accessed by the UIMA framework through the XML descriptors are already expanded to the absolute path. Files accessed from the code prefix supplied paths (relative to TGNI_HOME) with the value returned by the method above. The approach resulted in very minimal changes to the code.

The data is stored under $TGNI_HOME/data with separate subdirectories for the Neo4j database, the Lucene index, and the EHCache cache files.

NodeService needs to be a singleton

The other major change to the code was that Neo4j has a limitation (or feature) that a JVM can have only a single reference to the Neo4j GraphDatabaseService. In my controller, the Concept Mapper AE needs one reference (used by the Concept Annotator) and the controller itself needs another reference to support the navigation interface. So I was getting errors advising me of this every time the webapp started.

The solution was to have all the components reuse the same instance of the NodeService (the combo service interface that hides a reference to a Neo4j database and a Lucene index behind it). For this, I needed a factory that would instantiate a single instance of a NodeService on (Spring) container startup and destroy it on shutdown. Here is the code for this factory, named (appropriately enough) NodeServiceFactory.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
// Source: src/main/java/com/mycompany/tgni/neo4j/NodeServiceFactory.java
package com.mycompany.tgni.neo4j;

import java.io.File;
import java.io.FileInputStream;
import java.util.Properties;
import java.util.ResourceBundle;

import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang.StringUtils;
import org.neo4j.kernel.EmbeddedGraphDatabase;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.mycompany.tgni.uima.utils.UimaUtils;

/**
 * Factory for NodeService to ensure that a single instance
 * of NodeService is created (Neo4j allows a single reference
 * to it per JVM). Clients requiring a NodeService reference
 * call NodeServiceFactory.getInstance().
 */
public class NodeServiceFactory {

  private static final Logger logger = LoggerFactory.getLogger(
    NodeServiceFactory.class);
  
  private static NodeServiceFactory factory = new NodeServiceFactory();
  
  private static NodeService instance;
  
  private NodeServiceFactory() {
    init();
  }
  
  private void init() {
    try {
      Properties props = new Properties();
      props.load(new FileInputStream(new File(
        StringUtils.join(new String[] {
        UimaUtils.getTgniHome(),
        "conf",
        "nodeservice.properties"}, File.separator))));
      instance = new NodeService();
      instance.setGraphDir(props.getProperty("graphDir"));
      instance.setIndexDir(props.getProperty("indexDir"));
      instance.setStopwordsFile(props.getProperty("stopwordsFile"));
      instance.setTaxonomyMappingAEDescriptor(
        props.getProperty("taxonomyMappingAEDescriptor"));
      instance.setCacheDescriptor(
        props.getProperty("cacheDescriptor"));
      instance.init();
    } catch (Exception e) {
      logger.error("Can't initialize NodeService", e);
    }
  }
  
  public static NodeService getInstance() {
    return instance;
  }
  
  public static void destroy() {
    if (instance != null) {
      try {
        instance.destroy();
        instance = null;
      } catch (Exception e) {
        logger.error("Can't destroy NodeService", e);
      }
    }
  }
}

The Spring XML configuration snippet to declare the nodeService singleton is as follows.

1
2
  <bean id="nodeService" class="com.mycompany.tgni.neo4j.NodeServiceFactory" 
    factory-method="getInstance" destroy-method="destroy"/>

This reference can either be injected into the controller, or the controller gets the common NodeService instance using NodeServiceFactory.getInstance(). I chose the latter method because thats how I had to do it in the ConceptAnnotator (no possibility of setter injection for that, this is UIMA land), and I wanted to keep it consistent.

No comments:

Post a Comment

Comments are moderated to prevent spam.