Class CorpusBatchStep
- java.lang.Object
-
- io.outofprintmagazine.corpus.batch.CorpusBatchStep
-
- All Implemented Interfaces:
ICorpusBatchStep
- Direct Known Subclasses:
Analyze
,CleanText
,CoreNLPMyersBriggsScores
,CoreNLPTfidfScores
,CoreNLPZScores
,CorpusAggregate
,CorpusWord2Vec
,DocumentWord2Vec
,ExtractText
,FilterOutput
,GenerateDocID
,HttpDownload
,ImportDirectory
,ParseArchive
,ParseBodyStory
,ParseStory
,ParseStory
,ParseStory
,ParseTableTOC
,ParseTOC
,ParseTOC
,ParseTOC
,Polly
,Stamper
,ThreadedCorpusBatchStep
,ThreadedCorpusBatchStepTask
,TomcatExport
public abstract class CorpusBatchStep extends Object implements ICorpusBatchStep
-
-
Field Summary
Fields Modifier and Type Field Description protected List<String>
dictionaryPOS
-
Constructor Summary
Constructors Constructor Description CorpusBatchStep()
-
Method Summary
All Methods Instance Methods Abstract Methods Concrete Methods Modifier and Type Method Description protected com.fasterxml.jackson.databind.node.ObjectNode
copyInputToOutput(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem)
protected void
copyInputToOutput(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
protected String
getAuthor(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
protected String
getAuthor(org.jsoup.nodes.Document doc)
CorpusBatchStepModel
getData()
protected String
getDate(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
protected String
getDate(org.jsoup.nodes.Document doc)
protected SimpleDateFormat
getDateFormat()
com.fasterxml.jackson.databind.node.ObjectNode
getDefaultProperties()
protected String
getDocID(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
protected String
getExtensionFromMimeType(String mimeType)
protected com.fasterxml.jackson.databind.JsonNode
getJsonNodeFromStorage(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem)
protected com.fasterxml.jackson.databind.JsonNode
getJsonNodeFromStorage(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem, String property)
protected org.jsoup.nodes.Document
getJsoupDocumentFromStorage(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem)
protected org.jsoup.nodes.Document
getJsoupDocumentFromStorageNormalized(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem)
protected String
getLink(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
protected com.fasterxml.jackson.databind.ObjectMapper
getMapper()
protected String
getMimeTypeFromExtension(String extension)
protected String
getOutputScratchFilePath(String fileName)
protected String
getOutputScratchFilePath(String fileName, String extension)
protected String
getOutputScratchFilePathFromInput(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem, String extension)
protected io.outofprintmagazine.util.IParameterStore
getParameterStore()
protected IScratchStorage
getStorage()
protected String
getStorageLink(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
protected String
getText(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
protected String
getText(org.jsoup.nodes.Element element)
protected String
getTextDocumentFromStorage(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem)
protected String
getTextDocumentFromStorage(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem, String property)
protected String
getTextWithSelector(org.jsoup.nodes.Element element, String selector)
protected String
getThumbnail(org.jsoup.nodes.Document doc)
protected String
getTitle(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
protected String
getTitle(org.jsoup.nodes.Document doc)
protected boolean
isDictionaryWord(String pos)
com.fasterxml.jackson.databind.node.ArrayNode
run(com.fasterxml.jackson.databind.node.ArrayNode input)
abstract com.fasterxml.jackson.databind.node.ArrayNode
runOne(com.fasterxml.jackson.databind.node.ObjectNode input)
protected void
setAuthor(String author, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
protected void
setAuthor(org.jsoup.nodes.Document doc, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
void
setData(CorpusBatchStepModel data)
protected void
setDate(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
protected void
setDate(String date, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
protected void
setDate(org.jsoup.nodes.Document doc, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
protected void
setDocID(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem, String docID)
protected void
setLink(String link, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
void
setParameterStore(io.outofprintmagazine.util.IParameterStore parameterStore)
void
setStorage(IScratchStorage storage)
protected void
setStorageLink(String storage, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
protected void
setThumbnail(String thumbnail, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
protected void
setThumbnail(org.jsoup.nodes.Document doc, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
protected void
setTitle(String title, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
protected void
setTitle(org.jsoup.nodes.Document doc, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
-
-
Method Detail
-
getMapper
protected com.fasterxml.jackson.databind.ObjectMapper getMapper()
-
getParameterStore
protected io.outofprintmagazine.util.IParameterStore getParameterStore() throws IOException
- Throws:
IOException
-
setParameterStore
public void setParameterStore(io.outofprintmagazine.util.IParameterStore parameterStore)
- Specified by:
setParameterStore
in interfaceICorpusBatchStep
-
setStorage
public void setStorage(IScratchStorage storage)
- Specified by:
setStorage
in interfaceICorpusBatchStep
-
getStorage
protected IScratchStorage getStorage() throws IOException
- Throws:
IOException
-
getDateFormat
protected SimpleDateFormat getDateFormat()
-
getData
public CorpusBatchStepModel getData()
- Specified by:
getData
in interfaceICorpusBatchStep
-
setData
public void setData(CorpusBatchStepModel data)
- Specified by:
setData
in interfaceICorpusBatchStep
-
getDefaultProperties
public com.fasterxml.jackson.databind.node.ObjectNode getDefaultProperties()
- Specified by:
getDefaultProperties
in interfaceICorpusBatchStep
-
copyInputToOutput
protected void copyInputToOutput(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem) throws IOException
- Throws:
IOException
-
copyInputToOutput
protected com.fasterxml.jackson.databind.node.ObjectNode copyInputToOutput(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem) throws IOException
- Throws:
IOException
-
run
public com.fasterxml.jackson.databind.node.ArrayNode run(com.fasterxml.jackson.databind.node.ArrayNode input)
- Specified by:
run
in interfaceICorpusBatchStep
-
runOne
public abstract com.fasterxml.jackson.databind.node.ArrayNode runOne(com.fasterxml.jackson.databind.node.ObjectNode input) throws Exception
- Specified by:
runOne
in interfaceICorpusBatchStep
- Throws:
Exception
-
getText
protected String getText(org.jsoup.nodes.Element element)
-
getText
protected String getText(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
getTextWithSelector
protected String getTextWithSelector(org.jsoup.nodes.Element element, String selector)
-
getAuthor
protected String getAuthor(org.jsoup.nodes.Document doc)
-
getAuthor
protected String getAuthor(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
setAuthor
protected void setAuthor(String author, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
setAuthor
protected void setAuthor(org.jsoup.nodes.Document doc, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
getTitle
protected String getTitle(org.jsoup.nodes.Document doc)
-
getTitle
protected String getTitle(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
setTitle
protected void setTitle(String title, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
setTitle
protected void setTitle(org.jsoup.nodes.Document doc, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
getThumbnail
protected String getThumbnail(org.jsoup.nodes.Document doc)
-
setThumbnail
protected void setThumbnail(String thumbnail, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
setThumbnail
protected void setThumbnail(org.jsoup.nodes.Document doc, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
getDate
protected String getDate(org.jsoup.nodes.Document doc)
-
getDate
protected String getDate(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
setDate
protected void setDate(String date, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
setDate
protected void setDate(org.jsoup.nodes.Document doc, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
setDate
protected void setDate(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
setLink
protected void setLink(String link, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
getLink
protected String getLink(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
setStorageLink
protected void setStorageLink(String storage, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
setDocID
protected void setDocID(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem, String docID)
-
getDocID
protected String getDocID(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
getStorageLink
protected String getStorageLink(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
getJsoupDocumentFromStorage
protected org.jsoup.nodes.Document getJsoupDocumentFromStorage(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem) throws Exception
- Throws:
Exception
-
getJsoupDocumentFromStorageNormalized
protected org.jsoup.nodes.Document getJsoupDocumentFromStorageNormalized(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem) throws Exception
- Throws:
Exception
-
getTextDocumentFromStorage
protected String getTextDocumentFromStorage(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem) throws Exception
- Throws:
Exception
-
getTextDocumentFromStorage
protected String getTextDocumentFromStorage(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem, String property) throws Exception
- Throws:
Exception
-
getJsonNodeFromStorage
protected com.fasterxml.jackson.databind.JsonNode getJsonNodeFromStorage(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem) throws Exception
- Throws:
Exception
-
getJsonNodeFromStorage
protected com.fasterxml.jackson.databind.JsonNode getJsonNodeFromStorage(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem, String property) throws Exception
- Throws:
Exception
-
getOutputScratchFilePathFromInput
protected String getOutputScratchFilePathFromInput(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem, String extension) throws Exception
- Throws:
Exception
-
getOutputScratchFilePath
protected String getOutputScratchFilePath(String fileName) throws Exception
- Throws:
Exception
-
getOutputScratchFilePath
protected String getOutputScratchFilePath(String fileName, String extension) throws Exception
- Throws:
Exception
-
getExtensionFromMimeType
protected String getExtensionFromMimeType(String mimeType) throws org.apache.tika.mime.MimeTypeException
- Throws:
org.apache.tika.mime.MimeTypeException
-
isDictionaryWord
protected boolean isDictionaryWord(String pos)
-
-