Class CorpusBatchStep
- java.lang.Object
-
- io.outofprintmagazine.corpus.batch.CorpusBatchStep
-
- All Implemented Interfaces:
ICorpusBatchStep
- Direct Known Subclasses:
Analyze,CleanText,CoreNLPMyersBriggsScores,CoreNLPTfidfScores,CoreNLPZScores,CorpusAggregate,CorpusWord2Vec,DocumentWord2Vec,ExtractText,FilterOutput,GenerateDocID,HttpDownload,ImportDirectory,ParseArchive,ParseBodyStory,ParseStory,ParseStory,ParseStory,ParseTableTOC,ParseTOC,ParseTOC,ParseTOC,Polly,Stamper,ThreadedCorpusBatchStep,ThreadedCorpusBatchStepTask,TomcatExport
public abstract class CorpusBatchStep extends Object implements ICorpusBatchStep
-
-
Field Summary
Fields Modifier and Type Field Description protected List<String>dictionaryPOS
-
Constructor Summary
Constructors Constructor Description CorpusBatchStep()
-
Method Summary
All Methods Instance Methods Abstract Methods Concrete Methods Modifier and Type Method Description protected com.fasterxml.jackson.databind.node.ObjectNodecopyInputToOutput(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem)protected voidcopyInputToOutput(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)protected StringgetAuthor(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)protected StringgetAuthor(org.jsoup.nodes.Document doc)CorpusBatchStepModelgetData()protected StringgetDate(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)protected StringgetDate(org.jsoup.nodes.Document doc)protected SimpleDateFormatgetDateFormat()com.fasterxml.jackson.databind.node.ObjectNodegetDefaultProperties()protected StringgetDocID(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)protected StringgetExtensionFromMimeType(String mimeType)protected com.fasterxml.jackson.databind.JsonNodegetJsonNodeFromStorage(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem)protected com.fasterxml.jackson.databind.JsonNodegetJsonNodeFromStorage(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem, String property)protected org.jsoup.nodes.DocumentgetJsoupDocumentFromStorage(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem)protected org.jsoup.nodes.DocumentgetJsoupDocumentFromStorageNormalized(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem)protected StringgetLink(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)protected com.fasterxml.jackson.databind.ObjectMappergetMapper()protected StringgetMimeTypeFromExtension(String extension)protected StringgetOutputScratchFilePath(String fileName)protected StringgetOutputScratchFilePath(String fileName, String extension)protected StringgetOutputScratchFilePathFromInput(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem, String extension)protected io.outofprintmagazine.util.IParameterStoregetParameterStore()protected IScratchStoragegetStorage()protected StringgetStorageLink(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)protected StringgetText(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)protected StringgetText(org.jsoup.nodes.Element element)protected StringgetTextDocumentFromStorage(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem)protected StringgetTextDocumentFromStorage(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem, String property)protected StringgetTextWithSelector(org.jsoup.nodes.Element element, String selector)protected StringgetThumbnail(org.jsoup.nodes.Document doc)protected StringgetTitle(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)protected StringgetTitle(org.jsoup.nodes.Document doc)protected booleanisDictionaryWord(String pos)com.fasterxml.jackson.databind.node.ArrayNoderun(com.fasterxml.jackson.databind.node.ArrayNode input)abstract com.fasterxml.jackson.databind.node.ArrayNoderunOne(com.fasterxml.jackson.databind.node.ObjectNode input)protected voidsetAuthor(String author, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)protected voidsetAuthor(org.jsoup.nodes.Document doc, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)voidsetData(CorpusBatchStepModel data)protected voidsetDate(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)protected voidsetDate(String date, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)protected voidsetDate(org.jsoup.nodes.Document doc, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)protected voidsetDocID(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem, String docID)protected voidsetLink(String link, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)voidsetParameterStore(io.outofprintmagazine.util.IParameterStore parameterStore)voidsetStorage(IScratchStorage storage)protected voidsetStorageLink(String storage, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)protected voidsetThumbnail(String thumbnail, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)protected voidsetThumbnail(org.jsoup.nodes.Document doc, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)protected voidsetTitle(String title, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)protected voidsetTitle(org.jsoup.nodes.Document doc, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
-
-
Method Detail
-
getMapper
protected com.fasterxml.jackson.databind.ObjectMapper getMapper()
-
getParameterStore
protected io.outofprintmagazine.util.IParameterStore getParameterStore() throws IOException- Throws:
IOException
-
setParameterStore
public void setParameterStore(io.outofprintmagazine.util.IParameterStore parameterStore)
- Specified by:
setParameterStorein interfaceICorpusBatchStep
-
setStorage
public void setStorage(IScratchStorage storage)
- Specified by:
setStoragein interfaceICorpusBatchStep
-
getStorage
protected IScratchStorage getStorage() throws IOException
- Throws:
IOException
-
getDateFormat
protected SimpleDateFormat getDateFormat()
-
getData
public CorpusBatchStepModel getData()
- Specified by:
getDatain interfaceICorpusBatchStep
-
setData
public void setData(CorpusBatchStepModel data)
- Specified by:
setDatain interfaceICorpusBatchStep
-
getDefaultProperties
public com.fasterxml.jackson.databind.node.ObjectNode getDefaultProperties()
- Specified by:
getDefaultPropertiesin interfaceICorpusBatchStep
-
copyInputToOutput
protected void copyInputToOutput(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem) throws IOException- Throws:
IOException
-
copyInputToOutput
protected com.fasterxml.jackson.databind.node.ObjectNode copyInputToOutput(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem) throws IOException- Throws:
IOException
-
run
public com.fasterxml.jackson.databind.node.ArrayNode run(com.fasterxml.jackson.databind.node.ArrayNode input)
- Specified by:
runin interfaceICorpusBatchStep
-
runOne
public abstract com.fasterxml.jackson.databind.node.ArrayNode runOne(com.fasterxml.jackson.databind.node.ObjectNode input) throws Exception- Specified by:
runOnein interfaceICorpusBatchStep- Throws:
Exception
-
getText
protected String getText(org.jsoup.nodes.Element element)
-
getText
protected String getText(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
getTextWithSelector
protected String getTextWithSelector(org.jsoup.nodes.Element element, String selector)
-
getAuthor
protected String getAuthor(org.jsoup.nodes.Document doc)
-
getAuthor
protected String getAuthor(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
setAuthor
protected void setAuthor(String author, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
setAuthor
protected void setAuthor(org.jsoup.nodes.Document doc, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
getTitle
protected String getTitle(org.jsoup.nodes.Document doc)
-
getTitle
protected String getTitle(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
setTitle
protected void setTitle(String title, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
setTitle
protected void setTitle(org.jsoup.nodes.Document doc, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
getThumbnail
protected String getThumbnail(org.jsoup.nodes.Document doc)
-
setThumbnail
protected void setThumbnail(String thumbnail, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
setThumbnail
protected void setThumbnail(org.jsoup.nodes.Document doc, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
getDate
protected String getDate(org.jsoup.nodes.Document doc)
-
getDate
protected String getDate(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
setDate
protected void setDate(String date, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
setDate
protected void setDate(org.jsoup.nodes.Document doc, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
setDate
protected void setDate(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
setLink
protected void setLink(String link, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
getLink
protected String getLink(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
setStorageLink
protected void setStorageLink(String storage, com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
setDocID
protected void setDocID(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem, String docID)
-
getDocID
protected String getDocID(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
getStorageLink
protected String getStorageLink(com.fasterxml.jackson.databind.node.ObjectNode outputStepItem)
-
getJsoupDocumentFromStorage
protected org.jsoup.nodes.Document getJsoupDocumentFromStorage(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem) throws Exception- Throws:
Exception
-
getJsoupDocumentFromStorageNormalized
protected org.jsoup.nodes.Document getJsoupDocumentFromStorageNormalized(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem) throws Exception- Throws:
Exception
-
getTextDocumentFromStorage
protected String getTextDocumentFromStorage(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem) throws Exception
- Throws:
Exception
-
getTextDocumentFromStorage
protected String getTextDocumentFromStorage(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem, String property) throws Exception
- Throws:
Exception
-
getJsonNodeFromStorage
protected com.fasterxml.jackson.databind.JsonNode getJsonNodeFromStorage(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem) throws Exception- Throws:
Exception
-
getJsonNodeFromStorage
protected com.fasterxml.jackson.databind.JsonNode getJsonNodeFromStorage(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem, String property) throws Exception- Throws:
Exception
-
getOutputScratchFilePathFromInput
protected String getOutputScratchFilePathFromInput(com.fasterxml.jackson.databind.node.ObjectNode inputStepItem, String extension) throws Exception
- Throws:
Exception
-
getOutputScratchFilePath
protected String getOutputScratchFilePath(String fileName) throws Exception
- Throws:
Exception
-
getOutputScratchFilePath
protected String getOutputScratchFilePath(String fileName, String extension) throws Exception
- Throws:
Exception
-
getExtensionFromMimeType
protected String getExtensionFromMimeType(String mimeType) throws org.apache.tika.mime.MimeTypeException
- Throws:
org.apache.tika.mime.MimeTypeException
-
isDictionaryWord
protected boolean isDictionaryWord(String pos)
-
-