HttpUtils.java

package io.outofprintmagazine.nlp.utils;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.ServiceUnavailableRetryStrategy;
import org.apache.http.client.config.CookieSpecs;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.conn.HttpClientConnectionManager;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.client.LaxRedirectStrategy;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.HttpContext;
import org.apache.http.util.EntityUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ArrayNode;

import io.outofprintmagazine.util.IParameterStore;

public class HttpUtils {

	public HttpUtils() {
		super();
	}
	
	private static final Logger logger = LogManager.getLogger(HttpUtils.class);
	
	private Logger getLogger() {
		return logger;
	}
	
	private ObjectMapper mapper = null;
	private HttpClientConnectionManager connManager = null;
	
	private HttpUtils(IParameterStore parameterStore) throws IOException {
		this();
		mapper = new ObjectMapper();
		connManager = new PoolingHttpClientConnectionManager();
		//((PoolingHttpClientConnectionManager)connManager).setMaxTotal(5);
		//((PoolingHttpClientConnectionManager)connManager).setDefaultMaxPerRoute(4);
		//((PoolingHttpClientConnectionManager)connManager).setMaxPerRoute(
		//		new HttpRoute(
		//				new HttpHost("en.wikipedia.org", -1)
		//		),
		//		5
		//);
	}
	
	private static Map<IParameterStore, HttpUtils> instances = new HashMap<IParameterStore, HttpUtils>();
	
    public static HttpUtils getInstance(IParameterStore parameterStore) throws IOException { 
        if (instances.get(parameterStore) == null) {
        	HttpUtils instance = new HttpUtils(parameterStore);
            instances.put(parameterStore, instance);
        }
        return instances.get(parameterStore); 
    }
    
    class JsonResponseHandler implements ResponseHandler<JsonNode> {
    	
		@Override
		public JsonNode handleResponse(HttpResponse response) throws ClientProtocolException, IOException {
			int status = response.getStatusLine().getStatusCode();
			JsonNode retval = null;
            if (status >= 200 && status < 300) {
                HttpEntity entity = response.getEntity();
                retval = mapper.readTree(EntityUtils.toString(entity, StandardCharsets.UTF_8.name()));
                EntityUtils.consume(entity);
            } 
            else {
                throw new ClientProtocolException("Unexpected response status: " + status);
            }
            return retval;
		}
	}
    
    class StringResponseHandler implements ResponseHandler<String> {
    	
		@Override
		public String handleResponse(HttpResponse response) throws ClientProtocolException, IOException {
			int status = response.getStatusLine().getStatusCode();
			String retval = null;
            if (status >= 200 && status < 300) {
                HttpEntity entity = response.getEntity();
                retval = EntityUtils.toString(entity, StandardCharsets.UTF_8.name());
                EntityUtils.consume(entity);
            } 
            else {
                throw new ClientProtocolException("Unexpected response status: " + status);
            }
            return retval;
		}
	}
    
	protected CloseableHttpClient getHttpClient() {
		int timeout = 5;
		return HttpClients.custom()
				.setConnectionManager(connManager)
                .setServiceUnavailableRetryStrategy(
                		new ServiceUnavailableRetryStrategy() {
                			@Override
							public boolean retryRequest(
                					final HttpResponse response, final int executionCount, final HttpContext context) {
                					int statusCode = response.getStatusLine().getStatusCode();
                					return (statusCode == 503 || statusCode == 500 || statusCode == 429) && executionCount < 5;
                			}

                			@Override
							public long getRetryInterval() {
                				return 5;
                			}
                		})
                .setRedirectStrategy(new LaxRedirectStrategy())
                .setDefaultRequestConfig(
                		RequestConfig.custom()
                		.setConnectTimeout(timeout * 1000)
                		.setConnectionRequestTimeout(timeout * 1000)
                		.setSocketTimeout(timeout * 1000)
                		.setCookieSpec(CookieSpecs.STANDARD)
                		.build())
                .build();
	}

	public URI buildUri(URI uri, List<NameValuePair> nvps) throws URISyntaxException {
		URIBuilder builder = new URIBuilder(uri);
		builder.setParameters(nvps);
		return builder.build();
	}
	
	public JsonNode httpGetJson(URI url, List<Header> headers) throws IOException {
		return httpGetJson(url, headers.toArray(new Header[0]));
	}
	
	public JsonNode httpGetJson(URI url, Header[] headers) throws IOException {
		//getLogger().info(url);

    	HttpGet http = new HttpGet(url);
    	http.setHeaders(headers);
        return getHttpClient().execute(
        		http, 
        		new JsonResponseHandler()
        );
	}
	
	public void httpGetJsonCB(URI url, List<Header> headers, IJsonResponseHandler handler) throws IOException {
		handler.onPage(httpGetJson(url, headers.toArray(new Header[0])));
	}
	
	public void httpGetJsonCB(URI url, Header[] headers, IJsonResponseHandler handler) throws IOException {
		handler.onPage(httpGetJson(url, headers));
	}
	
	public String httpGetString(URI url, List<Header>headers) throws IOException {
		return httpGetString(url, headers.toArray(new Header[0]));
	}
		
	public String httpGetString(URI url, Header[] headers) throws IOException {
		//getLogger().info(url);

    	HttpGet http = new HttpGet(url);
    	http.setHeaders(headers);
        return getHttpClient().execute(
        		http, 
        		new StringResponseHandler()
        );
	}
	
	public JsonNode httpPostJson(HttpPost http) throws IOException {
        return getHttpClient().execute(
        		http, 
        		new JsonResponseHandler()
        );
	}
	
	public String httpPostString(HttpPost http) throws IOException {
        return getHttpClient().execute(
        		http, 
        		new StringResponseHandler()
        );
	}
	
	public URI getWikipediaApi() throws URISyntaxException {
		return new URI("https", "en.wikipedia.org", "/w/api.php", null);
	}
	
	public List<NameValuePair> getWikimediaImageParameters() {
		List<NameValuePair> nvps = new ArrayList<NameValuePair>();
		nvps.add(new BasicNameValuePair("utf8", "1"));
		nvps.add(new BasicNameValuePair("format", "json"));
		nvps.add(new BasicNameValuePair("action", "query"));
		nvps.add(new BasicNameValuePair("maxlag", "1"));		
		nvps.add(new BasicNameValuePair("generator", "images"));
		nvps.add(new BasicNameValuePair("prop", "info"));
		return nvps;
	}
	
	public List<NameValuePair> getWikimediaImageInfoParameters() {
		List<NameValuePair> nvps = new ArrayList<NameValuePair>();
		nvps.add(new BasicNameValuePair("utf8", "1"));		
		nvps.add(new BasicNameValuePair("format", "json"));
		nvps.add(new BasicNameValuePair("action", "query"));
		nvps.add(new BasicNameValuePair("maxlag", "1"));		
		nvps.add(new BasicNameValuePair("prop", "imageinfo"));
		nvps.add(new BasicNameValuePair("iiprop", "timestamp|user|userid|comment|canonicaltitle|url|size|dimensions|sha1|mime|thumbmime|mediatype|bitdepth"));
		return nvps;
	}
	
	public List<NameValuePair> getWikionaryParameters() {
		List<NameValuePair> nvps = new ArrayList<NameValuePair>();
		nvps.add(new BasicNameValuePair("utf8", "1"));
		nvps.add(new BasicNameValuePair("format", "json"));
		nvps.add(new BasicNameValuePair("action", "query"));
		nvps.add(new BasicNameValuePair("maxlag", "1"));
		return nvps;
	}
	
	public List<NameValuePair> getWikipediaCategoriesParameters() {
		List<NameValuePair> nvps = new ArrayList<NameValuePair>();
		nvps.add(new BasicNameValuePair("utf8", "1"));		
		nvps.add(new BasicNameValuePair("format", "json"));
		nvps.add(new BasicNameValuePair("action", "query"));
		nvps.add(new BasicNameValuePair("maxlag", "1"));		
		nvps.add(new BasicNameValuePair("prop", "categories"));
		return nvps;
	}
	
	public List<NameValuePair> getWikipediaPagesParameters() {
		List<NameValuePair> nvps = new ArrayList<NameValuePair>();
		nvps.add(new BasicNameValuePair("utf8", "1"));		
		nvps.add(new BasicNameValuePair("format", "json"));
		nvps.add(new BasicNameValuePair("action", "query"));
		nvps.add(new BasicNameValuePair("maxlag", "1"));
		nvps.add(new BasicNameValuePair("formatversion", "2"));
		nvps.add(new BasicNameValuePair("redirects", null));	
		return nvps;
	}
	
	public List<NameValuePair> getWikipediaExtractsParameters() {
		List<NameValuePair> nvps = new ArrayList<NameValuePair>();
		nvps.add(new BasicNameValuePair("utf8", "1"));		
		nvps.add(new BasicNameValuePair("format", "json"));
		nvps.add(new BasicNameValuePair("action", "query"));
		nvps.add(new BasicNameValuePair("maxlag", "1"));
		nvps.add(new BasicNameValuePair("prop", "extracts"));
		nvps.add(new BasicNameValuePair("exlimit", "20"));
		nvps.add(new BasicNameValuePair("exsentences", "1"));
		nvps.add(new BasicNameValuePair("exintro", null));
		nvps.add(new BasicNameValuePair("explaintext", null));
		return nvps;
	}
	
	public void httpGetJsonPaginated(
			URI baseUri, 
			List<NameValuePair> parameters, 
			List<Header> headers, 
			String gimcontinueName, 
			IJsonResponseHandler handler 
		) throws IOException, URISyntaxException {
		String gimcontinue = "init";
		while (gimcontinue != null) {
			JsonNode rootNode = null;
			if (gimcontinue.equals("init")) {
				rootNode = httpGetJson(
								buildUri(
										baseUri, 
										parameters
								),
								headers
						);
			}
			else {
				List<NameValuePair> mynvps = new ArrayList<NameValuePair>();
				mynvps.addAll(parameters);
				mynvps.add(new BasicNameValuePair(gimcontinueName, gimcontinue));
				rootNode = httpGetJson(
								buildUri(
										baseUri, 
										mynvps
								),
								headers
						);
			}
			if (rootNode.has("continue")) {
				gimcontinue = rootNode.get("continue").get(gimcontinueName).asText();
				gimcontinue = gimcontinue.replace(' ', '_');
			}
			else {
				gimcontinue = null;
			}
			if (rootNode != null && rootNode.has("errors")) {
				for (JsonNode err : ((ArrayNode)rootNode.get("errors"))) {
					if (err.get("code").asText().equals("ratelimited")) {
						try {
							Thread.sleep(1000);
						}
						catch (Exception e) {
							getLogger().error(e);
						}
					}
				}
			}
			else {
				handler.onPage(rootNode);
			}
		}		
	}
}