Java get file size and download using HTTP

Today I wanted to download a file using Http protocol but before that determine its size.
First aff all I open an Http connection and get file size using Content-Length header (see getFileSize method). Then I download the binary file.
You can download the source code from here.


import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * A simple utility class which provides methods for file downloading using HTTP
 * protocol.
 */
public class FileDownloader {

	private static final Logger logger = LoggerFactory
			.getLogger(FileDownloader.class);

	/**
	 * Gets file size using an HTTP connection with GET method
	 * 
	 * @return file size in bytes
	 * @throws IOException
	 */
	public static long getFileSize(String fileUrl) throws IOException {

		URL oracle = new URL(fileUrl);

		HttpURLConnection yc = (HttpURLConnection) oracle.openConnection();
		populateDesktopHttpHeaders(yc);

		long fileSize = 0;
		try {
			// retrieve file size from Content-Length header field
			fileSize = Long.parseLong(yc.getHeaderField("Content-Length"));
		} catch (NumberFormatException nfe) {
		}

		return fileSize;
	}

	/**
	 * Downloads a file from a given url and writes it to a file in current
	 * working directory
	 * 
	 * @param urli
	 *            Input file url
	 * 
	 * @throws IOException
	 *             ,MalformedURLException
	 */
	public static void downloadFile(String urli) throws IOException,
			MalformedURLException {
		String fileName = urli.substring(urli.lastIndexOf('/') + 1,
				urli.length());

		// download the file in the current working directory
		File outFile = new File(fileName);

		logger.debug("url file name: {}", fileName);
		logger.debug("Output file path: {}", outFile.getAbsolutePath());

		downloadFile(urli, outFile);
	}

	/**
	 * Downloads a file from a given url and writes it to a given File object
	 * 
	 * @param urli
	 *            Input file url
	 * @param outputFile
	 *            The output file to write to
	 * 
	 */
	public static void downloadFile(String urli, File outputFile)
			throws IOException, MalformedURLException {

		long startTime = System.currentTimeMillis();

		// Get a connection to the URL and start up a buffered reader.
		URL url = new URL(urli);
		url.openConnection();
		InputStream reader = url.openStream();

		// Setup a buffered file writer to write out what we read from the
		// website.
		FileOutputStream writer = new FileOutputStream(outputFile);
		byte[] buffer = new byte[153600];
		long totalBytesRead = 0;
		int bytesRead = 0;

		while ((bytesRead = reader.read(buffer)) > 0) {
			writer.write(buffer, 0, bytesRead);
			// buffer = new byte[153600];
			totalBytesRead += bytesRead;
			// logger.debug("Downloaded {} Kb ", (totalBytesRead / 1024));
		}

		long endTime = System.currentTimeMillis();

		logger.debug("Done. {} bytes read in : {} millseconds",
				String.valueOf(totalBytesRead),
				String.valueOf(endTime - startTime));

		writer.close();
		reader.close();
	}

	/**
	 * Downloads a file from a given url and writes it byte array
	 * 
	 * @param urli
	 *            Input file url
	 * 
	 * 
	 */
	public static byte[] downloadFileToArray(String urli) throws IOException,
			MalformedURLException {

		return downloadFileToArray(urli, 0, 0);
	}

	/**
	 * Downloads a file from a given url and writes it to a byte array
	 * 
	 * @param urli
	 *            Input file URL
	 * @param connectionTimeout
	 *            the maximum time in milliseconds to wait while connecting
	 * @param readTimeout
	 *            the read timeout in milliseconds, or 0 if reads never timeout
	 * 
	 * 
	 */
	public static byte[] downloadFileToArray(String urli,
			int connectionTimeout, int readTimeout) throws IOException,
			MalformedURLException {

		long startTime = System.currentTimeMillis();

		// Get a connection to the URL and start up a buffered reader.
		URL url = new URL(urli);
		HttpURLConnection con = (HttpURLConnection) url.openConnection();

		populateDesktopHttpHeaders(con);

		con.setConnectTimeout(connectionTimeout);
		con.setReadTimeout(readTimeout);

		InputStream reader = con.getInputStream();

		// Setup a buffered file writer to write out what we read from the
		// website.
		ByteArrayOutputStream writer = new ByteArrayOutputStream();
		byte[] buffer = new byte[153600];
		long totalBytesRead = 0;
		int bytesRead = 0;

		while ((bytesRead = reader.read(buffer)) > 0) {
			writer.write(buffer, 0, bytesRead);
			// buffer = new byte[153600];
			totalBytesRead += bytesRead;
			// logger.debug("Downloaded {} Kb ", (totalBytesRead / 1024));
		}

		long endTime = System.currentTimeMillis();

		// write all bytes to buffer
		buffer = writer.toByteArray();

		// logger.debug(
		// "Downloaded {}. {} bytes read in {} ",
		// new Object[] { urli, String.valueOf(totalBytesRead),
		// TimeUtils.getDuration(startTime, endTime) });

		con.disconnect();
		writer.close();
		reader.close();

		return buffer;
	}

	private static void populateDesktopHttpHeaders(URLConnection urlCon) {
		// add custom header in order to be easily detected
		urlCon.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:25.0) Gecko/20100101 Firefox/25.0");
		urlCon.setRequestProperty("Accept-Language",
				"el-gr,el;q=0.8,en-us;q=0.5,en;q=0.3");
		urlCon.setRequestProperty("Accept-Charset",
				"ISO-8859-7,utf-8;q=0.7,*;q=0.7");
	}

}

UPDATE:
2013-12-31: Added more methods and timeout options
2013-06-14: Minor fixes

Advertisements

About cmanios

programming
This entry was posted in Java and tagged , , , , . Bookmark the permalink.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s