package shop;

import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Random;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import map.Const;

/**
 * 三井住友銀行の住所を取得するプログラムです。
 * @author Kumano Tatsuo
 * 2005/11/27
 * @since 3.10
 */
public class SMBC implements Emap {
	/**
	 * 三井住友銀行の市区町村別ページを解析して住所の一覧を取得します。
	 * @param url 市区町存別ページのURL
	 * @return 住所と店舗名の対応表
	 * @throws IOException 
	 * @throws UnsupportedEncodingException 
	 */
	public Map<String, String> getAddresses(final URL url) throws UnsupportedEncodingException,
			IOException {
		final Map<String, String> ret = new LinkedHashMap<String, String>();
		final Scanner scanner = new Scanner(new InputStreamReader(url.openStream(),
				Const.Smbc.ENCODING));
		String caption = null;
		String address = null;
		boolean isSecond = false;
		final Pattern pattern = Pattern.compile("<a href=\"ssactl.htm\\?ENC=.+\">(.+)</a>");
		final Pattern pattern2 = Pattern.compile("<span class=\"t10\">([^<>]+)</span></td>");
		final Pattern pattern3 = Pattern.compile("<td[^<>]+><span[^<>]+>(支店|出張所)</span></td>");
		final Pattern pattern4 = Pattern
		.compile("<a href=\"(ssactl.htm\\?.+)\"><span class=\"t12\">次の20件</span></a>");
		while (scanner.hasNextLine()) {
			final String line = scanner.nextLine();
			final Matcher matcher = pattern.matcher(line);
			if (matcher.find()) {
				caption = matcher.group(1);
			}
			final Matcher matcher2 = pattern2.matcher(line);
			if (matcher2.find()) {
				address = matcher2.group(1);
			}
			final Matcher matcher3 = pattern3.matcher(line);
			if (matcher3.find()) {
				if (caption != null && address != null) {
					ret.put(address, caption);
				}
			}
			final Matcher matcher4 = pattern4.matcher(line);
			if (matcher4.find()) {
				if (isSecond) {
					ret.putAll(getAddresses(new URL(Const.Smbc.BASE_URL + matcher4.group(1))));
				} else {
					isSecond = true;
				}
			}
		}
		return ret;
	}

	/**
	 * 三井住友銀行の都道府県別ページを解析して市区町村のURL一覧を取得します。
	 * @param url 都道府県別ページのURL
	 * @return 市区町村の一覧
	 * @throws IOException 
	 * @throws MalformedURLException 
	 * @throws UnsupportedEncodingException 
	 */
	public Map<String, URL> getCities(final URL url) throws MalformedURLException, IOException {
		final Map<String, URL> ret = new LinkedHashMap<String, URL>();
		final Scanner scanner2 = new Scanner(new InputStreamReader(url.openStream(),
				Const.Smbc.ENCODING));
		String prefecture = null;
		while (scanner2.hasNextLine()) {
			final String line2 = scanner2.nextLine();
			final Pattern pattern2 = Pattern
					.compile("<IMG SRC=\"img/ken/[0-9]+.gif\" WIDTH=\"[0-9]+\" ALT=\"([^> ]+) +\">");
			final Matcher matcher2 = pattern2.matcher(line2);
			if (matcher2.find()) {
				prefecture = matcher2.group(1);
			}
			for (final String string : line2.split("</A>")) {
				final Pattern pattern3 = Pattern.compile("<A HREF=(ssactl.htm\\?ENC=.+)\">(.+)");
				final Matcher matcher3 = pattern3.matcher(string);
				String city = null;
				if (matcher3.find()) {
					city = matcher3.group(2);
					final URL url2 = new URL(Const.Smbc.BASE_URL + matcher3.group(1));
					ret.put(prefecture + "," + city, url2);
				}
			}
		}
		return ret;
	}

	/**
	 * 三井住友銀行のトップページを解析して都道府県のURL一覧を取得します。
	 * @return 都道府県の一覧
	 * @throws IOException 
	 * @throws MalformedURLException 
	 * @throws UnsupportedEncodingException 
	 */
	public Map<String, URL> getPrefectures() throws MalformedURLException, IOException {
		final Map<String, URL> ret = new LinkedHashMap<String, URL>();
		final Scanner scanner = new Scanner(new InputStreamReader(new URL(Const.Smbc.TOP_PAGE)
				.openStream(), Const.Smbc.ENCODING));
		while (scanner.hasNextLine()) {
			final String line = scanner.nextLine();
			final Pattern pattern = Pattern
					.compile("<a href=\"(ssactl.htm\\?ENC=[^<> ]+)\".+><img src=\".+\".+alt=\"([^<>\"]+)\".+></a>");
			final Matcher matcher = pattern.matcher(line);
			if (matcher.find()) {
				final URL url = new URL(Const.Smbc.BASE_URL + matcher.group(1));
				final String prefecture = matcher.group(2);
				ret.put(prefecture, url);
			}
		}
		return ret;
	}

	/**
	 * メインメソッドです。
	 * @param args コマンドライン引数
	 * @throws IOException 
	 * @throws MalformedURLException 
	 * @throws InterruptedException 
	 */
	public static void main(String[] args) throws MalformedURLException, IOException,
			InterruptedException {
		System.exit(0);
		final Random random = new Random();
		final Scanner scanner = new Scanner(new InputStreamReader(new URL(Const.Smbc.TOP_PAGE)
				.openStream(), Const.Smbc.ENCODING));
		while (scanner.hasNextLine()) {
			final String line = scanner.nextLine();
			final Pattern pattern = Pattern
					.compile("<a href=\"(ssactl.htm\\?ENC=[^<> ]+)\".+><img src=\".+\".+alt=\"([^<>\"]+)\".+></a>");
			final Matcher matcher = pattern.matcher(line);
			if (matcher.find()) {
				//				System.out.println("found: " + matcher.group(1) + "->" + matcher.group(2));
				Thread.sleep(random.nextInt(5000));
				final Scanner scanner2 = new Scanner(new InputStreamReader(new URL(
						Const.Smbc.BASE_URL + matcher.group(1)).openStream(), Const.Smbc.ENCODING));
				String prefecture = matcher.group(2);
				//System.out.println("DEBUG: prefecture = " + prefecture);
				while (scanner2.hasNextLine()) {
					final String line2 = scanner2.nextLine();
					final Pattern pattern2 = Pattern
							.compile("<IMG SRC=\"img/ken/[0-9]+.gif\" WIDTH=\"[0-9]+\" ALT=\"([^> ]+) +\">");
					final Matcher matcher2 = pattern2.matcher(line2);
					if (matcher2.find()) {
						prefecture = matcher2.group(1);
						//				System.out.println("DEBUG: prefecture = " + prefecture);
					}
					for (final String string : line2.split("</A>")) {
						final Pattern pattern3 = Pattern
								.compile("<A HREF=(ssactl.htm\\?ENC=.+)\">(.+)");
						final Matcher matcher3 = pattern3.matcher(string);
						String city = null;
						if (matcher3.find()) {
							//					System.out.println("DEBUG: getting " + matcher2.group(1) + "->"
							//							+ matcher2.group(2));
							city = matcher3.group(2);
							Thread.sleep(random.nextInt(5000));
							final URL url = new URL(Const.Smbc.BASE_URL + matcher3.group(1));
							get(prefecture, city, url);
						}
					}
				}
			}
		}
	}

	/**
	 * 三井住友銀行の住所一覧から住所を抽出します。
	 * @param prefecture 都道府県名
	 * @param city 市区町村名
	 * @param url URL
	 * @throws UnsupportedEncodingException
	 * @throws IOException
	 */
	private static void get(String prefecture, String city, final URL url)
			throws UnsupportedEncodingException, IOException {
		final Scanner scanner3 = new Scanner(new InputStreamReader(url.openStream(),
				Const.Smbc.ENCODING));
		String caption = null;
		String address = null;
		boolean isSecond = false;
		while (scanner3.hasNextLine()) {
			final String line3 = scanner3.nextLine();
			final Pattern pattern4 = Pattern.compile("<a href=\"ssactl.htm\\?ENC=.+\">(.+)</a>");
			final Matcher matcher4 = pattern4.matcher(line3);
			if (matcher4.find()) {
				//System.out.println("DEBUG: " + matcher3.group(1));
				caption = matcher4.group(1);
			}
			final Pattern pattern5 = Pattern.compile("<span class=\"t10\">([^<>]+)</span></td>");
			final Matcher matcher5 = pattern5.matcher(line3);
			if (matcher5.find()) {
				//System.out.println("DEBUG: " + matcher4.group(1));
				address = matcher5.group(1);
				if (prefecture != null && city != null && caption != null && address != null) {
					System.out.println(prefecture + "," + city + "," + address + "," + caption);
				}
			}
			final Pattern pattern6 = Pattern
					.compile("<a href=\"(ssactl.htm\\?.+)\"><span class=\"t12\">次の20件</span></a>");
			final Matcher matcher6 = pattern6.matcher(line3);
			if (matcher6.find()) {
				if (isSecond) {
					get(prefecture, city, new URL(Const.Smbc.BASE_URL + matcher6.group(1)));
				} else {
					isSecond = true;
				}
			}
		}
	}
}
