/*
 * $Id: RobotAcceptanceRule.java,v 1.2 2005/10/22 16:27:27 rampil Exp $
 * Copyright (c) 2005 LOGICAL-PARADOX.ORG
 */
package org.logical_paradox.rss.http;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.LinkedList;
import java.util.ListIterator;

/**
 * robot.txt̓ei[NXDTCgʂɐD
 * @author satoshi akabane@logical-paradox.org
 * @version $Revision: 1.2 $
 */
public class RobotAcceptanceRule {
	public static final String HEADER_USER_AGENT = "User-agent: ";
	public static final String HEADER_DISALLOW = "Disallow: ";

	private String ua = null;
	private LinkedList disallows = null;

	/**
	 * RXgN^D
	 */
	protected RobotAcceptanceRule() {
		disallows = new LinkedList();
	}

	public static RobotAcceptanceRule getInstance(InputStream in) throws IOException {
		RobotAcceptanceRule r = new RobotAcceptanceRule();

		BufferedReader reader = new BufferedReader(new InputStreamReader(in));
		String linebuf = null;

		// Xg[robots.txtǂݍ
		int cnt = -1;
		while((linebuf = reader.readLine()) != null) {
			if(linebuf.startsWith(HEADER_USER_AGENT)) {
				// VUser-Agent̏ꍇ
				// p[^ꍇ͐VfBNeBu쐬
				String robot = linebuf.substring(HEADER_USER_AGENT.length()).trim();
				if(robot.length() > 0) {
					cnt++;
					r.disallows.add(r.newRuleDirective(robot));
				}
			} else if(linebuf.startsWith(HEADER_DISALLOW)) {
				// ̃fBNeBudisallow[ǉ
				if(cnt >= 0) {
					UserAgentRuleDirective ua = (UserAgentRuleDirective)r.disallows.get(cnt);
					String param = linebuf.substring(HEADER_DISALLOW.length()).trim();
					ua.add(param);
				}
			}
		}

		reader.close();

		// CX^XƐłꍇ͂Ԃ
		if(r.disallows.size() == 0) {
			return null;
		} else {
/*
			for(int i = 0; i < r.disallows.size(); i++) {
				UserAgentRuleDirective d = (UserAgentRuleDirective)r.disallows.get(i);
				System.err.println("User-Agent: " + d.getUserAgent());
				ListIterator it = (ListIterator)d.getIterator();
				while(it.hasNext()) {
					System.err.println("\t" + (String)it.next());
				}
			}
*/
			return r;
		}
	}

	private UserAgentRuleDirective newRuleDirective(String r) {
		return new UserAgentRuleDirective(r);
	}

	protected boolean isAllowed(String url, String agent) {
		UserAgentRuleDirective u = matchedDirective(agent);
		if(u == null) {
			// w肳ꂽ[U[G[WFg͋֎~ĂȂ
			return true;
		}

		try {
			String path = (new URL(url)).getPath();
			ListIterator i = u.getIterator();
			while(i.hasNext()) {
				String ua = (String)i.next();
				if(ua.trim().length() > 0 && path.startsWith(ua)) {
					// ֎~悾ꍇ
					return false;
				}
			}
		} catch(MalformedURLException e) {
			// URLꍇ͋֎~ĂƂ݂Ȃ
			return false;
		}
		return true;
	}

	/**
	 * w肳ꂽ[U[G[WFgɃ}b`fBNeBuԂ
	 * vfBNeBu݂ȂꍇnullԂ
	 */
	protected UserAgentRuleDirective matchedDirective(String agent) {
		if(agent == null) {
			return null;
		}

		ListIterator i = disallows.listIterator();
		while(i.hasNext()) {
			UserAgentRuleDirective u = (UserAgentRuleDirective)i.next();
			if(agent.equals(u.getUserAgent()) || u.getUserAgent().equals("*")) {
				// [U[G[WFgSɈvĂ邩C邢̓ChJ[h̏ꍇ
				return u;
			}
		}

		// v̂SRȂ
		return null;
	}

	/**
	 * User-Agentʂ̏O[ێNX
	 */
	class UserAgentRuleDirective {
		private String agent = null;
		private LinkedList disallows = null;

		public UserAgentRuleDirective(String name) {
			agent = new String(name);
			disallows = new LinkedList();
		}

		public String getUserAgent() {
			return new String(agent);
		}

		public ListIterator getIterator() {
			return disallows.listIterator();
		}

		public int size() {
			return disallows.size();
		}

		public void add(String path) {
			disallows.add(path);
		}

		public void remove(String path) {
			disallows.remove(path);
		}
	}
}

// end of RobotAcceptanceRule.java
