/*
 * Copyright (c) 2003 The Ochusha Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * $Id$
 */

#include "ochusha.h"
#include "ochusha_private.h"

#include <glib.h>

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define GUESS_2CH_BOARD_URL	1


BBSTable *
ochusha_bbs_table_new(void)
{
  BBSTable *table = (BBSTable *)calloc(1, sizeof(BBSTable));
  if (table == NULL)
    return NULL;	/* Out of memory */

  table->category_table
    = g_hash_table_new_full(g_str_hash, g_str_equal, NULL,
			    (GDestroyNotify)ochusha_board_category_free);
  table->board_table
    = g_hash_table_new_full(g_str_hash, g_str_equal, NULL,
			    (GDestroyNotify)ochusha_bulletin_board_free);
  return table;
}


void
ochusha_bbs_table_add_category(BBSTable *table, BoardCategory *category)
{
  gpointer entry = g_hash_table_lookup(table->category_table, category->name);
  if (entry != NULL)
    return;

  g_hash_table_insert(table->category_table, category->name, category);
  table->category_list = g_slist_append(table->category_list, category);
}


void
ochusha_bbs_table_add_board(BBSTable *table, BulletinBoard *board)
{
  g_hash_table_insert(table->board_table, board->base_url, board);
}


void
ochusha_bbs_table_free(BBSTable *table)
{
  g_return_if_fail(table != NULL);

  g_hash_table_destroy(table->category_table);
  g_hash_table_destroy(table->board_table);
  g_slist_free(table->category_list);
}


BoardCategory *
ochusha_board_category_new(gchar *name)
{
  BoardCategory *category;

  if (name == NULL)
    return NULL;

  category = (BoardCategory *)calloc(1, sizeof(BoardCategory));
  if (category == NULL)
    return NULL;

  category->name = name;
  return category;
}


void
ochusha_board_category_free(BoardCategory *category)
{
  if (category->name != NULL)
    free(category->name);
  if (category->board_list != NULL)
    g_slist_free(category->board_list);
  free(category);
}


/*
 * ochusha_bulletin_board_new
 * name̾urlĤURLȤǼĤɽBulletinBoard¤Τݤ
 * ƤΥݥ󥿤֤
 *
 * nameurlϥԡʤˡBulletinBoardȼ̿Ʊˤ롣
 * BulletinBoardfreefreeȤ̣षԡ٤
 */
BulletinBoard *
ochusha_bulletin_board_new(gchar *name, char *url)
{
  BulletinBoard *board;
  char *server;
  char *base_path;

  if (name == NULL || url == NULL)
    return NULL;

  server = url_extract_http_server(url);
  if (server == NULL)
    return NULL;

  base_path = url_extract_http_root_pathname(url);
  if (base_path == NULL)
    {
      free(server);
      return NULL;
    }

  board = (BulletinBoard *)calloc(1, sizeof(BulletinBoard));
  if (board == NULL)
    {
      free(server);
      free(base_path);
      return NULL;
    }

  board->name = name;
  board->base_url = url;
  board->server = server;
  board->base_path = base_path;
  board->thread_table
    = g_hash_table_new_full(g_str_hash, g_str_equal, NULL,
			    (GDestroyNotify)ochusha_bbs_thread_free);

  return board;
}


void
ochusha_bulletin_board_free(BulletinBoard *board)
{
  if (board->name != NULL)
    free(board->name);
  if (board->base_url != NULL)
    free(board->base_url);
  if (board->server != NULL)
    free(board->server);
  if (board->base_path != NULL)
    free(board->base_path);
  if (board->thread_list != NULL)
    g_slist_free(board->thread_list);
  if (board->thread_table != NULL)
    g_hash_table_destroy(board->thread_table);
  free(board);
}


static gboolean
is_2ch_board_url(char *url)
{
  if (strstr(url, ".html") != NULL)
    return FALSE;
  if (strstr(url, "del_2ch") != NULL)
    return FALSE;
  return TRUE;
}


static void
extract_boards(BBSTable *table, BoardCategory *category, iconv_t converter,
	       EachBoardCallback *each_board_cb, char *head, char *tail)
{
  /* ǽ(HTMLA)õ*/
  char *cur_pos = g_strstr_len(head, tail - head, "<A HREF=http");
  if (cur_pos == NULL)
    return;
  cur_pos += 8;	/* skip "<A HREF=" */

  while (cur_pos != NULL && cur_pos < tail)
    {
      char *tag_tail = memchr(cur_pos, '>', tail - cur_pos);
      char *url_tail;
      gchar *name;
      char *url;
      char *close_tag;
      BulletinBoard *board;

      if (tag_tail == NULL)
	return;	/* Ƥ롩 */

      url_tail = strpbrk(cur_pos, " \t\r\n>");
      /* tag_tail != NULLʤΤurl_tail != NULL*/

      close_tag = g_strstr_len(url_tail, tail - url_tail, "</A>");
      if (close_tag == NULL)
	return;	/* Ƥ롩 */

#if GUESS_2CH_BOARD_URL
      {
	/* 2chİʳؤΥ󥫡ˤTARGE°դƤ̣*/
	char *target = g_strstr_len(cur_pos, close_tag - cur_pos, "TARGET=");
	if (target != NULL)
	  goto search_next;
      }
#endif
      if (converter != NULL)
	name = convert_string(converter,
			      tag_tail + 1, close_tag - tag_tail - 1);
      else
	name = g_strndup(tag_tail + 1, close_tag - tag_tail - 1);
      url = g_strndup(cur_pos, url_tail - cur_pos);
#if GUESS_2CH_BOARD_URL
      if (!is_2ch_board_url(url))
	goto search_next;
#endif
      if (url == NULL)
	{
	  if (name != NULL)
	    free(name);
	  return;	/* Out of memory */
	}

      board = g_hash_table_lookup(table->board_table, url);

      if (board == NULL)
	{
	  if (name == NULL)
	    {
	      free(url);
	      return;	/* Out of memory */
	    }

	  board = ochusha_bulletin_board_new(name, url);
	  if (board == NULL)
	    {
	      free(url);
	      free(name);
	      return;
	    }

#if GUESS_2CH_BOARD_URL
	  /* ФΥ롼ľηǼĤϸʤġġ*/
	  if (board->base_path[0] == '\0')
	    {
	      ochusha_bulletin_board_free(board);
	      goto search_next;
	    }
#endif
	  ochusha_bbs_table_add_board(table, board);
	}
      else
	{
	  free(url);
	  if (name != NULL)
	    free(name);
	}

      if (board != NULL && g_slist_find(category->board_list, board) == NULL)
	category->board_list = g_slist_append(category->board_list, board);

    search_next:
      /* (HTMLA)õ*/
      cur_pos = g_strstr_len(close_tag + 4, tail - close_tag, "<A HREF=http");
      if (cur_pos == NULL)
	return;
      cur_pos += 8;	/* skip "<A HREF=" */
    }
}


/*
 * ochusha_bbs_table_analyze
 *
 * Ϳ줿bufferbbstable.htmlǤȸʤƲϤ˴ޤޤ
 * ƥƥбBoardCategoryȳĤбBulletinBoardۤ
 * Ϳ줿BBSTable򹹿롣
 *
 * converterNULLǤä顢ʸΥ󥳡ǥ󥰤Ϥ
 * converterȤäѴ롣
 *
 * ޤ˥ХåؿͿ줿硢ƥƥꡢĤˤĤ
 * б륳ХåؿƤ֡
 * ХåؿFALSE֤硢ǲϤλ롣
 *
 * Ϥ˽λTRUE֤
 */
gboolean
ochusha_bbs_table_analyze(BBSTable *table, AsyncBuffer *buffer,
			  iconv_t converter,
			  EachCategoryCallback *each_category_cb,
			  EachBoardCallback *each_board_cb,
			  gpointer user_data)
{
  gboolean result = TRUE;

  if (!async_buffer_active_ref(buffer, "boardlist.c: ochusha_bbs_table_analyze"))
    {
#if DEBUG_ASYNC_BUFFER_MOST
      fprintf(stderr, "buffer has been terminated.\n");
#endif
      return FALSE;
    }

  async_buffer_lock(buffer);
  {
    unsigned int offset = 0;

    while (result)
      {
	char *buffer_top = (char *)buffer->buffer;
	char *cur_pos = buffer_top + offset;
	unsigned int length = buffer->length;
	unsigned int rest_of_data = length - offset;

	while (rest_of_data > 0)
	  {
	    char *end_name_pos;
	    gchar *category_name;
	    char *end_category_pos;
	    BoardCategory *category;

	    cur_pos = g_strstr_len(cur_pos, rest_of_data, "<B>");

	    if (cur_pos == NULL)
	      break;	/* ǡ­ʤ⤦ƥ꤬ʤ */
	    cur_pos += 3;	/* skip "<B>" */
	    rest_of_data -= 3;

	    end_name_pos = g_strstr_len(cur_pos, rest_of_data, "</B>");

	    if (end_name_pos == NULL)
	      break;	/* ǡ­ʤ */

	    /* ߤΥƥϼΥƥľޤ */
	    end_category_pos = g_strstr_len(end_name_pos + 4,
					    rest_of_data - (end_name_pos
							    - cur_pos),
					    "<B>");
	    /* ǸΥƥ</BODY>ľޤ */
	    if (end_category_pos == NULL)
	      end_category_pos = g_strstr_len(end_name_pos + 4,
					      rest_of_data - (end_name_pos
							      - cur_pos),
					      "</BODY>");
	    if (end_category_pos == NULL)
	      break;

	    if (converter != NULL)
	      category_name = convert_string(converter, cur_pos,
					     end_name_pos - cur_pos);
	    else
	      category_name = g_strndup(cur_pos, end_name_pos - cur_pos);

	    category = g_hash_table_lookup(table->category_table,
					   category_name);
	    if (category == NULL)
	      {
		category = ochusha_board_category_new(category_name);
		if (category == NULL)
		  {
		    if (category_name != NULL)
		      free(category_name);
		    result = FALSE;
		    break;
		  }
		category_name = NULL;
	      }

	    cur_pos = end_name_pos + 4;	/* skip "</B>" */
	    rest_of_data -= 4;
	      
	    extract_boards(table, category, converter, each_board_cb,
			   cur_pos, end_category_pos);

	    if (category->board_list == NULL)
	      ochusha_board_category_free(category);
	    else
	      {
		if (category_name == NULL)
		  ochusha_bbs_table_add_category(table, category);
		else
		  free(category_name);

		if (each_category_cb != NULL
		    && !(*each_category_cb)(category, user_data))
		  {
		    result = FALSE;
		    break;
		  }
	      }

	    cur_pos = end_category_pos;
	    offset = cur_pos - buffer_top;
	    rest_of_data = (length - offset);
	  }

	if (buffer->fixed)
	  break;

	if (!async_buffer_wait(buffer, "boardlist.c: ochusha_bbs_table_analyze"))
	  {
	    /* bufferϤ޲٤callerˤޤ */
#if DEBUG_ASYNC_BUFFER_MOST
	    fprintf(stderr, "ochusha_bbs_table_analyze: buffer has been terminated.\n");
#endif
	    result = FALSE;
	    break;
	  }

	if (((OchushaNetworkStatus *)buffer->user_data)->state
	    == OCHUSHA_CACHE_IS_DIRTY)
	  {
	    result = FALSE;
	    break;
	  }
      }
  }
  async_buffer_unlock(buffer);

  async_buffer_active_unref(buffer, "boardlist.c: ochusha_bbs_table_analyze");

  return result;
}


/*
 * url2chĤ⤷ϥؤURLȸʤƲϤ롣
 *
 * Ϸ̤ȤơбĤɽBulletinBoardؤΥݥ󥿤*boardˡ
 * URLʸΥåɻʬʸΥԡؤΥݥ󥿤*thread_part
 * 줾Ǽ롣caller*thread_partȤäfree뤳ȡ
 *
 * ϤˤTRUE֤URL2chĤ⤷ϥURL̵
 * ˤFALSE֤
 */
gboolean
ochusha_bbs_table_parse_url_2ch(BBSTable *table, const char *url,
				BulletinBoard **board, char **thread_part)
{
  char base_url[PATH_MAX];
  char *server = url_extract_http_server(url);
  char *path = url_extract_http_root_pathname(url);
  char *base_path;
  char *tmp_pos;
  char tmp_char;
  gboolean result = FALSE;

  *board = NULL;
  *thread_part = NULL;

  if (url == NULL)
    goto done;

  if (path == NULL)
    goto done;

  if (strncmp(path, "test/read.cgi/", 14) != 0)
    goto done;

  base_path = path + 14;
  tmp_pos = strchr(base_path, '/');
  if (tmp_pos == NULL)
    goto done;

  tmp_char = tmp_pos[1];
  tmp_pos[1] = '\0';
  if (snprintf(base_url, PATH_MAX, "http://%s/%s", server, base_path)
      >= PATH_MAX)
    goto done;

  *board = g_hash_table_lookup(table->board_table, base_url);
  tmp_pos[1] = tmp_char;
  *thread_part = strdup(tmp_pos + 1);
  result = TRUE;

#if 0 /* debug */
  if (*board == NULL)
    {
      fprintf(stderr, "Couldn't find board for base_url=\"%s\"\n", base_url);
      fprintf(stderr, "url=\"%s\", server=\"%s\", path=\"%s\", base_path=\"%s\", thread_part=\"%s\"\n", url, server, path, base_path, *thread_part);
    }
#endif

 done:
  if (server != NULL)
    free(server);

  if (path != NULL)
    free(path);

  return result;
}
