<?php

// PukiWiki - EUC-JP to UTF-8 convert tool
// version 2.0
// Date: 2022/04/03
//
// Copyright 2022 PukiWiki Development Team

define('MAX_PAGE_NAME_LENGTH', 115);
define('MAX_ATTACH_BASENAME_LENGTH', 240);

function show_version() {
  print("pw_convert_eucjp_utf8.php v2.0\n");
}

function name_decode($filename) {
  return preg_match('/^[0-9a-f]+$/i', $filename) ?
      pack('H*', (string)$filename) : $filename;
}

function name_encode($key) {
  return ($key == '') ? '' : strtoupper(bin2hex($key));
}

define('TO_BASENAME', 'to_basename');
define('TO_NAME', 'to_name');
define('TO_LEAF' ,'to_leaf');
define('TO_PATH', 'to_path');
function update_item_names(&$item, $from_enc, $to_enc)
{
  $path = $item['path'];
    $m = array();
    if (preg_match('#^(wiki/)([0-9A-Fa-f]+)(\.txt)$#', $path, $m)) {
        $rawname = name_decode($m[2]);
        $to_name = mb_convert_encoding($rawname, $to_enc, $from_enc);
        $to_basename = name_encode($to_name);
        $to_leaf = $to_basename . $m[3];
        $item['from_name'] = $rawname;
        $item['to_basename'] = $to_basename;
        $item['to_name'] = $to_name;
        $item['to_leaf'] = $to_leaf;
        $item['to_path'] = $m[1] . $to_leaf;
    } else if (preg_match('#^(wiki\.en/)([0-9A-Fa-f]+)(\.txt)$#', $path, $m)) {
        $rawname = name_decode($m[2]);
        $to_name = mb_convert_encoding($rawname, $to_enc, $from_enc);
        $to_basename = name_encode($to_name);
        $to_leaf = $to_basename . $m[3];
        $item['from_name'] = $rawname;
        $item['to_basename'] = $to_basename;
        $item['to_name'] = $to_name;
        $item['to_leaf'] = $to_leaf;
        $item['to_path'] = $m[1] . $to_leaf;
    } else if (preg_match('#^(backup/)([0-9A-Fa-f]+)(\.txt|\.gz)$#', $path, $m)) {
        $rawname = name_decode($m[2]);
        $to_name = mb_convert_encoding($rawname, $to_enc, $from_enc);
        $to_basename = name_encode($to_name);
        $to_leaf = $to_basename . $m[3];
        $item['from_name'] = $rawname;
        $item['to_basename'] = $to_basename;
        $item['to_name'] = $to_name;
        $item['to_leaf'] = $to_leaf;
        $item['to_path'] = $m[1] . $to_leaf;
    } else if (preg_match('#^(cache/)([0-9A-Fa-f]+)(\.[0-9a-z]+)$#', $path, $m)) {
        $rawname = name_decode($m[2]);
        $to_name = mb_convert_encoding($rawname, $to_enc, $from_enc);
        $to_basename = name_encode($to_name);
        $to_leaf = $to_basename . $m[3];
        $item['from_name'] = $rawname;
        $item['to_basename'] = $to_basename;
        $item['to_name'] = $to_name;
        $item['to_leaf'] = $to_leaf;
        $item['to_path'] = $m[1] . $to_leaf;
    } else if (preg_match('#^(counter/)([0-9A-Fa-f]+)(\.count)$#', $path, $m)) {
        $rawname = name_decode($m[2]);
        $to_name = mb_convert_encoding($rawname, $to_enc, $from_enc);
        $to_basename = name_encode($to_name);
        $to_leaf = $to_basename . $m[3];
        $item['from_name'] = $rawname;
        $item['to_basename'] = $to_basename;
        $item['to_name'] = $to_name;
        $item['to_leaf'] = $to_leaf;
        $item['to_path'] = $m[1] . $to_leaf;
    } else if (preg_match('#^(diff/)([0-9A-Fa-f]+)(\.txt)$#', $path, $m)) {
        $rawname = name_decode($m[2]);
        $to_name = mb_convert_encoding($rawname, $to_enc, $from_enc);
        $to_basename = name_encode($to_name);
        $to_leaf = $to_basename . $m[3];
        $item['from_name'] = $rawname;
        $item['to_basename'] = $to_basename;
        $item['to_name'] = $to_name;
        $item['to_leaf'] = $to_leaf;
        $item['to_path'] = $m[1] . $to_leaf;
    } else if (preg_match('#^(trackback/)([0-9A-Fa-f]+)(\.txt|\.ref)$#', $path, $m)) {
        $rawname = name_decode($m[2]);
        $to_name = mb_convert_encoding($rawname, $to_enc, $from_enc);
        $to_basename = name_encode($to_name);
        $to_leaf = $to_basename . $m[3];
        $item['from_name'] = $rawname;
        $item['to_basename'] = $to_basename;
        $item['to_name'] = $to_name;
        $item['to_leaf'] = $to_leaf;
        $item['to_path'] = $m[1] . $to_leaf;
    } else if (preg_match('#^(attach/)([0-9A-Fa-f]+)_([0-9A-Fa-f]+)(\.log|[\d]+)?$#', $path, $m)) {
        $rawname = name_decode($m[2]);
        $attachname = name_decode($m[3]);
        $to_name = mb_convert_encoding($rawname, $to_enc, $from_enc);
        $to_attach = mb_convert_encoding($attachname, $to_enc, $from_enc);
        $to_basename = name_encode($to_name) . '_' . name_encode($to_attach);
        $to_leaf = $to_basename . (isset($m[4]) ? $m[4] : '');
        $item['from_name'] = $rawname;
        $item['to_basename'] = $to_basename;
        $item['to_name'] = $to_name . '_' . $to_attach;
        $item['to_leaf'] = $to_leaf;
        $item['to_path'] = $m[1] . $to_leaf;
        $item['is_attach'] = 1;
    } else {
        $item['to_basename'] = '';
        $item['to_name'] = '';
        $item['to_leaf'] = $item['leaf'];
        $item['to_path'] = $item['path'];
    }

}

function convert_utf8_to_eucjp($src_dir, $dest_dir) {
  $eucjp = 'CP51932';
  $utf8 = 'UTF-8';
  $files = list_files('', $src_dir);
  // Create dest_dir
  if (!is_dir($dest_dir)) {
    mkdir($dest_dir);
  }
  foreach ($files as $item) {
    update_item_names($item, $utf8, $eucjp);
    echo $item['path'] . "\n";
    if ($item['is_dir']) {
      $dir_path = "$dest_dir/" . $item['path'];
      if (!is_dir($dir_path)) {
        mkdir($dir_path);
      }
    } else if ($item['is_file']) {
      convert_single_file($item, $src_dir, $dest_dir, $utf8, $eucjp);
    }
  }
  convert_init_file_logic_utf8_to_eucjp("$dest_dir/lib/init.php");
}

function convert_eucjp_to_utf8($eucjp_src_dir, $utf8_dest_dir, $filename_check_only=true) {
  $eucjp = 'CP51932';
  $utf8 = 'UTF-8';
  $files = list_files('', $eucjp_src_dir);
  // Create dest_dir
  if (!is_dir($utf8_dest_dir)) {
    mkdir($utf8_dest_dir);
  }
  $error_count = 0;
  foreach ($files as &$item) {
    update_item_names($item, $eucjp, $utf8);
    $to_name = $item['to_name'];
    if (!is_null($to_name)) {
        if (MAX_PAGE_NAME_LENGTH < strlen($to_name)) {
            $error_count += 1;
            print("Too long name: '" . $to_name . "' (length:" . strlen($to_name) . ")\n" .
                "\tsrc path:" . $item['path'] . " (length:" . strlen($item['path']) . ")\n" .
                "\tnew_path:" . $item['to_path'] . " (length:" . strlen($item['to_path']) . ")\n" .
                "");
        }
    }
    if (isset($item['is_attach']) && $item['is_attach']) {
        $to_name = $item['to_name'];
        $to_basename = $item['to_basename'];
        if (MAX_ATTACH_BASENAME_LENGTH < strlen($to_basename)) {
            print("Too long attach name: '" . $to_name . "' (length:" . strlen($to_basename) . ")\n" .
                "\tbasename:" . $to_basename . " (length:" . strlen($to_basename) . ")\n" .
                "\tnew_path:" . $item['to_path'] . " (length:" . strlen($item['to_path']) . ")\n" .
                "");
        }
    }
  }
  $file_count = count($files);
  print("DETECTED: $file_count files to convert.\n");
  if ($error_count > 0) {
      print("\nERROR occured.\nPlease check error output\n");
      return;
  }
  print("PASS - file name length check.\n");
  if ($filename_check_only) {
    print("Please execute convert with '\$filename_check_only=false'.\n");
    return;
  }
  $itr = 0;
  foreach ($files as $item) {
    $itr += 1;
    print("($itr/$file_count) " . $item['path'] . "\n");
    if ($item['is_dir']) {
      $dir_path = "$utf8_dest_dir/" . $item['path'];
      if (!is_dir($dir_path)) {
        mkdir($dir_path);
      }
    } else if ($item['is_file']) {
      convert_single_file($item, $eucjp_src_dir, $utf8_dest_dir, $eucjp, $utf8);
    }
  }
  convert_init_file_logic_eucjp_to_utf8("$utf8_dest_dir/lib/init.php");
}

const TYPE_HISTORY_GZ = 'history_gz';
const TYPE_HISTORY_GZ_JIS = 'history_gz_jis';
const TYPE_BINARY = 'binary';
const TYPE_TEXT = 'text';
const TYPE_TEXT_JIS = 'text_jis';
function is_dist_file($item) {
    $to_name = $item['to_name'];
    if ($to_name === 'FormattingRules' ||
        strpos($to_name, 'PukiWiki/1.4/Manual/Plugin') === 0) {
        return true;
    }
    $path = $item['path'];
    if ($item['path'] === 'README.txt' ||
            $item['rel_dir'] === 'plugin' ||
            $item['rel_dir'] === 'lib' ||
            $item['rel_dir'] === 'skin') {
        return true;
    }
    return false;
}
function get_convert_type($item) {
  $path = $item['to_path'];
  $to_name = $item['to_name'];
  switch ($item['rel_dir']) {
      case 'attach':
      case 'image':
      case 'shortner':
          return TYPE_BINARY;
      case 'backup':
          if (preg_match("/\.gz$/", $path)) {
              if (is_dist_file($item)) {
                  return TYPE_HISTORY_GZ_JIS;
              } else {
                  return TYPE_HISTORY_GZ;
              }
          }
      default:
          if (preg_match("/\.(zip|gz|png|jpg|jpeg|gif|swf|pdf|mp4|avi|mp3|ico)$/i", $path)) {
              return TYPE_BINARY;
          }
          if (is_dist_file($item)) {
              return TYPE_TEXT_JIS;
          }
          return TYPE_TEXT;
  }
}

function convert_single_file($item, $src_dir, $dest_dir, $from_enc, $to_enc) {
  $src_path = "$src_dir/" . $item['path'];
  $dest_path = "$dest_dir/" . $item['to_path'];
  switch (get_convert_type($item)) {
      case TYPE_HISTORY_GZ:
          convert_gz_text_file($src_path, $dest_path, $from_enc, $to_enc);
          break;
      case TYPE_HISTORY_GZ_JIS:
          convert_gz_text_file($src_path, $dest_path, $from_enc, $to_enc);
          break;
      case TYPE_BINARY:
          copy($src_path, $dest_path);
          break;
      case TYPE_TEXT:
          $src_content = file_get_contents($src_path);
          $dest_content = mb_convert_encoding($src_content, $to_enc, $from_enc);
          file_put_contents($dest_path, $dest_content);
          break;
      case TYPE_TEXT_JIS:
          $src_content = file_get_contents($src_path);
          $dest_content = mb_convert_encoding($src_content, $to_enc, $from_enc);
          file_put_contents($dest_path, $dest_content);
          break;
      default:
          throw new Exception('file is neither file nor dir: ' . "$dir/$file");
  }
  $mtime = filemtime($src_path);
  if ($mtime) {
      touch($dest_path, $mtime);
      $perms = fileperms($src_path);
      chmod($dest_path, $perms);
  }
}
function convert_gz_text_file($src_path, $dest_path, $from_enc, $to_enc) {
  global $g_errors;
  $src_content = '';
  $zp = gzopen($src_path, 'rb');
  if ($zp) {
      while (!gzeof($zp)) {
          $piece = gzread($zp, 1024);
          if (!is_null($piece)) {
              if (strlen($piece) > 0) {
                  $src_content .= $piece;
              }
          }
      }
      gzclose($zp);
      $dest_content = mb_convert_encoding($src_content, $to_enc, $from_enc);
      $zp_out = gzopen($dest_path, 'wb');
      if ($zp_out) {
          gzwrite($zp_out, $dest_content);
          gzclose($zp_out);
      } else {
          trigger_error('zp_out failed: ' . $dest_path);
      }
  } else {
      trigger_error('gzopen failed.');
  }
}

function copy_file_with_enc($from_file, $to_file, $from_enc, $to_enc) {
  $body = file_get_contents($from_file);
  $body_to_enc = mb_convert_encoding($body, $to_enc, $from_enc);
  file_put_contents($body_to_enc);
}

function list_files($dir, $root_dir) {
  $rel_dir = $dir;
  if ($rel_dir === '') {
    $rel_dir = '.';
  }
  $base_dir = "$root_dir/$rel_dir";
  $files = scandir($base_dir);
  $refs = array('.', '..', '.git', 'CVS');
  $list = array();
  foreach ($files as $file) {
      if ($rel_dir === '.') {
        $path = $file;
      } else {
        $path = "$dir/$file";
      }
      if (in_array($file, $refs)) { continue; }
      if (is_file("$base_dir/$file")) {
          $item = array(
              "leaf" => $file,
              "is_file" => true,
              "is_dir" => false,
              "root_dir" => $root_dir,
              "rel_dir" => $rel_dir,
              "path" => $path,
          );
          $item['mtime'] = filemtime("$base_dir/$file");
          $list[] = $item;
      } else if (is_dir("$base_dir/$file")) {
          $item = array(
              "leaf" => $file,
              "is_file" => false,
              "is_dir" => true,
              "root_dir" => $root_dir,
              "rel_dir" => $rel_dir,
              "path" => $path,
          );
          $item['mtime'] = filemtime("$base_dir/$file");
          $list[] = $item;
          $sublist = list_files($rel_dir === '.' ? "$file" : "$rel_dir/$file", $root_dir);
          $list = array_merge($list, $sublist);
      } else {
          echo "invalid state: $base_dir/$file\n";
      }
  }
  return $list;
}

function convert_init_file_logic_utf8_to_eucjp($init_file_path) {
  $file_path = $init_file_path;
  $mtime = filemtime($file_path);
  $fileperm = fileperms($file_path);
  $size_before = filesize($file_path);
  clearstatcache($file_path);
  $fp = fopen($file_path, 'rb');
  $body = '';
  if ($fp) {
      while (!feof($fp)) {
          $line = fgets($fp);
          if (preg_match('#//UTF-8 only\n$#', $line)) {
            $body .= '//UTF-8:' . $line;
          } else {
            $body .= $line;
          }
      }
      fclose($fp);
      file_put_contents($file_path, $body);
      touch($file_path, $mtime);
      chmod($file_path, $fileperm);
      $size_after = filesize($file_path);
  }
  return array(
      'status' => ($size_before === $size_after ? 'ng' : 'ok'),
      'size_before' => $size_before,
      'size_after' => $size_after,
      'file_path' => $file_path,
      );
}

function convert_init_file_logic_eucjp_to_utf8($init_file_path) {
  $file_path = $init_file_path;
  $mtime = filemtime($file_path);
  $fileperm = fileperms($file_path);
  $size_before = filesize($file_path);
  clearstatcache($file_path);
  $fp = fopen($file_path, 'rb');
  $body = '';
  if ($fp) {
      while (!feof($fp)) {
          $line = fgets($fp);
          if (preg_match('#^//UTF-8:#', $line)) {
              $body .= preg_replace('#^//UTF-8:#', '', $line);
          } else {
              $body .= $line;
          }
      }
      fclose($fp);
      file_put_contents($file_path, $body);
      touch($file_path, $mtime);
      chmod($file_path, $fileperm);
      $size_after = filesize($file_path);
  }
  return array(
      'status' => ($size_before === $size_after ? 'ng' : 'ok'),
      'size_before' => $size_before,
      'size_after' => $size_after,
      'file_path' => $file_path,
      );
}
