#!/usr/bin/perl -w
# vim: set ts=4 sw=4:
#
# Copyright (c) 2002-2010 Martin A. Godisch <martin@godisch.de>.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

use DBI qw(:sql_types);
use strict;

my @lang = ('Deutsch', 'English');
my $idw = 1;
my $idt = 1;
my $dbh;

sub add ($$)
{
	return 0 if ($a =~ /^00-/);

	my @word = ();
	my $sth;

	for (my $i = 0; $i < 2; $i++) {
		$word[$i] = shift;
		$sth = $dbh->prepare("INSERT INTO wordlist (language, word) VALUES (?, ?)");
		$sth->bind_param(1, $lang[$i], SQL_INTEGER);
		$sth->bind_param(2, $word[$i], SQL_VARCHAR);
		if ($sth->execute) {
			$idw++;
		}
	}

	$sth = $dbh->prepare("INSERT INTO translation (id, word1, word2) \
		SELECT IFNULL(MAX(translation.id) + 1, 1), word1.id, word2.id \
		FROM wordlist AS word1 INNER JOIN wordlist AS word2 LEFT OUTER JOIN translation \
		WHERE word1.language = ? AND word1.word = ? AND word2.language = ? AND word2.word = ?");
	$sth->bind_param(1, $lang[0], SQL_INTEGER);
	$sth->bind_param(2, $word[0], SQL_VARCHAR);
	$sth->bind_param(3, $lang[1], SQL_INTEGER);
	$sth->bind_param(4, $word[1], SQL_VARCHAR);
	if ($sth->execute) {
		$idt++;
	} else {
		warn "Cannot insert (" . $word[0] . ", " . $word[1] . ") into translation";
	}

	return 0;
}

my $state = 0;
my $i = 0;
my $n = 0;

#if ($#ARGV != 3) {
#	printf STDERR "Usage: dict2db sqlite3.db dict.dz\n";
#	exit 1;
#}

open R, "gzip -cd $ARGV[1] |" or die "Cannot open $ARGV[1]: $!";
$dbh = DBI->connect("dbi:SQLite:dbname=$ARGV[0]", '', '', {AutoCommit => 1, PrintError => 1, RaiseError => 0});
$dbh->{unicode} = 1;
$dbh->begin_work;

for (my $i = 0; $i < 2; $i++) {
	my $sth = $dbh->prepare("INSERT INTO language (name) VALUES (?)");
	$sth->bind_param(1, $lang[$i], SQL_VARCHAR);
	if ($sth->execute) {
		$lang[$i] = $dbh->last_insert_id(undef, undef, undef, undef);
	} else {
		my $qh = $dbh->prepare("SELECT id FROM language WHERE name = ?");
		$qh->bind_param(1, $lang[$i], SQL_VARCHAR);
		if ($qh->execute) {
			my @row = $qh->fetchrow_array;
			$lang[$i] = $row[0];
		} else {
			warn "Cannot insert " . $lang[$i] . " into language table";
			exit 1;
		}
	}
}

while (<R>) {
	chomp;
	s/\s*\[[^\]]*\].*//;
	s/.*\}//;
	s/\{.*//;
	s/\t/ /g;
	if (/^\S/) {
		if ($state == 2) {
			if (add($a, $b) < 0) {
				printf STDERR "cannot insert: $a\n$b\n";
			}
			$a = '';
			$b = '';
		}
		if ($state != 1) {
			$a .= ", ";
			$state = 1;
		}
		$a .= "$_";
	} elsif (/^\s+\S/) {
		if ($state != 2) {
			$a .= ", ";
			$state = 2;
		}
		$b .= "$_";
	} elsif (/^$/) {
		if ($state == 2) {
			if (add($a, $b) < 0) {
				printf STDERR "cannot insert: $a\n$b\n";
			}
			$a = '';
			$b = '';
		}
		$state = 0;
	}
	$n++;
	if ($i++ > 10000) {
		$dbh->commit;
		$dbh->begin_work;
		$i = 0;
	}
}

$dbh->commit;
$dbh->disconnect();
close R;

printf "%d lines processed, %d translations (%d words) inserted\n", $n, $idt - 1, $idw - 1;

exit 0;
