#!/usr/bin/perl -w
# dgit repos policy hook script for Debian
#
# Copyright (C) 2015-2019  Ian Jackson
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.

use strict;

use Debian::Dgit::Infra; # must precede Debian::Dgit; - can change @INC!
use Debian::Dgit qw(:DEFAULT :policyflags);
setup_sigwarn();

use POSIX;
use JSON;
use File::Temp qw(tempfile);
use DBI;
use IPC::Open2;
use Data::Dumper;

use Debian::Dgit::Policy::Debian;

initdebug('%');
enabledebuglevel $ENV{'DGIT_DRS_DEBUG'};

END { $? = 127; } # deliberate exit uses _exit

our $distro = shift @ARGV // die "need DISTRO";
our $repos = shift @ARGV // die "need DGIT-REPOS-DIR";
our $dgitlive = shift @ARGV // die "need DGIT-LIVE-DIR";
our $distrodir = shift @ARGV // die "need DISTRO-DIR";
our $action = shift @ARGV // die "need ACTION";

our $publicmode = 02775;
our $new_upload_propagation_slop = 3600*4 + 100;# fixme config;

our $poldbh;
our $pkg;
our $pkgdir;
our ($pkg_exists,$pkg_secret);

our $stderr;

our ($version,$suite,$tagname);
our %deliberately;

# We assume that it is not possible for NEW to have a version older
# than sid.

# Whenever pushing, we check for
#   source-package-local tainted history
#   global tainted history
#   can be overridden by --deliberately except for an admin prohib taint
# 
# ALL of the following apply only if history is secret:
# 
# if NEW has no version, or a version which is not in our history[1]
#   (always)
#   check all suites
#   if any suite's version is in our history[1], publish our history
#   otherwise discard our history,
#     tainting --deliberately-include-questionable-history
# 
# if NEW has a version which is in our history[1]
#   (on push only)
#   require explicit specification of one of
#     --deliberately-include-questionable-history
#     --deliberately-not-fast-forward
#       (latter will taint old NEW version --d-i-q-h)
#   (otherwise)
#   leave it be
# 
# [1] looking for the relevant git tag for the version number and not
#    caring what that tag refers to.
#
#    When we are doing a push to a fresh repo, any version will do: in
#    this case, this is the first dgit upload of an existing package,
#    and we trust that the uploader hasn't included in their git
#    history any previous non-dgit uploads.
#
# A wrinkle: if we approved a push recently, we treat NEW as having
# a version which is in our history.  This is because the package may
# still be being uploaded.  (We record this using the timestamp of the
# package's git repo directory.)

# We aim for the following invariants and properties:
#
# - .dsc of published dgit package will have corresponding publicly
#   visible dgit-repo (soon)
#
# - when a new package is rejected we help maintainer avoid
#   accidentally including bad objects in published dgit history
#
# - .dsc of NEW dgit package has corresponding dgit-repo but not
#   publicly readable

sub apiquery ($) {
    my ($subpath) = @_;
    local $/=undef;
    my $dgit = "$dgitlive/dgit";
    $dgit = "dgit" if !stat_exists $dgit;
    my $cmd = "$dgit -d$distro \$DGIT_TEST_OPTS";
    $cmd .= " -".("D" x $debuglevel) if $debuglevel;
    $cmd .= " archive-api-query $subpath";
    printdebug "apiquery $cmd\n";
    $!=0; $?=0; my $json = `$cmd`;
    defined $json && !$? or die "$subpath $! $?";
    my $r = decode_json $json;
    my $d = new Data::Dumper([$r], [qw(r)]);
    printdebug "apiquery $subpath | ", $d->Dump() if $debuglevel>=2;
    return $r;
}

sub vsn_in_our_history ($) {
    my ($vsn) = @_;

    # Eventually, when we withdraw support for old-format (DEP-14
    # namespace) tags, we will need to change this to only look
    # for debiantag_new.  See the commit
    #   "Tag change: Update dgit-repos-policy-debian"
    # (reverting which is a good start for that change).

    my @tagrefs = map { "refs/tags/".$_ } debiantags $vsn, $distro;
    printdebug " checking history  vsn=$vsn tagrefs=@tagrefs\n";
    open F, "-|", qw(git for-each-ref), @tagrefs;
    $_ = <F>;
    close F;
    return 1 if defined && m/\S/;
    die "$pkg tagrefs @tagrefs $? $!" if $?;
    return 0;
}

sub specific_suite_has_suitable_vsn ($$) {
    my ($suite, $vsn_check) = @_; # tests $vsn_check->($version)
    my $in_suite = apiquery "dsc_in_suite/$suite/$pkg";
    foreach my $entry (@$in_suite) {
	my $vsn = $entry->{version};
	die "$pkg ?" unless defined $vsn;
	printdebug " checking history found suite=$suite vsn=$vsn\n";
	return 1 if $vsn_check->($vsn);
    }
    return 0;
}

sub new_has_vsn_in_our_history () {
    return specific_suite_has_suitable_vsn('new', \&vsn_in_our_history);
}

sub good_suite_has_suitable_vsn ($) {
    my ($vsn_check) = @_; # as for specific_suite_has_specific_vsn
    my $suites = apiquery "suites";
    foreach my $suitei (@$suites) {
	my $suite = $suitei->{name};
	die unless defined $suite;
	next if $suite =~ m/\bnew$/;
	return 1 if specific_suite_has_suitable_vsn($suite, $vsn_check);
    }
    return 0;
}

sub statpackage () {
    $pkgdir = "$repos/$pkg.git";
    if (!stat_exists $pkgdir) {
	printdebug "statpackage $pkg => ENOENT\n";
	$pkg_exists = 0;
    } else {
	$pkg_exists = 1;
	$pkg_secret = !!(~(stat _)[2] & 05);
	printdebug "statpackage $pkg => exists, secret=$pkg_secret.\n";
    }
}

sub getpackage () {
    die unless @ARGV >= 1;
    $pkg = shift @ARGV;
    die unless $pkg =~ m/^$package_re$/;

    statpackage();
}

sub add_taint ($$) {
    my ($refobj, $reason) = @_;

    printdebug "TAINTING $refobj\n",
        (map { "\%| $_" } split "\n", $reason),
        "\n";

    my $tf = new File::Temp or die $!;
    print $tf "$refobj^0\n" or die $!;
    flush $tf or die $!;
    seek $tf,0,0 or die $!;

    my $gcfpid = open GCF, "-|";
    defined $gcfpid or die $!;
    if (!$gcfpid) {
	open STDIN, "<&", $tf or die $!;
	exec 'git', 'cat-file', '--batch';
	die $!;
    }

    close $tf or die $!;
    $_ = <GCF>;
    defined $_ or die;
    m/^(\w+) (\w+) (\d+)\n/ or die "$_ ?";
    my $gitobjid = $1;
    my $gitobjtype = $2;
    my $bytes = $3;

    my $gitobjdata;
    if ($gitobjtype eq 'commit' or $gitobjtype eq 'tag') {
	$!=0; read GCF, $gitobjdata, $bytes == $bytes
	    or die "$gitobjid $bytes $!";
    }
    close GCF;

    $poldbh->do("INSERT INTO taints".
		" (package, gitobjid, gitobjtype, gitobjdata, time, comment)".
		" VALUES (?,?,?,?,?,?)", {},
		$pkg, $gitobjid, $gitobjtype, $gitobjdata, time, $reason);

    my $taint_id = $poldbh->last_insert_id(undef,undef,"taints","taint_id");
    die unless defined $taint_id;

    $poldbh->do("INSERT INTO taintoverrides".
		" (taint_id, deliberately)".
		" VALUES (?, '--deliberately-include-questionable-history')", 
		{}, $taint_id);
}

sub add_taint_by_tag ($$) {
    my ($tagname,$refobjid) = @_;
    add_taint($refobjid,
	      "tag $tagname referred to this object in git tree but all".
	      " previously pushed versions were found to have been".
	      " removed from NEW (ie, rejected) (or never arrived)");
}

sub check_package () {
    return 0 unless $pkg_exists;
    return 0 unless $pkg_secret;

    printdebug "check_package\n";

    chdir $pkgdir or die "$pkgdir $!";

    stat '.' or die "$pkgdir $!";
    my $mtime = ((stat _)[9]);
    my $age = time -  $mtime;
    printdebug "check_package age=$age\n";

    if (good_suite_has_suitable_vsn(\&vsn_in_our_history)) {
	chmod $publicmode, "." or die $!;
	$pkg_secret = 0;
	eval {
	    my $mirror_hook = "$distrodir/mirror-hook";
	    if (stat_exists $mirror_hook) {
		my @mirror_cmd =
		    ($mirror_hook, $distrodir, "updated-hook", $pkg);
		debugcmd " (mirror)",@mirror_cmd;
		system @mirror_cmd and failedcmd @mirror_cmd;
	    }
	};
	if (length $@) {
	    chomp $@;
	    print STDERR "policy hook: warning:".
		" failed to mirror publication of \`$pkg':".
		" $@\n";
	}
	return 0;
    }

    return 0 if $age < $new_upload_propagation_slop;

    return 0 if new_has_vsn_in_our_history();

    printdebug "check_package secret, deleted, tainting\n";

    git_for_each_ref('refs/tags', sub {
	my ($objid,$objtype,$fullrefname,$tagname) = @_;
	add_taint_by_tag($tagname,$objid);
    });

    return FRESHREPO;
}

sub action_check_package () {
    getpackage();
    return check_package();
}

sub getpushinfo () {
    die unless @ARGV >= 4;
    $version = shift @ARGV;
    $suite = shift @ARGV;
    $tagname = shift @ARGV;
    my $delibs = shift @ARGV;
    foreach my $delib (split /\,/, $delibs) {
	$deliberately{$delib} = 1;
    }
}

sub deliberately ($) { return $deliberately{"--deliberately-$_[0]"}; }

sub action_push () {
    getpackage();
    getpushinfo();

    check_package(); # might make package public, or might add taints

    return 0 unless $pkg_exists;
    return 0 unless $pkg_secret;

    # we suppose that NEW has a version which is already in our
    # history, as otherwise the repo would have been blown away

    if (deliberately('not-fast-forward')) {
	add_taint(server_ref($suite),
		  "rewound suite $suite; --deliberately-not-fast-forward".
		  " specified in signed tag $tagname for upload of".
		  " version $version");
	return NOFFCHECK|FRESHREPO;
    }
    if (deliberately('include-questionable-history')) {
	return 0;
    }
    die <<END;

Package is in NEW and has not been accepted or rejected yet.
Unfortunately, we cannot determine automatically what should happen.
You will have to pass either --deliberately-not-fast-forward or
--deliberately-include-questionable-history to specify whether you are
keeping or discarding the previously pushed history.

The choice is important, to ensure that your git history is both
suitable for public distribution and as useful as possible.  Please
see the descriptions of these options in dgit(1).

END
}

sub action_push_confirm () {
    getpackage();
    getpushinfo();
    die unless @ARGV >= 1;
    my $freshrepo = shift @ARGV;

    my $initq = $poldbh->prepare(<<END);
        SELECT taint_id, gitobjid FROM taints t
            WHERE (package = ? OR package = '')
END
    $initq->execute($pkg);

    my @objscatcmd = qw(git);
    push @objscatcmd, qw(--git-dir), $freshrepo if length $freshrepo;
    push @objscatcmd, qw(cat-file --batch);
    debugcmd '|',@objscatcmd if $debuglevel>=2;

    my @taintids;
    my $chkinput = tempfile();
    while (my $taint = $initq->fetchrow_hashref()) {
	push @taintids, $taint->{taint_id};
	print $chkinput $taint->{gitobjid}, "\n" or die $!;
	printdebug '|> ', $taint->{gitobjid}, "\n" if $debuglevel>=2;
    }
    flush $chkinput or die $!;
    seek $chkinput,0,0 or die $!;

    my $checkpid = open CHKOUT, "-|" // die $!;
    if (!$checkpid) {
	open STDIN, "<&", $chkinput or die $!;
	delete $ENV{GIT_ALTERNATE_OBJECT_DIRECTORIES};
	# ^ recent versions of git set this in the environment of
	# receive hooks.  This can cause us to see things which
	# the user is trying to abolish.
	exec @objscatcmd or die $!;
    }

    my ($taintinfoq,$overridesanyq,$untaintq,$overridesq);

    my $overridesstmt = <<END;
        SELECT deliberately FROM taintoverrides WHERE (
            1=0
END
    my @overridesv = sort keys %deliberately;
    $overridesstmt .= <<END foreach @overridesv;
            OR deliberately = ?
END
    $overridesstmt .= <<END;
	) AND taint_id = ?
        ORDER BY deliberately ASC
END

    my $mustreject=0;

    while (my $taintid = shift @taintids) {
	$!=0; $_ = <CHKOUT>;
	die "($taintid @objscatcmd) $!" unless defined $_;
	printdebug "|< ", $_ if $debuglevel>=2;

	next if m/^\w+ missing$/;
	die "($taintid @objscatcmd) $_ ?" unless m/^(\w+) (\w+) (\d+)\s/;
	my ($objid,$objtype,$nbytes) = ($1,$2,$3);

	my $drop;
	(read CHKOUT, $drop, $nbytes) == $nbytes
	    or die "($taintid @objscatcmd) $!";

	$!=0; $_ = <CHKOUT>;
	die "($taintid @objscatcmd) $!" unless defined $_;
	die "($taintid @objscatcmd) $_ ?" if m/\S/;

	$taintinfoq ||= $poldbh->prepare(<<END);
            SELECT package, time, comment FROM taints WHERE taint_id =  ?
END
        $taintinfoq->execute($taintid);

	my $ti = $taintinfoq->fetchrow_hashref();
	die "($taintid)" unless $ti;

	my $timeshow = defined $ti->{time}
	    ? " at time ".strftime("%Y-%m-%d %H:%M:%S Z", gmtime $ti->{time})
	    : "";
	my $pkgshow = length $ti->{package}
	    ? "package $ti->{package}"
	    : "any package";

	$stderr .= <<END;

History contains tainted $objtype $objid
Taint recorded$timeshow for $pkgshow
Reason: $ti->{comment}
END

        printdebug "SQL overrides: @overridesv $taintid /\n$overridesstmt\n";

        $overridesq ||= $poldbh->prepare($overridesstmt);
	$overridesq->execute(@overridesv, $taintid);
	my ($ovwhy) = $overridesq->fetchrow_array();
	if (!defined $ovwhy) {
	    $overridesanyq ||= $poldbh->prepare(<<END);
	        SELECT 1 FROM taintoverrides WHERE taint_id = ? LIMIT 1
END
	    $overridesanyq->execute($taintid);
	    my ($ovany) = $overridesanyq->fetchrow_array();
	    $stderr .= $ovany ? <<END : <<END;
Could be forced using --deliberately.  Consult documentation.
END
Uncorrectable error.  If confused, consult administrator.
END
            $mustreject = 1;
	} else {
	    $stderr .= <<END;
Forcing due to $ovwhy
END
            $untaintq ||= $poldbh->prepare(<<END);
                DELETE FROM taints WHERE taint_id = ?
END
            $untaintq->execute($taintid);
        }
    }
    close CHKOUT;

    if ($mustreject) {
	$stderr .= <<END;

Rejecting push due to questionable history.
END
        return 1;
    }

    if (length $freshrepo) {
	if (!good_suite_has_suitable_vsn(sub { 1; })) {
	    stat $freshrepo or die "$freshrepo $!";
	    my $oldmode = ((stat _)[2]);
	    my $oldwrites = $oldmode & 0222;
	    # remove r and x bits which have corresponding w bits clear
	    my $newmode = $oldmode &
		(~0555 | ($oldwrites << 1) | ($oldwrites >> 1));
	    printdebug sprintf "chmod %#o (was %#o) %s\n",
	        $newmode, $oldmode, $freshrepo;
	    chmod $newmode, $freshrepo or die $!;
	    utime undef, undef, $freshrepo or die $!;
	}
    }

    return 0;
}

sub action_check_list () {
    opendir L, "$repos" or die "$repos $!";
    while (defined (my $dent = readdir L)) {
	next unless $dent =~ m/^($package_re)\.git$/;
	$pkg = $1;
	statpackage();
	next unless $pkg_exists;
	next unless $pkg_secret;
	print "$pkg\n" or die $!;
    }
    closedir L or die $!;
    close STDOUT or die $!;
    return 0;
}

$action =~ y/-/_/;
my $fn = ${*::}{"action_$action"};
if (!$fn) {
    printdebug "dgit-repos-policy-debian: unknown action $action\n";
    exit 0;
}

my $sleepy=0;
my $rcode;

my $db_busy_exception= 'Debian::Dgit::Policy::Debian::DB_BUSY';

my @orgargv = @ARGV;

for (;;) {
    @ARGV = @orgargv;
    eval {
	poldb_setup(poldb_path($repos), sub {
	    $poldbh->{HandleError} = sub {
		return 0 unless $poldbh->err == 5; # SQLITE_BUSY, not in .pm :-(
		die bless { }, $db_busy_exception;
	    };

	    eval ($ENV{'DGIT_RPD_TEST_DBLOOP_HOOK'}//'');
	    die $@ if length $@;
	    # used by tests/tests/debpolicy-dbretry
        });

	$stderr = '';

	$rcode = $fn->();
	die unless defined $rcode;

	$poldbh->commit;
	printdebug "poldbh commit\n";
    };
    last unless length $@;
    die $@ unless ref $@ eq $db_busy_exception;

    die if $sleepy >= 20;
    $sleepy++;
    print STDERR "[policy database busy, retrying (${sleepy}s)]\n";

    eval { $poldbh->rollback; };
}

print STDERR $stderr or die $!;
flush STDERR or die $!;
_exit $rcode;
