#!/usr/bin/perl 

#compare_lists.cgi -Provide gene info based on web query.
#
#This script provides access to my Worm DB and to Acedb.  Genes are entered,
#and the database info for the gene is displayed.
#
#---------------------------
#This program is free software; you can redistribute it and/or
#modify it under the terms of the GNU General Public License
#as published by the Free Software Foundation; either version 2
#of the License, or (at your option) any later version.
#
#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#GNU General Public License for more details.
#
#You should have received a copy of the GNU General Public License
#along with this program; if not, write to the Free Software
#Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#---------------------------
#
#Version: 0.005
#
#Last modified: 3/01
#Written by Jim Lund in the lab of Stuart Kim, Stanford University.
#jiml@stanford.edu
#
#Postal address: Department of Developmental Biology, B365
#		 279 Campus Dr.
#                Stanford, CA 94305
#
#Web site: http://worm-chip.stanford.edu
#---------------------------
#HISTORY:
#v.005	3/2/01
#Fix a bug that counted the first line of input as a gene if it was a blank
#line.
#


use CGI qw(:standard);
use DBI;
use WebConfig;
use Worm;


#
#Oracle Specific environment variables.
#
#$ENV{'ORACLE_HOME'}='/data/oracle/u01/app/oracle/product/8.1.5';
#$ENV{'ORACLE_SID'}='ORCL';


#
#Subroutines
#
sub PARSE_FORM;
sub SELECT_GENES;
sub SELECT_GENES2;
sub ACE_INFO;
sub PRINT_OUTPUT;
sub COMBINE_LISTS;



BEGIN {
  WebConfig::CGI_init;
}


#
#Initailize db connection.
#
my ($dbh,$DB_DATE) = WebConfig::DB_init;


#
#Get query gene lists and comparison type from form.
#
($gene_list1,$gene_list2,$extra,$compare_type,$results_as_text,$keep_extra,$not_worm)=
								&PARSE_FORM;

$count1=$#{$gene_list1}+1;
$count2=$#{$gene_list2}+1;



#
#Combine the two lists into one using the indicated logic.
#
@query_genes=&COMBINE_LISTS($gene_list1,$gene_list2,$compare_type);



#
#Search the database for the genes.  Use SELECT_GENES2 if there are many
#genes because it is faster.
#
if ($not_worm || $do_not_use_database) {
    foreach $gene (@query_genes) {push(@search_rows,[$gene]);}
    $search_rows_ref=\@search_rows;
    $table_rows_ref=['Gene'];
    $columns=1;   
    }
else {
    if ($#query_genes > 300) {
	($search_rows_ref,$table_rows_ref,$columns)=
				&SELECT_GENES2(@query_genes);
    	}
    else {
	($search_rows_ref,$table_rows_ref,$columns)=
				&SELECT_GENES(@query_genes);
	}
    }


#
#Print the output of the search.
#
&PRINT_OUTPUT($search_rows_ref,$table_rows_ref,$columns,$results_as_text);

exit;
#
#End of main section.
#


#
#Subroutines
#

############################################################################
#Called by: main
#Calls: none
#
#Get the list of genes from the form.
#
sub PARSE_FORM {
my ($form_result,@list1,@list2,$compare_type);
my (@genes_lines,$results_text,$gene,%list1,%list2);
my ($keep_extra,%extra,$not_worm,$gene_lines);

$form_result= new CGI;

#
#User want extra fields returned?.
#
$keep_extra=$form_result->param('keep_extra');

#
#Not worm genes?
#
$not_worm=$form_result->param('not_worm');


#
#Now import gene list 1.
#
$gene_lines=$form_result->param('Gene_list1');
$gene_lines=~s/(^\s+|\s+$)//gs;

@gene_lines=split(/\s*[\015\012]+\s*/,$gene_lines);
foreach $line (@gene_lines) {
    $line=~s/^(\S+)\s*//;
    $gene=$1;
    $list1{$gene}=1;
    if ($keep_extra && $line) {$extra{$gene}=$line;}
    }

if (!$list1[0]) {
    undef @gene_lines;
    undef @list1;
    undef $buffer;
    $max_file_size=10000000;
    $file_handle=$form_result->param('Gene_file1');
    while ($bytes = read($file_handle,$buffer,1024)) {
	$genes_file.=$buffer;
	$bytes_read+=$bytes;
	if ($bytes_read > $max_file_size) {last;}
	}
    $genes_file=~s/(^\s+|\s+$)//gs;
    @gene_lines=split(/\s*[\015\012]+\s*/,$genes_file);
    foreach $line (@gene_lines) {
	$line=~s/^(\S+)\s*//;
	$gene=$1;
	$list1{$gene}=1;
	if ($keep_extra && $line) {
	    if ($extra{$gene}) {$extra{$gene}.="  ".$line;}
	    else {$extra{$gene}=$line;}
	    }
	}
    }

@list1=keys %list1;



#
#Now import gene list 2.
#
undef @genes_lines;
$gene_lines=$form_result->param('Gene_list2');
$gene_lines=~s/(^\s+|\s+$)//gs;

@gene_lines=split(/\s*[\015\012]+\s*/,$gene_lines);
foreach $line (@gene_lines) {
    $line=~s/^(\S+)\s*//;
    $gene=$1;
    $list2{$gene}=1;
    if ($keep_extra && $line) {
	if ($extra{$gene}) {$extra{$gene}.="  ".$line;}
	else {$extra{$gene}=$line;}
	}
    }

if (!$list2[0]) {
    undef @gene_lines;
    undef @list2;
    undef $buffer;
    undef $file_handle;
    undef $bytes;
    undef $genes_file;
    undef $bytes_read;
    $max_file_size=10000000;
    $file_handle=$form_result->param('Gene_file2');
    while ($bytes = read($file_handle,$buffer,1024)) {
	$genes_file.=$buffer;
	$bytes_read+=$bytes;
	if ($bytes_read > $max_file_size) {last;}
	}
    $genes_file=~s/(^\s+|\s+$)//gs;
    @gene_lines=split(/\s*[\015\012]+\s*/,$genes_file);
    foreach $line (@gene_lines) {
	$line=~s/^(\S+)\s*//;
	$gene=$1;
	$list2{$gene}=1;
	if ($keep_extra && $line) {
	    if ($extra{$gene}) {$extra{$gene}.="  ".$line;}
	    else {$extra{$gene}=$line;}
	    }
	}
    }

@list2=keys %list2;



#
#Read in comparison type.
#
$compare_type=$form_result->param('compare_type');

#
#User want results as text-only?
#
$text_results=$form_result->param('text_only');

return (\@list1,\@list2,\%extra,$compare_type,$text_results,$keep_extra,$not_worm);
}



############################################################################
#Called by: main
#Calls: none
#
#Do the search.
#Ruturn the number of cols in the result rows, the row headers, and
#the data rows.
#
sub SELECT_GENES2 {
my (@query_genes)=@_;

my (@table_rows,@search_rows);
my $cols;


my $search="SELECT * FROM genes_map";
my $sth= $dbh->prepare($search);
$sth->execute();



#
#Get row labels and number of resulting columns.
#
if (!$cols) {
    $cols_this_row=$sth->{NUM_OF_FIELDS};
    if ($cols_this_row > $cols) {
	$cols=$cols_this_row;
   	@table_rows=@{$sth->{NAME}};
	}
    unshift(@table_rows,'Gene name entered');
    $cols++;
    }


my $rows_returned=0;
while ( my @row = $sth->fetchrow_array ) {
    $rows_returned++;
    my $gene=shift(@row);
    my $gene2=shift(@row);
    $all_rows{$gene}=[$gene,$gene,$gene2,@row];
    if ($gene2 && ($gene2 ne 'NULL')) {
	    $all_rows{$gene2}=[$gene,$gene,$gene2,@row];
	    }
    }



#
#Get other gene names.
#
$search = "SELECT g.gene_cos_name FROM sjj_primers p, gene g WHERE p.gene_name_key=g.gene_name_key";
$sth= $dbh->prepare($search);
$sth->execute();
while ( my @row = $sth->fetchrow_array ) {
    $rows_returned++;
    my $gene=shift(@row);
    if (!(exists $all_rows{$gene})) {$all_rows{$gene}=[$gene,$gene,$gene];}
    }



foreach $gene (@query_genes) {
    if (exists $all_rows{$gene}) { push(@search_rows,[@{$all_rows{$gene}}]); }
    else { push(@search_rows,[$gene,'-','-']); }
    }

#
#Done with database.
#
#$dbh->disconnect;

return (\@search_rows,\@table_rows,$cols);
}





############################################################################
#Called by: main
#Calls: none
#
#Do the search.
#Ruturn the number of cols in the result rows, the row headers, and
#the data rows.
#
sub SELECT_GENES {
my (@query_genes)=@_;

my (@table_rows,@search_rows);
my $cols;

my $search1="SELECT * FROM genes_map WHERE (gene_cos_name=? OR gene=?)";
my $search2="SELECT g.gene_cos_name FROM sjj_primers p, gene g where g.gene_cos_name=? AND g.gene_name_key=p.gene_name_key";


my $sth= $dbh->prepare($search1);
my $sth2= $dbh->prepare($search2);
foreach my $gene (@query_genes) {
    $sth->execute($gene,$gene);

#
#Get row labels and number of resulting columns.
#
    if (!$cols) {
	$cols_this_row=$sth->{NUM_OF_FIELDS};
    	if ($cols_this_row > $cols) {
	    $cols=$cols_this_row;
   	    @table_rows=@{$sth->{NAME}};
	    }
	unshift(@table_rows,'Gene name entered');
	$cols++;
	}

    my $rows_returned=0;
    while ( my @row = $sth->fetchrow_array ) {
        $rows_returned++;
        push(@search_rows,[$gene,@row]);
        }
    if (!$rows_returned) {
#
#Get other gene names.
#
        $sth2->execute($gene);
        while ( my @row = $sth2->fetchrow_array ) {
            $rows_returned++;
            push(@search_rows,[$gene,$gene,'-']);
            }
        }
    if (!$rows_returned) {
	push(@search_rows,[$gene,'-','-']);
	}
    }

#
#Done with datbase.
#
#$dbh->disconnect;

return (\@search_rows,\@table_rows,$cols);
}




############################################################################
#Called by: main
#Calls: none
#
#Format and print the output of the search.
#The table labels are in an array ref, $table_rows_ref.
#The number of columns is in $cols.
#The rows are in a ref to an array of refs, $search_rows_ref.
#
sub PRINT_OUTPUT {
my ($search_rows_ref,$table_rows_ref,$cols,$results_as_text)=@_;

my ($row_ref,$gene_rough_name_column,$gene_column);


#
#Print the search results as either an HTML table with links, 
#or as text.
#
#If there are too many rows for a table, force text output.
#
if ($#{$search_rows_ref} > 300) {$results_as_text=1;}



#
#Print HTML page header.
#
if ($results_as_text) {print header('text/plain');}
else {print header();}




if ($results_as_text) {
    &PRINT_TEXT($search_rows_ref,$table_rows_ref,$cols);
    return;
    }


#
#HTML header
#
my $HTML_results = new CGI;

print <<EOM;
<HTML>
<HEAD>
<link rel="icon" href="../../favicon.ico" type="image/x-icon">
<link REL="shortcut icon" HREF="../../favicon.ico" TYPE="image/x-icon">
<link rel="stylesheet" title="Worm (Default)" href="../../worm-bg.css" type="text/css" media="screen">
<TITLE> Results from comparison of the two gene lists </TITLE>
</HEAD>

<BODY>

<!--#include virtual="/links.php" -->

<div id="content">

EOM

print $HTML_results->center(h1('Results from comparison of the two gene lists')),"\n";


#
#Print the search results as an HTML table with links.
#
&PRINT_HTML($search_rows_ref,$table_rows_ref,$cols);


#
#HTML tail. Close enclosing tables.
#
print <<EOM;
</PRE>

<P>
The lists are combined in the manner you specify to generate the combination 
list.  The combination list is searched against the database of genes on the 
Kim lab microarrays. 

<P>
<FORM NAME='Back_to_compare' METHOD=POST 
        ACTION='./back_to_form.cgi'>
<INPUT type='submit' value="Use these results in a new comparison.">
EOM

#
#Make a hidden form with values which are the gene names, used to 
#fill the form if user wants to continue the comparison.
#
foreach $row_ref (@{$search_rows_ref}) {
    if ($$row_ref[0] eq '-') {$hidden.=$$row_ref[1].',';}
    else {$hidden.=$$row_ref[0].',';}
    }
print "<INPUT type='HIDDEN' NAME='Gene_list' VALUE='".$hidden."'>\n";

print <<EOM;
</FORM>
<P>

<a href="compare_prog.html">View the program.</a>
	    
<!--#include virtual="/tail.php" -->

	</div>

</BODY>
</HTML>
EOM
}



############################################################################
#Called by: PRINT_OUTPUT
#Calls: none
#
#Prints comparison results as text.
#
sub PRINT_TEXT {
my ($search_rows_ref,$table_rows_ref,$cols)=@_;

my ($result);

$result=$#{$search_rows_ref}+1;
$compare_type=uc($compare_type);

print <<EOM;
Gene list comparison results

$count1 genes in list 1.
$count2 genes in list 2.

List comparison type: $compare_type

$result genes meet the comparison criteria.

EOM

#
#Print out the extra user gene info, if $keep_extra is selected.
#
if ($keep_extra) {push(@{$table_rows_ref},'Extra input');}


#
#Print result field names.  Also find the column which has the gene name.
#This column will be linked to proteome.
#
for ($line=0;$line <= 2;$line++) { 
for ($field=0;$field <= $#{$table_rows_ref};$field++) {
    if (lc($$table_rows_ref[$field]) eq 'gene_cos_name') {
	$$table_rows_ref[$field]='Gene cosmid name';
	}

    if (length($$table_rows_ref[$field]) <= ((2-$line)*15)) {$field_print='';}
    else {
	($field_print)= $$table_rows_ref[$field]=~/^(.{1,15})\b/;
	substr($$table_rows_ref[$field],0,length($field_print),'');
	$field_print=~s/(^\s*|\s*$)//g;
	}
    if (!$field) {printf("%-16s",$field_print);}
    elsif ($field == 1) {printf("%16s",$field_print);}
    else {printf("%16s",ucfirst(lc($field_print)));}
    }
print"\n";
    }


#
#Print rows.
#
foreach $row_ref (@{$search_rows_ref}) {
#
#Print data.
#
    for ($i=0;$i< $cols;$i++) {
        if (!($datum=$$row_ref[$i]) 
		|| ($datum eq 'NULL')) {$datum="";}
        if (!$i) {printf("%-16s",$datum);}
        else {printf("%16s",$datum);}
        }
    if ($keep_extra) {
	$gene=$$row_ref[0];
	if (exists $$extra{$gene}) {printf(" %15s",$$extra{$gene});}
	}
    print"\n";
    }
}



############################################################################
#Called by: PRINT_OUTPUT
#Calls: none
#
#Prints comparison results as HTML with links to AceDB and Proteome.
#
sub PRINT_HTML {
my ($search_rows_ref,$table_rows_ref,$cols)=@_;

my ($result);

$result=$#{$search_rows_ref}+1;
$compare_type=uc($compare_type);

#
#Print header info.
#
print <<EOM;
$count1 genes in list 1.<BR>
$count2 genes in list 2.<BR>
List comparison type: $compare_type<BR>
$result genes meet the comparison criteria.<BR>
EOM



#
#Print table header.
#
if ($not_worm) {
    print"<H2 ALIGN='left'> Genes </H2>\n";
    }
else {
    print"<H2 ALIGN='left'> <I>C. elegans</I> genes </H2>\n";
    }

printf("<TABLE COLS=%d BORDER=1 CELLSPACING=0 CELLPADDING=1>\n",$cols);

#
#Add Proteome link and ACEDB link columns to table header row.
#
if (!$not_worm) {
    unshift(@{$table_rows_ref},'Proteome link');
    $proteome_col=1;
    unshift(@{$table_rows_ref},'ACEDB link');
    $ACEDB_col=0;
    }


#
#Print out the extra user gene info, if $keep_extra is selected.
#
if ($keep_extra) {push(@{$table_rows_ref},'Extra input');}


#
#Print result field names.  Also find the column which has the gene name.
#This column will be linked to proteome.
#
for ($field=0;$field <= $#{$table_rows_ref};$field++) {

    if ((!$gene_entered_column) && ($$table_rows_ref[$field] eq "Gene name entered")) {
        $gene_entered_column=$field;
        }
    if ((!$gene_column) && (lc($$table_rows_ref[$field]) eq "gene")) {
        $gene_column=$field;
        }
    if ((!$gene_rough_name_column)
                &&
        ($$table_rows_ref[$field] eq 'gene_cos_name')) {
        $gene_rough_name_column=$field;
        $$table_rows_ref[$field] = 'Gene cosmid name';
        }

    printf("<TH> %s </TH>\n",ucfirst(lc($$table_rows_ref[$field])));
    }
print"</TR>\n";


#
#Print rows.
#
foreach $row_ref (@{$search_rows_ref}) {
    print"<TR ALIGN=CENTER> ";

#
#Find which cell to link to ACEDB, and build the url.
#

if (!$not_worm) {
    if ($$row_ref[$gene_column-2] && $$row_ref[$gene_column-2] !~ /NULL/i && $$row_ref[$gene_column-2] !~ /;|\|/) {
        $datum=$$row_ref[$gene_column-2];
        }
    elsif ($$row_ref[$gene_rough_name_column-2] !~ /^-/) {
        $datum=$$row_ref[$gene_rough_name_column-2];
        }
    else { $datum=$$row_ref[$gene_entered_column-2]; }
    ($ACEDB_link) = Worm::Wormbase_url($datum);
    printf("<TD> %s </TD>\n",$ACEDB_link);


#
#Find which cell to link to proteome, and build the url.
#
	my $proteome_url = 0;
    if ($$row_ref[$gene_rough_name_column-2] !~ /^-/) {
        $proteome_url=1;
        if ($$row_ref[$gene_column-2] 
		&& 
	   ($$row_ref[$gene_column-2] !~ /NULL/i && $$row_ref[$gene_column-2] !~ /;|\|/)) {
            $datum=$$row_ref[$gene_column-2];
            }
        else {
            $datum=$$row_ref[$gene_rough_name_column-2];
            }
        }
    if ($proteome_url) {
        ($proteome_link) = Worm::Proteome_url($datum);
        printf("<TD> %s </TD>\n",$proteome_link);
        }
    else {print"<TD> &nbsp; </TD>\n";}
    }


#
#Print data.
#
    for ($i=0;$i< $cols;$i++) {
        if (!($datum=$$row_ref[$i])) {$datum="&nbsp;";}
        printf("<TD> %s </TD>\n",$datum);
        }
    if ($keep_extra) {
	$gene=$$row_ref[0];
	if (exists $$extra{$gene}) {printf("<TD> %s </TD>\n",$$extra{$gene});}
	else {print "<TD> &nbsp; </TD>\n";}
	}
    }



#
#Close table.
#
print"</TABLE>\n";
}



############################################################################
#Called by: main
#Calls: none
#
#Combine the two lists using the indicated logic.
#
sub COMBINE_LISTS {
my ($gene_list1,$gene_list2,$compare_type)=@_;

my (%both_lists,$gene,@keep_genes);

foreach $gene (@{$gene_list1}) {$both_lists{$gene}=1;}
foreach $gene (@{$gene_list2}) {$both_lists{$gene}+=2;}

if ($compare_type eq 'or') {
    @keep_genes=sort keys %both_lists;
    return (@keep_genes);
    }

foreach $gene (sort keys %both_lists) {
    if ($compare_type eq '2not1') {
	if ($both_lists{$gene} == 2) {push(@keep_genes,$gene);}
	}
    elsif ($compare_type eq '1not2') {
        if ($both_lists{$gene} == 1) {push(@keep_genes,$gene);}
        }
    elsif ($compare_type eq 'and') {
        if ($both_lists{$gene} == 3) {push(@keep_genes,$gene);}
        }
    elsif ($compare_type eq 'xor') {
        if (($both_lists{$gene} == 1) || ($both_lists{$gene} == 2)) {
	    push(@keep_genes,$gene);
	    }
        }
    }

return (@keep_genes);
}

#
#End of program.

