# Makefile for the matrix multiply contest
#
# To make the driver,
#   make matmul
# To time a dgemm routine,
#   make timing
# To plot the timing results,
#   make timing.ps
#
# The driver (matmul.c) is written in C, but you can write your code
# in C or in Fortran.  The file f2c_dgemm.c provides an interface
# between the C driver and a Fortran matrix multiply routine, if you
# would prefer to work with Fortran.  It should work on Intel/Linux
# systems and Sun systems, and it may or may not work on other systems.
# Mixed-language programming is a pain that way.
#
# I have also provided commented-out settings to link a version against
# an optimized of the Basic Linear Algebra Subroutines (BLAS).  I use
# the ATLAS BLAS, available from
#
#     www.netlib.org/atlas
#
# You might find it interesting to compare the speed of your matrix
# multiply to the speed of the ATLAS routines.  Then compare how long it
# took you to install and figure out ATLAS (or another optimized library)
# compared to how long you spent trying to optimize it yourself.  Using
# existing libraries can save you a lot of time and grief, and let
# you concentrate on wheels that haven't already been invented.

# ---
# Add -DFASTTEST to the CFLAGS variable in order to use a shorter
# list of test cases.  Add -DNOVALIDATE to turn off the routine to check
# your answers.  You should probably only use -DNOVALIDATE when you're
# really sure that everything is working.

# For Solaris, optimized for UltraSparcs only when compiled on an Ultra...
#CC = cc
#CFLAGS = -xO5 -xtarget=native -xarch=v8plusa

# For Linux on a PPro or better:
#  ... and a recent (2.95+) version of gcc.  previous versions may 
#  encounter Internal Compiler Errors with P6 insns
#CC = gcc
#CFLAGS = -march=pentiumpro -mcpu=pentiumpro -O4 \
#         -funroll-all-loops -fmove-all-movables -freduce-all-givs \
#         -falign-loops -falign-loops -falign-functions \
#         -fschedule-insns

# Generic:
#CC = cc
#CFLAGS = -O

# gprof testing
#LDFLAGS = -pg
#CFLAGS = -O -pg
#DRIVER_CFLAGS = -DFASTTEST -DNOVALIDATE

# ---

# Compile a C version (using basic_dgemm.c, in this case):
LIBS = -lm
OBJS = matmul.o basic_dgemm.o

# Compile a Fortran version (basic_fdgemm.f, in this case):
#LIBS = -lg2c -lm
#OBJS = matmul.o f2c_dgemm.o basic_fdgemm.o

# Compile a version using a vendor BLAS (ATLAS, in this case):
#LIBS = /home/eecs/dbindel/work/ATLAS/lib/Linux_PII/libf77blas.a \
#  	     /home/eecs/dbindel/work/ATLAS/lib/Linux_PII/libatlas.a \
# 	     -lg2c -lm
#OBJS = matmul.o wrap_dgemm.o

# ---

matmul: $(OBJS)
	$(CC) $(OBJS) $(LIBS) $(LDFLAGS) -o matmul

matmul.o: matmul.c
	$(CC) $(DRIVER_CFLAGS) -c matmul.c

# ---
# This is just a suggestion on how to generate timing plots...  Feel
# free to improve on these, so long as you show MFlop/s v. matrix size.

timing:	matmul
	gexec -n 1 matmul > timing

timing.ps:	timing
	echo "set term postscript; set output 'timing.ps';" \
	  | gnuplot - timing.gnuplot

timing.ppm:	timing
	echo "set term ppm color; set output 'timing.ppm';" \
          | gnuplot - timing.gnuplot

# ---

clean:
	rm -f matmul *.o
