CC=gcc AVX=-mavx2 -mfma CFLAGS=-O3 $(AVX) EXES=mm1 mm2 mm2-O2 mm3 mm4 mm5 mm6 mm7 mm8 mm9 mma mmb mmc limit1 limit2 limit3 atlas blas # openblas not present on g0 MM=mm5 500 all: $(EXES) clean: rm $(EXES) *.o perf: #@openblas 500 #get clock up to speed #@openblas 500 #get clock up to speed #perf stat -e cycles -e instructions -e branch-misses -e cpu/event=0xb7,umask=0x1,offcore_rsp=0x300400091,name=offcore_response_all_data_rd_llc_miss_dram/ $(MM) perf stat -e cycles -e instructions -e branch-misses $(MM) #perf stat -e L1-dcache-loads -e L1-dcache-load-misses $(MM) #perf stat -e LLC-loads -e LLC-prefetches $(MM) #perf stat -e dTLB-loads -e dTLB-load-misses $(MM) mm1: mm1.o main.o mm2: mm2.o main.o mm2-O2.o: mm2.c gcc -O2 $(AVX) -c mm2.c -o mm2-O2.o mm2-O2: mm2-O2.o main.o mm3: mm3.o main.o mm4: mm4.o main.o mm5: mm5.o main.o mm6: mm6.o main.o mm7: mm7.o main.o mm8: mm8.o main.o mm9: mm9.o main.o mma: mma.o main.o mmb: mmb.o mmbj.o main.o mmc: mmb.o mmc.o main.o limit1: limit1.o limit1a.o main.o limit2: limit2.o limit2a.o main.o limit3: limit3.o limit3a.o main.o openblas: mainblas.o gcc $(CFLAGS) mainblas.o -lopenblas -o $@ atlas: mainblas.o gcc $(CFLAGS) mainblas.o -latlas -lcblas -o $@ blas: mainblas.o gcc $(CFLAGS) mainblas.o -lblas -o $@