Skip to content

numpy,taco: updates to run full set of suitesparse benchmarks #25

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 24, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 25 additions & 6 deletions numpy/ufuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,35 @@ def bench():
def ufunc_bench_key(tensorName, funcName):
return tensorName + "-" + funcName + "-numpy"

# UfuncInputCache attempts to avoid reading the same tensor from disk multiple
# times in a benchmark run.
class UfuncInputCache:
def __init__(self):
self.lastLoaded = None
self.lastName = None
self.tensor = None
self.other = None

def load(self, tensor, suiteSparse):
if self.lastName == str(tensor):
return self.tensor, self.other
else:
if suiteSparse:
self.lastLoaded = tensor.load(PydataMatrixMarketTensorLoader())
else:
self.lastLoaded = tensor.load()
self.lastName = str(tensor)
self.tensor = safeCastPydataTensorToInts(self.lastLoaded)
self.other = PydataTensorShifter().shiftLastMode(self.tensor)
return self.tensor, self.other
inputCache = UfuncInputCache()

# Run benchmarks against the FROSTT collection.
FROSTTTensors = TensorCollectionFROSTT()
@pytest.mark.parametrize("tensor", FROSTTTensors.getTensors())
@pytest.mark.parametrize("ufunc", [numpy.logical_xor, numpy.ldexp, numpy.right_shift])
def bench_pydata_frostt_ufunc_sparse(tacoBench, tensor, ufunc):
frTensor = safeCastPydataTensorToInts(tensor.load())
shifter = PydataTensorShifter()
other = shifter.shiftLastMode(frTensor)
frTensor, other = inputCache.load(tensor, False)
def bench():
c = ufunc(frTensor, other)
return c
Expand All @@ -118,9 +139,7 @@ def bench():
@pytest.mark.parametrize("ufunc", [numpy.logical_xor, numpy.ldexp, numpy.right_shift])
def bench_pydata_suitesparse_ufunc_sparse(tacoBench, ufunc):
tensor = SuiteSparseTensor(os.getenv('SUITESPARSE_TENSOR_PATH'))
ssTensor = safeCastPydataTensorToInts(tensor.load(PydataMatrixMarketTensorLoader()))
shifter = PydataTensorShifter()
other = shifter.shiftLastMode(ssTensor)
ssTensor, other = inputCache.load(tensor, True)
def bench():
c = ufunc(ssTensor, other)
return c
Expand Down
35 changes: 35 additions & 0 deletions scripts/suitesparse_runner.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/bin/bash
#SBATCH -N 1
#SBATCH --mem 120000
#SBATCH -p lanka-v3
#SBATCH --exclusive

set -u

source venv/bin/activate

sspath=data/suitesparse
# out=suitesparse-ufunc-bench/taco
out=suitesparse-ufunc-bench/numpy

mkdir -p "$out"

while read line; do
matrix="$sspath/$line/$line.mtx"
# csvout="$out/result-$line.csv"
# LANKA=ON SUITESPARSE_TENSOR_PATH="$matrix" TACO_OUT="$csvout" make -j8 taco-bench BENCHES="bench_suitesparse_ufunc"
jsonout="$out/result-$line.json"
LANKA=ON SUITESPARSE_TENSOR_PATH="$matrix" NUMPY_JSON="$jsonout" make python-bench BENCHES="numpy/ufuncs.py::bench_pydata_suitesparse_ufunc_sparse"
done <$1

# for path in $sspath/*; do
# if [ ! -d $path ]; then
# continue
# fi
# name="$(cut -d'/' -f3 <<< "$path")"
# matrix="$path/$name.mtx"
#
# csvout="$out/result-$name.csv"
#
# LANKA=ON SUITESPARSE_TENSOR_PATH="$matrix" TACO_OUT="$csvout" make -j8 taco-bench BENCHES="bench_suitesparse_ufunc"
# done
12 changes: 6 additions & 6 deletions scripts/taco_bench_aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ def aggregateTacoBenches(folder, outfile, labelSet=None):
with open(fname, 'r') as f:
# Discard the first 10 lines. This corresponds to the
# google-benchmark generated header.
for i in range(0, 10):
f.readline()
# for i in range(0, 10):
# f.readline()
# Open the rest of the file as a CSV.
reader = csv.reader(f)
# Attempt to read the header from CSV. If this fails,
Expand All @@ -33,14 +33,14 @@ def aggregateTacoBenches(folder, outfile, labelSet=None):
continue
# Find the column that contains label. We're going to skip
# entries that have a skip marker in the label.
labelIdx = header.index("label")
# labelIdx = header.index("label", 0)
if first:
writer.writerow(header)
first = False
for row in reader:
if "SKIPPED" not in row[labelIdx]:
validLabels.add(row[labelIdx])
writer.writerow(row)
# if "SKIPPED" not in row[labelIdx]:
# validLabels.add(row[labelIdx])
writer.writerow(row)
# Write out the set of valid labels.
if labelSet is not None:
with open(labelSet, 'w+') as validSet:
Expand Down
34 changes: 19 additions & 15 deletions taco/ufuncs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ TACO_BENCH_ARGS(bench_ufunc_sparse, rightShift_0.01, 0.01, ">>")->Apply(applyBen
// time from caching these inputs.
struct UfuncInputCache {
template<typename U>
std::pair<taco::Tensor<int64_t>, taco::Tensor<int64_t>> getUfuncInput(std::string path, U format) {
std::pair<taco::Tensor<int64_t>, taco::Tensor<int64_t>> getUfuncInput(std::string path, U format, bool countNNZ = false) {
// See if the paths match.
if (this->lastPath == path) {
// TODO (rohany): Not worrying about whether the format was the same as what was asked for.
Expand All @@ -175,6 +175,12 @@ struct UfuncInputCache {
this->lastPath = path;
this->inputTensor = castToType<int64_t>("A", this->lastLoaded);
this->otherTensor = shiftLastMode<int64_t, int64_t>("B", this->inputTensor);
if (countNNZ) {
this->nnz = 0;
for (auto& it : iterate<int64_t>(this->inputTensor)) {
this->nnz++;
}
}
return std::make_pair(this->inputTensor, this->otherTensor);
}

Expand All @@ -183,6 +189,7 @@ struct UfuncInputCache {

taco::Tensor<int64_t> inputTensor;
taco::Tensor<int64_t> otherTensor;
int64_t nnz;
};
UfuncInputCache inputCache;

Expand Down Expand Up @@ -295,29 +302,32 @@ static void bench_suitesparse_ufunc(benchmark::State& state, Func op) {
// Counters must be present in every run to get reported to the CSV.
state.counters["dimx"] = 0;
state.counters["dimy"] = 0;
if (ssTensors.tensors.size() == 0) {
state.counters["nnz"] = 0;

auto tensorPath = getEnvVar("SUITESPARSE_TENSOR_PATH");
if (tensorPath == "") {
state.error_occurred();
return;
}
int tensorIdx = state.range(0);
auto tensorPath = ssTensors.tensors[tensorIdx];

auto pathSplit = taco::util::split(tensorPath, "/");
auto filename = pathSplit[pathSplit.size() - 1];
auto tensorName = taco::util::split(filename, ".")[0];
state.SetLabel(tensorName);

taco::Tensor<int64_t> ssTensor, other;
try {
std::tie(ssTensor, other) = inputCache.getUfuncInput(tensorPath, CSR);
std::tie(ssTensor, other) = inputCache.getUfuncInput(tensorPath, CSR, true /* countNNZ */);
} catch (TacoException& e) {
// Counters don't show up in the generated CSV if we used SkipWithError, so
// just add in the label that this run is skipped.
state.SetLabel(tensorName+"-SKIPPED-FAILED-READ");
state.SetLabel(tensorName+"/SKIPPED-FAILED-READ");
return;
}

state.counters["dimx"] = ssTensor.getDimension(0);
state.counters["dimy"] = ssTensor.getDimension(1);
state.counters["nnz"] = inputCache.nnz;

for (auto _ : state) {
state.PauseTiming();
Expand All @@ -339,12 +349,6 @@ static void bench_suitesparse_ufunc(benchmark::State& state, Func op) {
}
}

static void applySuiteSparse(benchmark::internal::Benchmark* b) {
for (int i = 0; i < ssTensors.tensors.size(); i++) {
b->Arg(i);
}
}

TACO_BENCH_ARGS(bench_suitesparse_ufunc, xor, xorOp)->Apply(applySuiteSparse);
TACO_BENCH_ARGS(bench_suitesparse_ufunc, ldExp, ldExp)->Apply(applySuiteSparse);
TACO_BENCH_ARGS(bench_suitesparse_ufunc, rightShift, rightShift)->Apply(applySuiteSparse);
TACO_BENCH_ARGS(bench_suitesparse_ufunc, xor, xorOp);
TACO_BENCH_ARGS(bench_suitesparse_ufunc, ldExp, ldExp);
TACO_BENCH_ARGS(bench_suitesparse_ufunc, rightShift, rightShift);
Loading