diff --git a/src/include/core.h b/src/include/core.h index ee21e291..8ce13c70 100644 --- a/src/include/core.h +++ b/src/include/core.h @@ -55,16 +55,17 @@ namespace Harp { DomStackEntry( unsigned p, const std::vector > >& m, std::vector &tm, Word pc - ): pc(pc), fallThrough(false) + ): pc(pc), fallThrough(false), uni(false) { for (unsigned i = 0; i < m.size(); ++i) tmask.push_back(!bool(m[i][p]) && tm[i]); } DomStackEntry(const std::vector &tmask): - tmask(tmask), fallThrough(true) {} + tmask(tmask), fallThrough(true), uni(false) {} bool fallThrough; + bool uni; std::vector tmask; Word pc; }; diff --git a/src/instruction.cpp b/src/instruction.cpp index 48b8ba73..84cbc4d4 100644 --- a/src/instruction.cpp +++ b/src/instruction.cpp @@ -112,6 +112,28 @@ ostream &Harp::operator<<(ostream& os, Instruction &inst) { return os; } +bool checkUnanimous(unsigned p, const std::vector > >& m, + const std::vector &tm) { + bool same; + unsigned i; + for (i = 0; i < m.size(); ++i) { + if (tm[i]) { + same = m[i][p]; + break; + } + } + if (i == m.size()) + throw DivergentBranchException(); + for (; i < m.size(); ++i) { + if (tm[i]) { + if (same != (bool(m[i][p]))) { + return false; + } + } + } + return true; +} + void Instruction::executeOn(Warp &c) { D(3, "Begin instruction execute."); @@ -345,7 +367,12 @@ void Instruction::executeOn(Warp &c) { break; case SPLIT: if (sjOnce) { sjOnce = false; - // TODO: if mask becomes all-zero, fall through + if (checkUnanimous(pred, c.pred, c.tmask)) { + DomStackEntry e(c.tmask); + e.uni = true; + c.domStack.push(e); + break; + } DomStackEntry e(pred, c.pred, c.tmask, c.pc); c.domStack.push(c.tmask); c.domStack.push(e); @@ -355,7 +382,12 @@ void Instruction::executeOn(Warp &c) { break; case JOIN: if (sjOnce) { sjOnce = false; - // TODO: if mask becomes all-zero, fall through + if (!c.domStack.empty() && c.domStack.top().uni) { + D(2, "Uni branch at join"); + c.tmask = c.domStack.top().tmask; + c.domStack.pop(); + break; + } if (!c.domStack.top().fallThrough) { if (!pcSet) nextPc = c.domStack.top().pc; pcSet = true;