/******************************************************************************* * * MIT License * * Copyright 2021-2025 AMD ROCm(TM) Software * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * *******************************************************************************/ #pragma once #include #include #include #include #include namespace rocRoller { /** * Represents whether we need to insert a `waitcnt` before a given instruction. * * Can represent waiting for any counter on any generation of device, including * architectures with separate `vscnt` counters. * * Internally represents -1 as not having to wait for a particular counter. */ class WaitCount { public: WaitCount() = default; explicit WaitCount(GPUArchitecture const& arch, std::string const& message = ""); WaitCount(GPUArchitecture const& arch, int loadcnt, int storecnt, int vscnt, int dscnt, int kmcnt, int expcnt); WaitCount(GPUArchitecture const& arch, GPUWaitQueue, int count); ~WaitCount() = default; bool operator==(WaitCount const& a) const { return a.m_loadcnt == m_loadcnt && a.m_storecnt == m_storecnt && a.m_vscnt == m_vscnt && a.m_dscnt == m_dscnt && a.m_kmcnt == m_kmcnt && a.m_expcnt == m_expcnt; } bool operator!=(WaitCount const& a) const { return !(*this == a); } static WaitCount LoadCnt(GPUArchitecture const& arch, int value, std::string const& message = ""); static WaitCount StoreCnt(GPUArchitecture const& arch, int value, std::string const& message = ""); static WaitCount VSCnt(GPUArchitecture const& arch, int value, std::string const& message = ""); static WaitCount DSCnt(GPUArchitecture const& arch, int value, std::string const& message = ""); static WaitCount KMCnt(GPUArchitecture const& arch, int value, std::string const& message = ""); static WaitCount EXPCnt(GPUArchitecture const& arch, int value, std::string const& message = ""); static WaitCount Zero(GPUArchitecture const& arch, std::string const& message = " "); static WaitCount Max(GPUArchitecture const& arch, std::string const& message = " "); std::string toString(LogLevel level) const; void toStream(std::ostream& os, LogLevel level) const; static int CombineValues(int lhs, int rhs); WaitCount& combine(WaitCount const& other); int loadcnt() const; int storecnt() const; int vscnt() const; int dscnt() const; int kmcnt() const; int expcnt() const; /** * vmcnt is the combination of loadcnt and storecnt for non-split counters */ int vmcnt() const; int getCount(GPUWaitQueue) const; void setLoadcnt(int value); void setStorecnt(int value); void setVscnt(int value); void setDScnt(int value); void setKMcnt(int value); void setExpcnt(int value); WaitCount& combineLoadcnt(int value); WaitCount& combineStorecnt(int value); WaitCount& combineVscnt(int value); WaitCount& combineDScnt(int value); WaitCount& combineKMcnt(int value); WaitCount& combineExpcnt(int value); std::vector const& comments() const; void addComment(std::string const& comment); void addComment(std::string&& comment); WaitCount getAsSaturatedWaitCount(GPUArchitecture const& arch) const; private: /** * -1 means don't care. * * On machines without separate vscnt, the vscnt field should not be used. * */ int m_loadcnt = -1; int m_storecnt = -1; int m_vscnt = -1; int m_dscnt = -1; int m_kmcnt = -1; int m_expcnt = -1; std::vector m_comments; bool m_isSplitCounter = false; bool m_hasVSCnt = false; bool m_hasEXPCnt = false; }; std::ostream& operator<<(std::ostream& stream, WaitCount const& wait); }