ridY*gridZ > 0) { HIP_CHECK(hipModuleLaunchKernel(function, gridX, gridY, gridZ, 64*num_warps, 1, 1, shared_memory, stream, params, 0)); } } typedef struct _DevicePtrInfo { hipDeviceptr_t dev_ptr; bool valid; } DevicePtrInfo; static inline DevicePtrInfo getPointer(PyObject *obj, int idx) { DevicePtrInfo ptr_info; ptr_info.dev_ptr = 0; ptr_info.valid = true; if (PyLong_Check(obj)) { ptr_info.dev_ptr = (hipDeviceptr_t)PyLong_AsUnsignedLongLong(obj); return ptr_info; } if (obj == Py_None) { // valid nullptr return ptr_info; } PyObject *ptr = PyObject_GetAttrString(obj, "data_ptr"); if (ptr) { PyObject *empty_tuple = PyTuple_New(0); PyObject *ret = PyObject_Call(ptr, empty_tuple, NULL); Py_DECREF(empty_tuple); Py_DECREF(ptr); if (!PyLong_Check(ret)) { PyErr_SetString(PyExc_TypeError, "data_ptr method of Pointer object must return 64-bit int"); ptr_info.valid = false; return ptr_info; } ptr_info.dev_ptr = (hipDeviceptr_t)PyLong_AsUnsignedLongLong(ret); if (!ptr_info.dev_ptr) return ptr_info; uint64_t dev_ptr; hipError_t status = hipPointerGetAttribute(&dev_ptr, HIP_POINTER_ATTRIBUTE_DEVICE_POINTER, ptr_info.dev_ptr); if (status == hipErrorInvalidValue) { PyErr_Format(PyExc_ValueError, "Pointer argument (at %d) cannot be accessed from Triton (cpu tensor?)", idx); ptr_info.valid = false; } ptr_info.dev_ptr = (hipDeviceptr_t)dev_ptr; return ptr_info; } PyErr_SetString(PyExc_TypeError, "Pointer argument must be either uint64 or have data_ptr method"); return ptr_info; } static PyObject* launch(PyObject* self, PyObject* args) { int gridX, gridY, gridZ; uint64_t _stream; uint64_t _function; int num_warps; int shared_memory; PyObject *launch_enter_hook = NULL; PyObject *launch_exit_hook = NULL; PyObject *compiled_kernel = NULL; ú