CUdeviceptr dev_ptr; bool valid; } DevicePtrInfo; static inline DevicePtrInfo getPointer(PyObject *obj, int idx) { DevicePtrInfo ptr_info; ptr_info.dev_ptr = 0; ptr_info.valid = true; if (PyLong_Check(obj)) { ptr_info.dev_ptr = PyLong_AsUnsignedLongLong(obj); return ptr_info; } if (obj == Py_None) { // valid nullptr return ptr_info; } PyObject *ptr = PyObject_GetAttrString(obj, "data_ptr"); if(ptr){ PyObject *empty_tuple = PyTuple_New(0); PyObject *ret = PyObject_Call(ptr, empty_tuple, NULL); Py_DECREF(empty_tuple); Py_DECREF(ptr); if (!PyLong_Check(ret)) { PyErr_SetString(PyExc_TypeError, "data_ptr method of Pointer object must return 64-bit int"); ptr_info.valid = false; return ptr_info; } ptr_info.dev_ptr = PyLong_AsUnsignedLongLong(ret); unsigned attr; CUresult status = cuPointerGetAttribute(&attr, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, ptr_info.dev_ptr); if (ptr_info.dev_ptr && (!(attr == CU_MEMORYTYPE_DEVICE || attr == CU_MEMORYTYPE_UNIFIED) || !(status == CUDA_SUCCESS))) { PyErr_Format(PyExc_ValueError, "Pointer argument (at %d) cannot be accessed from Triton (cpu tensor?)", idx); ptr_info.valid = false; } Py_DECREF(ret); // Thanks ChatGPT! return ptr_info; } PyErr_SetString(PyExc_TypeError, "Pointer argument must be either uint64 or have data_ptr method"); return ptr_info; } static PyObject* launch(PyObject* self, PyObject* args) { int gridX, gridY, gridZ; uint64_t _stream; uint64_t _function; int num_warps; int shared_memory; PyObject *launch_enter_hook = NULL; PyObject *launch_exit_hook = NULL; PyObject *compiled_kernel = NULL; PyObject *hook_ret = NULL; r›