UDA_CHECK(cuFuncSetCacheConfig(fun, CU_FUNC_CACHE_PREFER_SHARED)); int shared_total, shared_static; CUDA_CHECK(cuDeviceGetAttribute(&shared_total, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR, device)); CUDA_CHECK(cuFuncGetAttribute(&shared_static, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, fun)); CUDA_CHECK(cuFuncSetAttribute(fun, CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, shared_optin - shared_static)); } if(PyErr_Occurred()) { return NULL; } return Py_BuildValue("(KKii)", (uint64_t)mod, (uint64_t)fun, n_regs, n_spills); } static PyMethodDef ModuleMethods[] = { {"load_binary", loadBinary, METH_VARARGS, "Load provided cubin into CUDA driver"}, {"get_device_properties", getDeviceProperties, METH_VARARGS, "Get the properties for a given device"}, {NULL, NULL, 0, NULL} // sentinel }; static struct PyModuleDef ModuleDef = { PyModuleDef_HEAD_INIT, "cuda_utils", NULL, //documentation -1, //size ModuleMethods }; PyMODINIT_FUNC PyInit_cuda_utils(void) { PyObject *m = PyModule_Create(&ModuleDef); if(m == NULL) { return NULL; } PyModule_AddFunctions(m, ModuleMethods); return m; } r'