StarPU Internal Handbook
driver_cuda.h
Go to the documentation of this file.
1 /* StarPU --- Runtime system for heterogeneous multicore architectures.
2  *
3  * Copyright (C) 2008-2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
4  * Copyright (C) 2015 Mathieu Lirzin
5  *
6  * StarPU is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation; either version 2.1 of the License, or (at
9  * your option) any later version.
10  *
11  * StarPU is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14  *
15  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
16  */
17 
18 #ifndef __DRIVER_CUDA_H__
19 #define __DRIVER_CUDA_H__
20 
23 #include <common/config.h>
24 
25 #ifdef STARPU_USE_CUDA
26 #include <cuda.h>
27 #include <cuda_runtime_api.h>
28 #include <cublas.h>
29 #endif
30 
31 #include <starpu.h>
32 #include <core/workers.h>
33 #include <datawizard/node_ops.h>
34 
35 extern struct _starpu_driver_ops _starpu_driver_cuda_ops;
36 extern struct _starpu_node_ops _starpu_driver_cuda_node_ops;
37 
38 void _starpu_cuda_init(void);
39 unsigned _starpu_get_cuda_device_count(void);
40 extern int _starpu_cuda_bus_ids[STARPU_MAXCUDADEVS+STARPU_MAXNUMANODES][STARPU_MAXCUDADEVS+STARPU_MAXNUMANODES];
41 
42 #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
43 void _starpu_cuda_discover_devices (struct _starpu_machine_config *);
44 void _starpu_init_cuda(void);
45 void *_starpu_cuda_worker(void *);
46 #else
47 # define _starpu_cuda_discover_devices(config) ((void) config)
48 #endif
49 
50 #ifdef STARPU_USE_CUDA
51 cudaStream_t starpu_cuda_get_local_in_transfer_stream(void);
52 cudaStream_t starpu_cuda_get_in_transfer_stream(unsigned dst_node);
53 cudaStream_t starpu_cuda_get_local_out_transfer_stream(void);
54 cudaStream_t starpu_cuda_get_out_transfer_stream(unsigned src_node);
55 cudaStream_t starpu_cuda_get_peer_transfer_stream(unsigned src_node, unsigned dst_node);
56 #endif
57 
58 unsigned _starpu_cuda_test_request_completion(struct _starpu_async_channel *async_channel);
59 void _starpu_cuda_wait_request_completion(struct _starpu_async_channel *async_channel);
60 
61 int _starpu_cuda_copy_interface_from_cpu_to_cuda(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
62 int _starpu_cuda_copy_interface_from_cuda_to_cuda(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
63 int _starpu_cuda_copy_interface_from_cuda_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
64 
65 int _starpu_cuda_copy_data_from_cuda_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
66 int _starpu_cuda_copy_data_from_cuda_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
67 int _starpu_cuda_copy_data_from_cpu_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
68 
69 int _starpu_cuda_copy2d_data_from_cuda_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, struct _starpu_async_channel *async_channel);
70 int _starpu_cuda_copy2d_data_from_cuda_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, struct _starpu_async_channel *async_channel);
71 int _starpu_cuda_copy2d_data_from_cpu_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, struct _starpu_async_channel *async_channel);
72 
73 int _starpu_cuda_copy3d_data_from_cuda_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks_1, size_t ld1_src, size_t ld1_dst, size_t numblocks_2, size_t ld2_src, size_t ld2_dst, struct _starpu_async_channel *async_channel);
74 int _starpu_cuda_copy3d_data_from_cuda_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks_1, size_t ld1_src, size_t ld1_dst, size_t numblocks_2, size_t ld2_src, size_t ld2_dst, struct _starpu_async_channel *async_channel);
75 int _starpu_cuda_copy3d_data_from_cpu_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks_1, size_t ld1_src, size_t ld1_dst, size_t numblocks_2, size_t ld2_src, size_t ld2_dst, struct _starpu_async_channel *async_channel);
76 
77 int _starpu_cuda_is_direct_access_supported(unsigned node, unsigned handling_node);
78 uintptr_t _starpu_cuda_malloc_on_node(unsigned dst_node, size_t size, int flags);
79 void _starpu_cuda_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags);
80 
81 #endif // __DRIVER_CUDA_H__
82 
node_ops.h
_starpu_machine_config
Definition: workers.h:353
_starpu_async_channel
Definition: copy_driver.h:124
_starpu_driver_ops
Definition: drivers.h:24
_starpu_node_ops
Definition: node_ops.h:48