-
Notifications
You must be signed in to change notification settings - Fork 97
Cuda and Omp matrix conversion of Csr and Hybrid and Omp count_nonzeros of Ell #310
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
thoasm
merged 11 commits into
ginkgo-project:develop
from
yhmtsai:csr_hybrid_on_cuda_omp
Jun 4, 2019
Merged
Changes from 10 commits
Commits
Show all changes
11 commits
Select commit
Hold shift + click to select a range
d4494bd
cuda csr->hybrid
yhmtsai f03144d
cuda hybrid -> csr
yhmtsai f8559dc
omp csr -> hybrid
yhmtsai 7f39b29
omp hybrid -> csr and ell count_zero
yhmtsai 1e61c16
implement 16/32/64 bit atomic_add
yhmtsai 3e51a52
delete duplicate function
yhmtsai 89ab7f6
fix omp typo
yhmtsai 6e60328
delete initial_coo of csr->hyb and modify template
yhmtsai 1002488
modify by suggestion
yhmtsai b85e058
fix include name
yhmtsai c255b78
add description and use static_assert
yhmtsai File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
/*******************************<GINKGO LICENSE>****************************** | ||
Copyright (c) 2017-2019, the Ginkgo authors | ||
All rights reserved. | ||
|
||
Redistribution and use in source and binary forms, with or without | ||
modification, are permitted provided that the following conditions | ||
are met: | ||
|
||
1. Redistributions of source code must retain the above copyright | ||
notice, this list of conditions and the following disclaimer. | ||
|
||
2. Redistributions in binary form must reproduce the above copyright | ||
notice, this list of conditions and the following disclaimer in the | ||
documentation and/or other materials provided with the distribution. | ||
|
||
3. Neither the name of the copyright holder nor the names of its | ||
contributors may be used to endorse or promote products derived from | ||
this software without specific prior written permission. | ||
|
||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS | ||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | ||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A | ||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
******************************<GINKGO LICENSE>*******************************/ | ||
|
||
#ifndef GKO_CUDA_COMPONENTS_FORMAT_CONVERSION_CUH_ | ||
#define GKO_CUDA_COMPONENTS_FORMAT_CONVERSION_CUH_ | ||
|
||
|
||
#include <ginkgo/core/base/std_extensions.hpp> | ||
|
||
|
||
#include "cuda/components/cooperative_groups.cuh" | ||
#include "cuda/components/thread_ids.cuh" | ||
|
||
|
||
namespace gko { | ||
namespace kernels { | ||
namespace cuda { | ||
namespace ell { | ||
namespace kernel { | ||
|
||
|
||
template <typename ValueType, typename IndexType> | ||
__global__ void count_nnz_per_row(size_type num_rows, size_type max_nnz_per_row, | ||
size_type stride, | ||
const ValueType *__restrict__ values, | ||
IndexType *__restrict__ result); | ||
|
||
|
||
} // namespace kernel | ||
} // namespace ell | ||
|
||
|
||
namespace coo { | ||
namespace kernel { | ||
|
||
|
||
template <typename IndexType> | ||
__global__ void convert_row_idxs_to_ptrs(const IndexType *__restrict__ idxs, | ||
size_type num_nonzeros, | ||
IndexType *__restrict__ ptrs, | ||
size_type length); | ||
|
||
|
||
} // namespace kernel | ||
|
||
|
||
namespace host_kernel { | ||
|
||
|
||
template <size_type subwarp_size = cuda_config::warp_size> | ||
__host__ size_type calculate_nwarps(std::shared_ptr<const CudaExecutor> exec, | ||
const size_type nnz) | ||
thoasm marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{ | ||
size_type warps_per_sm = exec->get_num_cores_per_sm() / subwarp_size; | ||
size_type nwarps_in_cuda = exec->get_num_multiprocessor() * warps_per_sm; | ||
size_type multiple = 8; | ||
if (nnz >= 2000000) { | ||
multiple = 128; | ||
} else if (nnz >= 200000) { | ||
multiple = 32; | ||
} | ||
return std::min( | ||
multiple * nwarps_in_cuda, | ||
static_cast<size_type>(ceildiv(nnz, cuda_config::warp_size))); | ||
} | ||
|
||
|
||
} // namespace host_kernel | ||
} // namespace coo | ||
} // namespace cuda | ||
} // namespace kernels | ||
} // namespace gko | ||
|
||
|
||
#endif // GKO_CUDA_COMPONENTS_FORMAT_CONVERSION_CUH_ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.