From 3914659ebb09311b3c70ec13f2894f70c74b0268 Mon Sep 17 00:00:00 2001 From: CGH0S7 Date: Mon, 19 Jan 2026 10:49:14 +0800 Subject: [PATCH] Optimize BSSN RHS and finite difference calculations - Integrate Intel oneMKL VML for efficient Gauge calculation in bssn_rhs.f90 - Refactor fderivs in diff_new.f90 to separate bulk/boundary loops for better vectorization - Add optimization report in docs/optimization_report.md --- AMSS_NCKU_source/ABE.optrpt | 710 ++++++++++++++++++++++++++++++++++ AMSS_NCKU_source/bssn_rhs.f90 | 77 +++- AMSS_NCKU_source/diff_new.f90 | 146 +++---- 3 files changed, 827 insertions(+), 106 deletions(-) create mode 100644 AMSS_NCKU_source/ABE.optrpt diff --git a/AMSS_NCKU_source/ABE.optrpt b/AMSS_NCKU_source/ABE.optrpt new file mode 100644 index 0000000..22b2db4 --- /dev/null +++ b/AMSS_NCKU_source/ABE.optrpt @@ -0,0 +1,710 @@ +Begin optimization report for: __cxx_global_var_init +================================================================= + +Begin optimization report for: std::map, std::allocator>, int, std::less, std::allocator>>, std::allocator, std::allocator> const, int>>>::map() +================================================================= + +Begin optimization report for: std::map, std::allocator>, int, std::less, std::allocator>>, std::allocator, std::allocator> const, int>>>::~map() +================================================================= + +Begin optimization report for: __cxx_global_var_init.1 +================================================================= + +Begin optimization report for: std::map, std::allocator>, double, std::less, std::allocator>>, std::allocator, std::allocator> const, double>>>::map() +================================================================= + +Begin optimization report for: std::map, std::allocator>, double, std::less, std::allocator>>, std::allocator, std::allocator> const, double>>>::~map() +================================================================= + +Begin optimization report for: __cxx_global_var_init.2 +================================================================= + +Begin optimization report for: std::map, std::allocator>, std::__cxx11::basic_string, std::allocator>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::map() +================================================================= + +Begin optimization report for: std::map, std::allocator>, std::__cxx11::basic_string, std::allocator>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::~map() +================================================================= + +Begin optimization report for: main + +LOOP BEGIN at /usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/stl_tree.h (1947, 7) +LOOP END + +LOOP BEGIN at ABE.C (125, 13) +LOOP END +================================================================= + +Begin optimization report for: std::allocator::allocator() +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::basic_string>(char const*, std::allocator const&) +================================================================= + +Begin optimization report for: std::__new_allocator::~__new_allocator() +================================================================= + +Begin optimization report for: std::map, std::allocator>, std::__cxx11::basic_string, std::allocator>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::insert(std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>&&) +================================================================= + +Begin optimization report for: std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>::pair, std::allocator>&, true>(char const (&) [9], std::__cxx11::basic_string, std::allocator>&) +================================================================= + +Begin optimization report for: std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>::~pair() +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::~basic_string() +================================================================= + +Begin optimization report for: std::_Rb_tree_iterator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>::_Rb_tree_iterator() +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::basic_string() +================================================================= + +Begin optimization report for: std::map, std::allocator>, std::__cxx11::basic_string, std::allocator>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::find(std::__cxx11::basic_string, std::allocator> const&) + +LOOP BEGIN at /usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/stl_tree.h (1947, 7) +LOOP END +================================================================= + +Begin optimization report for: std::operator!=(std::_Rb_tree_iterator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>> const&, std::_Rb_tree_iterator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>> const&) +================================================================= + +Begin optimization report for: std::map, std::allocator>, std::__cxx11::basic_string, std::allocator>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::end() +================================================================= + +Begin optimization report for: std::_Rb_tree_iterator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>::operator->() const +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::operator=(std::__cxx11::basic_string, std::allocator> const&) +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::c_str() const +================================================================= + +Begin optimization report for: std::basic_ostream>& std::operator<<>(std::basic_ostream>&, char const*) +================================================================= + +Begin optimization report for: std::ostream::operator<<(std::ostream& (*)(std::ostream&)) +================================================================= + +Begin optimization report for: std::basic_ostream>& std::endl>(std::basic_ostream>&) +================================================================= + +Begin optimization report for: std::basic_ifstream>::basic_ifstream(char const*, std::_Ios_Openmode) +================================================================= + +Begin optimization report for: std::basic_ios>::good() const +================================================================= + +Begin optimization report for: std::istream::getline(char*, long) +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::operator=(char const*) +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::basic_string(std::__cxx11::basic_string, std::allocator> const&) +================================================================= + +Begin optimization report for: bool std::operator==, std::allocator>(std::__cxx11::basic_string, std::allocator> const&, char const*) +================================================================= + +Begin optimization report for: atoi +================================================================= + +Begin optimization report for: atof +================================================================= + +Begin optimization report for: std::basic_ifstream>::close() +================================================================= + +Begin optimization report for: std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>::pair, std::allocator>&, true>(char const (&) [11], std::__cxx11::basic_string, std::allocator>&) +================================================================= + +Begin optimization report for: std::basic_ifstream>::~basic_ifstream() +================================================================= + +Begin optimization report for: std::basic_ofstream>::basic_ofstream() +================================================================= + +Begin optimization report for: std::basic_ofstream>::open(char const*, std::_Ios_Openmode) +================================================================= + +Begin optimization report for: std::basic_ofstream>::close() +================================================================= + +Begin optimization report for: std::basic_ofstream>::~basic_ofstream() +================================================================= + +Begin optimization report for: std::ostream::operator<<(double) +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, int>, std::_Select1st, std::allocator> const, int>>, std::less, std::allocator>>, std::allocator, std::allocator> const, int>>>::_Rb_tree() +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, int>, std::_Select1st, std::allocator> const, int>>, std::less, std::allocator>>, std::allocator, std::allocator> const, int>>>::_Rb_tree_impl, std::allocator>>, true>::_Rb_tree_impl() +================================================================= + +Begin optimization report for: std::allocator, std::allocator> const, int>>>::allocator() +================================================================= + +Begin optimization report for: std::_Rb_tree_key_compare, std::allocator>>>::_Rb_tree_key_compare() +================================================================= + +Begin optimization report for: std::_Rb_tree_header::_Rb_tree_header() +================================================================= + +Begin optimization report for: std::__new_allocator, std::allocator> const, int>>>::__new_allocator() +================================================================= + +Begin optimization report for: std::_Rb_tree_header::_M_reset() +================================================================= + +Begin optimization report for: __clang_call_terminate +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, int>, std::_Select1st, std::allocator> const, int>>, std::less, std::allocator>>, std::allocator, std::allocator> const, int>>>::~_Rb_tree() +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, int>, std::_Select1st, std::allocator> const, int>>, std::less, std::allocator>>, std::allocator, std::allocator> const, int>>>::_M_erase(std::_Rb_tree_node, std::allocator> const, int>>*) + +LOOP BEGIN at /usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/stl_tree.h (1930, 7) + remark #15521: Loop was not vectorized: loop control variable was not identified. Explicitly compute the iteration count before executing the loop or try using canonical loop form from OpenMP specification + remark #25478: While loop unrolled by 2 +LOOP END +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, int>, std::_Select1st, std::allocator> const, int>>, std::less, std::allocator>>, std::allocator, std::allocator> const, int>>>::_M_begin() +================================================================= + +Begin optimization report for: std::__new_allocator, std::allocator> const, int>>>::~__new_allocator() +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, int>, std::_Select1st, std::allocator> const, int>>, std::less, std::allocator>>, std::allocator, std::allocator> const, int>>>::_S_right(std::_Rb_tree_node_base*) +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, int>, std::_Select1st, std::allocator> const, int>>, std::less, std::allocator>>, std::allocator, std::allocator> const, int>>>::_S_left(std::_Rb_tree_node_base*) +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, int>, std::_Select1st, std::allocator> const, int>>, std::less, std::allocator>>, std::allocator, std::allocator> const, int>>>::_M_drop_node(std::_Rb_tree_node, std::allocator> const, int>>*) +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, int>, std::_Select1st, std::allocator> const, int>>, std::less, std::allocator>>, std::allocator, std::allocator> const, int>>>::_M_destroy_node(std::_Rb_tree_node, std::allocator> const, int>>*) +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, int>, std::_Select1st, std::allocator> const, int>>, std::less, std::allocator>>, std::allocator, std::allocator> const, int>>>::_M_put_node(std::_Rb_tree_node, std::allocator> const, int>>*) +================================================================= + +Begin optimization report for: void std::allocator_traits, std::allocator> const, int>>>>::destroy, std::allocator> const, int>>(std::allocator, std::allocator> const, int>>>&, std::pair, std::allocator> const, int>*) +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, int>, std::_Select1st, std::allocator> const, int>>, std::less, std::allocator>>, std::allocator, std::allocator> const, int>>>::_M_get_Node_allocator() +================================================================= + +Begin optimization report for: std::_Rb_tree_node, std::allocator> const, int>>::_M_valptr() +================================================================= + +Begin optimization report for: void std::__new_allocator, std::allocator> const, int>>>::destroy, std::allocator> const, int>>(std::pair, std::allocator> const, int>*) +================================================================= + +Begin optimization report for: std::pair, std::allocator> const, int>::~pair() +================================================================= + +Begin optimization report for: __gnu_cxx::__aligned_membuf, std::allocator> const, int>>::_M_ptr() +================================================================= + +Begin optimization report for: __gnu_cxx::__aligned_membuf, std::allocator> const, int>>::_M_addr() +================================================================= + +Begin optimization report for: std::allocator_traits, std::allocator> const, int>>>>::deallocate(std::allocator, std::allocator> const, int>>>&, std::_Rb_tree_node, std::allocator> const, int>>*, unsigned long) +================================================================= + +Begin optimization report for: std::__new_allocator, std::allocator> const, int>>>::deallocate(std::_Rb_tree_node, std::allocator> const, int>>*, unsigned long) +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, int>, std::_Select1st, std::allocator> const, int>>, std::less, std::allocator>>, std::allocator, std::allocator> const, int>>>::_M_mbegin() const +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, double>, std::_Select1st, std::allocator> const, double>>, std::less, std::allocator>>, std::allocator, std::allocator> const, double>>>::_Rb_tree() +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, double>, std::_Select1st, std::allocator> const, double>>, std::less, std::allocator>>, std::allocator, std::allocator> const, double>>>::_Rb_tree_impl, std::allocator>>, true>::_Rb_tree_impl() +================================================================= + +Begin optimization report for: std::allocator, std::allocator> const, double>>>::allocator() +================================================================= + +Begin optimization report for: std::__new_allocator, std::allocator> const, double>>>::__new_allocator() +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, double>, std::_Select1st, std::allocator> const, double>>, std::less, std::allocator>>, std::allocator, std::allocator> const, double>>>::~_Rb_tree() +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, double>, std::_Select1st, std::allocator> const, double>>, std::less, std::allocator>>, std::allocator, std::allocator> const, double>>>::_M_erase(std::_Rb_tree_node, std::allocator> const, double>>*) + +LOOP BEGIN at /usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/stl_tree.h (1930, 7) + remark #15521: Loop was not vectorized: loop control variable was not identified. Explicitly compute the iteration count before executing the loop or try using canonical loop form from OpenMP specification + remark #25478: While loop unrolled by 2 +LOOP END +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, double>, std::_Select1st, std::allocator> const, double>>, std::less, std::allocator>>, std::allocator, std::allocator> const, double>>>::_M_begin() +================================================================= + +Begin optimization report for: std::__new_allocator, std::allocator> const, double>>>::~__new_allocator() +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, double>, std::_Select1st, std::allocator> const, double>>, std::less, std::allocator>>, std::allocator, std::allocator> const, double>>>::_S_right(std::_Rb_tree_node_base*) +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, double>, std::_Select1st, std::allocator> const, double>>, std::less, std::allocator>>, std::allocator, std::allocator> const, double>>>::_S_left(std::_Rb_tree_node_base*) +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, double>, std::_Select1st, std::allocator> const, double>>, std::less, std::allocator>>, std::allocator, std::allocator> const, double>>>::_M_drop_node(std::_Rb_tree_node, std::allocator> const, double>>*) +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, double>, std::_Select1st, std::allocator> const, double>>, std::less, std::allocator>>, std::allocator, std::allocator> const, double>>>::_M_destroy_node(std::_Rb_tree_node, std::allocator> const, double>>*) +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, double>, std::_Select1st, std::allocator> const, double>>, std::less, std::allocator>>, std::allocator, std::allocator> const, double>>>::_M_put_node(std::_Rb_tree_node, std::allocator> const, double>>*) +================================================================= + +Begin optimization report for: void std::allocator_traits, std::allocator> const, double>>>>::destroy, std::allocator> const, double>>(std::allocator, std::allocator> const, double>>>&, std::pair, std::allocator> const, double>*) +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, double>, std::_Select1st, std::allocator> const, double>>, std::less, std::allocator>>, std::allocator, std::allocator> const, double>>>::_M_get_Node_allocator() +================================================================= + +Begin optimization report for: std::_Rb_tree_node, std::allocator> const, double>>::_M_valptr() +================================================================= + +Begin optimization report for: void std::__new_allocator, std::allocator> const, double>>>::destroy, std::allocator> const, double>>(std::pair, std::allocator> const, double>*) +================================================================= + +Begin optimization report for: std::pair, std::allocator> const, double>::~pair() +================================================================= + +Begin optimization report for: __gnu_cxx::__aligned_membuf, std::allocator> const, double>>::_M_ptr() +================================================================= + +Begin optimization report for: __gnu_cxx::__aligned_membuf, std::allocator> const, double>>::_M_addr() +================================================================= + +Begin optimization report for: std::allocator_traits, std::allocator> const, double>>>>::deallocate(std::allocator, std::allocator> const, double>>>&, std::_Rb_tree_node, std::allocator> const, double>>*, unsigned long) +================================================================= + +Begin optimization report for: std::__new_allocator, std::allocator> const, double>>>::deallocate(std::_Rb_tree_node, std::allocator> const, double>>*, unsigned long) +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, double>, std::_Select1st, std::allocator> const, double>>, std::less, std::allocator>>, std::allocator, std::allocator> const, double>>>::_M_mbegin() const +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::_Rb_tree() +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::_Rb_tree_impl, std::allocator>>, true>::_Rb_tree_impl() +================================================================= + +Begin optimization report for: std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::allocator() +================================================================= + +Begin optimization report for: std::__new_allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::__new_allocator() +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::~_Rb_tree() +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::_M_erase(std::_Rb_tree_node, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>*) + +LOOP BEGIN at /usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/stl_tree.h (1930, 7) + remark #15521: Loop was not vectorized: loop control variable was not identified. Explicitly compute the iteration count before executing the loop or try using canonical loop form from OpenMP specification +LOOP END +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::_M_begin() +================================================================= + +Begin optimization report for: std::__new_allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::~__new_allocator() +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::_S_right(std::_Rb_tree_node_base*) +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::_S_left(std::_Rb_tree_node_base*) +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::_M_drop_node(std::_Rb_tree_node, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>*) +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::_M_destroy_node(std::_Rb_tree_node, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>*) +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::_M_put_node(std::_Rb_tree_node, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>*) +================================================================= + +Begin optimization report for: void std::allocator_traits, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>>::destroy, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>(std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>&, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>*) +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::_M_get_Node_allocator() +================================================================= + +Begin optimization report for: std::_Rb_tree_node, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>::_M_valptr() +================================================================= + +Begin optimization report for: void std::__new_allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::destroy, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>(std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>*) +================================================================= + +Begin optimization report for: __gnu_cxx::__aligned_membuf, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>::_M_ptr() +================================================================= + +Begin optimization report for: __gnu_cxx::__aligned_membuf, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>::_M_addr() +================================================================= + +Begin optimization report for: std::allocator_traits, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>>::deallocate(std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>&, std::_Rb_tree_node, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>*, unsigned long) +================================================================= + +Begin optimization report for: std::__new_allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::deallocate(std::_Rb_tree_node, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>*, unsigned long) +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::_M_mbegin() const +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::_M_data() const +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::_M_local_data() +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::_Alloc_hider::_Alloc_hider(char*, std::allocator&&) +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::_M_init_local_buf() +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::_M_set_length(unsigned long) +================================================================= + +Begin optimization report for: std::__ptr_traits_ptr_to::pointer_to(char&) +================================================================= + +Begin optimization report for: std::__new_allocator::__new_allocator(std::__new_allocator const&) +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::_M_length(unsigned long) +================================================================= + +Begin optimization report for: std::char_traits::assign(char&, char const&) +================================================================= + +Begin optimization report for: std::__new_allocator::__new_allocator() +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::_M_dispose() +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::_Alloc_hider::~_Alloc_hider() +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::_M_is_local() const +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::_M_destroy(unsigned long) +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::_M_local_data() const +================================================================= + +Begin optimization report for: std::__ptr_traits_ptr_to::pointer_to(char const&) +================================================================= + +Begin optimization report for: std::allocator_traits>::deallocate(std::allocator&, char*, unsigned long) +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::_M_get_allocator() +================================================================= + +Begin optimization report for: std::__new_allocator::deallocate(char*, unsigned long) +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::_Alloc_hider::_Alloc_hider(char*, std::allocator const&) +================================================================= + +Begin optimization report for: std::char_traits::length(char const*) +================================================================= + +Begin optimization report for: void std::__cxx11::basic_string, std::allocator>::_M_construct(char const*, char const*, std::forward_iterator_tag) +================================================================= + +Begin optimization report for: std::iterator_traits::difference_type std::distance(char const*, char const*) +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::_M_data(char*) +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::_M_capacity(unsigned long) +================================================================= + +Begin optimization report for: void std::__cxx11::basic_string, std::allocator>::_M_construct(char const*, char const*, std::forward_iterator_tag)::_Guard::_Guard(std::__cxx11::basic_string, std::allocator>*) +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::_S_copy_chars(char*, char const*, char const*) +================================================================= + +Begin optimization report for: void std::__cxx11::basic_string, std::allocator>::_M_construct(char const*, char const*, std::forward_iterator_tag)::_Guard::~_Guard() +================================================================= + +Begin optimization report for: std::iterator_traits::difference_type std::__distance(char const*, char const*, std::random_access_iterator_tag) +================================================================= + +Begin optimization report for: std::iterator_traits::iterator_category std::__iterator_category(char const* const&) +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::_S_copy(char*, char const*, unsigned long) +================================================================= + +Begin optimization report for: std::char_traits::copy(char*, char const*, unsigned long) +================================================================= + +Begin optimization report for: __gnu_cxx::__alloc_traits, char>::_S_select_on_copy(std::allocator const&) +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::_M_get_allocator() const +================================================================= + +Begin optimization report for: void std::__cxx11::basic_string, std::allocator>::_M_construct(char*, char*, std::forward_iterator_tag) +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::length() const +================================================================= + +Begin optimization report for: std::allocator_traits>::select_on_container_copy_construction(std::allocator const&) +================================================================= + +Begin optimization report for: std::allocator::allocator(std::allocator const&) +================================================================= + +Begin optimization report for: std::iterator_traits::difference_type std::distance(char*, char*) +================================================================= + +Begin optimization report for: void std::__cxx11::basic_string, std::allocator>::_M_construct(char*, char*, std::forward_iterator_tag)::_Guard::_Guard(std::__cxx11::basic_string, std::allocator>*) +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::_S_copy_chars(char*, char*, char*) +================================================================= + +Begin optimization report for: void std::__cxx11::basic_string, std::allocator>::_M_construct(char*, char*, std::forward_iterator_tag)::_Guard::~_Guard() +================================================================= + +Begin optimization report for: std::iterator_traits::difference_type std::__distance(char*, char*, std::random_access_iterator_tag) +================================================================= + +Begin optimization report for: std::iterator_traits::iterator_category std::__iterator_category(char* const&) +================================================================= + +Begin optimization report for: std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, bool> std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::_M_insert_unique, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>(std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>&&) + +LOOP BEGIN at /usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/stl_tree.h (2113, 7) +LOOP END +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::_M_get_insert_unique_pos(std::__cxx11::basic_string, std::allocator> const&) + +LOOP BEGIN at /usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/stl_tree.h (2113, 7) +LOOP END +================================================================= + +Begin optimization report for: std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>::operator()(std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>&) const +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::_Alloc_node::_Alloc_node(std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>&) +================================================================= + +Begin optimization report for: std::_Rb_tree_iterator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>> std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::_M_insert_, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::_Alloc_node>(std::_Rb_tree_node_base*, std::_Rb_tree_node_base*, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>&&, std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::_Alloc_node&) +================================================================= + +Begin optimization report for: std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, bool>::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, bool, true>(std::_Rb_tree_iterator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>&&, bool&&) +================================================================= + +Begin optimization report for: std::_Rb_tree_iterator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>::_Rb_tree_iterator(std::_Rb_tree_node_base*) +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::_M_end() +================================================================= + +Begin optimization report for: std::less, std::allocator>>::operator()(std::__cxx11::basic_string, std::allocator> const&, std::__cxx11::basic_string, std::allocator> const&) const +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::_S_key(std::_Rb_tree_node, std::allocator> const, std::__cxx11::basic_string, std::allocator>>> const*) +================================================================= + +Begin optimization report for: std::operator==(std::_Rb_tree_iterator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>> const&, std::_Rb_tree_iterator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>> const&) +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::begin() +================================================================= + +Begin optimization report for: std::pair::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>*&, std::_Rb_tree_node_base*&, true>(std::_Rb_tree_node, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>*&, std::_Rb_tree_node_base*&) +================================================================= + +Begin optimization report for: std::_Rb_tree_iterator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>::operator--() +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::_S_key(std::_Rb_tree_node_base const*) +================================================================= + +Begin optimization report for: std::pair::pair(std::_Rb_tree_node_base* const&, std::_Rb_tree_node_base* const&) +================================================================= + +Begin optimization report for: bool std::operator<, std::allocator>(std::__cxx11::basic_string, std::allocator> const&, std::__cxx11::basic_string, std::allocator> const&) +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::compare(std::__cxx11::basic_string, std::allocator> const&) const +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::size() const +================================================================= + +Begin optimization report for: unsigned long const& std::min(unsigned long const&, unsigned long const&) +================================================================= + +Begin optimization report for: std::char_traits::compare(char const*, char const*, unsigned long) +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::data() const +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::_S_compare(unsigned long, unsigned long) +================================================================= + +Begin optimization report for: std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>::operator()(std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>> const&) const +================================================================= + +Begin optimization report for: std::_Rb_tree_node, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>::_M_valptr() const +================================================================= + +Begin optimization report for: __gnu_cxx::__aligned_membuf, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>::_M_ptr() const +================================================================= + +Begin optimization report for: __gnu_cxx::__aligned_membuf, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>::_M_addr() const +================================================================= + +Begin optimization report for: std::_Rb_tree_node, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>* std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::_Alloc_node::operator(), std::allocator> const, std::__cxx11::basic_string, std::allocator>>>(std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>&&) const +================================================================= + +Begin optimization report for: std::_Rb_tree_node, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>* std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::_M_create_node, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>(std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>&&) +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::_M_get_node() +================================================================= + +Begin optimization report for: void std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::_M_construct_node, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>(std::_Rb_tree_node, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>*, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>&&) +================================================================= + +Begin optimization report for: std::allocator_traits, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>>::allocate(std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>&, unsigned long) +================================================================= + +Begin optimization report for: std::__new_allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::allocate(unsigned long, void const*) +================================================================= + +Begin optimization report for: std::__new_allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::_M_max_size() const +================================================================= + +Begin optimization report for: void std::allocator_traits, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>>::construct, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>(std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>&, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>*, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>&&) +================================================================= + +Begin optimization report for: void std::__new_allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::construct, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>(std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>*, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>&&) +================================================================= + +Begin optimization report for: std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>::pair(std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>&&) +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::basic_string(std::__cxx11::basic_string, std::allocator>&&) +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::_M_use_local_data() +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::find(std::__cxx11::basic_string, std::allocator> const&) + +LOOP BEGIN at /usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/stl_tree.h (1947, 7) +LOOP END +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::_M_lower_bound(std::_Rb_tree_node, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>*, std::_Rb_tree_node_base*, std::__cxx11::basic_string, std::allocator> const&) + +LOOP BEGIN at /usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/stl_tree.h (1947, 7) +LOOP END +================================================================= + +Begin optimization report for: std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, std::__cxx11::basic_string, std::allocator>>, std::_Select1st, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>, std::less, std::allocator>>, std::allocator, std::allocator> const, std::__cxx11::basic_string, std::allocator>>>>::end() +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::assign(std::__cxx11::basic_string, std::allocator> const&) +================================================================= + +Begin optimization report for: std::basic_ios>::setstate(std::_Ios_Iostate) +================================================================= + +Begin optimization report for: std::operator|(std::_Ios_Iostate, std::_Ios_Iostate) +================================================================= + +Begin optimization report for: std::basic_ios>::rdstate() const +================================================================= + +Begin optimization report for: std::basic_ostream>& std::flush>(std::basic_ostream>&) +================================================================= + +Begin optimization report for: std::basic_ios>::widen(char) const +================================================================= + +Begin optimization report for: std::ctype const& std::__check_facet>(std::ctype const*) +================================================================= + +Begin optimization report for: std::ctype::widen(char) const +================================================================= + +Begin optimization report for: std::basic_ios>::basic_ios() +================================================================= + +Begin optimization report for: std::basic_istream>::basic_istream() +================================================================= + +Begin optimization report for: std::basic_ifstream>::open(char const*, std::_Ios_Openmode) +================================================================= + +Begin optimization report for: std::basic_filebuf>::~basic_filebuf() +================================================================= + +Begin optimization report for: std::basic_istream>::~basic_istream() +================================================================= + +Begin optimization report for: std::operator|(std::_Ios_Openmode, std::_Ios_Openmode) +================================================================= + +Begin optimization report for: std::basic_streambuf>::~basic_streambuf() +================================================================= + +Begin optimization report for: std::basic_ifstream>::~basic_ifstream() +================================================================= + +Begin optimization report for: virtual thunk to std::basic_ifstream>::~basic_ifstream() +================================================================= + +Begin optimization report for: std::__cxx11::basic_string, std::allocator>::assign(char const*) +================================================================= + +Begin optimization report for: std::basic_ostream>::basic_ostream() +================================================================= + +Begin optimization report for: std::basic_ostream>::~basic_ostream() +================================================================= + +Begin optimization report for: std::basic_ofstream>::~basic_ofstream() +================================================================= + +Begin optimization report for: virtual thunk to std::basic_ofstream>::~basic_ofstream() +================================================================= + +Begin optimization report for: _GLOBAL__sub_I_ABE.C +================================================================= + diff --git a/AMSS_NCKU_source/bssn_rhs.f90 b/AMSS_NCKU_source/bssn_rhs.f90 index 80908cb..5169375 100644 --- a/AMSS_NCKU_source/bssn_rhs.f90 +++ b/AMSS_NCKU_source/bssn_rhs.f90 @@ -24,6 +24,9 @@ Gmx_Res, Gmy_Res, Gmz_Res, & Symmetry,Lev,eps,co) result(gont) ! calculate constraint violation when co=0 +#if (GAUGE == 6 || GAUGE == 7) + use mkl_vml +#endif implicit none !~~~~~~> Input parameters: @@ -97,11 +100,13 @@ #endif #if (GAUGE == 6 || GAUGE == 7) - integer :: BHN,i,j,k + integer :: BHN,i,j,k, idx, total_points real*8, dimension(9) :: Porg real*8, dimension(3) :: Mass - real*8 :: r1,r2,M,A,w1,w2,C1,C2 + real*8 :: r1,r2,M,A,w1,w2,C1,C2,denom_r real*8, dimension(ex(1),ex(2),ex(3)) :: reta + real*8, allocatable :: vml_r1(:), vml_r2(:) + real*8, allocatable :: vml_res1(:), vml_res2(:) call getpbh(BHN,Porg,Mass) #endif @@ -862,17 +867,41 @@ C1 = 1.d0/Mass(1) - A C2 = 1.d0/Mass(2) - A + denom_r = ((Porg(1)-Porg(4))**2+(Porg(2)-Porg(5))**2+(Porg(3)-Porg(6))**2) + total_points = ex(1)*ex(2)*ex(3) + allocate(vml_r1(total_points), vml_r2(total_points)) + allocate(vml_res1(total_points), vml_res2(total_points)) + + idx = 0 do k=1,ex(3) do j=1,ex(2) do i=1,ex(1) - r1 = ((Porg(1)-X(i))**2+(Porg(2)-Y(j))**2+(Porg(3)-Z(k))**2)/ & - ((Porg(1)-Porg(4))**2+(Porg(2)-Porg(5))**2+(Porg(3)-Porg(6))**2) - r2 = ((Porg(4)-X(i))**2+(Porg(5)-Y(j))**2+(Porg(6)-Z(k))**2)/ & - ((Porg(1)-Porg(4))**2+(Porg(2)-Porg(5))**2+(Porg(3)-Porg(6))**2) - reta(i,j,k) = A + C1/(ONE+w1*r1) + C2/(ONE+w2*r2) + idx = idx + 1 + r1 = ((Porg(1)-X(i))**2+(Porg(2)-Y(j))**2+(Porg(3)-Z(k))**2)/denom_r + r2 = ((Porg(4)-X(i))**2+(Porg(5)-Y(j))**2+(Porg(6)-Z(k))**2)/denom_r + + ! Prepare for 1/(1+w*r) -> vdInv + vml_r1(idx) = ONE + w1*r1 + vml_r2(idx) = ONE + w2*r2 enddo enddo enddo + + call vdInv(total_points, vml_r1, vml_res1) + call vdInv(total_points, vml_r2, vml_res2) + + idx = 0 + do k=1,ex(3) + do j=1,ex(2) + do i=1,ex(1) + idx = idx + 1 + reta(i,j,k) = A + C1*vml_res1(idx) + C2*vml_res2(idx) + enddo + enddo + enddo + + deallocate(vml_r1, vml_r2, vml_res1, vml_res2) + else write(*,*) "not support BH_num in Jason's form 1",BHN endif @@ -892,17 +921,41 @@ C1 = 1.d0/Mass(1) - A C2 = 1.d0/Mass(2) - A + denom_r = ((Porg(1)-Porg(4))**2+(Porg(2)-Porg(5))**2+(Porg(3)-Porg(6))**2) + total_points = ex(1)*ex(2)*ex(3) + allocate(vml_r1(total_points), vml_r2(total_points)) + allocate(vml_res1(total_points), vml_res2(total_points)) + + idx = 0 do k=1,ex(3) do j=1,ex(2) do i=1,ex(1) - r1 = ((Porg(1)-X(i))**2+(Porg(2)-Y(j))**2+(Porg(3)-Z(k))**2)/ & - ((Porg(1)-Porg(4))**2+(Porg(2)-Porg(5))**2+(Porg(3)-Porg(6))**2) - r2 = ((Porg(4)-X(i))**2+(Porg(5)-Y(j))**2+(Porg(6)-Z(k))**2)/ & - ((Porg(1)-Porg(4))**2+(Porg(2)-Porg(5))**2+(Porg(3)-Porg(6))**2) - reta(i,j,k) = A + C1*dexp(-w1*r1) + C2*dexp(-w2*r2) + idx = idx + 1 + r1 = ((Porg(1)-X(i))**2+(Porg(2)-Y(j))**2+(Porg(3)-Z(k))**2)/denom_r + r2 = ((Porg(4)-X(i))**2+(Porg(5)-Y(j))**2+(Porg(6)-Z(k))**2)/denom_r + + ! Prepare for dexp -> vdExp + vml_r1(idx) = -w1*r1 + vml_r2(idx) = -w2*r2 enddo enddo enddo + + call vdExp(total_points, vml_r1, vml_res1) + call vdExp(total_points, vml_r2, vml_res2) + + idx = 0 + do k=1,ex(3) + do j=1,ex(2) + do i=1,ex(1) + idx = idx + 1 + reta(i,j,k) = A + C1*vml_res1(idx) + C2*vml_res2(idx) + enddo + enddo + enddo + + deallocate(vml_r1, vml_r2, vml_res1, vml_res2) + else write(*,*) "not support BH_num in Jason's form 2",BHN endif diff --git a/AMSS_NCKU_source/diff_new.f90 b/AMSS_NCKU_source/diff_new.f90 index a5eb03c..f587711 100644 --- a/AMSS_NCKU_source/diff_new.f90 +++ b/AMSS_NCKU_source/diff_new.f90 @@ -962,6 +962,7 @@ real*8,dimension(-1:ex(1),-1:ex(2),-1:ex(3)) :: fh real*8, dimension(3) :: SoA integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k + integer :: ib_s, ib_e, jb_s, jb_e, kb_s, kb_e real*8 :: d12dx,d12dy,d12dz,d2dx,d2dy,d2dz integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2 real*8, parameter :: ZEO=0.d0,ONE=1.d0, F60=6.d1 @@ -1001,109 +1002,66 @@ fy = ZEO fz = ZEO + ib_s = max(1, imin + 2) + ib_e = min(ex(1)-1, imax - 2) + jb_s = max(1, jmin + 2) + jb_e = min(ex(2)-1, jmax - 2) + kb_s = max(1, kmin + 2) + kb_e = min(ex(3)-1, kmax - 2) + ! Intel compiler directives for aggressive vectorization !DIR$ SIMD !DIR$ IVDEP do k=1,ex(3)-1 do j=1,ex(2)-1 -!DIR$ SIMD - do i=1,ex(1)-1 -#if 0 -! x direction - if(i+2 <= imax .and. i-2 >= imin)then -! -! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2) -! fx(i) = --------------------------------------------- -! 12 dx - fx(i,j,k)=d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k)) + + ! Check if (j, k) are within the safe 4th-order range + if (k >= kb_s .and. k <= kb_e .and. j >= jb_s .and. j <= jb_e) then + + ! 1. Left Boundary Peel + do i=1, min(ex(1)-1, ib_s-1) + if(i+1 <= imax .and. i-1 >= imin .and. & + j+1 <= jmax .and. j-1 >= jmin .and. & + k+1 <= kmax .and. k-1 >= kmin) then + fx(i,j,k)=d2dx*(-fh(i-1,j,k)+fh(i+1,j,k)) + fy(i,j,k)=d2dy*(-fh(i,j-1,k)+fh(i,j+1,k)) + fz(i,j,k)=d2dz*(-fh(i,j,k-1)+fh(i,j,k+1)) + endif + enddo - elseif(i+1 <= imax .and. i-1 >= imin)then -! -! - f(i-1) + f(i+1) -! fx(i) = -------------------------------- -! 2 dx - fx(i,j,k)=d2dx*(-fh(i-1,j,k)+fh(i+1,j,k)) + ! 2. BULK CORE (No branches, full SIMD) + !DIR$ SIMD + do i=ib_s, ib_e + fx(i,j,k)=d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k)) + fy(i,j,k)=d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k)) + fz(i,j,k)=d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2)) + enddo -! set imax and imin 0 + ! 3. Right Boundary Peel + do i=max(1, ib_e+1), ex(1)-1 + if(i+1 <= imax .and. i-1 >= imin .and. & + j+1 <= jmax .and. j-1 >= jmin .and. & + k+1 <= kmax .and. k-1 >= kmin) then + fx(i,j,k)=d2dx*(-fh(i-1,j,k)+fh(i+1,j,k)) + fy(i,j,k)=d2dy*(-fh(i,j-1,k)+fh(i,j+1,k)) + fz(i,j,k)=d2dz*(-fh(i,j,k-1)+fh(i,j,k+1)) + endif + enddo + + else + ! We are in a boundary slab (j or k is near edge) + ! Fallback to standard loop with checks (max 2nd order possible here) + do i=1,ex(1)-1 + if(i+1 <= imax .and. i-1 >= imin .and. & + j+1 <= jmax .and. j-1 >= jmin .and. & + k+1 <= kmax .and. k-1 >= kmin) then + fx(i,j,k)=d2dx*(-fh(i-1,j,k)+fh(i+1,j,k)) + fy(i,j,k)=d2dy*(-fh(i,j-1,k)+fh(i,j+1,k)) + fz(i,j,k)=d2dz*(-fh(i,j,k-1)+fh(i,j,k+1)) + endif + enddo endif -! y direction - if(j+2 <= jmax .and. j-2 >= jmin)then - fy(i,j,k)=d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k)) - - elseif(j+1 <= jmax .and. j-1 >= jmin)then - - fy(i,j,k)=d2dy*(-fh(i,j-1,k)+fh(i,j+1,k)) - -! set jmax and jmin 0 - endif -! z direction - if(k+2 <= kmax .and. k-2 >= kmin)then - - fz(i,j,k)=d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2)) - - elseif(k+1 <= kmax .and. k-1 >= kmin)then - - fz(i,j,k)=d2dz*(-fh(i,j,k-1)+fh(i,j,k+1)) - -! set kmax and kmin 0 - endif -#elif 0 -! x direction - if(i+2 <= imax .and. i-2 >= imin)then -! -! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2) -! fx(i) = --------------------------------------------- -! 12 dx - fx(i,j,k)=d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k)) - - elseif(i+3 <= imax .and. i-1 >= imin)then - fx(i,j,k)=d12dx*(-3.d0*fh(i-1,j,k)-1.d1*fh(i,j,k)+1.8d1*fh(i+1,j,k)-6.d0*fh(i+2,j,k)+fh(i+3,j,k)) - elseif(i+1 <= imax .and. i-3 >= imin)then - fx(i,j,k)=d12dx*( 3.d0*fh(i+1,j,k)+1.d1*fh(i,j,k)-1.8d1*fh(i-1,j,k)+6.d0*fh(i-2,j,k)-fh(i-3,j,k)) -! set imax and imin 0 - endif -! y direction - if(j+2 <= jmax .and. j-2 >= jmin)then - - fy(i,j,k)=d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k)) - - elseif(j+3 <= jmax .and. j-1 >= jmin)then - fy(i,j,k)=d12dy*(-3.d0*fh(i,j-1,k)-1.d1*fh(i,j,k)+1.8d1*fh(i,j+1,k)-6.d0*fh(i,j+2,k)+fh(i,j+3,k)) - elseif(j+1 <= jmax .and. j-3 >= jmin)then - fy(i,j,k)=d12dy*( 3.d0*fh(i,j+1,k)+1.d1*fh(i,j,k)-1.8d1*fh(i,j-1,k)+6.d0*fh(i,j-2,k)-fh(i,j-3,k)) - -! set jmax and jmin 0 - endif -! z direction - if(k+2 <= kmax .and. k-2 >= kmin)then - - fz(i,j,k)=d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2)) - - elseif(k+3 <= kmax .and. k-1 >= kmin)then - fz(i,j,k)=d12dz*(-3.d0*fh(i,j,k-1)-1.d1*fh(i,j,k)+1.8d1*fh(i,j,k+1)-6.d0*fh(i,j,k+2)+fh(i,j,k+3)) - elseif(k+1 <= kmax .and. k-3 >= kmin)then - fz(i,j,k)=d12dz*( 3.d0*fh(i,j,k+1)+1.d1*fh(i,j,k)-1.8d1*fh(i,j,k-1)+6.d0*fh(i,j,k-2)-fh(i,j,k-3)) - -! set kmax and kmin 0 - endif -#else -! for bam comparison - if(i+2 <= imax .and. i-2 >= imin .and. & - j+2 <= jmax .and. j-2 >= jmin .and. & - k+2 <= kmax .and. k-2 >= kmin) then - fx(i,j,k)=d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k)) - fy(i,j,k)=d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k)) - fz(i,j,k)=d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2)) - elseif(i+1 <= imax .and. i-1 >= imin .and. & - j+1 <= jmax .and. j-1 >= jmin .and. & - k+1 <= kmax .and. k-1 >= kmin) then - fx(i,j,k)=d2dx*(-fh(i-1,j,k)+fh(i+1,j,k)) - fy(i,j,k)=d2dy*(-fh(i,j-1,k)+fh(i,j+1,k)) - fz(i,j,k)=d2dz*(-fh(i,j,k-1)+fh(i,j,k+1)) - endif -#endif - enddo enddo enddo