mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-05 00:31:30 +08:00
backport: Makefile.am (liboffloadmic_host_la_DEPENDENCIES): Remove libcoi_host and libmyo-client.
Merge liboffloadmic from upstream, version 20150803. liboffloadmic/ * Makefile.am (liboffloadmic_host_la_DEPENDENCIES): Remove libcoi_host and libmyo-client. liboffloadmic_host loads them dynamically. * Makefile.in: Regenerate. * doc/doxygen/header.tex: Merge from upstream, version 20150803 <https://openmprtl.org/sites/default/files/liboffload_oss_20150803.tgz>. * runtime/cean_util.cpp: Likewise. * runtime/cean_util.h: Likewise. * runtime/coi/coi_client.cpp: Likewise. * runtime/coi/coi_client.h: Likewise. * runtime/coi/coi_server.cpp: Likewise. * runtime/coi/coi_server.h: Likewise. * runtime/compiler_if_host.cpp: Likewise. * runtime/compiler_if_host.h: Likewise. * runtime/compiler_if_target.cpp: Likewise. * runtime/compiler_if_target.h: Likewise. * runtime/dv_util.cpp: Likewise. * runtime/dv_util.h: Likewise. * runtime/liboffload_error.c: Likewise. * runtime/liboffload_error_codes.h: Likewise. * runtime/liboffload_msg.c: Likewise. * runtime/liboffload_msg.h: Likewise. * runtime/mic_lib.f90: Likewise. * runtime/offload.h: Likewise. * runtime/offload_common.cpp: Likewise. * runtime/offload_common.h: Likewise. * runtime/offload_engine.cpp: Likewise. * runtime/offload_engine.h: Likewise. * runtime/offload_env.cpp: Likewise. * runtime/offload_env.h: Likewise. * runtime/offload_host.cpp: Likewise. * runtime/offload_host.h: Likewise. * runtime/offload_iterator.h: Likewise. * runtime/offload_myo_host.cpp: Likewise. * runtime/offload_myo_host.h: Likewise. * runtime/offload_myo_target.cpp: Likewise. * runtime/offload_myo_target.h: Likewise. * runtime/offload_omp_host.cpp: Likewise. * runtime/offload_omp_target.cpp: Likewise. * runtime/offload_orsl.cpp: Likewise. * runtime/offload_orsl.h: Likewise. * runtime/offload_table.cpp: Likewise. * runtime/offload_table.h: Likewise. * runtime/offload_target.cpp: Likewise. * runtime/offload_target.h: Likewise. * runtime/offload_target_main.cpp: Likewise. * runtime/offload_timer.h: Likewise. * runtime/offload_timer_host.cpp: Likewise. * runtime/offload_timer_target.cpp: Likewise. * runtime/offload_trace.cpp: Likewise. * runtime/offload_trace.h: Likewise. * runtime/offload_util.cpp: Likewise. * runtime/offload_util.h: Likewise. * runtime/ofldbegin.cpp: Likewise. * runtime/ofldend.cpp: Likewise. * runtime/orsl-lite/include/orsl-lite.h: Likewise. * runtime/orsl-lite/lib/orsl-lite.c: Likewise. * runtime/use_mpss2.txt: Likewise. * include/coi/common/COIEngine_common.h: Merge from upstream, MPSS version 3.5 <http://registrationcenter.intel.com/irc_nas/7445/mpss-src-3.5.tar>. * include/coi/common/COIEvent_common.h: Likewise. * include/coi/common/COIMacros_common.h: Likewise. * include/coi/common/COIPerf_common.h: Likewise. * include/coi/common/COIResult_common.h: Likewise. * include/coi/common/COISysInfo_common.h: Likewise. * include/coi/common/COITypes_common.h: Likewise. * include/coi/sink/COIBuffer_sink.h: Likewise. * include/coi/sink/COIPipeline_sink.h: Likewise. * include/coi/sink/COIProcess_sink.h: Likewise. * include/coi/source/COIBuffer_source.h: Likewise. * include/coi/source/COIEngine_source.h: Likewise. * include/coi/source/COIEvent_source.h: Likewise. * include/coi/source/COIPipeline_source.h: Likewise. * include/coi/source/COIProcess_source.h: Likewise. * include/myo/myo.h: Likewise. * include/myo/myoimpl.h: Likewise. * include/myo/myotypes.h: Likewise. * plugin/Makefile.am (myo_inc_dir): Remove. (libgomp_plugin_intelmic_la_CPPFLAGS): Do not define MYO_SUPPORT. (AM_CPPFLAGS): Likewise for offload_target_main. * plugin/Makefile.in: Regenerate. * runtime/emulator/coi_common.h: Update copyright years. (OFFLOAD_EMUL_KNC_NUM_ENV): Replace with ... (OFFLOAD_EMUL_NUM_ENV): ... this. (enum cmd_t): Add CMD_CLOSE_LIBRARY. * runtime/emulator/coi_device.cpp: Update copyright years. (COIProcessWaitForShutdown): Add space between string constants. Return handle to host in CMD_OPEN_LIBRARY. Support CMD_CLOSE_LIBRARY. * runtime/emulator/coi_device.h: Update copyright years. * runtime/emulator/coi_host.cpp: Update copyright years. (knc_engines_num): Replace with ... (num_engines): ... this. (init): Replace OFFLOAD_EMUL_KNC_NUM_ENV with OFFLOAD_EMUL_NUM_ENV. (COIEngineGetCount): Replace COI_ISA_KNC with COI_ISA_MIC, and knc_engines_num with num_engines. (COIEngineGetHandle): Likewise. (COIProcessCreateFromMemory): Add space between string constants. (COIProcessCreateFromFile): New function. (COIProcessLoadLibraryFromMemory): Rename arguments according to COIProcess_source.h. Return handle, received from target. (COIProcessUnloadLibrary): New function. (COIPipelineClearCPUMask): New function. (COIPipelineSetCPUMask): New function. (COIEngineGetInfo): New function. * runtime/emulator/coi_host.h: Update copyright years. * runtime/emulator/coi_version_asm.h: Regenerate. * runtime/emulator/coi_version_linker_script.map: Regenerate. * runtime/emulator/myo_client.cpp: Update copyright years. * runtime/emulator/myo_service.cpp: Update copyright years. (myoArenaRelease): New function. (myoArenaAcquire): New function. (myoArenaAlignedFree): New function. (myoArenaAlignedMalloc): New function. * runtime/emulator/myo_service.h: Update copyright years. * runtime/emulator/myo_version_asm.h: Regenerate. * runtime/emulator/myo_version_linker_script.map: Regenerate. From-SVN: r227532
This commit is contained in:
parent
761f8e2f8a
commit
2eab96661b
@ -1,3 +1,123 @@
|
||||
2015-09-08 Ilya Verbin <ilya.verbin@intel.com>
|
||||
|
||||
* Makefile.am (liboffloadmic_host_la_DEPENDENCIES): Remove libcoi_host
|
||||
and libmyo-client. liboffloadmic_host loads them dynamically.
|
||||
* Makefile.in: Regenerate.
|
||||
* doc/doxygen/header.tex: Merge from upstream, version 20150803
|
||||
<https://openmprtl.org/sites/default/files/liboffload_oss_20150803.tgz>.
|
||||
* runtime/cean_util.cpp: Likewise.
|
||||
* runtime/cean_util.h: Likewise.
|
||||
* runtime/coi/coi_client.cpp: Likewise.
|
||||
* runtime/coi/coi_client.h: Likewise.
|
||||
* runtime/coi/coi_server.cpp: Likewise.
|
||||
* runtime/coi/coi_server.h: Likewise.
|
||||
* runtime/compiler_if_host.cpp: Likewise.
|
||||
* runtime/compiler_if_host.h: Likewise.
|
||||
* runtime/compiler_if_target.cpp: Likewise.
|
||||
* runtime/compiler_if_target.h: Likewise.
|
||||
* runtime/dv_util.cpp: Likewise.
|
||||
* runtime/dv_util.h: Likewise.
|
||||
* runtime/liboffload_error.c: Likewise.
|
||||
* runtime/liboffload_error_codes.h: Likewise.
|
||||
* runtime/liboffload_msg.c: Likewise.
|
||||
* runtime/liboffload_msg.h: Likewise.
|
||||
* runtime/mic_lib.f90: Likewise.
|
||||
* runtime/offload.h: Likewise.
|
||||
* runtime/offload_common.cpp: Likewise.
|
||||
* runtime/offload_common.h: Likewise.
|
||||
* runtime/offload_engine.cpp: Likewise.
|
||||
* runtime/offload_engine.h: Likewise.
|
||||
* runtime/offload_env.cpp: Likewise.
|
||||
* runtime/offload_env.h: Likewise.
|
||||
* runtime/offload_host.cpp: Likewise.
|
||||
* runtime/offload_host.h: Likewise.
|
||||
* runtime/offload_iterator.h: Likewise.
|
||||
* runtime/offload_myo_host.cpp: Likewise.
|
||||
* runtime/offload_myo_host.h: Likewise.
|
||||
* runtime/offload_myo_target.cpp: Likewise.
|
||||
* runtime/offload_myo_target.h: Likewise.
|
||||
* runtime/offload_omp_host.cpp: Likewise.
|
||||
* runtime/offload_omp_target.cpp: Likewise.
|
||||
* runtime/offload_orsl.cpp: Likewise.
|
||||
* runtime/offload_orsl.h: Likewise.
|
||||
* runtime/offload_table.cpp: Likewise.
|
||||
* runtime/offload_table.h: Likewise.
|
||||
* runtime/offload_target.cpp: Likewise.
|
||||
* runtime/offload_target.h: Likewise.
|
||||
* runtime/offload_target_main.cpp: Likewise.
|
||||
* runtime/offload_timer.h: Likewise.
|
||||
* runtime/offload_timer_host.cpp: Likewise.
|
||||
* runtime/offload_timer_target.cpp: Likewise.
|
||||
* runtime/offload_trace.cpp: Likewise.
|
||||
* runtime/offload_trace.h: Likewise.
|
||||
* runtime/offload_util.cpp: Likewise.
|
||||
* runtime/offload_util.h: Likewise.
|
||||
* runtime/ofldbegin.cpp: Likewise.
|
||||
* runtime/ofldend.cpp: Likewise.
|
||||
* runtime/orsl-lite/include/orsl-lite.h: Likewise.
|
||||
* runtime/orsl-lite/lib/orsl-lite.c: Likewise.
|
||||
* runtime/use_mpss2.txt: Likewise.
|
||||
* include/coi/common/COIEngine_common.h: Merge from upstream, MPSS
|
||||
version 3.5
|
||||
<http://registrationcenter.intel.com/irc_nas/7445/mpss-src-3.5.tar>.
|
||||
* include/coi/common/COIEvent_common.h: Likewise.
|
||||
* include/coi/common/COIMacros_common.h: Likewise.
|
||||
* include/coi/common/COIPerf_common.h: Likewise.
|
||||
* include/coi/common/COIResult_common.h: Likewise.
|
||||
* include/coi/common/COISysInfo_common.h: Likewise.
|
||||
* include/coi/common/COITypes_common.h: Likewise.
|
||||
* include/coi/sink/COIBuffer_sink.h: Likewise.
|
||||
* include/coi/sink/COIPipeline_sink.h: Likewise.
|
||||
* include/coi/sink/COIProcess_sink.h: Likewise.
|
||||
* include/coi/source/COIBuffer_source.h: Likewise.
|
||||
* include/coi/source/COIEngine_source.h: Likewise.
|
||||
* include/coi/source/COIEvent_source.h: Likewise.
|
||||
* include/coi/source/COIPipeline_source.h: Likewise.
|
||||
* include/coi/source/COIProcess_source.h: Likewise.
|
||||
* include/myo/myo.h: Likewise.
|
||||
* include/myo/myoimpl.h: Likewise.
|
||||
* include/myo/myotypes.h: Likewise.
|
||||
* plugin/Makefile.am (myo_inc_dir): Remove.
|
||||
(libgomp_plugin_intelmic_la_CPPFLAGS): Do not define MYO_SUPPORT.
|
||||
(AM_CPPFLAGS): Likewise for offload_target_main.
|
||||
* plugin/Makefile.in: Regenerate.
|
||||
* runtime/emulator/coi_common.h: Update copyright years.
|
||||
(OFFLOAD_EMUL_KNC_NUM_ENV): Replace with ...
|
||||
(OFFLOAD_EMUL_NUM_ENV): ... this.
|
||||
(enum cmd_t): Add CMD_CLOSE_LIBRARY.
|
||||
* runtime/emulator/coi_device.cpp: Update copyright years.
|
||||
(COIProcessWaitForShutdown): Add space between string constants.
|
||||
Return handle to host in CMD_OPEN_LIBRARY.
|
||||
Support CMD_CLOSE_LIBRARY.
|
||||
* runtime/emulator/coi_device.h: Update copyright years.
|
||||
* runtime/emulator/coi_host.cpp: Update copyright years.
|
||||
(knc_engines_num): Replace with ...
|
||||
(num_engines): ... this.
|
||||
(init): Replace OFFLOAD_EMUL_KNC_NUM_ENV with OFFLOAD_EMUL_NUM_ENV.
|
||||
(COIEngineGetCount): Replace COI_ISA_KNC with COI_ISA_MIC, and
|
||||
knc_engines_num with num_engines.
|
||||
(COIEngineGetHandle): Likewise.
|
||||
(COIProcessCreateFromMemory): Add space between string constants.
|
||||
(COIProcessCreateFromFile): New function.
|
||||
(COIProcessLoadLibraryFromMemory): Rename arguments according to
|
||||
COIProcess_source.h. Return handle, received from target.
|
||||
(COIProcessUnloadLibrary): New function.
|
||||
(COIPipelineClearCPUMask): New function.
|
||||
(COIPipelineSetCPUMask): New function.
|
||||
(COIEngineGetInfo): New function.
|
||||
* runtime/emulator/coi_host.h: Update copyright years.
|
||||
* runtime/emulator/coi_version_asm.h: Regenerate.
|
||||
* runtime/emulator/coi_version_linker_script.map: Regenerate.
|
||||
* runtime/emulator/myo_client.cpp: Update copyright years.
|
||||
* runtime/emulator/myo_service.cpp: Update copyright years.
|
||||
(myoArenaRelease): New function.
|
||||
(myoArenaAcquire): New function.
|
||||
(myoArenaAlignedFree): New function.
|
||||
(myoArenaAlignedMalloc): New function.
|
||||
* runtime/emulator/myo_service.h: Update copyright years.
|
||||
* runtime/emulator/myo_version_asm.h: Regenerate.
|
||||
* runtime/emulator/myo_version_linker_script.map: Regenerate.
|
||||
|
||||
2015-08-24 Nathan Sidwell <nathan@codesourcery.com>
|
||||
|
||||
* plugin/libgomp-plugin-intelmic.cpp (GOMP_OFFLOAD_version): New.
|
||||
@ -17,11 +137,11 @@
|
||||
* configure: Reflects renaming of configure.in to configure.ac
|
||||
|
||||
2015-07-17 Nathan Sidwell <nathan@acm.org>
|
||||
Ilya Verbin <iverbin@gmail.com>
|
||||
Ilya Verbin <ilya.verbin@intel.com>
|
||||
|
||||
* plugin/libgomp-plugin-intelmic.cpp (ImgDevAddrMap): Constify.
|
||||
(offload_image, GOMP_OFFLOAD_load_image,
|
||||
OMP_OFFLOAD_unload_image): Constify target data.
|
||||
GOMP_OFFLOAD_unload_image): Constify target data.
|
||||
|
||||
2015-07-08 Thomas Schwinge <thomas@codesourcery.com>
|
||||
|
||||
|
@ -84,8 +84,6 @@ liboffloadmic_host_la_SOURCES = $(liboffloadmic_sources) \
|
||||
|
||||
liboffloadmic_host_la_CPPFLAGS = $(liboffloadmic_cppflags) -DHOST_LIBRARY=1
|
||||
liboffloadmic_host_la_LDFLAGS = @lt_cv_dlopen_libs@ -version-info 5:0:0
|
||||
liboffloadmic_host_la_LIBADD = libcoi_host.la libmyo-client.la
|
||||
liboffloadmic_host_la_DEPENDENCIES = $(liboffloadmic_host_la_LIBADD)
|
||||
|
||||
liboffloadmic_target_la_SOURCES = $(liboffloadmic_sources) \
|
||||
runtime/coi/coi_server.cpp \
|
||||
|
@ -165,6 +165,7 @@ libmyo_service_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
|
||||
$(CXXFLAGS) $(libmyo_service_la_LDFLAGS) $(LDFLAGS) -o $@
|
||||
@LIBOFFLOADMIC_HOST_FALSE@am_libmyo_service_la_rpath = -rpath \
|
||||
@LIBOFFLOADMIC_HOST_FALSE@ $(toolexeclibdir)
|
||||
liboffloadmic_host_la_LIBADD =
|
||||
am__objects_1 = liboffloadmic_host_la-dv_util.lo \
|
||||
liboffloadmic_host_la-liboffload_error.lo \
|
||||
liboffloadmic_host_la-liboffload_msg.lo \
|
||||
@ -445,8 +446,6 @@ liboffloadmic_host_la_SOURCES = $(liboffloadmic_sources) \
|
||||
|
||||
liboffloadmic_host_la_CPPFLAGS = $(liboffloadmic_cppflags) -DHOST_LIBRARY=1
|
||||
liboffloadmic_host_la_LDFLAGS = @lt_cv_dlopen_libs@ -version-info 5:0:0
|
||||
liboffloadmic_host_la_LIBADD = libcoi_host.la libmyo-client.la
|
||||
liboffloadmic_host_la_DEPENDENCIES = $(liboffloadmic_host_la_LIBADD)
|
||||
liboffloadmic_target_la_SOURCES = $(liboffloadmic_sources) \
|
||||
runtime/coi/coi_server.cpp \
|
||||
runtime/compiler_if_target.cpp \
|
||||
|
@ -82,7 +82,7 @@ Notice revision \#20110804
|
||||
|
||||
Intel, Xeon, and Intel Xeon Phi are trademarks of Intel Corporation in the U.S. and/or other countries.
|
||||
|
||||
This document is Copyright \textcopyright 2014, Intel Corporation. All rights reserved.
|
||||
This document is Copyright \textcopyright 2014-2015, Intel Corporation. All rights reserved.
|
||||
|
||||
\pagenumbering{roman}
|
||||
\tableofcontents
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright 2010-2013 Intel Corporation.
|
||||
* Copyright 2010-2015 Intel Corporation.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
@ -64,7 +64,7 @@ extern "C" {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
///
|
||||
/// List of ISA types of supported engines.
|
||||
/// List of ISA types of supported engines.
|
||||
///
|
||||
typedef enum
|
||||
{
|
||||
@ -89,7 +89,7 @@ typedef enum
|
||||
/// [out] The zero-based index of this engine in the collection of
|
||||
/// engines of the ISA returned in out_pType.
|
||||
///
|
||||
/// @return COI_INVALID_POINTER if the any of the parameters are NULL.
|
||||
/// @return COI_INVALID_POINTER if any of the parameters are NULL.
|
||||
///
|
||||
/// @return COI_SUCCESS
|
||||
///
|
||||
|
84
liboffloadmic/include/coi/common/COIEvent_common.h
Normal file
84
liboffloadmic/include/coi/common/COIEvent_common.h
Normal file
@ -0,0 +1,84 @@
|
||||
/*
|
||||
* Copyright 2010-2015 Intel Corporation.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
* by the Free Software Foundation, version 2.1.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* Disclaimer: The codes contained in these modules may be specific
|
||||
* to the Intel Software Development Platform codenamed Knights Ferry,
|
||||
* and the Intel product codenamed Knights Corner, and are not backward
|
||||
* compatible with other Intel products. Additionally, Intel will NOT
|
||||
* support the codes or instruction set in future products.
|
||||
*
|
||||
* Intel offers no warranty of any kind regarding the code. This code is
|
||||
* licensed on an "AS IS" basis and Intel is not obligated to provide
|
||||
* any support, assistance, installation, training, or other services
|
||||
* of any kind. Intel is also not obligated to provide any updates,
|
||||
* enhancements or extensions. Intel specifically disclaims any warranty
|
||||
* of merchantability, non-infringement, fitness for any particular
|
||||
* purpose, and any other warranty.
|
||||
*
|
||||
* Further, Intel disclaims all liability of any kind, including but
|
||||
* not limited to liability for infringement of any proprietary rights,
|
||||
* relating to the use of the code, even if Intel is notified of the
|
||||
* possibility of such liability. Except as expressly stated in an Intel
|
||||
* license agreement provided with this code and agreed upon with Intel,
|
||||
* no license, express or implied, by estoppel or otherwise, to any
|
||||
* intellectual property rights is granted herein.
|
||||
*/
|
||||
|
||||
#ifndef _COIEVENT_COMMON_H
|
||||
#define _COIEVENT_COMMON_H
|
||||
|
||||
/** @ingroup COIEvent
|
||||
* @addtogroup COIEventcommon
|
||||
@{
|
||||
* @file common/COIEvent_common.h
|
||||
*/
|
||||
#ifndef DOXYGEN_SHOULD_SKIP_THIS
|
||||
|
||||
#include "../common/COITypes_common.h"
|
||||
#include "../common/COIResult_common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#endif // DOXYGEN_SHOULD_SKIP_THIS
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
///
|
||||
/// Signal one shot user event. User events created on source can be
|
||||
/// signaled from both sink and source. This fires the event and wakes up
|
||||
/// threads waiting on COIEventWait.
|
||||
///
|
||||
/// Note: For events that are not registered or already signaled this call
|
||||
/// will behave as a NOP. Users need to make sure that they pass valid
|
||||
/// events on the sink side.
|
||||
///
|
||||
/// @param in_Event
|
||||
/// Event Handle to be signaled.
|
||||
///
|
||||
/// @return COI_INVAILD_HANDLE if in_Event was not a User event.
|
||||
///
|
||||
/// @return COI_ERROR if the signal fails to be sent from the sink.
|
||||
///
|
||||
/// @return COI_SUCCESS if the event was successfully signaled or ignored.
|
||||
///
|
||||
COIACCESSAPI
|
||||
COIRESULT COIEventSignalUserEvent(COIEVENT in_Event);
|
||||
///
|
||||
///
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* _COIEVENT_COMMON_H */
|
||||
|
||||
/*! @} */
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright 2010-2013 Intel Corporation.
|
||||
* Copyright 2010-2015 Intel Corporation.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
@ -41,12 +41,17 @@
|
||||
#ifndef _COIMACROS_COMMON_H
|
||||
#define _COIMACROS_COMMON_H
|
||||
|
||||
#include <string.h>
|
||||
#include "../source/COIPipeline_source.h"
|
||||
#include "../common/COITypes_common.h"
|
||||
|
||||
/// @file common/COIMacros_common.h
|
||||
/// Commonly used macros
|
||||
|
||||
// Note that UNUSUED_ATTR means that it is "possibly" unused, not "definitely".
|
||||
// This should compile out in release mode if indeed it is unused.
|
||||
#define UNUSED_ATTR __attribute__((unused))
|
||||
#include <sched.h>
|
||||
#ifndef UNREFERENCED_CONST_PARAM
|
||||
#define UNREFERENCED_CONST_PARAM(P) { void* x UNUSED_ATTR = \
|
||||
(void*)(uint64_t)P; \
|
||||
@ -66,4 +71,150 @@
|
||||
|
||||
#endif
|
||||
|
||||
/* The following are static inline definitions of functions used for manipulating
|
||||
COI_CPU_MASK info (The COI_CPU_MASK type is declared as an array of 16 uint64_t's
|
||||
in COITypes_common.h "typedef uint64_t COI_CPU_MASK[16]").
|
||||
|
||||
These static inlined functions are intended on being roughly the same as the Linux
|
||||
CPU_* macros defined in sched.h - with the important difference being a different
|
||||
fundamental type difference: cpu_set_t versus COI_CPU_MASK.
|
||||
|
||||
The motivation for writing this code was to ease portability on the host side of COI
|
||||
applications to both Windows and Linux.
|
||||
*/
|
||||
|
||||
/* Roughly equivalent to CPU_ISSET(). */
|
||||
static inline uint64_t COI_CPU_MASK_ISSET(int bitNumber, const COI_CPU_MASK cpu_mask)
|
||||
{
|
||||
if ((size_t)bitNumber < sizeof(COI_CPU_MASK)*8)
|
||||
return ((cpu_mask)[bitNumber/64] & (((uint64_t)1) << (bitNumber%64)));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Roughly equivalent to CPU_SET(). */
|
||||
static inline void COI_CPU_MASK_SET(int bitNumber, COI_CPU_MASK cpu_mask)
|
||||
{
|
||||
if ((size_t)bitNumber < sizeof(COI_CPU_MASK)*8)
|
||||
((cpu_mask)[bitNumber/64] |= (((uint64_t)1) << (bitNumber%64)));
|
||||
}
|
||||
|
||||
/* Roughly equivalent to CPU_ZERO(). */
|
||||
static inline void COI_CPU_MASK_ZERO(COI_CPU_MASK cpu_mask)
|
||||
{
|
||||
memset(cpu_mask,0,sizeof(COI_CPU_MASK));
|
||||
}
|
||||
|
||||
/* Roughly equivalent to CPU_AND(). */
|
||||
static inline void COI_CPU_MASK_AND(COI_CPU_MASK dst, const COI_CPU_MASK src1, const COI_CPU_MASK src2)
|
||||
{
|
||||
const unsigned int loopIterations = sizeof(COI_CPU_MASK) / sizeof(dst[0]);
|
||||
|
||||
for(unsigned int i=0;i<loopIterations;++i)
|
||||
dst[i] = src1[i] & src2[i];
|
||||
}
|
||||
|
||||
/* Roughly equivalent to CPU_XOR(). */
|
||||
static inline void COI_CPU_MASK_XOR(COI_CPU_MASK dst, const COI_CPU_MASK src1, const COI_CPU_MASK src2)
|
||||
{
|
||||
const unsigned int loopIterations = sizeof(COI_CPU_MASK) / sizeof(dst[0]);
|
||||
|
||||
for(unsigned int i=0;i<loopIterations;++i)
|
||||
dst[i] = src1[i] ^ src2[i];
|
||||
}
|
||||
|
||||
/* Roughly equivalent to CPU_OR(). */
|
||||
static inline void COI_CPU_MASK_OR(COI_CPU_MASK dst, const COI_CPU_MASK src1, const COI_CPU_MASK src2)
|
||||
{
|
||||
const unsigned int loopIterations = sizeof(COI_CPU_MASK) / sizeof(dst[0]);
|
||||
|
||||
for(unsigned int i=0;i<loopIterations;++i)
|
||||
dst[i] = src1[i] | src2[i];
|
||||
}
|
||||
|
||||
/* Utility function for COI_CPU_MASK_COUNT() below. */
|
||||
static inline int __COI_CountBits(uint64_t n)
|
||||
{
|
||||
int cnt=0;
|
||||
|
||||
for (;n;cnt++)
|
||||
n &= (n-1);
|
||||
return cnt;
|
||||
}
|
||||
|
||||
/* Roughly equivalent to CPU_COUNT(). */
|
||||
static inline int COI_CPU_MASK_COUNT(const COI_CPU_MASK cpu_mask)
|
||||
{
|
||||
int cnt=0;
|
||||
const unsigned int loopIterations = sizeof(COI_CPU_MASK) / sizeof(cpu_mask[0]);
|
||||
|
||||
for(unsigned int i=0;i < loopIterations;++i)
|
||||
{
|
||||
cnt += __COI_CountBits(cpu_mask[i]);
|
||||
}
|
||||
return cnt;
|
||||
}
|
||||
|
||||
/* Roughly equivalent to CPU_EQUAL(). */
|
||||
static inline int COI_CPU_MASK_EQUAL(const COI_CPU_MASK cpu_mask1,const COI_CPU_MASK cpu_mask2)
|
||||
{
|
||||
const unsigned int loopIterations = sizeof(COI_CPU_MASK) / sizeof(cpu_mask1[0]);
|
||||
|
||||
for(unsigned int i=0;i < loopIterations;++i)
|
||||
{
|
||||
if (cpu_mask1[i] != cpu_mask2[i])
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/* Utility function to translate from cpu_set * to COI_CPU_MASK. */
|
||||
static inline void COI_CPU_MASK_XLATE(COI_CPU_MASK dest,const cpu_set_t *src)
|
||||
{
|
||||
COI_CPU_MASK_ZERO(dest);
|
||||
#if 0
|
||||
/* Slightly slower version than the following #else/#endif block. Left here only to
|
||||
document the intent of the code. */
|
||||
for(unsigned int i=0;i < sizeof(cpu_set_t)*8;++i)
|
||||
if (CPU_ISSET(i,src))
|
||||
COI_CPU_MASK_SET(i,dest);
|
||||
#else
|
||||
for(unsigned int i=0;i < sizeof(COI_CPU_MASK)/sizeof(dest[0]);++i)
|
||||
{
|
||||
for(unsigned int j=0;j < 64;++j)
|
||||
{
|
||||
if (CPU_ISSET(i*64+j,src))
|
||||
dest[i] |= ((uint64_t)1) << j;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Utility function to translate from COI_CPU_MASK to cpu_set *. */
|
||||
static inline void COI_CPU_MASK_XLATE_EX(cpu_set_t *dest,const COI_CPU_MASK src)
|
||||
{
|
||||
CPU_ZERO(dest);
|
||||
#if 0
|
||||
/* Slightly slower version than the following #else/#endif block. Left here only to
|
||||
document the intent of the code. */
|
||||
for(unsigned int i=0;i < sizeof(COI_CPU_MASK)*8;++i)
|
||||
if (COI_CPU_MASK_ISSET(i,src))
|
||||
CPU_SET(i,dest);
|
||||
#else
|
||||
for(unsigned int i=0;i < sizeof(COI_CPU_MASK)/sizeof(src[0]);++i)
|
||||
{
|
||||
const uint64_t cpu_mask = src[i];
|
||||
|
||||
for(unsigned int j=0;j < 64;++j)
|
||||
{
|
||||
const uint64_t bit = ((uint64_t)1) << j;
|
||||
|
||||
if (bit & cpu_mask)
|
||||
CPU_SET(i*64+j,dest);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
#endif /* _COIMACROS_COMMON_H */
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright 2010-2013 Intel Corporation.
|
||||
* Copyright 2010-2015 Intel Corporation.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright 2010-2013 Intel Corporation.
|
||||
* Copyright 2010-2015 Intel Corporation.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
@ -110,12 +110,13 @@ typedef enum COIRESULT
|
||||
///< Offload Infrastructure on the host
|
||||
///< is not compatible with the version
|
||||
///< on the device.
|
||||
COI_BAD_PORT, ///< The port that the host is set to
|
||||
COI_BAD_PORT, ///< The port that the host is set to
|
||||
///< connect to is invalid.
|
||||
COI_AUTHENTICATION_FAILURE, ///< The daemon was unable to authenticate
|
||||
///< the user that requested an engine.
|
||||
///< Only reported if daemon is set up for
|
||||
///< authorization.
|
||||
///< authorization. Is also reported in
|
||||
///< Windows if host can not find user.
|
||||
COI_NUM_RESULTS ///< Reserved, do not use.
|
||||
}
|
||||
COIRESULT;
|
||||
|
126
liboffloadmic/include/coi/common/COISysInfo_common.h
Normal file
126
liboffloadmic/include/coi/common/COISysInfo_common.h
Normal file
@ -0,0 +1,126 @@
|
||||
/*
|
||||
* Copyright 2010-2015 Intel Corporation.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
* by the Free Software Foundation, version 2.1.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* Disclaimer: The codes contained in these modules may be specific
|
||||
* to the Intel Software Development Platform codenamed Knights Ferry,
|
||||
* and the Intel product codenamed Knights Corner, and are not backward
|
||||
* compatible with other Intel products. Additionally, Intel will NOT
|
||||
* support the codes or instruction set in future products.
|
||||
*
|
||||
* Intel offers no warranty of any kind regarding the code. This code is
|
||||
* licensed on an "AS IS" basis and Intel is not obligated to provide
|
||||
* any support, assistance, installation, training, or other services
|
||||
* of any kind. Intel is also not obligated to provide any updates,
|
||||
* enhancements or extensions. Intel specifically disclaims any warranty
|
||||
* of merchantability, non-infringement, fitness for any particular
|
||||
* purpose, and any other warranty.
|
||||
*
|
||||
* Further, Intel disclaims all liability of any kind, including but
|
||||
* not limited to liability for infringement of any proprietary rights,
|
||||
* relating to the use of the code, even if Intel is notified of the
|
||||
* possibility of such liability. Except as expressly stated in an Intel
|
||||
* license agreement provided with this code and agreed upon with Intel,
|
||||
* no license, express or implied, by estoppel or otherwise, to any
|
||||
* intellectual property rights is granted herein.
|
||||
*/
|
||||
|
||||
#ifndef _COISYSINFO_COMMON_H
|
||||
#define _COISYSINFO_COMMON_H
|
||||
|
||||
/** @ingroup COISysInfo
|
||||
* @addtogroup COISysInfoCommon
|
||||
@{
|
||||
* @file common/COISysInfo_common.h
|
||||
* This interface allows developers to query the platform for system level
|
||||
* information. */
|
||||
|
||||
#ifndef DOXYGEN_SHOULD_SKIP_THIS
|
||||
#include "../common/COITypes_common.h"
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#endif // DOXYGEN_SHOULD_SKIP_THIS
|
||||
|
||||
#define INITIAL_APIC_ID_BITS 0xFF000000 // EBX[31:24] unique APIC ID
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
/// \fn uint32_t COISysGetAPICID(void)
|
||||
/// @return The Advanced Programmable Interrupt Controller (APIC) ID of
|
||||
/// the hardware thread on which the caller is running.
|
||||
///
|
||||
/// @warning APIC IDs are unique to each hardware thread within a processor,
|
||||
/// but may not be sequential.
|
||||
COIACCESSAPI
|
||||
uint32_t COISysGetAPICID(void);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
///
|
||||
/// @return The number of cores exposed by the processor on which the caller is
|
||||
/// running. Returns 0 if there is an error loading the processor info.
|
||||
COIACCESSAPI
|
||||
uint32_t COISysGetCoreCount(void);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
///
|
||||
/// @return The number of hardware threads exposed by the processor on which
|
||||
/// the caller is running. Returns 0 if there is an error loading processor
|
||||
/// info.
|
||||
COIACCESSAPI
|
||||
uint32_t COISysGetHardwareThreadCount(void);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
///
|
||||
/// @return The index of the hardware thread on which the caller is running.
|
||||
///
|
||||
/// The indexes of neighboring hardware threads will differ by a value of one
|
||||
/// and are within the range zero through COISysGetHardwareThreadCount()-1.
|
||||
/// Returns ((uint32_t)-1) if there was an error loading processor info.
|
||||
COIACCESSAPI
|
||||
uint32_t COISysGetHardwareThreadIndex(void);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
///
|
||||
/// @return The index of the core on which the caller is running.
|
||||
///
|
||||
/// The indexes of neighboring cores will differ by a value of one and are
|
||||
/// within the range zero through COISysGetCoreCount()-1. Returns ((uint32_t)-1)
|
||||
/// if there was an error loading processor info.
|
||||
COIACCESSAPI
|
||||
uint32_t COISysGetCoreIndex(void);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
///
|
||||
/// @return The number of level 2 caches within the processor on which the
|
||||
/// caller is running. Returns ((uint32_t)-1) if there was an error loading
|
||||
/// processor info.
|
||||
COIACCESSAPI
|
||||
uint32_t COISysGetL2CacheCount(void);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
///
|
||||
/// @return The index of the level 2 cache on which the caller is running.
|
||||
/// Returns ((uint32_t)-1) if there was an error loading processor info.
|
||||
///
|
||||
/// The indexes of neighboring cores will differ by a value of one and are
|
||||
/// within the range zero through COISysGetL2CacheCount()-1.
|
||||
COIACCESSAPI
|
||||
uint32_t COISysGetL2CacheIndex(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
/*! @} */
|
||||
|
||||
#endif /* _COISYSINFO_COMMON_H */
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright 2010-2013 Intel Corporation.
|
||||
* Copyright 2010-2015 Intel Corporation.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
@ -73,8 +73,8 @@ typedef struct coimapinst * COIMAPINSTANCE;
|
||||
|
||||
typedef uint64_t COI_CPU_MASK[16];
|
||||
|
||||
/**
|
||||
* On Windows, coi_wchar_t is a uint32_t. On Windows, wchar_t is 16 bits wide, and on Linux it is 32 bits wide, so uint32_t is used for portability.
|
||||
/**
|
||||
* On Windows, coi_wchar_t is a uint32_t. On Windows, wchar_t is 16 bits wide, and on Linux it is 32 bits wide, so uint32_t is used for portability.
|
||||
*/
|
||||
typedef wchar_t coi_wchar_t;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright 2010-2013 Intel Corporation.
|
||||
* Copyright 2010-2015 Intel Corporation.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
@ -45,7 +45,7 @@
|
||||
* @addtogroup COIBufferSink
|
||||
@{
|
||||
|
||||
* @file sink\COIBuffer_sink.h
|
||||
* @file sink\COIBuffer_sink.h
|
||||
*/
|
||||
#ifndef DOXYGEN_SHOULD_SKIP_THIS
|
||||
#include "../common/COITypes_common.h"
|
||||
@ -54,29 +54,29 @@
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
///
|
||||
/// Adds a reference to the memory of a buffer. The memory of the buffer
|
||||
/// will remain on the device until both a corresponding COIBufferReleaseRef()
|
||||
/// Adds a reference to the memory of a buffer. The memory of the buffer
|
||||
/// will remain on the device until both a corresponding COIBufferReleaseRef()
|
||||
/// call is made and the run function that delivered the buffer returns.
|
||||
///
|
||||
/// Intel® Coprocessor Offload Infrastructure (Intel® COI) streaming buffers should not be AddRef'd. Doing so may result in
|
||||
/// unpredictable results or may cause the sink process to crash.
|
||||
/// Running this API in a thread spawned within the run function is not
|
||||
/// supported and will cause unpredictable results and may cause data corruption.
|
||||
///
|
||||
/// @warning 1.It is possible for enqueued run functions to be unable to
|
||||
/// execute due to all card memory being occupied by addref'ed
|
||||
/// @warning 1.It is possible for enqueued run functions to be unable to
|
||||
/// execute due to all card memory being occupied by AddRef'd
|
||||
/// buffers. As such, it is important that whenever a buffer is
|
||||
/// addref'd that there be no dependencies on future run functions
|
||||
/// AddRef'd that there be no dependencies on future run functions
|
||||
/// for progress to be made towards releasing the buffer.
|
||||
/// 2.It is important that AddRef is called within the scope of
|
||||
/// run function that carries the buffer to be addref'ed.
|
||||
/// 2.It is important that AddRef is called within the scope of
|
||||
/// run function that carries the buffer to be AddRef'd.
|
||||
///
|
||||
/// @param in_pBuffer
|
||||
/// [in] Pointer to the start of a buffer being addref'ed, that was
|
||||
/// [in] Pointer to the start of a buffer being AddRef'd, that was
|
||||
/// passed in at the start of the run function.
|
||||
///
|
||||
///
|
||||
/// @return COI_SUCCESS if the buffer ref count was successfully incremented.
|
||||
///
|
||||
/// @return COI_INVALID_POINTER if the buffer pointer is NULL.
|
||||
@ -90,30 +90,33 @@ COIBufferAddRef(
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
///
|
||||
/// Removes a reference to the memory of a buffer. The memory of the buffer
|
||||
/// Removes a reference to the memory of a buffer. The memory of the buffer
|
||||
/// will be eligible for being freed on the device when the following
|
||||
/// conditions are met: the run function that delivered the buffer
|
||||
/// returns, and the number of calls to COIBufferReleaseRef() matches the
|
||||
/// returns, and the number of calls to COIBufferReleaseRef() matches the
|
||||
/// number of calls to COIBufferAddRef().
|
||||
//
|
||||
/// Running this API in a thread spawned within the run function is not
|
||||
/// supported and will cause unpredictable results and may cause data corruption.
|
||||
///
|
||||
/// @warning When a buffer is addref'ed it is assumed that it is in use and all
|
||||
/// @warning When a buffer is AddRef'd it is assumed that it is in use and all
|
||||
/// other operations on that buffer waits for ReleaseRef() to happen.
|
||||
/// So you cannot pass the addref'ed buffer's handle to RunFunction
|
||||
/// that calls ReleaseRef(). This is a circular dependency and will
|
||||
/// cause a deadlock. Buffer's pointer (buffer's sink side
|
||||
/// So you cannot pass the AddRef'd buffer's handle to RunFunction
|
||||
/// that calls ReleaseRef(). This is a circular dependency and will
|
||||
/// cause a deadlock. Buffer's pointer (buffer's sink side
|
||||
/// address/pointer which is different than source side BUFFER handle)
|
||||
/// needs to be stored somewhere to retrieve it later to use in
|
||||
/// needs to be stored somewhere to retrieve it later to use in
|
||||
/// ReleaseRef.
|
||||
///
|
||||
/// @param in_pBuffer
|
||||
/// [in] Pointer to the start of a buffer previously addref'ed, that
|
||||
/// [in] Pointer to the start of a buffer previously AddRef'd, that
|
||||
/// was passed in at the start of the run function.
|
||||
///
|
||||
///
|
||||
/// @return COI_SUCCESS if the buffer refcount was successfully decremented.
|
||||
///
|
||||
/// @return COI_INVALID_POINTER if the buffer pointer was invalid.
|
||||
///
|
||||
/// @return COI_INVALID_HANDLE if the buffer did not have COIBufferAddRef()
|
||||
/// @return COI_INVALID_HANDLE if the buffer did not have COIBufferAddRef()
|
||||
/// previously called on it.
|
||||
///
|
||||
COIRESULT
|
||||
@ -123,7 +126,7 @@ COIBufferReleaseRef(
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif /* _COIBUFFER_SINK_H */
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright 2010-2013 Intel Corporation.
|
||||
* Copyright 2010-2015 Intel Corporation.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright 2010-2013 Intel Corporation.
|
||||
* Copyright 2010-2015 Intel Corporation.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
@ -63,10 +63,11 @@ extern "C" {
|
||||
/// main() function from exiting until it is directed to by the source. When
|
||||
/// the shutdown message is received this function will stop any future run
|
||||
/// functions from executing but will wait for any current run functions to
|
||||
/// complete. All Intel® Coprocessor Offload Infrastructure (Intel® COI) resources will be cleaned up and no additional Intel® Coprocessor Offload Infrastructure (Intel® COI) APIs
|
||||
/// should be called after this function returns. This function does not
|
||||
/// invoke exit() so the application can perform any of its own cleanup once
|
||||
/// this call returns.
|
||||
/// complete. All Intel® Coprocessor Offload Infrastructure (Intel® COI)
|
||||
/// resources will be cleaned up and no additional Intel® Coprocessor Offload
|
||||
/// Infrastructure (Intel® COI) APIs should be called after this function
|
||||
/// returns. This function does not invoke exit() so the application
|
||||
/// can perform any of its own cleanup once this call returns.
|
||||
///
|
||||
/// @return COI_SUCCESS once the process receives the shutdown message.
|
||||
///
|
||||
@ -86,8 +87,9 @@ COIProcessWaitForShutdown();
|
||||
/// from this call.
|
||||
///
|
||||
/// @return COI_SUCCESS once the proxy output has been flushed to and written
|
||||
/// written by the host. Note that Intel® Coprocessor Offload Infrastructure (Intel® COI) on the source writes to stdout
|
||||
/// and stderr, but does not flush this output.
|
||||
/// written by the host. Note that Intel® Coprocessor Offload
|
||||
/// Infrastructure (Intel® COI) on the source writes to stdout and
|
||||
/// stderr, but does not flush this output.
|
||||
/// @return COI_SUCCESS if the process was created without enabling
|
||||
/// proxy IO this function.
|
||||
///
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright 2010-2013 Intel Corporation.
|
||||
* Copyright 2010-2015 Intel Corporation.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
@ -75,7 +75,7 @@ typedef enum
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
/// This structure returns information about an Intel(r) Xeon Phi(tm)
|
||||
/// This structure returns information about an Intel(R) Xeon Phi(TM)
|
||||
/// coprocessor.
|
||||
/// A pointer to this structure is passed into the COIGetEngineInfo() function,
|
||||
/// which fills in the data before returning to the caller.
|
||||
@ -101,6 +101,7 @@ typedef struct COI_ENGINE_INFO
|
||||
uint32_t CoreMaxFrequency;
|
||||
|
||||
/// The load percentage for each of the hardware threads on the engine.
|
||||
/// Currently this is limited to reporting out a maximum of 1024 HW threads
|
||||
uint32_t Load[COI_MAX_HW_THREADS];
|
||||
|
||||
/// The amount of physical memory managed by the OS.
|
||||
@ -133,9 +134,9 @@ typedef struct COI_ENGINE_INFO
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
///
|
||||
/// Returns information related to a specified engine. Note that if Intel® Coprocessor Offload Infrastructure (Intel® COI) is
|
||||
/// unable to query a value it will be returned as zero but the call will
|
||||
/// still succeed.
|
||||
/// Returns information related to a specified engine. Note that if Intel(R)
|
||||
/// Coprocessor Offload Infrastructure (Intel(R) COI) is unable to query
|
||||
/// a value it will be returned as zero but the call will still succeed.
|
||||
///
|
||||
///
|
||||
/// @param in_EngineHandle
|
||||
@ -173,14 +174,15 @@ COIEngineGetInfo(
|
||||
///
|
||||
/// Returns the number of engines in the system that match the provided ISA.
|
||||
///
|
||||
/// Note that while it is possible to enumerate different types of Intel(r)
|
||||
/// Xeon Phi(tm) coprocessors on a single host this is not currently
|
||||
/// supported. Intel® Coprocessor Offload Infrastructure (Intel® COI) makes an assumption that all Intel(r) Xeon Phi(tm)
|
||||
/// coprocessors found in the system are the same architecture as the first
|
||||
/// coprocessor device.
|
||||
/// Note that while it is possible to enumerate different types of Intel(R)
|
||||
/// Xeon Phi(TM) coprocessors on a single host this is not currently
|
||||
/// supported. Intel(R) Coprocessor Offload Infrastructure (Intel(R) COI)
|
||||
/// makes an assumption that all Intel(R) Xeon Phi(TM) coprocessors found
|
||||
/// in the system are the same architecture as the first coprocessor device.
|
||||
///
|
||||
/// Also, note that this function returns the number of engines that Intel® Coprocessor Offload Infrastructure (Intel® COI)
|
||||
/// is able to detect. Not all of them may be online.
|
||||
/// Also, note that this function returns the number of engines that Intel(R)
|
||||
/// Coprocessor Offload Infrastructure (Intel(R) COI) is able to detect. Not
|
||||
/// all of them may be online.
|
||||
///
|
||||
/// @param in_ISA
|
||||
/// [in] Specifies the ISA type of the engine requested.
|
||||
@ -211,7 +213,7 @@ COIEngineGetCount(
|
||||
///
|
||||
/// @param in_EngineIndex
|
||||
/// [in] A unsigned integer which specifies the zero-based position of
|
||||
/// the engine in a collection of engines. The makeup of this
|
||||
/// the engine in a collection of engines. The makeup of this
|
||||
/// collection is defined by the in_ISA parameter.
|
||||
///
|
||||
/// @param out_pEngineHandle
|
||||
@ -226,7 +228,8 @@ COIEngineGetCount(
|
||||
///
|
||||
/// @return COI_INVALID_POINTER if the out_pEngineHandle parameter is NULL.
|
||||
///
|
||||
/// @return COI_VERSION_MISMATCH if the version of Intel® Coprocessor Offload Infrastructure (Intel® COI) on the host is not
|
||||
/// @return COI_VERSION_MISMATCH if the version of Intel(R) Coprocessor Offload
|
||||
/// Infrastructure (Intel(R) COI) on the host is not
|
||||
/// compatible with the version on the device.
|
||||
///
|
||||
/// @return COI_NOT_INITIALIZED if the engine requested exists but is offline.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright 2010-2013 Intel Corporation.
|
||||
* Copyright 2010-2015 Intel Corporation.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
@ -59,12 +59,10 @@ extern "C" {
|
||||
///
|
||||
/// Special case event values which can be passed in to APIs to specify
|
||||
/// how the API should behave. In COIBuffer APIs passing in NULL for the
|
||||
/// completion event is the equivalent of passing COI_EVENT_SYNC. For
|
||||
/// COIPipelineRunFunction passing in NULL is the equivalent of
|
||||
/// COI_EVENT_ASYNC.
|
||||
/// completion event is the equivalent of passing COI_EVENT_SYNC.
|
||||
/// Note that passing COI_EVENT_ASYNC can be used when the caller wishes the
|
||||
/// operation to be performed asynchronously but does not care when the
|
||||
/// operation completes. This can be useful for opertions that by definition
|
||||
/// operation completes. This can be useful for operations that by definition
|
||||
/// must complete in order (DMAs, run functions on a single pipeline). If
|
||||
/// the caller does care when the operation completes then they should pass
|
||||
/// in a valid completion event which they can later wait on.
|
||||
@ -72,6 +70,16 @@ extern "C" {
|
||||
#define COI_EVENT_ASYNC ((COIEVENT*)1)
|
||||
#define COI_EVENT_SYNC ((COIEVENT*)2)
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
///
|
||||
/// This can be used to initialize a COIEVENT to a known invalid state.
|
||||
/// This is not required to use, but can be useful in some cases
|
||||
/// if a program is unsure if the event will be initialized by the runtime.
|
||||
/// Simply set the event to this value: COIEVENT event = COI_EVENT_INITIALIZER;
|
||||
///
|
||||
#define COI_EVENT_INITIALIZER { { 0, -1 } }
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
///
|
||||
/// Wait for an arbitrary number of COIEVENTs to be signaled as completed,
|
||||
@ -94,17 +102,17 @@ extern "C" {
|
||||
/// and returns immediately, -1 blocks indefinitely.
|
||||
///
|
||||
/// @param in_WaitForAll
|
||||
/// [in] Boolean value specifying behavior. If true, wait for all
|
||||
/// [in] Boolean value specifying behavior. If true, wait for all
|
||||
/// events to be signaled, or for timeout, whichever happens first.
|
||||
/// If false, return when any event is signaled, or at timeout.
|
||||
///
|
||||
/// @param out_pNumSignaled
|
||||
/// [out] The number of events that were signaled. If in_NumEvents
|
||||
/// [out] The number of events that were signaled. If in_NumEvents
|
||||
/// is 1 or in_WaitForAll = True, this parameter is optional.
|
||||
///
|
||||
/// @param out_pSignaledIndices
|
||||
/// [out] Pointer to an array of indicies into the original event
|
||||
/// array. Those denoted have been signaled. The user must provide an
|
||||
/// [out] Pointer to an array of indices into the original event
|
||||
/// array. Those denoted have been signaled. The user must provide an
|
||||
/// array that is no smaller than the in_Events array. If in_NumEvents
|
||||
/// is 1 or in_WaitForAll = True, this parameter is optional.
|
||||
///
|
||||
@ -132,6 +140,10 @@ extern "C" {
|
||||
/// @return COI_PROCESS_DIED if the remote process died. See COIProcessDestroy
|
||||
/// for more details.
|
||||
///
|
||||
/// @return COI_<REAL ERROR> if only a single event is passed in, and that event
|
||||
/// failed, COI will attempt to return the real error code that caused
|
||||
/// the original operation to fail, otherwise COI_PROCESS_DIED is reported.
|
||||
///
|
||||
COIACCESSAPI
|
||||
COIRESULT
|
||||
COIEventWait(
|
||||
@ -183,6 +195,103 @@ COIRESULT
|
||||
COIEventUnregisterUserEvent(
|
||||
COIEVENT in_Event);
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
///
|
||||
/// A callback that will be invoked to notify the user of an internal
|
||||
/// runtime event completion.
|
||||
///
|
||||
/// As with any callback mechanism it is up to the user to make sure that
|
||||
/// there are no possible deadlocks due to reentrancy (ie the callback being
|
||||
/// invoked in the same context that triggered the notification) and also
|
||||
/// that the callback does not slow down overall processing. If the user
|
||||
/// performs too much work within the callback it could delay further
|
||||
/// processing. The callback will be invoked prior to the signaling of
|
||||
/// the corresponding COIEvent. For example, if a user is waiting
|
||||
/// for a COIEvent associated with a run function completing they will
|
||||
/// receive the callback before the COIEvent is marked as signaled.
|
||||
///
|
||||
/// @param in_Event
|
||||
/// [in] The completion event that is associated with the
|
||||
/// operation that is being notified.
|
||||
///
|
||||
/// @param in_Result
|
||||
/// [in] The COIRESULT of the operation.
|
||||
///
|
||||
/// @param in_UserData
|
||||
/// [in] Opaque data that was provided when the callback was
|
||||
/// registered. Intel(R) Coprocessor Offload Infrastructure
|
||||
/// (Intel(R) COI) simply passes this back to the user so that
|
||||
/// they can interpret it as they choose.
|
||||
///
|
||||
typedef void (*COI_EVENT_CALLBACK)(
|
||||
COIEVENT in_Event,
|
||||
const COIRESULT in_Result,
|
||||
const void* in_UserData);
|
||||
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
///
|
||||
/// Registers any COIEVENT to receive a one time callback, when the event
|
||||
/// is marked complete in the offload runtime. If the event has completed
|
||||
/// before the COIEventRegisterCallback() is called then the callback will
|
||||
/// immediately be invoked by the calling thread. When the event is
|
||||
/// registered before the event completes, the runtime gaurantees that
|
||||
/// the callback will be invoked before COIEventWait() is notified of
|
||||
/// the same event completing. In well written user code, this may provide
|
||||
/// a slight performance advantage.
|
||||
///
|
||||
/// Users should treat the callback much like an interrupt routine, in regards
|
||||
/// of performance. Specifically designing the callback to be as short and
|
||||
/// non blocking as possible. Since the thread that runs the callback is
|
||||
/// non deterministic blocking or stalling of the callback, may have severe
|
||||
/// performance impacts on the offload runtime. Thus, it is important to not
|
||||
/// create deadlocks between the callback and other signaling/waiting
|
||||
/// mechanisms. It is recommended to never invoke COIEventWait() inside
|
||||
/// a callback function, as this could lead to immediate deadlocks.
|
||||
///
|
||||
/// It is important to note that the runtime cannot distinguish between
|
||||
/// already triggered events and invalid events. Thus the user needs to pass
|
||||
/// in a valid event, or the callback will be invoked immediately.
|
||||
/// Failed events will still receive a callback and the user can query
|
||||
/// COIEventWait() after the callback for the failed return code.
|
||||
///
|
||||
/// If more than one callback is registered for the same event, only the
|
||||
/// single most current callback will be used, i.e. the older one will
|
||||
/// be replaced.
|
||||
///
|
||||
/// @param in_Event
|
||||
/// [in] A valid single event handle to be registered to receive a callback.
|
||||
///
|
||||
/// @param in_Callback
|
||||
/// [in] Pointer to a user function used to signal an
|
||||
/// event completion.
|
||||
///
|
||||
/// @param in_UserData
|
||||
/// [in] Opaque data to pass to the callback when it is invoked.
|
||||
///
|
||||
/// @param in_Flags
|
||||
/// [in] Reserved parameter for future expansion, required to be zero for now.
|
||||
///
|
||||
/// @return COI_INVALID_HANDLE if in_Event is not a valid COIEVENT
|
||||
///
|
||||
/// @return COI_INVALID_HANDLE if in_Callback is not a valid pointer.
|
||||
///
|
||||
/// @return COI_ARGUMENT_MISMATCH if the in_Flags is not zero.
|
||||
///
|
||||
/// @return COI_SUCCESS an event is successfully registered
|
||||
///
|
||||
COIACCESSAPI
|
||||
COIRESULT
|
||||
COIEventRegisterCallback(
|
||||
const COIEVENT in_Event,
|
||||
COI_EVENT_CALLBACK in_Callback,
|
||||
const void* in_UserData,
|
||||
const uint64_t in_Flags);
|
||||
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright 2010-2013 Intel Corporation.
|
||||
* Copyright 2010-2015 Intel Corporation.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
@ -59,12 +59,13 @@ extern "C" {
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
/// These flags specify how a buffer will be used within a run function. They
|
||||
/// allow Intel® Coprocessor Offload Infrastructure (Intel® COI) to make optimizations in how it moves data around the system.
|
||||
/// These flags specify how a buffer will be used within a run function. They
|
||||
/// allow the runtime to make optimizations in how it moves the data around.
|
||||
/// These flags can affect the correctness of an application, so they must be
|
||||
/// set properly. For example, if a buffer is used in a run function with the
|
||||
/// COI_SINK_READ flag and then mapped on the source, Intel® Coprocessor Offload Infrastructure (Intel® COI) may use a previously
|
||||
/// cached version of the buffer instead of retrieving data from the sink.
|
||||
/// set properly. For example, if a buffer is used in a run function with the
|
||||
/// COI_SINK_READ flag and then mapped on the source, the runtime may use a
|
||||
/// previously cached version of the buffer instead of retrieving data from
|
||||
/// the sink.
|
||||
typedef enum COI_ACCESS_FLAGS
|
||||
{
|
||||
/// Specifies that the run function will only read the associated buffer.
|
||||
@ -76,7 +77,23 @@ typedef enum COI_ACCESS_FLAGS
|
||||
/// Specifies that the run function will overwrite the entire associated
|
||||
/// buffer and therefore the buffer will not be synchronized with the
|
||||
/// source before execution.
|
||||
COI_SINK_WRITE_ENTIRE
|
||||
COI_SINK_WRITE_ENTIRE,
|
||||
|
||||
/// Specifies that the run function will only read the associated buffer
|
||||
/// and will maintain the reference count on the buffer after
|
||||
/// run function exit.
|
||||
COI_SINK_READ_ADDREF,
|
||||
|
||||
/// Specifies that the run function will write to the associated buffer
|
||||
/// and will maintain the reference count on the buffer after
|
||||
/// run function exit.
|
||||
COI_SINK_WRITE_ADDREF,
|
||||
|
||||
/// Specifies that the run function will overwrite the entire associated
|
||||
/// buffer and therefore the buffer will not be synchronized with the
|
||||
/// source before execution and will maintain the reference count on the
|
||||
/// buffer after run function exit.
|
||||
COI_SINK_WRITE_ENTIRE_ADDREF
|
||||
} COI_ACCESS_FLAGS;
|
||||
|
||||
#define COI_PIPELINE_MAX_PIPELINES 512
|
||||
@ -86,7 +103,7 @@ typedef enum COI_ACCESS_FLAGS
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
///
|
||||
/// Create a pipeline assoiated with a remote process. This pipeline can
|
||||
/// Create a pipeline associated with a remote process. This pipeline can
|
||||
/// then be used to execute remote functions and to share data using
|
||||
/// COIBuffers.
|
||||
///
|
||||
@ -133,8 +150,8 @@ typedef enum COI_ACCESS_FLAGS
|
||||
/// @return COI_TIME_OUT_REACHED if establishing the communication channel with
|
||||
/// the remote pipeline timed out.
|
||||
///
|
||||
/// @return COI_RETRY if the pipeline cannot be created due to the number of
|
||||
/// source-to-sink connections in use. A subsequent call to
|
||||
/// @return COI_RETRY if the pipeline cannot be created due to the number of
|
||||
/// source-to-sink connections in use. A subsequent call to
|
||||
/// COIPipelineCreate may succeed if resources are freed up.
|
||||
///
|
||||
/// @return COI_PROCESS_DIED if in_Process died.
|
||||
@ -149,7 +166,7 @@ COIPipelineCreate(
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
///
|
||||
/// Destroys the inidicated pipeline, releasing its resources.
|
||||
/// Destroys the indicated pipeline, releasing its resources.
|
||||
///
|
||||
/// @param in_Pipeline
|
||||
/// [in] Pipeline to destroy.
|
||||
@ -175,22 +192,21 @@ COIPipelineDestroy(
|
||||
///
|
||||
/// 1. Proper care has to be taken while setting the input dependencies for
|
||||
/// RunFunctions. Setting it incorrectly can lead to cyclic dependencies
|
||||
/// and can cause the respective pipeline (as a result Intel® Coprocessor Offload Infrastructure (Intel® COI) Runtime) to
|
||||
/// stall.
|
||||
/// and can cause the respective pipeline to stall.
|
||||
/// 2. RunFunctions can also segfault if enough memory space is not available
|
||||
/// on the sink for the buffers passed in. Pinned buffers and buffers that
|
||||
/// are AddRef'd need to be accounted for available memory space. In other
|
||||
/// words, this memory is not available for use until it is freed up.
|
||||
/// 3. Unexpected segmentation faults or erroneous behaviour can occur if
|
||||
/// handles or data passed in to Runfunction gets destroyed before the
|
||||
/// 3. Unexpected segmentation faults or erroneous behavior can occur if
|
||||
/// handles or data passed in to Runfunction gets destroyed before the
|
||||
/// RunFunction finishes.
|
||||
/// For example, if a variable passed in as Misc data or the buffer gets
|
||||
/// destroyed before the Intel® Coprocessor Offload Infrastructure (Intel® COI) runtime receives the completion notification
|
||||
/// of the Runfunction, it can cause unexpected behaviour. So it is always
|
||||
/// destroyed before the runtime receives the completion notification
|
||||
/// of the Runfunction, it can cause unexpected behavior. So it is always
|
||||
/// recommended to wait for RunFunction completion event before any related
|
||||
/// destroy event occurs.
|
||||
///
|
||||
/// Intel® Coprocessor Offload Infrastructure (Intel® COI) Runtime expects users to handle such scenarios. COIPipelineRunFunction
|
||||
/// The runtime expects users to handle such scenarios. COIPipelineRunFunction
|
||||
/// returns COI_SUCCESS for above cases because it was queued up successfully.
|
||||
/// Also if you try to destroy a pipeline with a stalled function then the
|
||||
/// destroy call will hang. COIPipelineDestroy waits until all the functions
|
||||
@ -240,7 +256,7 @@ COIPipelineDestroy(
|
||||
/// [in] Pointer to user defined data, typically used to pass
|
||||
/// parameters to Sink side functions. Should only be used for small
|
||||
/// amounts data since the data will be placed directly in the
|
||||
/// Driver's command buffer. COIBuffers should be used to pass large
|
||||
/// Driver's command buffer. COIBuffers should be used to pass large
|
||||
/// amounts of data.
|
||||
///
|
||||
/// @param in_MiscDataLen
|
||||
@ -250,8 +266,8 @@ COIPipelineDestroy(
|
||||
///
|
||||
/// @param out_pAsyncReturnValue
|
||||
/// [out] Pointer to user-allocated memory where the return value from
|
||||
/// the run function will be placed. This memory should not be read
|
||||
/// until out_pCompletion has been signalled.
|
||||
/// the run function will be placed. This memory should not be read
|
||||
/// until out_pCompletion has been signaled.
|
||||
///
|
||||
/// @param in_AsyncReturnValueLen
|
||||
/// [in] Size of the out_pAsyncReturnValue in bytes.
|
||||
@ -259,11 +275,14 @@ COIPipelineDestroy(
|
||||
/// @param out_pCompletion
|
||||
/// [out] An optional pointer to a COIEVENT object
|
||||
/// that will be signaled when this run function has completed
|
||||
/// execution. The user may pass in NULL if they do not wish to signal
|
||||
/// any COIEVENTs when this run function completes.
|
||||
/// execution. The user may pass in NULL if they wish for this function
|
||||
/// to be synchronous, otherwise if a COIEVENT object is passed in the
|
||||
/// function is then asynchronous and closes after enqueuing the
|
||||
/// RunFunction and passes back the COIEVENT that will be signaled
|
||||
/// once the RunFunction has completed.
|
||||
///
|
||||
/// @return COI_SUCCESS if the function was successfully placed in a
|
||||
/// pipeline for future execution. Note that the actual
|
||||
/// pipeline for future execution. Note that the actual
|
||||
/// execution of the function will occur in the future.
|
||||
///
|
||||
/// @return COI_OUT_OF_RANGE if in_NumBuffers is greater than
|
||||
@ -303,18 +322,10 @@ COIPipelineDestroy(
|
||||
/// @return COI_ARGUMENT_MISMATCH if in_pReturnValue is non-NULL but
|
||||
/// in_ReturnValueLen is zero.
|
||||
///
|
||||
/// @return COI_ARGUMENT_MISMATCH if a COI_BUFFER_STREAMING_TO_SOURCE buffer
|
||||
/// is not passed with COI_SINK_WRITE_ENTIRE access flag.
|
||||
///
|
||||
/// @return COI_RESOURCE_EXHAUSTED if could not create a version for TO_SOURCE
|
||||
/// streaming buffer. It can fail if enough memory is not available to
|
||||
/// register. This call will succeed eventually when the registered
|
||||
/// memory becomes available.
|
||||
///
|
||||
/// @return COI_RETRY if any input buffers, which are not pinned buffers,
|
||||
/// are still mapped when passed to the run function.
|
||||
///
|
||||
/// @return COI_MISSING_DEPENDENCY if buffer was not created on the process
|
||||
/// @return COI_MISSING_DEPENDENCY if buffer was not created on the process
|
||||
/// associated with the pipeline that was passed in.
|
||||
///
|
||||
/// @return COI_OUT_OF_RANGE if any of the access flags in
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright 2010-2013 Intel Corporation.
|
||||
* Copyright 2010-2015 Intel Corporation.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
@ -61,10 +61,16 @@ extern "C" {
|
||||
/// This is a special COIPROCESS handle that can be used to indicate that
|
||||
/// the source process should be used for an operation.
|
||||
///
|
||||
#define COI_PROCESS_SOURCE ((COIPROCESS)-1)
|
||||
#define COI_PROCESS_SOURCE ((COIPROCESS)-1)
|
||||
|
||||
#define COI_MAX_FILE_NAME_LENGTH 256
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
/// This is a flag for COIProcessCreateFromMemory that indicates the passed in
|
||||
/// memory pointer is a fat binary file and should not have regular validation.
|
||||
///
|
||||
#define COI_FAT_BINARY ((uint64_t)-1)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
///
|
||||
/// Create a remote process on the Sink and start executing its main()
|
||||
@ -74,14 +80,14 @@ extern "C" {
|
||||
///
|
||||
/// @param in_Engine
|
||||
/// [in] A handle retrieved via a call to COIEngineGetHandle() that
|
||||
/// indicates which device to create the process on. This is
|
||||
/// indicates which device to create the process on. This is
|
||||
/// necessary because there can be more than one device
|
||||
/// within the system.
|
||||
///
|
||||
/// @param in_pBinaryName
|
||||
/// [in] Pointer to a null-terminated string that contains the
|
||||
/// path to the program binary to be instantiated as a process on
|
||||
/// the sink device. The file name will be accessed via
|
||||
/// the sink device. The file name will be accessed via
|
||||
/// fopen and fread, as such, the passed in binary name must
|
||||
/// be locatable via these commands. Also, the file name (without
|
||||
/// directory information) will be used automatically by the system
|
||||
@ -121,8 +127,8 @@ extern "C" {
|
||||
/// @param in_InitialBufferSpace
|
||||
/// [in] The initial memory (in bytes) that will be pre-allocated at
|
||||
/// process creation for use by buffers associated with this remote
|
||||
/// process. In addition to allocating, Intel® Coprocessor Offload
|
||||
/// Infrastructure (Intel® COI) will also fault in the
|
||||
/// process. In addition to allocating, Intel(R) Coprocessor Offload
|
||||
/// Infrastructure (Intel(R) COI) will also fault in the
|
||||
/// memory during process creation. If the total size of the buffers
|
||||
/// in use by this process exceed this initial size, memory on the
|
||||
/// sink may continue to be allocated on demand, as needed, subject
|
||||
@ -186,7 +192,7 @@ COIProcessCreateFromFile(
|
||||
///
|
||||
/// @param in_Engine
|
||||
/// [in] A handle retrieved via a call to COIEngineGetHandle() that
|
||||
/// indicates which device to create the process on. This is
|
||||
/// indicates which device to create the process on. This is
|
||||
/// necessary because there can be more than one device
|
||||
/// within the system.
|
||||
///
|
||||
@ -236,8 +242,8 @@ COIProcessCreateFromFile(
|
||||
/// @param in_InitialBufferSpace
|
||||
/// [in] The initial memory (in bytes) that will be pre-allocated at
|
||||
/// process creation for use by buffers associated with this remote
|
||||
/// process. In addition to allocating, Intel® Coprocessor
|
||||
/// Offload Infrastructure (Intel® COI) will also fault in the
|
||||
/// process. In addition to allocating, Intel(R) Coprocessor
|
||||
/// Offload Infrastructure (Intel(R) COI) will also fault in the
|
||||
/// memory during process creation. If the total size of the buffers
|
||||
/// in use by this process exceed this initial size, memory on the
|
||||
/// sink may continue to be allocated on demand, as needed, subject
|
||||
@ -314,8 +320,8 @@ COIProcessCreateFromFile(
|
||||
/// @return COI_PROCESS_DIED if at some point during the loading of the remote
|
||||
/// process the remote process terminated abnormally.
|
||||
///
|
||||
/// @return COI_VERSION_MISMATCH if the version of Intel® Coprocessor
|
||||
/// Offload Infrastructure (Intel® COI) on the host is not
|
||||
/// @return COI_VERSION_MISMATCH if the version of Intel(R) Coprocessor
|
||||
/// Offload Infrastructure (Intel(R) COI) on the host is not
|
||||
/// compatible with the version on the device.
|
||||
///
|
||||
COIACCESSAPI
|
||||
@ -354,7 +360,7 @@ COIProcessCreateFromMemory(
|
||||
/// [in] If this flag is set to true, then the sink process will be
|
||||
/// forcibly terminated after the timeout has been reached. A timeout
|
||||
/// value of 0 will kill the process immediately, while a timeout of
|
||||
/// -1 is invalid. If the flag is set to false then a message will
|
||||
/// -1 is invalid. If the flag is set to false then a message will
|
||||
/// be sent to the sink process requesting a clean shutdown. A value
|
||||
/// of false along with a timeout of 0 does not send a shutdown
|
||||
/// message, instead simply polls the process to see if it is alive.
|
||||
@ -374,8 +380,8 @@ COIProcessCreateFromMemory(
|
||||
/// be 0 if the remote process exited cleanly. If the remote process
|
||||
/// exited abnormally this will contain the termination code given
|
||||
/// by the operating system of the remote process. This is an optional
|
||||
/// parameter and the caller may pass in NULL if they are not
|
||||
/// interested in the termination code. The output value of this
|
||||
/// parameter and the caller may pass in NULL if they are not
|
||||
/// interested in the termination code. The output value of this
|
||||
/// pointer is only meaningful if COI_SUCCESS is returned.
|
||||
///
|
||||
/// @return COI_SUCCESS if the process was destroyed.
|
||||
@ -390,8 +396,8 @@ COIProcessCreateFromMemory(
|
||||
///
|
||||
/// @return COI_TIME_OUT_REACHED if the sink process is still running after
|
||||
/// waiting in_WaitForMainTimeout milliseconds and in_ForceDestroy
|
||||
/// is false. This is true even if in_WaitForMainTimeout was 0.
|
||||
/// In this case, out_pProcessReturn and out_pTerminationCode
|
||||
/// is false. This is true even if in_WaitForMainTimeout was 0.
|
||||
/// In this case, out_pProcessReturn and out_pTerminationCode
|
||||
/// are undefined.
|
||||
///
|
||||
COIACCESSAPI
|
||||
@ -410,10 +416,10 @@ COIProcessDestroy(
|
||||
///
|
||||
/// Given a loaded native process, gets an array of function handles that can
|
||||
/// be used to schedule run functions on a pipeline associated with that
|
||||
/// process. See the documentation for COIPipelineRunFunction() for
|
||||
/// additional information. All functions that are to be retrieved in this
|
||||
/// fashion must have the define COINATIVEPROCESSEXPORT preceeding their type
|
||||
/// specification. For functions that are written in C++, either the entries
|
||||
/// process. See the documentation for COIPipelineRunFunction() for
|
||||
/// additional information. All functions that are to be retrieved in this
|
||||
/// fashion must have the define COINATIVEPROCESSEXPORT preceding their type
|
||||
/// specification. For functions that are written in C++, either the entries
|
||||
/// in in_pFunctionNameArray in must be pre-mangled, or the functions must be
|
||||
/// declared as extern "C". It is also necessary to link the binary containing
|
||||
/// the exported functions with the -rdynamic linker flag.
|
||||
@ -432,7 +438,7 @@ COIProcessDestroy(
|
||||
/// @param in_ppFunctionNameArray
|
||||
/// [in] Pointer to an array of null-terminated strings that match
|
||||
/// the name of functions present in the code of the binary
|
||||
/// previously loaded via COIProcessCreate(). Note that if a C++
|
||||
/// previously loaded via COIProcessCreate(). Note that if a C++
|
||||
/// function is used, then the string passed in must already be
|
||||
/// properly name-mangled, or extern "C" must be used for where
|
||||
/// the function is declared.
|
||||
@ -462,7 +468,7 @@ COIProcessDestroy(
|
||||
/// the null.
|
||||
///
|
||||
/// @warning This operation can take several milliseconds so it is recommended
|
||||
/// that it only be be done at load time.
|
||||
/// that it only be done at load time.
|
||||
///
|
||||
COIACCESSAPI
|
||||
COIRESULT
|
||||
@ -486,7 +492,7 @@ COIProcessGetFunctionHandles(
|
||||
#define COI_LOADLIBRARY_DEEPBIND 0x00008
|
||||
#define COI_LOADLIBRARY_NODELETE 0x01000
|
||||
|
||||
/// Flags to replicate the behavior of the original version of
|
||||
/// Flags to replicate the behaviour of the original version of
|
||||
/// COIProcessLoadLibrary* APIs.
|
||||
#define COI_LOADLIBRARY_V1_FLAGS (COI_LOADLIBRARY_GLOBAL|COI_LOADLIBRARY_NOW)
|
||||
|
||||
@ -796,13 +802,13 @@ COIProcessRegisterLibraries(
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
/// The user can choose to have notifications for these internal events
|
||||
/// so that they can build their own profiling and performance layer on
|
||||
/// top of Intel® Coprocessor Offload Infrastructure (Intel® COI) .
|
||||
/// top of Intel(R) Coprocessor Offload Infrastructure (Intel(R) COI).
|
||||
///
|
||||
typedef enum COI_NOTIFICATIONS
|
||||
{
|
||||
/// This event occurs when all explicit and implicit dependencies are
|
||||
/// satisified and Intel® Coprocessor Offload Infrastructure
|
||||
/// (Intel® COI) schedules the run function to begin execution.
|
||||
/// satisfied and Intel(R) Coprocessor Offload Infrastructure
|
||||
/// (Intel(R) COI) schedules the run function to begin execution.
|
||||
RUN_FUNCTION_READY = 0,
|
||||
|
||||
/// This event occurs just before the run function actually starts
|
||||
@ -835,20 +841,17 @@ typedef enum COI_NOTIFICATIONS
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
///
|
||||
/// A callback that will be invoked to notify the user of an internal
|
||||
/// Intel® Coprocessor Offload Infrastructure (Intel® COI)
|
||||
/// Intel(R) Coprocessor Offload Infrastructure (Intel(R) COI)
|
||||
/// event. Note that the callback is registered per process so any of the
|
||||
/// above notifications that happen on the registered process will receive
|
||||
/// the callback.
|
||||
/// As with any callback mechanism it is up to the user to make sure that
|
||||
/// there are no possible deadlocks due to reentrancy (ie the callback being
|
||||
/// there are no possible deadlocks due to reentrancy (i.e. the callback being
|
||||
/// invoked in the same context that triggered the notification) and also
|
||||
/// that the callback does not slow down overall processing. If the user
|
||||
/// performs too much work within the callback it could delay further
|
||||
/// Intel® Coprocessor Offload Infrastructure (Intel® COI)
|
||||
/// processing.
|
||||
/// Intel® Coprocessor Offload Infrastructure (Intel® COI)
|
||||
/// promises to invoke the callback for an internal event prior to
|
||||
/// signaling the corresponding COIEvent. For example, if a user is waiting
|
||||
/// processing. The callback will be invoked prior to the signaling of
|
||||
/// the corresponding COIEvent. For example, if a user is waiting
|
||||
/// for a COIEvent associated with a run function completing they will
|
||||
/// receive the callback before the COIEvent is marked as signaled.
|
||||
///
|
||||
@ -865,11 +868,12 @@ typedef enum COI_NOTIFICATIONS
|
||||
///
|
||||
/// @param in_UserData
|
||||
/// [in] Opaque data that was provided when the callback was
|
||||
/// registered. Intel® Coprocessor Offload Infrastructure (Intel® COI) simply passes this back to the user so that
|
||||
/// registered. Intel(R) Coprocessor Offload Infrastructure (Intel(R) COI)
|
||||
/// simply passes this back to the user so that
|
||||
/// they can interpret it as they choose.
|
||||
///
|
||||
typedef void (*COI_NOTIFICATION_CALLBACK)(
|
||||
COI_NOTIFICATIONS in_Type,
|
||||
COI_NOTIFICATIONS in_Type,
|
||||
COIPROCESS in_Process,
|
||||
COIEVENT in_Event,
|
||||
const void* in_UserData);
|
||||
@ -878,7 +882,7 @@ typedef void (*COI_NOTIFICATION_CALLBACK)(
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
///
|
||||
/// Register a callback to be invoked to notify that an internal
|
||||
/// Intel® Coprocessor Offload Infrastructure (Intel® COI) event
|
||||
/// Intel(R) Coprocessor Offload Infrastructure (Intel(R) COI) event
|
||||
/// has occured on the process that is associated with the callback.
|
||||
/// Note that it is legal to have more than one callback registered with
|
||||
/// a given process but those must all be unique callback pointers.
|
||||
@ -942,13 +946,13 @@ COIRESULT COIUnregisterNotificationCallback(
|
||||
///
|
||||
/// Set the user data that will be returned in the notification callback.
|
||||
/// This data is sticky and per thread so must be set prior to the
|
||||
/// Intel® Coprocessor Offload Infrastructure (Intel® COI) //
|
||||
/// Intel(R) Coprocessor Offload Infrastructure (Intel(R) COI)
|
||||
/// operation being invoked. If you wish to set the context to be returned
|
||||
/// for a specific instance of a user event notification then the context
|
||||
/// must be set using this API prior to registering that user event with
|
||||
/// COIEventRegisterUserEvent.
|
||||
/// The value may be set prior to each Intel® Coprocessor Offload
|
||||
/// Infrastructure (Intel® COI) operation being called to
|
||||
/// The value may be set prior to each Intel(R) Coprocessor Offload
|
||||
/// Infrastructure (Intel(R) COI) operation being called to
|
||||
/// effectively have a unique UserData per callback.
|
||||
/// Setting this value overrides any value that was set when the
|
||||
/// callback was registered and will also override any future registrations
|
||||
@ -962,6 +966,266 @@ COIACCESSAPI
|
||||
void COINotificationCallbackSetContext(
|
||||
const void* in_UserData);
|
||||
|
||||
|
||||
/// @name COIProcessSetCacheSize flags.
|
||||
/// Flags are divided into two categories: _MODE_ and _ACTION_
|
||||
/// only one of each is valid with each call.
|
||||
/// _ACTIONS_ and _MODES_ should be bitwised OR'ed together, i.e. |
|
||||
//@{
|
||||
|
||||
/// Current set of DEFINED bits for _MODE_, can be used
|
||||
/// to clear or check fields, not useful to pass into APIs. Used internally.
|
||||
#define COI_CACHE_MODE_MASK 0x00000007
|
||||
|
||||
/// Flag to indicate to keep the previous mode of operation. By default
|
||||
/// this would be COI_CACHE_MODE_ONDEMAND_SYNC. As of this release
|
||||
/// This is the only mode available. This mode is valid with _ACTION_
|
||||
/// flags.
|
||||
#define COI_CACHE_MODE_NOCHANGE 0x00000001
|
||||
|
||||
/// Mode of operation that indicates that COI will allocate physical
|
||||
/// cache memory exactly when it is is needed. COIPipeline execution in
|
||||
/// the given process will momentarily block until the allocation request
|
||||
/// is completed. This is and has been the default mode.
|
||||
#define COI_CACHE_MODE_ONDEMAND_SYNC 0x00000002
|
||||
|
||||
/// Not yet implemented. Future mode that will not stall a COIPipeline
|
||||
/// but prefer eviction/paging if possible as to immediately execute pipeline.
|
||||
/// At the same time, enqueue background requests to allocate extra cache
|
||||
/// so as to provide optimze behavior on subsequent runs.
|
||||
#define COI_CACHE_MODE_ONDEMAND_ASYNC 0x00000004
|
||||
|
||||
|
||||
/// Current set of DEFINED bits for _ACTION_ can be used
|
||||
/// to clear fields, but not useful to pass into API's. Used internally.
|
||||
#define COI_CACHE_ACTION_MASK 0x00070000
|
||||
|
||||
/// No action requested. With this flag specified
|
||||
/// it is recommended to NOT provide a out_pCompletion event,
|
||||
/// as with this flag, modes and values are immediately set.
|
||||
/// This is valid with _MODE_ flags.
|
||||
#define COI_CACHE_ACTION_NONE 0x00010000
|
||||
|
||||
/// This _ACTION_ flag will immediately attempt to increase the cache
|
||||
/// physical memory size to the current set pool size(s). Used to
|
||||
/// pre-allocate memory on remote processes, so that runfunction will
|
||||
/// enqueue faster. Also may prevent unused buffer eviction from process
|
||||
/// reducing overhead in trade for memory allocation cost.
|
||||
#define COI_CACHE_ACTION_GROW_NOW 0x00020000
|
||||
|
||||
/// Not yet implemented. Future _ACTION_ that will attempt to find unused
|
||||
/// allocated cache and free it, with the express goal of reducing the
|
||||
/// footprint on the remote process down to the value of the currently set
|
||||
/// pool size(s).
|
||||
#define COI_CACHE_ACTION_FREE_UNUSED 0x00040000
|
||||
|
||||
//@}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
///
|
||||
/// Set the minimum preferred COIProcess cache size. By default these values
|
||||
/// are set to 1GB. With the default size of 1GB, Intel(R) COI will only
|
||||
/// grow the cache with each new buffer up until the set limit is consumed,
|
||||
/// after which, only required to accommodate additional buffers.
|
||||
/// This means that after the cache preference is met, a process will act
|
||||
/// as conservative as possible for memory consumption.
|
||||
/// This API will allow users to adjust memory consumption aggressiveness.
|
||||
///
|
||||
/// Additional performance may be gained if the user sets a value higher than
|
||||
/// default. With high memory consumption user can choose to trade performance
|
||||
/// between memory allocation cost and transfer speeds to and from the
|
||||
/// remote process. A last consideration is that if buffers are used only
|
||||
/// once, it may be best to keep a small cache size, or ensure buffers are
|
||||
/// fully destroyed after their use.
|
||||
///
|
||||
/// Adjusting this value to high may result in out of resource conditions.
|
||||
///
|
||||
/// @param in_pProcess
|
||||
/// [in] Handle to uniquely identify the process for which the cache
|
||||
/// is to be adjusted.
|
||||
///
|
||||
/// @param in_HugePagePoolSize
|
||||
/// [in] The suggested size of the remote huge page cache in bytes.
|
||||
/// This value defaults to 1GB. A process will only allocate cache
|
||||
/// memory if the current cache is smaller than this limit, or it is
|
||||
/// absolutely necessary to fulfill a request, but preferring to
|
||||
/// re-use existing memory and paging unused buffers back to the host
|
||||
/// Increasing this value will cause a process to
|
||||
/// aggressively allocate memory on demand up to this value, before
|
||||
/// evicting/paging memory from the remote process back to the host
|
||||
/// process.
|
||||
///
|
||||
/// The net result is that memory consumption is increased, but the
|
||||
/// user can 'cache' more buffers on the remote process. More time
|
||||
/// may be spent during first use of run functions as more memory
|
||||
/// may be allocated, but subsequent run functions will likely
|
||||
/// see an increase in queueing performance as the data is already
|
||||
/// valid in the remote process.
|
||||
///
|
||||
/// Users should tune this value for optimum performance balanced
|
||||
/// against memory consumption. This value does not affect 4K page
|
||||
/// cache. Please use in_SmallPagePoolSize for 4K pages.
|
||||
///
|
||||
/// @param in_HugeFlags
|
||||
/// [in] Flags to select mode or action for huge page cache. One _MODE_
|
||||
/// and one _ACTION_ flag are specified together. Default _MODE_ is
|
||||
/// COI_CACHE_MODE_ONDEMAND_SYNC. See all COI_CACHE_MODE_* and
|
||||
/// COI_CACHE_ACTION_* for other modes and actions. Default _ACTION_
|
||||
/// is COI_CACHE_ACTION_NONE.
|
||||
///
|
||||
/// @param in_SmallPagePoolSize
|
||||
/// [in] The suggested size of the remote 4K cache in bytes. Same
|
||||
/// function as in_HugePagePoolSize but affecting only 4K page cache.
|
||||
/// Defaults to 1GB.
|
||||
///
|
||||
/// @param in_SmallFlags
|
||||
/// [in] Flags to select mode or action for 4K page cache. One _MODE_
|
||||
/// and one _ACTION_ flag are be specified together. Default _MODE_ is
|
||||
/// COI_CACHE_MODE_ONDEMAND_SYNC. See all COI_CACHE_MODE_* and
|
||||
/// COI_CACHE_ACTION_* for other modes and actions.
|
||||
///
|
||||
/// @param in_NumDependencies
|
||||
/// [in] The number of dependencies specified in the in_pDependencies
|
||||
/// array. This may be 0 if the caller does not want the call to
|
||||
/// wait for any events to be signaled.
|
||||
///
|
||||
/// @param in_pDependencies
|
||||
/// [in] An optional array of handles to previously created COIEVENT
|
||||
/// objects that this operation will wait for before starting.
|
||||
/// This allows the user to create dependencies between asynchronous
|
||||
/// calls and other operations such as run functions. The user may
|
||||
/// pass in NULL if they do not wish to wait for any dependencies.
|
||||
/// Only useful with _ACTION_ flags, otherwise there is no action
|
||||
/// to wait on. All _MODE_ changes happen immediately.
|
||||
///
|
||||
/// @param out_pCompletion
|
||||
/// [out] An optional pointer to a COIEVENT object that will be
|
||||
/// signaled when the operation is complete. The user may pass in
|
||||
/// NULL if the user wants the operation to block until completed.
|
||||
/// Note: This flag is not useful unless paired with a
|
||||
/// valid _ACTION_ flag.
|
||||
///
|
||||
/// @return COI_SUCCESS if the cache was successfully adjusted. In case of
|
||||
/// valid flags including _ACTION_, if out_pCompletion was specified,
|
||||
/// this does not indicate the operation succeeded, but rather only
|
||||
/// it was successfully queued. For further information see
|
||||
/// that COIEventWait() for getting return values.
|
||||
///
|
||||
/// @return COI_INVALID_HANDLE if the in_Process handle passed in was invalid.
|
||||
///
|
||||
/// @return COI_RESOURCE_EXHAUSTED if no more cache can be created,
|
||||
/// possibly, but not necessarily because a pool size was set to large
|
||||
/// and COI_CACHE_ACTION_GROW_NOW was specified.
|
||||
///
|
||||
/// @return COI_NOT_SUPPORTED if more than one _MODE_ or _ACTION_ was
|
||||
/// specified.
|
||||
///
|
||||
/// @return COI_NOT_SUPPORTED if an invalid _MODE_ or _ACTION_ was
|
||||
/// specified.
|
||||
///
|
||||
/// @return COI_ARGUMENT_MISMATCH if in_NumDependencies is non-zero while
|
||||
/// in_pDependencies was passed in as NULL.
|
||||
///
|
||||
/// @return COI_OUT_OF_RANGE if one of the pool sizes was invalid.
|
||||
///
|
||||
/// @return COI_PROCESS_DIED if at some point during the mode or action the
|
||||
/// remote process terminated abnormally. Possible due to an out of
|
||||
/// memory condition.
|
||||
///
|
||||
COIACCESSAPI
|
||||
COIRESULT COIProcessSetCacheSize(
|
||||
const COIPROCESS in_Process,
|
||||
const uint64_t in_HugePagePoolSize,
|
||||
const uint32_t in_HugeFlags,
|
||||
const uint64_t in_SmallPagePoolSize,
|
||||
const uint32_t in_SmallFlags,
|
||||
uint32_t in_NumDependencies,
|
||||
const COIEVENT* in_pDependencies,
|
||||
COIEVENT* out_pCompletion);
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
/// These are the different modes of operation that can be selected for
|
||||
/// the COI_DMA_MODE by the API COIProcessConfigureDMA. They allow the user
|
||||
/// to customize the DMA layer behaviour.
|
||||
///
|
||||
typedef enum COI_DMA_MODE
|
||||
{
|
||||
/// This mode will use one common logical channel for all DMA operations.
|
||||
/// Using this mode requires a channel count of one.
|
||||
COI_DMA_MODE_SINGLE = 0,
|
||||
|
||||
/// This mode will dedicate on logical channel for write operations
|
||||
/// and one logical channel for read operations. Requires a minimum of
|
||||
/// two logical channels, if more than two are used they are ignored
|
||||
/// in the current implementation.
|
||||
COI_DMA_MODE_READ_WRITE,
|
||||
|
||||
/// This mode is not yet implemented and is a placeholder for future
|
||||
/// releases. Check here for updates when it is implemented.
|
||||
/// Will require a minimum of two logical channels and a maximum
|
||||
/// of four channels.
|
||||
COI_DMA_MODE_ROUND_ROBIN,
|
||||
|
||||
/// Reserved for internal use.
|
||||
COI_DMA_RESERVED
|
||||
} COI_DMA_MODE;
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
///
|
||||
/// Set the number and mode of the physical DMA channels that each COIProcess
|
||||
/// will establish during COIProcess creation.
|
||||
///
|
||||
/// By default the runtime will operate in COI_DMA_MODE_SINGLE mode.
|
||||
/// This API is intended to be called before COIProcessCreateFromFile() or
|
||||
/// COIProcessCreateFromMemory(). The values are stored globally and will
|
||||
/// be used by the creation API's. It is possible to call this API once
|
||||
/// before each new COIPROCESS is created and thus have each COIPROCESS
|
||||
/// run in different modes. It is not possible to change the mode on an
|
||||
/// existing COIPROCESS.
|
||||
///
|
||||
/// The larger number of logical connections requested will impose a
|
||||
/// performance penalty on the COIBUFFER creation API's, but unlock better
|
||||
/// parallelism for DMA transfers during runtime.
|
||||
///
|
||||
/// A maximum value of four (4) channels is available today, but current
|
||||
/// implementation will only take advantage of two DMA channels. The option
|
||||
/// is left available for programmers to use in case future implementations
|
||||
/// provide performance advantages.
|
||||
///
|
||||
/// It is important to note that for some operations that enabling this
|
||||
/// options may increase parallelism and require the user to enforce
|
||||
/// explicit dependencies for operations on the same buffers. See documentation
|
||||
/// for COIBufferRead/Write/Copy operations for more details.
|
||||
///
|
||||
/// @param in_Channels
|
||||
/// [in] Number of logical connections to the remote COIProcess that
|
||||
/// the runtime will establish and use for DMA transfer requests.
|
||||
/// Will be ignored if in_Mode is set to COI_DMA_MODE_SINGLE.
|
||||
///
|
||||
/// @param in_Mode
|
||||
/// [in] The mode of operation in which the runtime will use the
|
||||
/// logical connections to the remote COIProcess.
|
||||
///
|
||||
/// @return COI_SUCCESS if the mode and number of DMA channels requested
|
||||
/// is valid. The actual create creation of channels and modes is
|
||||
/// done during COIProcessCreateFromFile() and
|
||||
/// COIProcessCreateFromMemory().
|
||||
///
|
||||
/// @return COI_NOT_SUPPORTED if an invalid value for in_Channels or
|
||||
/// in_Mode was requested.
|
||||
///
|
||||
/// @return COI_ARGUMENT_MISMATCH if an invalid combination of in_Channels and
|
||||
/// in_Mode was requested. Example could be 2 channels with
|
||||
/// COI_DMA_MODE_SINGLE, or 1 channel with COI_DMA_MODE_READ_WRITE.
|
||||
///
|
||||
COIACCESSAPI
|
||||
COIRESULT COIProcessConfigureDMA(
|
||||
const uint64_t in_Channels,
|
||||
const COI_DMA_MODE in_Mode);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright 2010-2013 Intel Corporation.
|
||||
* Copyright 2010-2015 Intel Corporation.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright 2010-2013 Intel Corporation.
|
||||
* Copyright 2010-2015 Intel Corporation.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
@ -459,11 +459,37 @@ extern MyoError myoiTargetSharedMallocTableRegister(
|
||||
* return -1;
|
||||
* }
|
||||
* @endcode
|
||||
* This intialization is required only in the client/host side
|
||||
* of the application. The server/card side executable should be
|
||||
* executed only on the second card in this case.
|
||||
* This intialization is required only in the client/host side
|
||||
* of the application. The server/card side executable should be
|
||||
* executed only on the second card in this case.
|
||||
*
|
||||
* @param userInitFunc Shared variables and remote funtions are
|
||||
* Another capability for the MyoiUserParams structure in MYO is specifying
|
||||
* a remote procedure call to be executed on the host or card, immediately after
|
||||
* myoiLibInit() completes. This capability is useful because some calls in
|
||||
* MYO return immediately, but do not actually complete until after the MYO
|
||||
* library is completely initialized on all peers. An example follows,
|
||||
* showing how to cause MYO to execute the registered function named
|
||||
* "PostMyoLibInitFunction" on the first card only:
|
||||
* @code
|
||||
* MyoiUserParams UserParas[64];
|
||||
* UserParas[0].type = MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC;
|
||||
* UserParas[0].nodeid = 1;
|
||||
* SetPostLibInitFuncName(UserParas[1], "PostMyoLibInitFunction");
|
||||
* UserParas[2].type = MYOI_USERPARAMS_LAST_MSG;
|
||||
* if(MYO_SUCCESS != myoiLibInit(&UserParas, (void*)&myoiUserInit)) {
|
||||
* printf("Failed to initialize MYO runtime\n");
|
||||
* return -1;
|
||||
* }
|
||||
* @endcode
|
||||
*
|
||||
* Note, to cause PostMyoLibInitFunction to be executed on ALL cards,
|
||||
* specify: MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_ALL_NODES for the nodeid.
|
||||
* That is:
|
||||
* @code
|
||||
* UserParas[0].nodeid = MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_ALL_NODES;
|
||||
* @endcode
|
||||
*
|
||||
* @param userInitFunc Shared variables and remote functions are
|
||||
* registered in this routine, which is called by the runtime during
|
||||
* library initialization.
|
||||
* @return
|
||||
@ -473,6 +499,22 @@ extern MyoError myoiTargetSharedMallocTableRegister(
|
||||
MYOACCESSAPI
|
||||
MyoError myoiLibInit(void * in_args, void *userInitFunc /*userInitFunc must be: MyoError (*userInitFunc)(void) */);
|
||||
|
||||
/** @fn extern MyoError myoiSupportsFeature(MyoFeatureType myoFeature)
|
||||
* @brief Supports runtime query to determine whether a feature is supported
|
||||
* by the myo that is installed on the system. This function is intended to
|
||||
* support client code to query the myo library to determine whether its set
|
||||
* of capabilities are able to support the client's needs.
|
||||
*
|
||||
* @param myoFeature The feature that is to be inquired about.
|
||||
* @return
|
||||
* MYO_SUCCESS; if the feature is supported.
|
||||
* MYO_FEATURE_NOT_IMPLEMENTED if the feature is not supported.
|
||||
*
|
||||
* (For more information, please also see the declaration of the MyoFeatureType enum declaration.)
|
||||
**/
|
||||
MYOACCESSAPI
|
||||
MyoError myoiSupportsFeature(MyoFeatureType myoFeature);
|
||||
|
||||
/** @fn void myoiLibFini()
|
||||
* @brief Finalize the MYO library, all resources held by the runtime are
|
||||
* released by this routine.
|
||||
@ -519,17 +561,56 @@ MyoError myoiSetMemConsistent(void *in_pAddr, size_t in_Size);
|
||||
EXTERN_C MYOACCESSAPI unsigned int myoiMyId; /* MYO_MYID if on accelerators */
|
||||
EXTERN_C MYOACCESSAPI volatile int myoiInitFlag;
|
||||
|
||||
|
||||
//! Structure of the array element that is passed to myoiLibInit() to initialize a subset of the available cards.
|
||||
typedef struct{
|
||||
//!type = MYOI_USERPARAMS_DEVID for each element in the array except the last element ; type = MYOI_USERPARAMS_LAST_MSG for the last element in the array.
|
||||
//! Structure of the array element that is passed to myoiLibInit() to initialize a subset of the available cards, or
|
||||
//! to specify a remote call function to be called after successful myo library initialization:
|
||||
typedef struct {
|
||||
//!type = MYOI_USERPARAMS_DEVID or MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC for each element in the array except
|
||||
//!the last element, type should be: MYOI_USERPARAMS_LAST_MSG.
|
||||
int type;
|
||||
//!nodeid refers to the card index.
|
||||
//! nodeid refers to the 'one-based' card index. Specifying, 1 represents the first card, mic0, 2 represents the
|
||||
// second card, mic1, 3 represents the third card, mic2, ....).
|
||||
// NOTE: for type == MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC, specifying MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_ALL_NODES
|
||||
// for nodeid, will execute the named function, on each card in the system, mic0, mic1, mic2, .... micn.
|
||||
int nodeid;
|
||||
}MyoiUserParams;
|
||||
} MyoiUserParams;
|
||||
|
||||
#define MYOI_USERPARAMS_DEVID 1
|
||||
#define MYOI_USERPARAMS_LAST_MSG -1
|
||||
//!The following two types are dealt with entirely with just one MyoiUserParams structure:
|
||||
//!MYOI_USERPARAMS_DEVID maps node ids.
|
||||
#define MYOI_USERPARAMS_DEVID 1
|
||||
//!MYOI_USERPARAMS_LAST_MSG terminates the array of MyoiUserParams.
|
||||
#define MYOI_USERPARAMS_LAST_MSG -1
|
||||
|
||||
//!The following type requires setting the node id in a MyoiUserParams structure, and then following the struct
|
||||
//!with a MyoiUserParamsPostLibInit union:
|
||||
#define MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC 2
|
||||
//!nodeid can be one of the following macros, or a number >=1, corresponding to the card number (1 == mic0,
|
||||
//!2 == mic1, 3 == mic2, ....)
|
||||
//!Setting nodeid to MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_ALL_NODES causes the function to be called on all
|
||||
//!cards:
|
||||
#define MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_ALL_NODES 0
|
||||
//!Setting nodeid to MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_HOST_NODE causes the function to be called on the
|
||||
//!host instead of the card:
|
||||
#define MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_HOST_NODE -1
|
||||
|
||||
//!The postLibInit union contains two members that serves two different purposes:
|
||||
//!1. It can be used to stipulate the name of the function to be remotely called from host to card, on successful
|
||||
//!myo library initialization, (member postLibInitRemoveFuncName) using the type:
|
||||
//!MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC. OR
|
||||
//!2. It can be an actual function pointer (member name: postLibInitHostFuncAddress) that will be called on the host,
|
||||
//!on successful myo library initialization, using the type: MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC, with nodeid:
|
||||
//!MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_HOST_NODE
|
||||
typedef union {
|
||||
const char *postLibInitRemoveFuncName;
|
||||
void (*postLibInitHostFuncAddress)(void);
|
||||
} MyoiUserParamsPostLibInit;
|
||||
|
||||
/* These are two macros to help get the information in a MyoiUserParamsPostLibInit union from a MyoiUserParams struct; */
|
||||
#define GetPostLibInitFuncName(USERPARAMS) ((MyoiUserParamsPostLibInit *) (& (USERPARAMS)))->postLibInitRemoveFuncName
|
||||
#define GetPostLibInitFuncAddr(USERPARAMS) ((MyoiUserParamsPostLibInit *) (& (USERPARAMS)))->postLibInitHostFuncAddress
|
||||
|
||||
/* These are two macros to help set the information in a MyoiUserParamsPostLibInit union from a MyoiUserParams struct; */
|
||||
#define SetPostLibInitFuncName(USERPARAMS,FUNC_NAME) GetPostLibInitFuncName(USERPARAMS) = FUNC_NAME
|
||||
#define SetPostLibInitFuncAddr(USERPARAMS,FUNC_ADDR) GetPostLibInitFuncAddr(USERPARAMS) = FUNC_ADDR
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright 2010-2013 Intel Corporation.
|
||||
* Copyright 2010-2015 Intel Corporation.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
@ -74,7 +74,8 @@ typedef enum {
|
||||
|
||||
MYO_ALREADY_EXISTS, /*!< Already Exists */
|
||||
|
||||
MYO_EOF, /*!< EOF */
|
||||
MYO_EOF, /*!< EOF */
|
||||
MYO_FEATURE_NOT_IMPLEMENTED = -1, /*!< Feature not implemented (see myoiSupportsFeature(). */
|
||||
} MyoError;
|
||||
|
||||
|
||||
@ -84,6 +85,40 @@ typedef enum {
|
||||
MYO_ARENA_OURS, /*!< Arena OURS Ownership */
|
||||
} MyoOwnershipType;
|
||||
|
||||
/*! MYO Features */
|
||||
typedef enum {
|
||||
/*!< EVERY VALUE that is less than MYO_FEATURE_BEGIN is not implemented. */
|
||||
MYO_FEATURE_BEGIN = 1, /*!< The first feature that is supported. */
|
||||
MYO_FEATURE_POST_LIB_INIT = MYO_FEATURE_BEGIN, /*!< Allows specifying a function to be executed immediately */
|
||||
/* after myoiLibInit() completes. This feature was implemented in version */
|
||||
/* 3.3 of MPSS. */
|
||||
/* MYO_FEATURE_FUTURE_CAPABILITY = 2, at some time in the future, as new features are added to MYO, new enumeration constants */
|
||||
/* will be added to the MyoFeatureType, and the value of the new enumeration constant will be greater */
|
||||
/* than the current value of MYO_FEATURE_LAST constant, and then the MYO_FEATURE_LAST constant too, */
|
||||
/* will be changed to be the value of the new enumeration constant. For example, in April, 2014, */
|
||||
/* the POST_LIB_INIT feature was implemented in version 3.3 of MPSS, and the MYO_FEATURE_BEGIN */
|
||||
/* enumeration constant is the same as the MYO_FEATURE_LAST enumeration constant, and both are equal */
|
||||
/* to 1. */
|
||||
/* Suppose in December, 2014, a new feature is added to the MYO library, for version 3.4 of MPSS. */
|
||||
/* Then, MYO_FEATURE_BEGIN enumeration constant will be still the value 1, but the MYO_FEATURE_LAST */
|
||||
/* enumeration constant will be set to 2. */
|
||||
/* At runtime, one client binary can determine if the MYO that is installed is capable of any */
|
||||
/* capability. For example, suppose a future client binary queries version 3.3 of MYO if it is */
|
||||
/* capable of some future feature. Version 3.3 of MYO will indicate that the feature is not */
|
||||
/* implemented to the client. But, conversely, suppose the future client queries version 3.4 of MYO */
|
||||
/* if it is capable of some future feature. Version 3.4 of MYO will indicate that the feature isd */
|
||||
/* supported. */
|
||||
/* */
|
||||
/* Date: | MYO_FEATURE_BEGIN: | MYO_FEATURE_LAST: | MPSS VERSION: | myoiSupportsFeature(MYO_FEATURE_FUTURE_CAPABILITY) */
|
||||
/* ---------------+---------------------+--------------------+---------------+--------------------------------------------------- */
|
||||
/* April, 2014 | 1 | 1 | 3.3 | MYO_FEATURE_NOT_IMPLEMENTED */
|
||||
/* December, 2014 | 1 | 2 | 3.4 | MYO_SUCCESS */
|
||||
/* ---------------+---------------------+--------------------+---------------+--------------------------------------------------- */
|
||||
MYO_FEATURE_LAST = MYO_FEATURE_POST_LIB_INIT, /*!< The last feature that is supported. */
|
||||
/*!< EVERY VALUE that is greater than MYO_FEATURE_LAST is not implemented. */
|
||||
/*!< EVERY VALUE that is greater than or equal to MYO_FEATURE_BEGIN AND less than or equal to MYO_FEATURE_LAST is implemented. */
|
||||
} MyoFeatureType; /* (For more information, please also see myoiSupportsFeature() function declaration.) */
|
||||
|
||||
/*************************************************************
|
||||
* define the property of MYO Arena
|
||||
***********************************************************/
|
||||
|
@ -35,7 +35,6 @@ ACLOCAL_AMFLAGS = -I ../.. -I ../../config
|
||||
build_dir = $(top_builddir)
|
||||
source_dir = $(top_srcdir)
|
||||
coi_inc_dir = $(top_srcdir)/../include/coi
|
||||
myo_inc_dir = $(top_srcdir)/../include/myo
|
||||
include_src_dir = $(top_srcdir)/../../include
|
||||
libgomp_src_dir = $(top_srcdir)/../../libgomp
|
||||
libgomp_dir = $(build_dir)/../../libgomp
|
||||
@ -53,12 +52,12 @@ target_install_dir = $(accel_search_dir)/lib/gcc/$(accel_target)/$(gcc_version)$
|
||||
if PLUGIN_HOST
|
||||
toolexeclib_LTLIBRARIES = libgomp-plugin-intelmic.la
|
||||
libgomp_plugin_intelmic_la_SOURCES = libgomp-plugin-intelmic.cpp
|
||||
libgomp_plugin_intelmic_la_CPPFLAGS = $(CPPFLAGS) -DLINUX -DCOI_LIBRARY_VERSION=2 -DMYO_SUPPORT -DOFFLOAD_DEBUG=1 -DSEP_SUPPORT -DTIMING_SUPPORT -DHOST_LIBRARY=1 -I$(coi_inc_dir) -I$(myo_inc_dir) -I$(liboffload_src_dir) -I$(libgomp_src_dir) -I$(libgomp_dir) -I$(include_src_dir) -I$(target_prefix_dir)/include -I$(target_build_dir) -I$(target_install_dir)/include
|
||||
libgomp_plugin_intelmic_la_CPPFLAGS = $(CPPFLAGS) -DLINUX -DCOI_LIBRARY_VERSION=2 -DOFFLOAD_DEBUG=1 -DSEP_SUPPORT -DTIMING_SUPPORT -DHOST_LIBRARY=1 -I$(coi_inc_dir) -I$(liboffload_src_dir) -I$(libgomp_src_dir) -I$(libgomp_dir) -I$(include_src_dir) -I$(target_prefix_dir)/include -I$(target_build_dir) -I$(target_install_dir)/include
|
||||
libgomp_plugin_intelmic_la_LDFLAGS = -L$(liboffload_dir)/.libs -loffloadmic_host -version-info 1:0:0
|
||||
else # PLUGIN_TARGET
|
||||
plugin_includedir = $(libsubincludedir)
|
||||
plugin_include_HEADERS = main_target_image.h
|
||||
AM_CPPFLAGS = $(CPPFLAGS) -DLINUX -DCOI_LIBRARY_VERSION=2 -DMYO_SUPPORT -DOFFLOAD_DEBUG=1 -DSEP_SUPPORT -DTIMING_SUPPORT -DHOST_LIBRARY=0 -I$(coi_inc_dir) -I$(myo_inc_dir) -I$(liboffload_src_dir) -I$(libgomp_dir)
|
||||
AM_CPPFLAGS = $(CPPFLAGS) -DLINUX -DCOI_LIBRARY_VERSION=2 -DOFFLOAD_DEBUG=1 -DSEP_SUPPORT -DTIMING_SUPPORT -DHOST_LIBRARY=0 -I$(coi_inc_dir) -I$(liboffload_src_dir) -I$(libgomp_dir)
|
||||
AM_CXXFLAGS = $(CXXFLAGS)
|
||||
AM_LDFLAGS = -L$(liboffload_dir)/.libs -L$(libgomp_dir)/.libs -loffloadmic_target -lcoi_device -lmyo-service -lgomp -rdynamic
|
||||
endif
|
||||
|
@ -305,7 +305,6 @@ ACLOCAL_AMFLAGS = -I ../.. -I ../../config
|
||||
build_dir = $(top_builddir)
|
||||
source_dir = $(top_srcdir)
|
||||
coi_inc_dir = $(top_srcdir)/../include/coi
|
||||
myo_inc_dir = $(top_srcdir)/../include/myo
|
||||
include_src_dir = $(top_srcdir)/../../include
|
||||
libgomp_src_dir = $(top_srcdir)/../../libgomp
|
||||
libgomp_dir = $(build_dir)/../../libgomp
|
||||
@ -321,11 +320,11 @@ target_build_dir = $(accel_search_dir)/$(accel_target)$(MULTISUBDIR)/liboffloadm
|
||||
target_install_dir = $(accel_search_dir)/lib/gcc/$(accel_target)/$(gcc_version)$(MULTISUBDIR)
|
||||
@PLUGIN_HOST_TRUE@toolexeclib_LTLIBRARIES = libgomp-plugin-intelmic.la
|
||||
@PLUGIN_HOST_TRUE@libgomp_plugin_intelmic_la_SOURCES = libgomp-plugin-intelmic.cpp
|
||||
@PLUGIN_HOST_TRUE@libgomp_plugin_intelmic_la_CPPFLAGS = $(CPPFLAGS) -DLINUX -DCOI_LIBRARY_VERSION=2 -DMYO_SUPPORT -DOFFLOAD_DEBUG=1 -DSEP_SUPPORT -DTIMING_SUPPORT -DHOST_LIBRARY=1 -I$(coi_inc_dir) -I$(myo_inc_dir) -I$(liboffload_src_dir) -I$(libgomp_src_dir) -I$(libgomp_dir) -I$(include_src_dir) -I$(target_prefix_dir)/include -I$(target_build_dir) -I$(target_install_dir)/include
|
||||
@PLUGIN_HOST_TRUE@libgomp_plugin_intelmic_la_CPPFLAGS = $(CPPFLAGS) -DLINUX -DCOI_LIBRARY_VERSION=2 -DOFFLOAD_DEBUG=1 -DSEP_SUPPORT -DTIMING_SUPPORT -DHOST_LIBRARY=1 -I$(coi_inc_dir) -I$(liboffload_src_dir) -I$(libgomp_src_dir) -I$(libgomp_dir) -I$(include_src_dir) -I$(target_prefix_dir)/include -I$(target_build_dir) -I$(target_install_dir)/include
|
||||
@PLUGIN_HOST_TRUE@libgomp_plugin_intelmic_la_LDFLAGS = -L$(liboffload_dir)/.libs -loffloadmic_host -version-info 1:0:0
|
||||
@PLUGIN_HOST_FALSE@plugin_includedir = $(libsubincludedir)
|
||||
@PLUGIN_HOST_FALSE@plugin_include_HEADERS = main_target_image.h
|
||||
@PLUGIN_HOST_FALSE@AM_CPPFLAGS = $(CPPFLAGS) -DLINUX -DCOI_LIBRARY_VERSION=2 -DMYO_SUPPORT -DOFFLOAD_DEBUG=1 -DSEP_SUPPORT -DTIMING_SUPPORT -DHOST_LIBRARY=0 -I$(coi_inc_dir) -I$(myo_inc_dir) -I$(liboffload_src_dir) -I$(libgomp_dir)
|
||||
@PLUGIN_HOST_FALSE@AM_CPPFLAGS = $(CPPFLAGS) -DLINUX -DCOI_LIBRARY_VERSION=2 -DOFFLOAD_DEBUG=1 -DSEP_SUPPORT -DTIMING_SUPPORT -DHOST_LIBRARY=0 -I$(coi_inc_dir) -I$(liboffload_src_dir) -I$(libgomp_dir)
|
||||
@PLUGIN_HOST_FALSE@AM_CXXFLAGS = $(CXXFLAGS)
|
||||
@PLUGIN_HOST_FALSE@AM_LDFLAGS = -L$(liboffload_dir)/.libs -L$(libgomp_dir)/.libs -loffloadmic_target -lcoi_device -lmyo-service -lgomp -rdynamic
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -34,7 +34,7 @@
|
||||
// 1. allocate element of CeanReadRanges type
|
||||
// 2. initialized it for reading consequently contiguous ranges
|
||||
// described by "ap" argument
|
||||
CeanReadRanges * init_read_ranges_arr_desc(const arr_desc *ap)
|
||||
CeanReadRanges * init_read_ranges_arr_desc(const Arr_Desc *ap)
|
||||
{
|
||||
CeanReadRanges * res;
|
||||
|
||||
@ -57,6 +57,8 @@ CeanReadRanges * init_read_ranges_arr_desc(const arr_desc *ap)
|
||||
(ap->rank - rank) * sizeof(CeanReadDim));
|
||||
if (res == NULL)
|
||||
LIBOFFLOAD_ERROR(c_malloc);
|
||||
|
||||
res->arr_desc = const_cast<Arr_Desc*>(ap);
|
||||
res->current_number = 0;
|
||||
res->range_size = length;
|
||||
res->last_noncont_ind = rank;
|
||||
@ -82,7 +84,7 @@ CeanReadRanges * init_read_ranges_arr_desc(const arr_desc *ap)
|
||||
return res;
|
||||
}
|
||||
|
||||
// check if ranges described by 1 argument could be transfered into ranges
|
||||
// check if ranges described by 1 argument could be transferred into ranges
|
||||
// described by 2-nd one
|
||||
bool cean_ranges_match(
|
||||
CeanReadRanges * read_rng1,
|
||||
@ -118,7 +120,7 @@ bool get_next_range(
|
||||
return true;
|
||||
}
|
||||
|
||||
bool is_arr_desc_contiguous(const arr_desc *ap)
|
||||
bool is_arr_desc_contiguous(const Arr_Desc *ap)
|
||||
{
|
||||
int64_t rank = ap->rank - 1;
|
||||
int64_t length = ap->dim[rank].size;
|
||||
@ -146,14 +148,22 @@ int64_t cean_get_transf_size(CeanReadRanges * read_rng)
|
||||
}
|
||||
|
||||
static uint64_t last_left, last_right;
|
||||
typedef void (*fpp)(const char *spaces, uint64_t low, uint64_t high, int esize);
|
||||
|
||||
typedef void (*fpp)(
|
||||
const char *spaces,
|
||||
uint64_t low,
|
||||
uint64_t high,
|
||||
int esize,
|
||||
bool print_values
|
||||
);
|
||||
|
||||
static void generate_one_range(
|
||||
const char *spaces,
|
||||
uint64_t lrange,
|
||||
uint64_t rrange,
|
||||
fpp fp,
|
||||
int esize
|
||||
int esize,
|
||||
bool print_values
|
||||
)
|
||||
{
|
||||
OFFLOAD_TRACE(3,
|
||||
@ -168,20 +178,35 @@ static void generate_one_range(
|
||||
// Extend previous range, don't print
|
||||
}
|
||||
else {
|
||||
(*fp)(spaces, last_left, last_right, esize);
|
||||
(*fp)(spaces, last_left, last_right, esize, print_values);
|
||||
last_left = lrange;
|
||||
}
|
||||
}
|
||||
last_right = rrange;
|
||||
}
|
||||
|
||||
static bool element_is_contiguous(
|
||||
uint64_t rank,
|
||||
const struct Dim_Desc *ddp
|
||||
)
|
||||
{
|
||||
if (rank == 1) {
|
||||
return (ddp[0].lower == ddp[0].upper || ddp[0].stride == 1);
|
||||
}
|
||||
else {
|
||||
return ((ddp[0].size == (ddp[1].upper-ddp[1].lower+1)*ddp[1].size) &&
|
||||
element_is_contiguous(rank-1, ddp++));
|
||||
}
|
||||
}
|
||||
|
||||
static void generate_mem_ranges_one_rank(
|
||||
const char *spaces,
|
||||
uint64_t base,
|
||||
uint64_t rank,
|
||||
const struct dim_desc *ddp,
|
||||
const struct Dim_Desc *ddp,
|
||||
fpp fp,
|
||||
int esize
|
||||
int esize,
|
||||
bool print_values
|
||||
)
|
||||
{
|
||||
uint64_t lindex = ddp->lindex;
|
||||
@ -194,35 +219,40 @@ static void generate_mem_ranges_one_rank(
|
||||
"generate_mem_ranges_one_rank(base=%p, rank=%lld, lindex=%lld, "
|
||||
"lower=%lld, upper=%lld, stride=%lld, size=%lld, esize=%d)\n",
|
||||
spaces, (void*)base, rank, lindex, lower, upper, stride, size, esize);
|
||||
if (rank == 1) {
|
||||
|
||||
if (element_is_contiguous(rank, ddp)) {
|
||||
uint64_t lrange, rrange;
|
||||
if (stride == 1) {
|
||||
lrange = base + (lower-lindex)*size;
|
||||
rrange = lrange + (upper-lower+1)*size - 1;
|
||||
generate_one_range(spaces, lrange, rrange, fp, esize);
|
||||
lrange = base + (lower-lindex)*size;
|
||||
rrange = lrange + (upper-lower+1)*size - 1;
|
||||
generate_one_range(spaces, lrange, rrange, fp, esize, print_values);
|
||||
}
|
||||
else {
|
||||
if (rank == 1) {
|
||||
for (int i=lower-lindex; i<=upper-lindex; i+=stride) {
|
||||
uint64_t lrange, rrange;
|
||||
lrange = base + i*size;
|
||||
rrange = lrange + size - 1;
|
||||
generate_one_range(spaces, lrange, rrange,
|
||||
fp, esize, print_values);
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (int i=lower-lindex; i<=upper-lindex; i+=stride) {
|
||||
lrange = base + i*size;
|
||||
rrange = lrange + size - 1;
|
||||
generate_one_range(spaces, lrange, rrange, fp, esize);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (int i=lower-lindex; i<=upper-lindex; i+=stride) {
|
||||
generate_mem_ranges_one_rank(
|
||||
spaces, base+i*size, rank-1, ddp+1, fp, esize);
|
||||
generate_mem_ranges_one_rank(
|
||||
spaces, base+i*size, rank-1, ddp+1,
|
||||
fp, esize, print_values);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void generate_mem_ranges(
|
||||
const char *spaces,
|
||||
const arr_desc *adp,
|
||||
const Arr_Desc *adp,
|
||||
bool deref,
|
||||
fpp fp
|
||||
fpp fp,
|
||||
bool print_values
|
||||
)
|
||||
{
|
||||
uint64_t esize;
|
||||
@ -241,13 +271,13 @@ static void generate_mem_ranges(
|
||||
// For c_cean_var the base addr is the address of the data
|
||||
// For c_cean_var_ptr the base addr is dereferenced to get to the data
|
||||
spaces, deref ? *((uint64_t*)(adp->base)) : adp->base,
|
||||
adp->rank, &adp->dim[0], fp, esize);
|
||||
(*fp)(spaces, last_left, last_right, esize);
|
||||
adp->rank, &adp->dim[0], fp, esize, print_values);
|
||||
(*fp)(spaces, last_left, last_right, esize, print_values);
|
||||
}
|
||||
|
||||
// returns offset and length of the data to be transferred
|
||||
void __arr_data_offset_and_length(
|
||||
const arr_desc *adp,
|
||||
const Arr_Desc *adp,
|
||||
int64_t &offset,
|
||||
int64_t &length
|
||||
)
|
||||
@ -284,11 +314,12 @@ void __arr_data_offset_and_length(
|
||||
|
||||
#if OFFLOAD_DEBUG > 0
|
||||
|
||||
void print_range(
|
||||
static void print_range(
|
||||
const char *spaces,
|
||||
uint64_t low,
|
||||
uint64_t high,
|
||||
int esize
|
||||
int esize,
|
||||
bool print_values
|
||||
)
|
||||
{
|
||||
char buffer[1024];
|
||||
@ -297,7 +328,7 @@ void print_range(
|
||||
OFFLOAD_TRACE(3, "%s print_range(low=%p, high=%p, esize=%d)\n",
|
||||
spaces, (void*)low, (void*)high, esize);
|
||||
|
||||
if (console_enabled < 4) {
|
||||
if (console_enabled < 4 || !print_values) {
|
||||
return;
|
||||
}
|
||||
OFFLOAD_TRACE(4, "%s values:\n", spaces);
|
||||
@ -340,8 +371,9 @@ void print_range(
|
||||
void __arr_desc_dump(
|
||||
const char *spaces,
|
||||
const char *name,
|
||||
const arr_desc *adp,
|
||||
bool deref
|
||||
const Arr_Desc *adp,
|
||||
bool deref,
|
||||
bool print_values
|
||||
)
|
||||
{
|
||||
OFFLOAD_TRACE(2, "%s%s CEAN expression %p\n", spaces, name, adp);
|
||||
@ -360,7 +392,7 @@ void __arr_desc_dump(
|
||||
}
|
||||
// For c_cean_var the base addr is the address of the data
|
||||
// For c_cean_var_ptr the base addr is dereferenced to get to the data
|
||||
generate_mem_ranges(spaces, adp, deref, &print_range);
|
||||
generate_mem_ranges(spaces, adp, deref, &print_range, print_values);
|
||||
}
|
||||
}
|
||||
#endif // OFFLOAD_DEBUG
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -32,9 +32,10 @@
|
||||
#define CEAN_UTIL_H_INCLUDED
|
||||
|
||||
#include <stdint.h>
|
||||
#include "offload_util.h"
|
||||
|
||||
// CEAN expression representation
|
||||
struct dim_desc {
|
||||
struct Dim_Desc {
|
||||
int64_t size; // Length of data type
|
||||
int64_t lindex; // Lower index
|
||||
int64_t lower; // Lower section bound
|
||||
@ -42,10 +43,10 @@ struct dim_desc {
|
||||
int64_t stride; // Stride
|
||||
};
|
||||
|
||||
struct arr_desc {
|
||||
struct Arr_Desc {
|
||||
int64_t base; // Base address
|
||||
int64_t rank; // Rank of array
|
||||
dim_desc dim[1];
|
||||
Dim_Desc dim[1];
|
||||
};
|
||||
|
||||
struct CeanReadDim {
|
||||
@ -55,6 +56,7 @@ struct CeanReadDim {
|
||||
};
|
||||
|
||||
struct CeanReadRanges {
|
||||
Arr_Desc* arr_desc;
|
||||
void * ptr;
|
||||
int64_t current_number; // the number of ranges read
|
||||
int64_t range_max_number; // number of contiguous ranges
|
||||
@ -66,23 +68,23 @@ struct CeanReadRanges {
|
||||
|
||||
// array descriptor length
|
||||
#define __arr_desc_length(rank) \
|
||||
(sizeof(int64_t) + sizeof(dim_desc) * (rank))
|
||||
(sizeof(int64_t) + sizeof(Dim_Desc) * (rank))
|
||||
|
||||
// returns offset and length of the data to be transferred
|
||||
void __arr_data_offset_and_length(const arr_desc *adp,
|
||||
DLL_LOCAL void __arr_data_offset_and_length(const Arr_Desc *adp,
|
||||
int64_t &offset,
|
||||
int64_t &length);
|
||||
|
||||
// define if data array described by argument is contiguous one
|
||||
bool is_arr_desc_contiguous(const arr_desc *ap);
|
||||
DLL_LOCAL bool is_arr_desc_contiguous(const Arr_Desc *ap);
|
||||
|
||||
// allocate element of CeanReadRanges type initialized
|
||||
// to read consequently contiguous ranges described by "ap" argument
|
||||
CeanReadRanges * init_read_ranges_arr_desc(const arr_desc *ap);
|
||||
DLL_LOCAL CeanReadRanges * init_read_ranges_arr_desc(const Arr_Desc *ap);
|
||||
|
||||
// check if ranges described by 1 argument could be transfered into ranges
|
||||
// check if ranges described by 1 argument could be transferred into ranges
|
||||
// described by 2-nd one
|
||||
bool cean_ranges_match(
|
||||
DLL_LOCAL bool cean_ranges_match(
|
||||
CeanReadRanges * read_rng1,
|
||||
CeanReadRanges * read_rng2
|
||||
);
|
||||
@ -90,27 +92,27 @@ bool cean_ranges_match(
|
||||
// first argument - returned value by call to init_read_ranges_arr_desc.
|
||||
// returns true if offset and length of next range is set successfuly.
|
||||
// returns false if the ranges is over.
|
||||
bool get_next_range(
|
||||
DLL_LOCAL bool get_next_range(
|
||||
CeanReadRanges * read_rng,
|
||||
int64_t *offset
|
||||
);
|
||||
|
||||
// returns number of transfered bytes
|
||||
int64_t cean_get_transf_size(CeanReadRanges * read_rng);
|
||||
// returns number of transferred bytes
|
||||
DLL_LOCAL int64_t cean_get_transf_size(CeanReadRanges * read_rng);
|
||||
|
||||
#if OFFLOAD_DEBUG > 0
|
||||
// prints array descriptor contents to stderr
|
||||
void __arr_desc_dump(
|
||||
DLL_LOCAL void __arr_desc_dump(
|
||||
const char *spaces,
|
||||
const char *name,
|
||||
const arr_desc *adp,
|
||||
bool dereference);
|
||||
const Arr_Desc *adp,
|
||||
bool dereference,
|
||||
bool print_values);
|
||||
#define ARRAY_DESC_DUMP(spaces, name, adp, dereference, print_values) \
|
||||
if (console_enabled >= 2) \
|
||||
__arr_desc_dump(spaces, name, adp, dereference, print_values);
|
||||
#else
|
||||
#define __arr_desc_dump(
|
||||
spaces,
|
||||
name,
|
||||
adp,
|
||||
dereference)
|
||||
#define ARRAY_DESC_DUMP(spaces, name, adp, dereference, print_values)
|
||||
#endif // OFFLOAD_DEBUG
|
||||
|
||||
#endif // CEAN_UTIL_H_INCLUDED
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -50,6 +50,13 @@ COIRESULT (*ProcessCreateFromMemory)(COIENGINE, const char*, const void*,
|
||||
const char**, uint8_t, const char*,
|
||||
uint64_t, const char*, const char*,
|
||||
uint64_t, COIPROCESS*);
|
||||
COIRESULT (*ProcessCreateFromFile)(COIENGINE, const char*,
|
||||
int, const char**, uint8_t,
|
||||
const char**, uint8_t, const char*,
|
||||
uint64_t, const char*,COIPROCESS*);
|
||||
COIRESULT (*ProcessSetCacheSize)(COIPROCESS, uint64_t, uint32_t,
|
||||
uint64_t, uint32_t, uint32_t,
|
||||
const COIEVENT*, COIEVENT*);
|
||||
COIRESULT (*ProcessDestroy)(COIPROCESS, int32_t, uint8_t, int8_t*, uint32_t*);
|
||||
COIRESULT (*ProcessGetFunctionHandles)(COIPROCESS, uint32_t, const char**,
|
||||
COIFUNCTION*);
|
||||
@ -57,6 +64,8 @@ COIRESULT (*ProcessLoadLibraryFromMemory)(COIPROCESS, const void*, uint64_t,
|
||||
const char*, const char*,
|
||||
const char*, uint64_t, uint32_t,
|
||||
COILIBRARY*);
|
||||
COIRESULT (*ProcessUnloadLibrary)(COIPROCESS,
|
||||
COILIBRARY);
|
||||
COIRESULT (*ProcessRegisterLibraries)(uint32_t, const void**, const uint64_t*,
|
||||
const char**, const uint64_t*);
|
||||
|
||||
@ -80,6 +89,13 @@ COIRESULT (*BufferWrite)(COIBUFFER, uint64_t, const void*, uint64_t,
|
||||
COI_COPY_TYPE, uint32_t, const COIEVENT*, COIEVENT*);
|
||||
COIRESULT (*BufferRead)(COIBUFFER, uint64_t, void*, uint64_t, COI_COPY_TYPE,
|
||||
uint32_t, const COIEVENT*, COIEVENT*);
|
||||
COIRESULT (*BufferReadMultiD)(COIBUFFER, uint64_t,
|
||||
void *, void *, COI_COPY_TYPE,
|
||||
uint32_t, const COIEVENT*, COIEVENT*);
|
||||
COIRESULT (*BufferWriteMultiD)(COIBUFFER, const COIPROCESS,
|
||||
uint64_t, void *, void *,
|
||||
COI_COPY_TYPE, uint32_t, const COIEVENT*, COIEVENT*);
|
||||
|
||||
COIRESULT (*BufferCopy)(COIBUFFER, COIBUFFER, uint64_t, uint64_t, uint64_t,
|
||||
COI_COPY_TYPE, uint32_t, const COIEVENT*, COIEVENT*);
|
||||
COIRESULT (*BufferGetSinkAddress)(COIBUFFER, uint64_t*);
|
||||
@ -92,6 +108,20 @@ COIRESULT (*EventWait)(uint16_t, const COIEVENT*, int32_t, uint8_t, uint32_t*,
|
||||
|
||||
uint64_t (*PerfGetCycleFrequency)(void);
|
||||
|
||||
COIRESULT (*PipelineClearCPUMask) (COI_CPU_MASK);
|
||||
|
||||
COIRESULT (*PipelineSetCPUMask) (COIPROCESS, uint32_t,
|
||||
uint8_t, COI_CPU_MASK);
|
||||
COIRESULT (*EngineGetInfo)(COIENGINE, uint32_t, COI_ENGINE_INFO*);
|
||||
|
||||
COIRESULT (*EventRegisterCallback)(
|
||||
const COIEVENT,
|
||||
void (*)(COIEVENT, const COIRESULT, const void*),
|
||||
const void*,
|
||||
const uint64_t);
|
||||
|
||||
COIRESULT (*ProcessConfigureDMA)(const uint64_t, const int);
|
||||
|
||||
bool init(void)
|
||||
{
|
||||
#ifndef TARGET_WINNT
|
||||
@ -140,6 +170,32 @@ bool init(void)
|
||||
return false;
|
||||
}
|
||||
|
||||
ProcessSetCacheSize =
|
||||
(COIRESULT (*)(COIPROCESS, uint64_t, uint32_t,
|
||||
uint64_t, uint32_t, uint32_t,
|
||||
const COIEVENT*, COIEVENT*))
|
||||
DL_sym(lib_handle, "COIProcessSetCacheSize", COI_VERSION1);
|
||||
if (ProcessSetCacheSize == 0) {
|
||||
OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
|
||||
"COIProcessSetCacheSize");
|
||||
#if 0 // for now disable as ProcessSetCacheSize is not available on < MPSS 3.4
|
||||
fini();
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
ProcessCreateFromFile =
|
||||
(COIRESULT (*)(COIENGINE, const char*, int, const char**, uint8_t,
|
||||
const char**, uint8_t, const char*, uint64_t,
|
||||
const char*, COIPROCESS*))
|
||||
DL_sym(lib_handle, "COIProcessCreateFromFile", COI_VERSION1);
|
||||
if (ProcessCreateFromFile == 0) {
|
||||
OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
|
||||
"COIProcessCreateFromFile");
|
||||
fini();
|
||||
return false;
|
||||
}
|
||||
|
||||
ProcessDestroy =
|
||||
(COIRESULT (*)(COIPROCESS, int32_t, uint8_t, int8_t*,
|
||||
uint32_t*))
|
||||
@ -173,6 +229,17 @@ bool init(void)
|
||||
return false;
|
||||
}
|
||||
|
||||
ProcessUnloadLibrary =
|
||||
(COIRESULT (*)(COIPROCESS,
|
||||
COILIBRARY))
|
||||
DL_sym(lib_handle, "COIProcessUnloadLibrary", COI_VERSION1);
|
||||
if (ProcessUnloadLibrary == 0) {
|
||||
OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
|
||||
"COIProcessUnloadLibrary");
|
||||
fini();
|
||||
return false;
|
||||
}
|
||||
|
||||
ProcessRegisterLibraries =
|
||||
(COIRESULT (*)(uint32_t, const void**, const uint64_t*, const char**,
|
||||
const uint64_t*))
|
||||
@ -295,6 +362,22 @@ bool init(void)
|
||||
return false;
|
||||
}
|
||||
|
||||
BufferReadMultiD =
|
||||
(COIRESULT (*)(COIBUFFER, uint64_t,
|
||||
void *, void *, COI_COPY_TYPE,
|
||||
uint32_t, const COIEVENT*, COIEVENT*))
|
||||
DL_sym(lib_handle, "COIBufferReadMultiD", COI_VERSION1);
|
||||
// We accept that coi library has no COIBufferReadMultiD routine.
|
||||
// So there is no check for zero value
|
||||
|
||||
BufferWriteMultiD =
|
||||
(COIRESULT (*)(COIBUFFER, const COIPROCESS,
|
||||
uint64_t, void *, void *,
|
||||
COI_COPY_TYPE, uint32_t, const COIEVENT*, COIEVENT*))
|
||||
DL_sym(lib_handle, "COIBufferWriteMultiD", COI_VERSION1);
|
||||
// We accept that coi library has no COIBufferWriteMultiD routine.
|
||||
// So there is no check for zero value
|
||||
|
||||
BufferCopy =
|
||||
(COIRESULT (*)(COIBUFFER, COIBUFFER, uint64_t, uint64_t, uint64_t,
|
||||
COI_COPY_TYPE, uint32_t, const COIEVENT*,
|
||||
@ -350,6 +433,47 @@ bool init(void)
|
||||
return false;
|
||||
}
|
||||
|
||||
PipelineClearCPUMask =
|
||||
(COIRESULT (*)(COI_CPU_MASK))
|
||||
DL_sym(lib_handle, "COIPipelineClearCPUMask", COI_VERSION1);
|
||||
if (PipelineClearCPUMask == 0) {
|
||||
OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
|
||||
"COIPipelineClearCPUMask");
|
||||
fini();
|
||||
return false;
|
||||
}
|
||||
|
||||
PipelineSetCPUMask =
|
||||
(COIRESULT (*)(COIPROCESS, uint32_t,uint8_t, COI_CPU_MASK))
|
||||
DL_sym(lib_handle, "COIPipelineSetCPUMask", COI_VERSION1);
|
||||
if (PipelineSetCPUMask == 0) {
|
||||
OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
|
||||
"COIPipelineSetCPUMask");
|
||||
fini();
|
||||
return false;
|
||||
}
|
||||
|
||||
EngineGetInfo =
|
||||
(COIRESULT (*)(COIENGINE, uint32_t, COI_ENGINE_INFO*))
|
||||
DL_sym(lib_handle, "COIEngineGetInfo", COI_VERSION1);
|
||||
if (COIEngineGetInfo == 0) {
|
||||
OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
|
||||
"COIEngineGetInfo");
|
||||
fini();
|
||||
return false;
|
||||
}
|
||||
|
||||
EventRegisterCallback =
|
||||
(COIRESULT (*)(COIEVENT,
|
||||
void (*)(COIEVENT, const COIRESULT, const void*),
|
||||
const void*,
|
||||
const uint64_t))
|
||||
DL_sym(lib_handle, "COIEventRegisterCallback", COI_VERSION1);
|
||||
|
||||
ProcessConfigureDMA =
|
||||
(COIRESULT (*)(const uint64_t, const int))
|
||||
DL_sym(lib_handle, "COIProcessConfigureDMA", COI_VERSION1);
|
||||
|
||||
is_available = true;
|
||||
|
||||
return true;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -28,7 +28,7 @@
|
||||
*/
|
||||
|
||||
|
||||
// The interface betwen offload library and the COI API on the host
|
||||
// The interface between offload library and the COI API on the host
|
||||
|
||||
#ifndef COI_CLIENT_H_INCLUDED
|
||||
#define COI_CLIENT_H_INCLUDED
|
||||
@ -54,16 +54,16 @@
|
||||
// COI library interface
|
||||
namespace COI {
|
||||
|
||||
extern bool init(void);
|
||||
extern void fini(void);
|
||||
DLL_LOCAL extern bool init(void);
|
||||
DLL_LOCAL extern void fini(void);
|
||||
|
||||
extern bool is_available;
|
||||
DLL_LOCAL extern bool is_available;
|
||||
|
||||
// pointers to functions from COI library
|
||||
extern COIRESULT (*EngineGetCount)(COI_ISA_TYPE, uint32_t*);
|
||||
extern COIRESULT (*EngineGetHandle)(COI_ISA_TYPE, uint32_t, COIENGINE*);
|
||||
DLL_LOCAL extern COIRESULT (*EngineGetCount)(COI_ISA_TYPE, uint32_t*);
|
||||
DLL_LOCAL extern COIRESULT (*EngineGetHandle)(COI_ISA_TYPE, uint32_t, COIENGINE*);
|
||||
|
||||
extern COIRESULT (*ProcessCreateFromMemory)(COIENGINE, const char*,
|
||||
DLL_LOCAL extern COIRESULT (*ProcessCreateFromMemory)(COIENGINE, const char*,
|
||||
const void*, uint64_t, int,
|
||||
const char**, uint8_t,
|
||||
const char**, uint8_t,
|
||||
@ -71,12 +71,23 @@ extern COIRESULT (*ProcessCreateFromMemory)(COIENGINE, const char*,
|
||||
const char*,
|
||||
const char*, uint64_t,
|
||||
COIPROCESS*);
|
||||
extern COIRESULT (*ProcessDestroy)(COIPROCESS, int32_t, uint8_t,
|
||||
DLL_LOCAL extern COIRESULT (*ProcessCreateFromFile)(COIENGINE, const char*, int,
|
||||
const char**, uint8_t,
|
||||
const char**,
|
||||
uint8_t,
|
||||
const char*,
|
||||
uint64_t,
|
||||
const char*,
|
||||
COIPROCESS*);
|
||||
DLL_LOCAL extern COIRESULT (*ProcessSetCacheSize)(COIPROCESS, uint64_t, uint32_t,
|
||||
uint64_t, uint32_t, uint32_t,
|
||||
const COIEVENT*, COIEVENT*);
|
||||
DLL_LOCAL extern COIRESULT (*ProcessDestroy)(COIPROCESS, int32_t, uint8_t,
|
||||
int8_t*, uint32_t*);
|
||||
extern COIRESULT (*ProcessGetFunctionHandles)(COIPROCESS, uint32_t,
|
||||
DLL_LOCAL extern COIRESULT (*ProcessGetFunctionHandles)(COIPROCESS, uint32_t,
|
||||
const char**,
|
||||
COIFUNCTION*);
|
||||
extern COIRESULT (*ProcessLoadLibraryFromMemory)(COIPROCESS,
|
||||
DLL_LOCAL extern COIRESULT (*ProcessLoadLibraryFromMemory)(COIPROCESS,
|
||||
const void*,
|
||||
uint64_t,
|
||||
const char*,
|
||||
@ -85,54 +96,80 @@ extern COIRESULT (*ProcessLoadLibraryFromMemory)(COIPROCESS,
|
||||
uint64_t,
|
||||
uint32_t,
|
||||
COILIBRARY*);
|
||||
extern COIRESULT (*ProcessRegisterLibraries)(uint32_t,
|
||||
|
||||
DLL_LOCAL extern COIRESULT (*ProcessUnloadLibrary)(COIPROCESS,
|
||||
COILIBRARY);
|
||||
|
||||
DLL_LOCAL extern COIRESULT (*ProcessRegisterLibraries)(uint32_t,
|
||||
const void**,
|
||||
const uint64_t*,
|
||||
const char**,
|
||||
const uint64_t*);
|
||||
|
||||
extern COIRESULT (*PipelineCreate)(COIPROCESS, COI_CPU_MASK, uint32_t,
|
||||
DLL_LOCAL extern COIRESULT (*PipelineCreate)(COIPROCESS, COI_CPU_MASK, uint32_t,
|
||||
COIPIPELINE*);
|
||||
extern COIRESULT (*PipelineDestroy)(COIPIPELINE);
|
||||
extern COIRESULT (*PipelineRunFunction)(COIPIPELINE, COIFUNCTION,
|
||||
DLL_LOCAL extern COIRESULT (*PipelineDestroy)(COIPIPELINE);
|
||||
DLL_LOCAL extern COIRESULT (*PipelineRunFunction)(COIPIPELINE, COIFUNCTION,
|
||||
uint32_t, const COIBUFFER*,
|
||||
const COI_ACCESS_FLAGS*,
|
||||
uint32_t, const COIEVENT*,
|
||||
const void*, uint16_t, void*,
|
||||
uint16_t, COIEVENT*);
|
||||
|
||||
extern COIRESULT (*BufferCreate)(uint64_t, COI_BUFFER_TYPE, uint32_t,
|
||||
DLL_LOCAL extern COIRESULT (*BufferCreate)(uint64_t, COI_BUFFER_TYPE, uint32_t,
|
||||
const void*, uint32_t,
|
||||
const COIPROCESS*, COIBUFFER*);
|
||||
extern COIRESULT (*BufferCreateFromMemory)(uint64_t, COI_BUFFER_TYPE,
|
||||
DLL_LOCAL extern COIRESULT (*BufferCreateFromMemory)(uint64_t, COI_BUFFER_TYPE,
|
||||
uint32_t, void*,
|
||||
uint32_t, const COIPROCESS*,
|
||||
COIBUFFER*);
|
||||
extern COIRESULT (*BufferDestroy)(COIBUFFER);
|
||||
extern COIRESULT (*BufferMap)(COIBUFFER, uint64_t, uint64_t,
|
||||
DLL_LOCAL extern COIRESULT (*BufferDestroy)(COIBUFFER);
|
||||
DLL_LOCAL extern COIRESULT (*BufferMap)(COIBUFFER, uint64_t, uint64_t,
|
||||
COI_MAP_TYPE, uint32_t, const COIEVENT*,
|
||||
COIEVENT*, COIMAPINSTANCE*, void**);
|
||||
extern COIRESULT (*BufferUnmap)(COIMAPINSTANCE, uint32_t,
|
||||
DLL_LOCAL extern COIRESULT (*BufferUnmap)(COIMAPINSTANCE, uint32_t,
|
||||
const COIEVENT*, COIEVENT*);
|
||||
extern COIRESULT (*BufferWrite)(COIBUFFER, uint64_t, const void*,
|
||||
DLL_LOCAL extern COIRESULT (*BufferWrite)(COIBUFFER, uint64_t, const void*,
|
||||
uint64_t, COI_COPY_TYPE, uint32_t,
|
||||
const COIEVENT*, COIEVENT*);
|
||||
extern COIRESULT (*BufferRead)(COIBUFFER, uint64_t, void*, uint64_t,
|
||||
DLL_LOCAL extern COIRESULT (*BufferRead)(COIBUFFER, uint64_t, void*, uint64_t,
|
||||
COI_COPY_TYPE, uint32_t,
|
||||
const COIEVENT*, COIEVENT*);
|
||||
extern COIRESULT (*BufferCopy)(COIBUFFER, COIBUFFER, uint64_t, uint64_t,
|
||||
DLL_LOCAL extern COIRESULT (*BufferReadMultiD)(COIBUFFER, uint64_t,
|
||||
void *, void *, COI_COPY_TYPE,
|
||||
uint32_t, const COIEVENT*, COIEVENT*);
|
||||
DLL_LOCAL extern COIRESULT (*BufferWriteMultiD)(COIBUFFER, const COIPROCESS,
|
||||
uint64_t, void *, void *,
|
||||
COI_COPY_TYPE, uint32_t, const COIEVENT*, COIEVENT*);
|
||||
|
||||
DLL_LOCAL extern COIRESULT (*BufferCopy)(COIBUFFER, COIBUFFER, uint64_t, uint64_t,
|
||||
uint64_t, COI_COPY_TYPE, uint32_t,
|
||||
const COIEVENT*, COIEVENT*);
|
||||
extern COIRESULT (*BufferGetSinkAddress)(COIBUFFER, uint64_t*);
|
||||
extern COIRESULT (*BufferSetState)(COIBUFFER, COIPROCESS, COI_BUFFER_STATE,
|
||||
DLL_LOCAL extern COIRESULT (*BufferGetSinkAddress)(COIBUFFER, uint64_t*);
|
||||
DLL_LOCAL extern COIRESULT (*BufferSetState)(COIBUFFER, COIPROCESS, COI_BUFFER_STATE,
|
||||
COI_BUFFER_MOVE_FLAG, uint32_t,
|
||||
const COIEVENT*, COIEVENT*);
|
||||
|
||||
extern COIRESULT (*EventWait)(uint16_t, const COIEVENT*, int32_t,
|
||||
DLL_LOCAL extern COIRESULT (*EventWait)(uint16_t, const COIEVENT*, int32_t,
|
||||
uint8_t, uint32_t*, uint32_t*);
|
||||
|
||||
extern uint64_t (*PerfGetCycleFrequency)(void);
|
||||
DLL_LOCAL extern uint64_t (*PerfGetCycleFrequency)(void);
|
||||
|
||||
DLL_LOCAL extern COIRESULT (*ProcessConfigureDMA)(const uint64_t, const int);
|
||||
|
||||
extern COIRESULT (*PipelineClearCPUMask)(COI_CPU_MASK);
|
||||
|
||||
extern COIRESULT (*PipelineSetCPUMask)(COIPROCESS, uint32_t,
|
||||
uint8_t, COI_CPU_MASK);
|
||||
extern COIRESULT (*EngineGetInfo)(COIENGINE, uint32_t, COI_ENGINE_INFO*);
|
||||
|
||||
extern COIRESULT (*EventRegisterCallback)(
|
||||
const COIEVENT,
|
||||
void (*)(COIEVENT, const COIRESULT, const void*),
|
||||
const void*,
|
||||
const uint64_t);
|
||||
|
||||
const int DMA_MODE_READ_WRITE = 1;
|
||||
} // namespace COI
|
||||
|
||||
#endif // COI_CLIENT_H_INCLUDED
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -38,6 +38,22 @@
|
||||
#include "../offload_myo_target.h" // for __offload_myoLibInit/Fini
|
||||
#endif // MYO_SUPPORT
|
||||
|
||||
#if !defined(CPU_COUNT)
|
||||
// if CPU_COUNT is not defined count number of CPUs manually
|
||||
static
|
||||
int my_cpu_count(cpu_set_t const *cpu_set)
|
||||
{
|
||||
int res = 0;
|
||||
for (int i = 0; i < sizeof(cpu_set_t) / sizeof(__cpu_mask); ++i) {
|
||||
res += __builtin_popcountl(cpu_set->__bits[i]);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
// Map CPU_COUNT to our function
|
||||
#define CPU_COUNT(x) my_cpu_count(x)
|
||||
|
||||
#endif
|
||||
|
||||
COINATIVELIBEXPORT
|
||||
void server_compute(
|
||||
uint32_t buffer_count,
|
||||
@ -118,6 +134,20 @@ void server_var_table_copy(
|
||||
__offload_vars.table_copy(buffers[0], *static_cast<int64_t*>(misc_data));
|
||||
}
|
||||
|
||||
COINATIVELIBEXPORT
|
||||
void server_set_stream_affinity(
|
||||
uint32_t buffer_count,
|
||||
void** buffers,
|
||||
uint64_t* buffers_len,
|
||||
void* misc_data,
|
||||
uint16_t misc_data_len,
|
||||
void* return_data,
|
||||
uint16_t return_data_len
|
||||
)
|
||||
{
|
||||
/* kmp affinity is not supported by GCC. */
|
||||
}
|
||||
|
||||
#ifdef MYO_SUPPORT
|
||||
// temporary workaround for blocking behavior of myoiLibInit/Fini calls
|
||||
COINATIVELIBEXPORT
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -28,7 +28,7 @@
|
||||
*/
|
||||
|
||||
|
||||
//The interface betwen offload library and the COI API on the target.
|
||||
// The interface between offload library and the COI API on the target
|
||||
|
||||
#ifndef COI_SERVER_H_INCLUDED
|
||||
#define COI_SERVER_H_INCLUDED
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -35,7 +35,7 @@
|
||||
#include <alloca.h>
|
||||
#endif // TARGET_WINNT
|
||||
|
||||
// Global counter on host.
|
||||
// Global counter on host.
|
||||
// This variable is used if P2OPT_offload_do_data_persistence == 2.
|
||||
// The variable used to identify offload constructs contained in one procedure.
|
||||
// Increment of OFFLOAD_CALL_COUNT is inserted at entries of HOST routines with
|
||||
@ -72,7 +72,7 @@ extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE(
|
||||
|
||||
OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
|
||||
|
||||
// initalize all devices is init_type is on_offload_all
|
||||
// initialize all devices is init_type is on_offload_all
|
||||
if (retval && __offload_init_type == c_init_on_offload_all) {
|
||||
for (int i = 0; i < mic_engines_total; i++) {
|
||||
mic_engines[i].init();
|
||||
@ -241,7 +241,128 @@ extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE1(
|
||||
return ofld;
|
||||
}
|
||||
|
||||
int offload_offload_wrap(
|
||||
extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE2(
|
||||
TARGET_TYPE target_type,
|
||||
int target_number,
|
||||
int is_optional,
|
||||
_Offload_status* status,
|
||||
const char* file,
|
||||
uint64_t line,
|
||||
const void** stream
|
||||
)
|
||||
{
|
||||
bool retval;
|
||||
OFFLOAD ofld;
|
||||
|
||||
// initialize status
|
||||
if (status != 0) {
|
||||
status->result = OFFLOAD_UNAVAILABLE;
|
||||
status->device_number = -1;
|
||||
status->data_sent = 0;
|
||||
status->data_received = 0;
|
||||
}
|
||||
|
||||
// make sure libray is initialized
|
||||
retval = __offload_init_library();
|
||||
// OFFLOAD_TIMER_INIT must follow call to __offload_init_library
|
||||
OffloadHostTimerData * timer_data = OFFLOAD_TIMER_INIT(file, line);
|
||||
|
||||
OFFLOAD_TIMER_START(timer_data, c_offload_host_total_offload);
|
||||
|
||||
OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
|
||||
|
||||
// initalize all devices if init_type is on_offload_all
|
||||
if (retval && __offload_init_type == c_init_on_offload_all) {
|
||||
for (int i = 0; i < mic_engines_total; i++) {
|
||||
mic_engines[i].init();
|
||||
}
|
||||
}
|
||||
OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
|
||||
|
||||
OFFLOAD_TIMER_START(timer_data, c_offload_host_target_acquire);
|
||||
|
||||
if (target_type == TARGET_HOST) {
|
||||
// Host always available
|
||||
retval = true;
|
||||
}
|
||||
else if (target_type == TARGET_MIC) {
|
||||
_Offload_stream handle = *(reinterpret_cast<_Offload_stream*>(stream));
|
||||
Stream * stream = handle ? Stream::find_stream(handle, false) : NULL;
|
||||
if (target_number >= -1) {
|
||||
if (retval) {
|
||||
// device number is defined by stream
|
||||
if (stream) {
|
||||
target_number = stream->get_device();
|
||||
target_number = target_number % mic_engines_total;
|
||||
}
|
||||
|
||||
// reserve device in ORSL
|
||||
if (target_number != -1) {
|
||||
if (is_optional) {
|
||||
if (!ORSL::try_reserve(target_number)) {
|
||||
target_number = -1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (!ORSL::reserve(target_number)) {
|
||||
target_number = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// initialize device
|
||||
if (target_number >= 0 &&
|
||||
__offload_init_type == c_init_on_offload) {
|
||||
OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
|
||||
mic_engines[target_number].init();
|
||||
OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// fallback to CPU
|
||||
target_number = -1;
|
||||
}
|
||||
if (!(target_number == -1 && handle == 0)) {
|
||||
if (target_number < 0 || !retval) {
|
||||
if (!is_optional && status == 0) {
|
||||
LIBOFFLOAD_ERROR(c_device_is_not_available);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
retval = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
LIBOFFLOAD_ERROR(c_invalid_device_number);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
if (retval) {
|
||||
ofld = new OffloadDescriptor(target_number, status,
|
||||
!is_optional, false, timer_data);
|
||||
OFFLOAD_TIMER_HOST_MIC_NUM(timer_data, target_number);
|
||||
Offload_Report_Prolog(timer_data);
|
||||
OFFLOAD_DEBUG_TRACE_1(2, timer_data->offload_number, c_offload_start,
|
||||
"Starting offload: target_type = %d, "
|
||||
"number = %d, is_optional = %d\n",
|
||||
target_type, target_number, is_optional);
|
||||
|
||||
OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
|
||||
}
|
||||
else {
|
||||
ofld = NULL;
|
||||
|
||||
OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
|
||||
OFFLOAD_TIMER_STOP(timer_data, c_offload_host_total_offload);
|
||||
offload_report_free_data(timer_data);
|
||||
}
|
||||
|
||||
return ofld;
|
||||
}
|
||||
|
||||
static int offload_offload_wrap(
|
||||
OFFLOAD ofld,
|
||||
const char *name,
|
||||
int is_empty,
|
||||
@ -252,12 +373,15 @@ int offload_offload_wrap(
|
||||
const void **waits,
|
||||
const void **signal,
|
||||
int entry_id,
|
||||
const void *stack_addr
|
||||
const void *stack_addr,
|
||||
OffloadFlags offload_flags
|
||||
)
|
||||
{
|
||||
bool ret = ofld->offload(name, is_empty, vars, vars2, num_vars,
|
||||
waits, num_waits, signal, entry_id, stack_addr);
|
||||
if (!ret || signal == 0) {
|
||||
waits, num_waits, signal, entry_id,
|
||||
stack_addr, offload_flags);
|
||||
if (!ret || (signal == 0 && ofld->get_stream() == 0 &&
|
||||
!offload_flags.bits.omp_async)) {
|
||||
delete ofld;
|
||||
}
|
||||
return ret;
|
||||
@ -278,7 +402,7 @@ extern "C" int OFFLOAD_OFFLOAD1(
|
||||
return offload_offload_wrap(ofld, name, is_empty,
|
||||
num_vars, vars, vars2,
|
||||
num_waits, waits,
|
||||
signal, NULL, NULL);
|
||||
signal, 0, NULL, {0});
|
||||
}
|
||||
|
||||
extern "C" int OFFLOAD_OFFLOAD2(
|
||||
@ -298,7 +422,35 @@ extern "C" int OFFLOAD_OFFLOAD2(
|
||||
return offload_offload_wrap(ofld, name, is_empty,
|
||||
num_vars, vars, vars2,
|
||||
num_waits, waits,
|
||||
signal, entry_id, stack_addr);
|
||||
signal, entry_id, stack_addr, {0});
|
||||
}
|
||||
|
||||
extern "C" int OFFLOAD_OFFLOAD3(
|
||||
OFFLOAD ofld,
|
||||
const char *name,
|
||||
int is_empty,
|
||||
int num_vars,
|
||||
VarDesc *vars,
|
||||
VarDesc2 *vars2,
|
||||
int num_waits,
|
||||
const void** waits,
|
||||
const void** signal,
|
||||
int entry_id,
|
||||
const void *stack_addr,
|
||||
OffloadFlags offload_flags,
|
||||
const void** stream
|
||||
)
|
||||
{
|
||||
// 1. if the source is compiled with -traceback then stream is 0
|
||||
// 2. if offload has a stream clause then stream is address of stream value
|
||||
if (stream) {
|
||||
ofld->set_stream(*(reinterpret_cast<_Offload_stream *>(stream)));
|
||||
}
|
||||
|
||||
return offload_offload_wrap(ofld, name, is_empty,
|
||||
num_vars, vars, vars2,
|
||||
num_waits, waits,
|
||||
signal, entry_id, stack_addr, offload_flags);
|
||||
}
|
||||
|
||||
extern "C" int OFFLOAD_OFFLOAD(
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -39,9 +39,11 @@
|
||||
|
||||
#define OFFLOAD_TARGET_ACQUIRE OFFLOAD_PREFIX(target_acquire)
|
||||
#define OFFLOAD_TARGET_ACQUIRE1 OFFLOAD_PREFIX(target_acquire1)
|
||||
#define OFFLOAD_TARGET_ACQUIRE2 OFFLOAD_PREFIX(target_acquire2)
|
||||
#define OFFLOAD_OFFLOAD OFFLOAD_PREFIX(offload)
|
||||
#define OFFLOAD_OFFLOAD1 OFFLOAD_PREFIX(offload1)
|
||||
#define OFFLOAD_OFFLOAD2 OFFLOAD_PREFIX(offload2)
|
||||
#define OFFLOAD_OFFLOAD3 OFFLOAD_PREFIX(offload3)
|
||||
#define OFFLOAD_CALL_COUNT OFFLOAD_PREFIX(offload_call_count)
|
||||
|
||||
|
||||
@ -75,6 +77,26 @@ extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE1(
|
||||
uint64_t line
|
||||
);
|
||||
|
||||
/*! \fn OFFLOAD_TARGET_ACQUIRE2
|
||||
\brief Attempt to acquire the target.
|
||||
\param target_type The type of target.
|
||||
\param target_number The device number.
|
||||
\param is_optional Whether CPU fall-back is allowed.
|
||||
\param status Address of variable to hold offload status.
|
||||
\param file Filename in which this offload occurred.
|
||||
\param line Line number in the file where this offload occurred.
|
||||
\param stream Pointer to stream value.
|
||||
*/
|
||||
extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE2(
|
||||
TARGET_TYPE target_type,
|
||||
int target_number,
|
||||
int is_optional,
|
||||
_Offload_status* status,
|
||||
const char* file,
|
||||
uint64_t line,
|
||||
const void** stream
|
||||
);
|
||||
|
||||
/*! \fn OFFLOAD_OFFLOAD1
|
||||
\brief Run function on target using interface for old data persistence.
|
||||
\param o Offload descriptor created by OFFLOAD_TARGET_ACQUIRE.
|
||||
@ -127,6 +149,40 @@ extern "C" int OFFLOAD_OFFLOAD2(
|
||||
const void *stack_addr
|
||||
);
|
||||
|
||||
|
||||
/*! \fn OFFLOAD_OFFLOAD3
|
||||
\brief Run function on target, API introduced in 15.0 Update 1
|
||||
\brief when targetptr, preallocated feature was introduced.
|
||||
\param o Offload descriptor created by OFFLOAD_TARGET_ACQUIRE.
|
||||
\param name Name of offload entry point.
|
||||
\param is_empty If no code to execute (e.g. offload_transfer)
|
||||
\param num_vars Number of variable descriptors.
|
||||
\param vars Pointer to VarDesc array.
|
||||
\param vars2 Pointer to VarDesc2 array.
|
||||
\param num_waits Number of "wait" values.
|
||||
\param waits Pointer to array of wait values.
|
||||
\param signal Pointer to signal value or NULL.
|
||||
\param entry_id A signature for the function doing the offload.
|
||||
\param stack_addr The stack frame address of the function doing offload.
|
||||
\param offload_flags Flags to indicate Fortran traceback, OpenMP async.
|
||||
\param stream Pointer to stream value or NULL.
|
||||
*/
|
||||
extern "C" int OFFLOAD_OFFLOAD3(
|
||||
OFFLOAD ofld,
|
||||
const char *name,
|
||||
int is_empty,
|
||||
int num_vars,
|
||||
VarDesc *vars,
|
||||
VarDesc2 *vars2,
|
||||
int num_waits,
|
||||
const void** waits,
|
||||
const void** signal,
|
||||
int entry_id,
|
||||
const void *stack_addr,
|
||||
OffloadFlags offload_flags,
|
||||
const void** stream
|
||||
);
|
||||
|
||||
// Run function on target (obsolete).
|
||||
// @param o OFFLOAD object
|
||||
// @param name function name
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -102,8 +102,8 @@ CeanReadRanges * init_read_ranges_dv(const ArrDesc *dvp)
|
||||
}
|
||||
res = (CeanReadRanges *)malloc(
|
||||
sizeof(CeanReadRanges) + (rank - i) * sizeof(CeanReadDim));
|
||||
if (res == NULL)
|
||||
LIBOFFLOAD_ERROR(c_malloc);
|
||||
if (res == NULL)
|
||||
LIBOFFLOAD_ERROR(c_malloc);
|
||||
res -> last_noncont_ind = rank - i - 1;
|
||||
count = 1;
|
||||
for (; i < rank; i++) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -32,6 +32,7 @@
|
||||
#define DV_UTIL_H_INCLUDED
|
||||
|
||||
#include <stdint.h>
|
||||
#include "offload_util.h"
|
||||
|
||||
// Dope vector declarations
|
||||
#define ArrDescMaxArrayRank 31
|
||||
@ -64,18 +65,18 @@ typedef struct ArrDesc {
|
||||
|
||||
typedef ArrDesc* pArrDesc;
|
||||
|
||||
bool __dv_is_contiguous(const ArrDesc *dvp);
|
||||
DLL_LOCAL bool __dv_is_contiguous(const ArrDesc *dvp);
|
||||
|
||||
bool __dv_is_allocated(const ArrDesc *dvp);
|
||||
DLL_LOCAL bool __dv_is_allocated(const ArrDesc *dvp);
|
||||
|
||||
uint64_t __dv_data_length(const ArrDesc *dvp);
|
||||
DLL_LOCAL uint64_t __dv_data_length(const ArrDesc *dvp);
|
||||
|
||||
uint64_t __dv_data_length(const ArrDesc *dvp, int64_t nelems);
|
||||
DLL_LOCAL uint64_t __dv_data_length(const ArrDesc *dvp, int64_t nelems);
|
||||
|
||||
CeanReadRanges * init_read_ranges_dv(const ArrDesc *dvp);
|
||||
DLL_LOCAL CeanReadRanges * init_read_ranges_dv(const ArrDesc *dvp);
|
||||
|
||||
#if OFFLOAD_DEBUG > 0
|
||||
void __dv_desc_dump(const char *name, const ArrDesc *dvp);
|
||||
DLL_LOCAL void __dv_desc_dump(const char *name, const ArrDesc *dvp);
|
||||
#else // OFFLOAD_DEBUG
|
||||
#define __dv_desc_dump(name, dvp)
|
||||
#endif // OFFLOAD_DEBUG
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -62,8 +62,8 @@
|
||||
/* Environment variable for target executable run command. */
|
||||
#define OFFLOAD_EMUL_RUN_ENV "OFFLOAD_EMUL_RUN"
|
||||
|
||||
/* Environment variable for number ok KNC devices. */
|
||||
#define OFFLOAD_EMUL_KNC_NUM_ENV "OFFLOAD_EMUL_KNC_NUM"
|
||||
/* Environment variable for number of emulated devices. */
|
||||
#define OFFLOAD_EMUL_NUM_ENV "OFFLOAD_EMUL_NUM"
|
||||
|
||||
|
||||
/* Path to engine directory. */
|
||||
@ -133,6 +133,7 @@ typedef enum
|
||||
CMD_BUFFER_UNMAP,
|
||||
CMD_GET_FUNCTION_HANDLE,
|
||||
CMD_OPEN_LIBRARY,
|
||||
CMD_CLOSE_LIBRARY,
|
||||
CMD_RUN_FUNCTION,
|
||||
CMD_SHUTDOWN
|
||||
} cmd_t;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -109,8 +109,8 @@ SYMBOL_VERSION (COIProcessWaitForShutdown, 1) ()
|
||||
strlen (PIPE_HOST_PATH) + strlen (mic_dir) + 1);
|
||||
MALLOC (char *, pipe_target_path,
|
||||
strlen (PIPE_TARGET_PATH) + strlen (mic_dir) + 1);
|
||||
sprintf (pipe_host_path, "%s"PIPE_HOST_PATH, mic_dir);
|
||||
sprintf (pipe_target_path, "%s"PIPE_TARGET_PATH, mic_dir);
|
||||
sprintf (pipe_host_path, "%s" PIPE_HOST_PATH, mic_dir);
|
||||
sprintf (pipe_target_path, "%s" PIPE_TARGET_PATH, mic_dir);
|
||||
pipe_host = open (pipe_host_path, O_CLOEXEC | O_WRONLY);
|
||||
if (pipe_host < 0)
|
||||
COIERROR ("Cannot open target-to-host pipe.");
|
||||
@ -237,6 +237,7 @@ SYMBOL_VERSION (COIProcessWaitForShutdown, 1) ()
|
||||
{
|
||||
char *lib_path;
|
||||
size_t len;
|
||||
void *handle;
|
||||
|
||||
/* Receive data from host. */
|
||||
READ (pipe_target, &len, sizeof (size_t));
|
||||
@ -244,12 +245,26 @@ SYMBOL_VERSION (COIProcessWaitForShutdown, 1) ()
|
||||
READ (pipe_target, lib_path, len);
|
||||
|
||||
/* Open library. */
|
||||
if (dlopen (lib_path, RTLD_LAZY | RTLD_GLOBAL) == 0)
|
||||
handle = dlopen (lib_path, RTLD_LAZY | RTLD_GLOBAL);
|
||||
if (handle == NULL)
|
||||
COIERROR ("Cannot load %s: %s", lib_path, dlerror ());
|
||||
|
||||
/* Send data to host. */
|
||||
WRITE (pipe_host, &handle, sizeof (void *));
|
||||
|
||||
/* Clean up. */
|
||||
free (lib_path);
|
||||
|
||||
break;
|
||||
}
|
||||
case CMD_CLOSE_LIBRARY:
|
||||
{
|
||||
/* Receive data from host. */
|
||||
void *handle;
|
||||
READ (pipe_target, &handle, sizeof (void *));
|
||||
|
||||
dlclose (handle);
|
||||
|
||||
break;
|
||||
}
|
||||
case CMD_RUN_FUNCTION:
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -40,8 +40,8 @@ extern char **environ;
|
||||
char **tmp_dirs;
|
||||
unsigned tmp_dirs_num = 0;
|
||||
|
||||
/* Number of KNC engines. */
|
||||
long knc_engines_num;
|
||||
/* Number of emulated MIC engines. */
|
||||
long num_engines;
|
||||
|
||||
/* Mutex to sync parallel execution. */
|
||||
pthread_mutex_t mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
|
||||
@ -116,8 +116,7 @@ __attribute__((constructor))
|
||||
static void
|
||||
init ()
|
||||
{
|
||||
if (read_long_env (OFFLOAD_EMUL_KNC_NUM_ENV, &knc_engines_num, 1)
|
||||
== COI_ERROR)
|
||||
if (read_long_env (OFFLOAD_EMUL_NUM_ENV, &num_engines, 1) == COI_ERROR)
|
||||
exit (0);
|
||||
}
|
||||
|
||||
@ -665,10 +664,10 @@ SYMBOL_VERSION (COIEngineGetCount, 1) (COI_ISA_TYPE isa,
|
||||
COITRACE ("COIEngineGetCount");
|
||||
|
||||
/* Features of liboffload. */
|
||||
assert (isa == COI_ISA_KNC);
|
||||
assert (isa == COI_ISA_MIC);
|
||||
|
||||
/* Prepare output arguments. */
|
||||
*count = knc_engines_num;
|
||||
*count = num_engines;
|
||||
|
||||
return COI_SUCCESS;
|
||||
}
|
||||
@ -684,10 +683,10 @@ SYMBOL_VERSION (COIEngineGetHandle, 1) (COI_ISA_TYPE isa,
|
||||
Engine *engine;
|
||||
|
||||
/* Features of liboffload. */
|
||||
assert (isa == COI_ISA_KNC);
|
||||
assert (isa == COI_ISA_MIC);
|
||||
|
||||
/* Check engine index. */
|
||||
if (index >= knc_engines_num)
|
||||
if (index >= num_engines)
|
||||
COIERROR ("Wrong engine index.");
|
||||
|
||||
/* Create engine handle. */
|
||||
@ -889,7 +888,7 @@ SYMBOL_VERSION (COIProcessCreateFromMemory, 1) (COIENGINE engine,
|
||||
|
||||
/* Create directory for pipes to prevent names collision. */
|
||||
MALLOC (char *, pipes_path, strlen (PIPES_PATH) + strlen (eng->dir) + 1);
|
||||
sprintf (pipes_path, "%s"PIPES_PATH, eng->dir);
|
||||
sprintf (pipes_path, "%s" PIPES_PATH, eng->dir);
|
||||
if (mkdir (pipes_path, S_IRWXU) < 0)
|
||||
COIERROR ("Cannot create folder %s.", pipes_path);
|
||||
|
||||
@ -900,8 +899,8 @@ SYMBOL_VERSION (COIProcessCreateFromMemory, 1) (COIENGINE engine,
|
||||
strlen (PIPE_TARGET_PATH) + strlen (eng->dir) + 1);
|
||||
if (pipe_target_path == NULL)
|
||||
COIERROR ("Cannot allocate memory.");
|
||||
sprintf (pipe_host_path, "%s"PIPE_HOST_PATH, eng->dir);
|
||||
sprintf (pipe_target_path, "%s"PIPE_TARGET_PATH, eng->dir);
|
||||
sprintf (pipe_host_path, "%s" PIPE_HOST_PATH, eng->dir);
|
||||
sprintf (pipe_target_path, "%s" PIPE_TARGET_PATH, eng->dir);
|
||||
if (mkfifo (pipe_host_path, S_IRUSR | S_IWUSR) < 0)
|
||||
COIERROR ("Cannot create pipe %s.", pipe_host_path);
|
||||
if (mkfifo (pipe_target_path, S_IRUSR | S_IWUSR) < 0)
|
||||
@ -1018,6 +1017,27 @@ SYMBOL_VERSION (COIProcessCreateFromMemory, 1) (COIENGINE engine,
|
||||
}
|
||||
|
||||
|
||||
COIRESULT
|
||||
SYMBOL_VERSION (COIProcessCreateFromFile, 1) (COIENGINE in_Engine,
|
||||
const char *in_pBinaryName,
|
||||
int in_Argc,
|
||||
const char **in_ppArgv,
|
||||
uint8_t in_DupEnv,
|
||||
const char **in_ppAdditionalEnv,
|
||||
uint8_t in_ProxyActive,
|
||||
const char *in_Reserved,
|
||||
uint64_t in_BufferSpace,
|
||||
const char *in_LibrarySearchPath,
|
||||
COIPROCESS *out_pProcess)
|
||||
{
|
||||
COITRACE ("COIProcessCreateFromFile");
|
||||
|
||||
/* liboffloadmic with GCC compiled binaries should never go here. */
|
||||
assert (false);
|
||||
return COI_ERROR;
|
||||
}
|
||||
|
||||
|
||||
COIRESULT
|
||||
SYMBOL_VERSION (COIProcessDestroy, 1) (COIPROCESS process,
|
||||
int32_t wait_timeout, // Ignored
|
||||
@ -1129,38 +1149,39 @@ SYMBOL_VERSION (COIProcessGetFunctionHandles, 1) (COIPROCESS process,
|
||||
|
||||
|
||||
COIRESULT
|
||||
SYMBOL_VERSION (COIProcessLoadLibraryFromMemory, 2) (COIPROCESS process,
|
||||
const void *lib_buffer,
|
||||
uint64_t lib_buffer_len,
|
||||
const char *lib_name,
|
||||
const char *lib_search_path,
|
||||
const char *file_of_origin, // Ignored
|
||||
uint64_t file_from_origin_offset, // Ignored
|
||||
uint32_t flags, // Ignored
|
||||
COILIBRARY *library) // Ignored
|
||||
SYMBOL_VERSION (COIProcessLoadLibraryFromMemory, 2) (COIPROCESS in_Process,
|
||||
const void *in_pLibraryBuffer,
|
||||
uint64_t in_LibraryBufferLength,
|
||||
const char *in_pLibraryName,
|
||||
const char *in_LibrarySearchPath, // Ignored
|
||||
const char *in_FileOfOrigin, // Ignored
|
||||
uint64_t in_FileOfOriginOffset, // Ignored
|
||||
uint32_t in_Flags, // Ignored
|
||||
COILIBRARY *out_pLibrary)
|
||||
{
|
||||
COITRACE ("COIProcessLoadLibraryFromMemory");
|
||||
|
||||
const cmd_t cmd = CMD_OPEN_LIBRARY;
|
||||
char *lib_path;
|
||||
cmd_t cmd = CMD_OPEN_LIBRARY;
|
||||
int fd;
|
||||
FILE *file;
|
||||
size_t len;
|
||||
|
||||
/* Convert input arguments. */
|
||||
Process *proc = (Process *) process;
|
||||
Process *proc = (Process *) in_Process;
|
||||
|
||||
/* Create target library file. */
|
||||
MALLOC (char *, lib_path,
|
||||
strlen (proc->engine->dir) + strlen (lib_name) + 2);
|
||||
sprintf (lib_path, "%s/%s", proc->engine->dir, lib_name);
|
||||
strlen (proc->engine->dir) + strlen (in_pLibraryName) + 2);
|
||||
sprintf (lib_path, "%s/%s", proc->engine->dir, in_pLibraryName);
|
||||
fd = open (lib_path, O_CLOEXEC | O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
|
||||
if (fd < 0)
|
||||
COIERROR ("Cannot create file %s.", lib_path);
|
||||
file = fdopen (fd, "wb");
|
||||
if (file == NULL)
|
||||
COIERROR ("Cannot associate stream with file descriptor.");
|
||||
if (fwrite (lib_buffer, 1, lib_buffer_len, file) != lib_buffer_len)
|
||||
if (fwrite (in_pLibraryBuffer, 1, in_LibraryBufferLength, file)
|
||||
!= in_LibraryBufferLength)
|
||||
COIERROR ("Cannot write in file %s.", lib_path);
|
||||
if (fclose (file) != 0)
|
||||
COIERROR ("Cannot close file %s.", lib_path);
|
||||
@ -1176,6 +1197,10 @@ SYMBOL_VERSION (COIProcessLoadLibraryFromMemory, 2) (COIPROCESS process,
|
||||
WRITE (proc->pipeline->pipe_target, &len, sizeof (size_t));
|
||||
WRITE (proc->pipeline->pipe_target, lib_path, len);
|
||||
|
||||
/* Receive data from target. */
|
||||
void *handle;
|
||||
READ (proc->pipeline->pipe_host, &handle, sizeof (void *));
|
||||
|
||||
/* Finish critical section. */
|
||||
if (pthread_mutex_unlock (&mutex) != 0)
|
||||
COIERROR ("Cannot unlock mutex.");
|
||||
@ -1183,6 +1208,7 @@ SYMBOL_VERSION (COIProcessLoadLibraryFromMemory, 2) (COIPROCESS process,
|
||||
/* Clean up. */
|
||||
free (lib_path);
|
||||
|
||||
*out_pLibrary = (COILIBRARY) handle;
|
||||
return COI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -1202,6 +1228,33 @@ SYMBOL_VERSION (COIProcessRegisterLibraries, 1) (uint32_t libraries_num,
|
||||
}
|
||||
|
||||
|
||||
COIRESULT
|
||||
SYMBOL_VERSION (COIProcessUnloadLibrary, 1) (COIPROCESS in_Process,
|
||||
COILIBRARY in_Library)
|
||||
{
|
||||
COITRACE ("COIProcessUnloadLibrary");
|
||||
|
||||
const cmd_t cmd = CMD_CLOSE_LIBRARY;
|
||||
|
||||
/* Convert input arguments. */
|
||||
Process *proc = (Process *) in_Process;
|
||||
|
||||
/* Start critical section. */
|
||||
if (pthread_mutex_lock (&mutex) != 0)
|
||||
COIERROR ("Cannot lock mutex.");
|
||||
|
||||
/* Make target close library. */
|
||||
WRITE (proc->pipeline->pipe_target, &cmd, sizeof (cmd_t));
|
||||
WRITE (proc->pipeline->pipe_target, &in_Library, sizeof (void *));
|
||||
|
||||
/* Finish critical section. */
|
||||
if (pthread_mutex_unlock (&mutex) != 0)
|
||||
COIERROR ("Cannot unlock mutex.");
|
||||
|
||||
return COI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
uint64_t
|
||||
SYMBOL_VERSION (COIPerfGetCycleFrequency, 1) ()
|
||||
{
|
||||
@ -1210,5 +1263,51 @@ SYMBOL_VERSION (COIPerfGetCycleFrequency, 1) ()
|
||||
return (uint64_t) CYCLE_FREQUENCY;
|
||||
}
|
||||
|
||||
|
||||
COIRESULT
|
||||
SYMBOL_VERSION (COIPipelineClearCPUMask, 1) (COI_CPU_MASK *in_Mask)
|
||||
{
|
||||
COITRACE ("COIPipelineClearCPUMask");
|
||||
|
||||
/* Looks like we have nothing to do here. */
|
||||
|
||||
return COI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
COIRESULT
|
||||
SYMBOL_VERSION (COIPipelineSetCPUMask, 1) (COIPROCESS in_Process,
|
||||
uint32_t in_CoreID,
|
||||
uint8_t in_ThreadID,
|
||||
COI_CPU_MASK *out_pMask)
|
||||
{
|
||||
COITRACE ("COIPipelineSetCPUMask");
|
||||
|
||||
/* Looks like we have nothing to do here. */
|
||||
|
||||
return COI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
COIRESULT
|
||||
SYMBOL_VERSION (COIEngineGetInfo, 1) (COIENGINE in_EngineHandle,
|
||||
uint32_t in_EngineInfoSize,
|
||||
COI_ENGINE_INFO *out_pEngineInfo)
|
||||
{
|
||||
COITRACE ("COIEngineGetInfo");
|
||||
|
||||
out_pEngineInfo->ISA = COI_ISA_x86_64;
|
||||
out_pEngineInfo->NumCores = 1;
|
||||
out_pEngineInfo->NumThreads = 8;
|
||||
out_pEngineInfo->CoreMaxFrequency = SYMBOL_VERSION(COIPerfGetCycleFrequency,1)() / 1000000;
|
||||
out_pEngineInfo->PhysicalMemory = 1024;
|
||||
out_pEngineInfo->PhysicalMemoryFree = 1024;
|
||||
out_pEngineInfo->SwapMemory = 1024;
|
||||
out_pEngineInfo->SwapMemoryFree = 1024;
|
||||
out_pEngineInfo->MiscFlags = COI_ENG_ECC_DISABLED;
|
||||
|
||||
return COI_SUCCESS;
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright 2010-2013 Intel Corporation.
|
||||
* Copyright 2010-2015 Intel Corporation.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
@ -38,31 +38,54 @@
|
||||
* intellectual property rights is granted herein.
|
||||
*/
|
||||
|
||||
__asm__ (".symver COIBufferAddRef1,COIBufferAddRef@@COI_1.0");
|
||||
__asm__ (".symver COIBufferCopy1,COIBufferCopy@@COI_1.0");
|
||||
__asm__ (".symver COIBufferCreate1,COIBufferCreate@@COI_1.0");
|
||||
__asm__ (".symver COIBufferCreateFromMemory1,COIBufferCreateFromMemory@@COI_1.0");
|
||||
__asm__ (".symver COIBufferDestroy1,COIBufferDestroy@@COI_1.0");
|
||||
__asm__ (".symver COIBufferGetSinkAddress1,COIBufferGetSinkAddress@@COI_1.0");
|
||||
__asm__ (".symver COIBufferMap1,COIBufferMap@@COI_1.0");
|
||||
__asm__ (".symver COIBufferRead1,COIBufferRead@@COI_1.0");
|
||||
__asm__ (".symver COIBufferReleaseRef1,COIBufferReleaseRef@@COI_1.0");
|
||||
__asm__ (".symver COIBufferSetState1,COIBufferSetState@@COI_1.0");
|
||||
__asm__ (".symver COIBufferUnmap1,COIBufferUnmap@@COI_1.0");
|
||||
__asm__ (".symver COIBufferWrite1,COIBufferWrite@@COI_1.0");
|
||||
__asm__ (".symver COIEngineGetCount1,COIEngineGetCount@@COI_1.0");
|
||||
__asm__ (".symver COIEngineGetHandle1,COIEngineGetHandle@@COI_1.0");
|
||||
__asm__ (".symver COIEngineGetIndex1,COIEngineGetIndex@@COI_1.0");
|
||||
__asm__ (".symver COIEventWait1,COIEventWait@@COI_1.0");
|
||||
__asm__ (".symver COIPerfGetCycleFrequency1,COIPerfGetCycleFrequency@@COI_1.0");
|
||||
__asm__ (".symver COIPipelineCreate1,COIPipelineCreate@@COI_1.0");
|
||||
__asm__ (".symver COIPipelineDestroy1,COIPipelineDestroy@@COI_1.0");
|
||||
__asm__ (".symver COIPipelineRunFunction1,COIPipelineRunFunction@@COI_1.0");
|
||||
__asm__ (".symver COIPipelineStartExecutingRunFunctions1,COIPipelineStartExecutingRunFunctions@@COI_1.0");
|
||||
__asm__ (".symver COIProcessCreateFromMemory1,COIProcessCreateFromMemory@@COI_1.0");
|
||||
__asm__ (".symver COIProcessDestroy1,COIProcessDestroy@@COI_1.0");
|
||||
__asm__ (".symver COIProcessGetFunctionHandles1,COIProcessGetFunctionHandles@@COI_1.0");
|
||||
__asm__ (".symver COIProcessLoadLibraryFromMemory2,COIProcessLoadLibraryFromMemory@COI_2.0");
|
||||
__asm__ (".symver COIProcessRegisterLibraries1,COIProcessRegisterLibraries@@COI_1.0");
|
||||
__asm__ (".symver COIProcessWaitForShutdown1,COIProcessWaitForShutdown@@COI_1.0");
|
||||
|
||||
// Originally generated via:
|
||||
// cd include;
|
||||
// ctags -x --c-kinds=fp -R sink/ source/ common/ | grep -v COIX | awk '{print "__asm__(\".symver "$1"1,"$1"@@COI_1.0\");"}'
|
||||
//
|
||||
// These directives must have an associated linker script with VERSION stuff.
|
||||
// See coi_version_linker_script.map
|
||||
// Passed in as
|
||||
// -Wl,--version-script coi_version_linker_script.map
|
||||
// when building Intel(R) Coprocessor Offload Infrastructure (Intel(R) COI)
|
||||
//
|
||||
// See http://sourceware.org/binutils/docs/ld/VERSION.html#VERSION for more info
|
||||
//
|
||||
// This is not strictly a .h file, so no need to #pragma once or anything.
|
||||
// You must include these asm directives in the same translation unit as the
|
||||
// one where the function body is.
|
||||
// Otherwise we'd have add this file to the list of files needed to build
|
||||
// libcoi*, instead of including it in each of the api/*/*cpp files.
|
||||
//
|
||||
__asm__(".symver COIBufferAddRef1,COIBufferAddRef@@COI_1.0");
|
||||
__asm__(".symver COIBufferCopy1,COIBufferCopy@@COI_1.0");
|
||||
__asm__(".symver COIBufferCreate1,COIBufferCreate@@COI_1.0");
|
||||
__asm__(".symver COIBufferCreateFromMemory1,COIBufferCreateFromMemory@@COI_1.0");
|
||||
__asm__(".symver COIBufferDestroy1,COIBufferDestroy@@COI_1.0");
|
||||
__asm__(".symver COIBufferGetSinkAddress1,COIBufferGetSinkAddress@@COI_1.0");
|
||||
__asm__(".symver COIBufferMap1,COIBufferMap@@COI_1.0");
|
||||
__asm__(".symver COIBufferRead1,COIBufferRead@@COI_1.0");
|
||||
__asm__(".symver COIBufferReleaseRef1,COIBufferReleaseRef@@COI_1.0");
|
||||
__asm__(".symver COIBufferSetState1,COIBufferSetState@@COI_1.0");
|
||||
__asm__(".symver COIBufferUnmap1,COIBufferUnmap@@COI_1.0");
|
||||
__asm__(".symver COIBufferWrite1,COIBufferWrite@@COI_1.0");
|
||||
__asm__(".symver COIEngineGetCount1,COIEngineGetCount@@COI_1.0");
|
||||
__asm__(".symver COIEngineGetHandle1,COIEngineGetHandle@@COI_1.0");
|
||||
__asm__(".symver COIEngineGetIndex1,COIEngineGetIndex@@COI_1.0");
|
||||
__asm__(".symver COIEngineGetInfo1,COIEngineGetInfo@@COI_1.0");
|
||||
__asm__(".symver COIEventRegisterCallback1,COIEventRegisterCallback@@COI_1.0");
|
||||
__asm__(".symver COIEventWait1,COIEventWait@@COI_1.0");
|
||||
__asm__(".symver COIPerfGetCycleFrequency1,COIPerfGetCycleFrequency@@COI_1.0");
|
||||
__asm__(".symver COIPipelineClearCPUMask1,COIPipelineClearCPUMask@@COI_1.0");
|
||||
__asm__(".symver COIPipelineCreate1,COIPipelineCreate@@COI_1.0");
|
||||
__asm__(".symver COIPipelineDestroy1,COIPipelineDestroy@@COI_1.0");
|
||||
__asm__(".symver COIPipelineRunFunction1,COIPipelineRunFunction@@COI_1.0");
|
||||
__asm__(".symver COIPipelineSetCPUMask1,COIPipelineSetCPUMask@@COI_1.0");
|
||||
__asm__(".symver COIPipelineStartExecutingRunFunctions1,COIPipelineStartExecutingRunFunctions@@COI_1.0");
|
||||
__asm__(".symver COIProcessCreateFromFile1,COIProcessCreateFromFile@@COI_1.0");
|
||||
__asm__(".symver COIProcessCreateFromMemory1,COIProcessCreateFromMemory@@COI_1.0");
|
||||
__asm__(".symver COIProcessDestroy1,COIProcessDestroy@@COI_1.0");
|
||||
__asm__(".symver COIProcessGetFunctionHandles1,COIProcessGetFunctionHandles@@COI_1.0");
|
||||
__asm__(".symver COIProcessLoadLibraryFromMemory2,COIProcessLoadLibraryFromMemory@COI_2.0");
|
||||
__asm__(".symver COIProcessRegisterLibraries1,COIProcessRegisterLibraries@@COI_1.0");
|
||||
__asm__(".symver COIProcessUnloadLibrary1,COIProcessUnloadLibrary@@COI_1.0");
|
||||
__asm__(".symver COIProcessWaitForShutdown1,COIProcessWaitForShutdown@@COI_1.0");
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright 2010-2013 Intel Corporation.
|
||||
* Copyright 2010-2015 Intel Corporation.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
@ -38,6 +38,12 @@
|
||||
* intellectual property rights is granted herein.
|
||||
*/
|
||||
|
||||
/***
|
||||
* See http://sourceware.org/binutils/docs/ld/VERSION.html#VERSION for more info.
|
||||
* Use this in conjunction with coi_version_asm.h.
|
||||
* // Comments don't work in this file.
|
||||
***/
|
||||
|
||||
COI_1.0
|
||||
{
|
||||
global:
|
||||
@ -56,17 +62,23 @@ COI_1.0
|
||||
COIEngineGetCount;
|
||||
COIEngineGetHandle;
|
||||
COIEngineGetIndex;
|
||||
COIEngineGetInfo;
|
||||
COIEventWait;
|
||||
COIEventRegisterCallback;
|
||||
COIPerfGetCycleFrequency;
|
||||
COIPipelineClearCPUMask;
|
||||
COIPipelineCreate;
|
||||
COIPipelineDestroy;
|
||||
COIPipelineRunFunction;
|
||||
COIPipelineSetCPUMask;
|
||||
COIPipelineStartExecutingRunFunctions;
|
||||
COIProcessCreateFromFile;
|
||||
COIProcessCreateFromMemory;
|
||||
COIProcessDestroy;
|
||||
COIProcessGetFunctionHandles;
|
||||
COIProcessLoadLibraryFromMemory;
|
||||
COIProcessRegisterLibraries;
|
||||
COIProcessUnloadLibrary;
|
||||
COIProcessWaitForShutdown;
|
||||
local:
|
||||
*;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -155,5 +155,49 @@ SYMBOL_VERSION (myoiTargetFptrTableRegister, 1) (void *table,
|
||||
return MYO_ERROR;
|
||||
}
|
||||
|
||||
|
||||
MYOACCESSAPI MyoError
|
||||
SYMBOL_VERSION (myoArenaRelease, 1) (MyoArena in_Arena)
|
||||
{
|
||||
MYOTRACE ("myoArenaRelease");
|
||||
|
||||
assert (false);
|
||||
|
||||
return MYO_ERROR;
|
||||
}
|
||||
|
||||
|
||||
MYOACCESSAPI MyoError
|
||||
SYMBOL_VERSION (myoArenaAcquire, 1) (MyoArena in_Arena)
|
||||
{
|
||||
MYOTRACE ("myoArenaAcquire");
|
||||
|
||||
assert (false);
|
||||
|
||||
return MYO_ERROR;
|
||||
}
|
||||
|
||||
|
||||
MYOACCESSAPI void
|
||||
SYMBOL_VERSION (myoArenaAlignedFree, 1) (MyoArena in_Arena, void *in_pPtr)
|
||||
{
|
||||
MYOTRACE ("myoArenaAlignedFree");
|
||||
|
||||
assert (false);
|
||||
}
|
||||
|
||||
|
||||
MYOACCESSAPI void *
|
||||
SYMBOL_VERSION (myoArenaAlignedMalloc, 1) (MyoArena in_Arena, size_t in_Size,
|
||||
size_t in_Alignment)
|
||||
{
|
||||
MYOTRACE ("myoArenaAlignedMalloc");
|
||||
|
||||
assert (false);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
} // extern "C"
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright 2010-2013 Intel Corporation.
|
||||
* Copyright 2010-2015 Intel Corporation.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
@ -38,16 +38,24 @@
|
||||
* intellectual property rights is granted herein.
|
||||
*/
|
||||
|
||||
__asm__ (".symver myoAcquire1,myoAcquire@@MYO_1.0");
|
||||
__asm__ (".symver myoRelease1,myoRelease@@MYO_1.0");
|
||||
__asm__ (".symver myoSharedAlignedFree1,myoSharedAlignedFree@@MYO_1.0");
|
||||
__asm__ (".symver myoSharedAlignedMalloc1,myoSharedAlignedMalloc@@MYO_1.0");
|
||||
__asm__ (".symver myoSharedFree1,myoSharedFree@@MYO_1.0");
|
||||
__asm__ (".symver myoSharedMalloc1,myoSharedMalloc@@MYO_1.0");
|
||||
/*Version for Symbols( only Functions currently versioned)
|
||||
Only that Linux Host Side code is versioned currently*/
|
||||
#if (! defined MYO_MIC_CARD) && (! defined _WIN32)
|
||||
|
||||
__asm__ (".symver myoiLibInit1,myoiLibInit@@MYO_1.0");
|
||||
__asm__ (".symver myoiLibFini1,myoiLibFini@@MYO_1.0");
|
||||
__asm__ (".symver myoiMicVarTableRegister1,myoiMicVarTableRegister@@MYO_1.0");
|
||||
__asm__ (".symver myoiRemoteFuncRegister1,myoiRemoteFuncRegister@@MYO_1.0");
|
||||
__asm__ (".symver myoiTargetFptrTableRegister1,myoiTargetFptrTableRegister@@MYO_1.0");
|
||||
__asm__(".symver myoArenaAlignedMalloc1,myoArenaAlignedMalloc@@MYO_1.0");
|
||||
__asm__(".symver myoArenaAlignedFree1,myoArenaAlignedFree@@MYO_1.0");
|
||||
__asm__(".symver myoArenaAcquire1,myoArenaAcquire@@MYO_1.0");
|
||||
__asm__(".symver myoArenaRelease1,myoArenaRelease@@MYO_1.0");
|
||||
__asm__(".symver myoAcquire1,myoAcquire@@MYO_1.0");
|
||||
__asm__(".symver myoRelease1,myoRelease@@MYO_1.0");
|
||||
__asm__(".symver myoSharedAlignedFree1,myoSharedAlignedFree@@MYO_1.0");
|
||||
__asm__(".symver myoSharedAlignedMalloc1,myoSharedAlignedMalloc@@MYO_1.0");
|
||||
__asm__(".symver myoSharedFree1,myoSharedFree@@MYO_1.0");
|
||||
__asm__(".symver myoSharedMalloc1,myoSharedMalloc@@MYO_1.0");
|
||||
__asm__(".symver myoiLibInit1,myoiLibInit@@MYO_1.0");
|
||||
__asm__(".symver myoiLibFini1,myoiLibFini@@MYO_1.0");
|
||||
__asm__(".symver myoiMicVarTableRegister1,myoiMicVarTableRegister@@MYO_1.0");
|
||||
__asm__(".symver myoiRemoteFuncRegister1,myoiRemoteFuncRegister@@MYO_1.0");
|
||||
__asm__(".symver myoiTargetFptrTableRegister1,myoiTargetFptrTableRegister@@MYO_1.0");
|
||||
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright 2010-2013 Intel Corporation.
|
||||
* Copyright 2010-2015 Intel Corporation.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
@ -38,9 +38,17 @@
|
||||
* intellectual property rights is granted herein.
|
||||
*/
|
||||
|
||||
/***
|
||||
* See http://sourceware.org/binutils/docs/ld/VERSION.html#VERSION for more info.
|
||||
***/
|
||||
|
||||
MYO_1.0
|
||||
{
|
||||
global:
|
||||
myoArenaAlignedMalloc;
|
||||
myoArenaAlignedFree;
|
||||
myoArenaAcquire;
|
||||
myoArenaRelease;
|
||||
myoAcquire;
|
||||
myoRelease;
|
||||
myoSharedAlignedFree;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -144,6 +144,9 @@ void __liboffload_error_support(error_types input_tag, ...)
|
||||
case c_process_create:
|
||||
write_message(stderr, msg_c_process_create, args);
|
||||
break;
|
||||
case c_process_set_cache_size:
|
||||
write_message(stderr, msg_c_process_set_cache_size, args);
|
||||
break;
|
||||
case c_process_wait_shutdown:
|
||||
write_message(stderr, msg_c_process_wait_shutdown, args);
|
||||
break;
|
||||
@ -216,6 +219,9 @@ void __liboffload_error_support(error_types input_tag, ...)
|
||||
case c_zero_or_neg_transfer_size:
|
||||
write_message(stderr, msg_c_zero_or_neg_transfer_size, args);
|
||||
break;
|
||||
case c_bad_ptr_mem_alloc:
|
||||
write_message(stderr, msg_c_bad_ptr_mem_alloc, args);
|
||||
break;
|
||||
case c_bad_ptr_mem_range:
|
||||
write_message(stderr, msg_c_bad_ptr_mem_range, args);
|
||||
break;
|
||||
@ -258,6 +264,39 @@ void __liboffload_error_support(error_types input_tag, ...)
|
||||
case c_report_unknown_trace_node:
|
||||
write_message(stderr, msg_c_report_unknown_trace_node, args);
|
||||
break;
|
||||
case c_incorrect_affinity:
|
||||
write_message(stderr, msg_c_incorrect_affinity, args);
|
||||
break;
|
||||
case c_cannot_set_affinity:
|
||||
write_message(stderr, msg_c_cannot_set_affinity, args);
|
||||
break;
|
||||
case c_in_with_preallocated:
|
||||
write_message(stderr, msg_c_in_with_preallocated, args);
|
||||
break;
|
||||
case c_report_no_host_exe:
|
||||
write_message(stderr, msg_c_report_no_host_exe, args);
|
||||
break;
|
||||
case c_report_path_buff_overflow:
|
||||
write_message(stderr, msg_c_report_path_buff_overflow, args);
|
||||
break;
|
||||
case c_create_pipeline_for_stream:
|
||||
write_message(stderr, msg_c_create_pipeline_for_stream, args);
|
||||
break;
|
||||
case c_offload_no_stream:
|
||||
write_message(stderr, msg_c_offload_no_stream, args);
|
||||
break;
|
||||
case c_get_engine_info:
|
||||
write_message(stderr, msg_c_get_engine_info, args);
|
||||
break;
|
||||
case c_clear_cpu_mask:
|
||||
write_message(stderr, msg_c_clear_cpu_mask, args);
|
||||
break;
|
||||
case c_set_cpu_mask:
|
||||
write_message(stderr, msg_c_set_cpu_mask, args);
|
||||
break;
|
||||
case c_unload_library:
|
||||
write_message(stderr, msg_c_unload_library, args);
|
||||
break;
|
||||
}
|
||||
va_end(args);
|
||||
}
|
||||
@ -374,6 +413,10 @@ char const * report_get_message_str(error_types input_tag)
|
||||
return (offload_get_message_str(msg_c_report_unregister));
|
||||
case c_report_var:
|
||||
return (offload_get_message_str(msg_c_report_var));
|
||||
case c_report_stream:
|
||||
return (offload_get_message_str(msg_c_report_stream));
|
||||
case c_report_state_stream:
|
||||
return (offload_get_message_str(msg_c_report_state_stream));
|
||||
|
||||
default:
|
||||
LIBOFFLOAD_ERROR(c_report_unknown_trace_node);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -68,6 +68,7 @@ typedef enum
|
||||
c_get_engine_handle,
|
||||
c_get_engine_index,
|
||||
c_process_create,
|
||||
c_process_set_cache_size,
|
||||
c_process_get_func_handles,
|
||||
c_process_wait_shutdown,
|
||||
c_process_proxy_flush,
|
||||
@ -91,6 +92,7 @@ typedef enum
|
||||
c_event_wait,
|
||||
c_zero_or_neg_ptr_len,
|
||||
c_zero_or_neg_transfer_size,
|
||||
c_bad_ptr_mem_alloc,
|
||||
c_bad_ptr_mem_range,
|
||||
c_different_src_and_dstn_sizes,
|
||||
c_ranges_dont_match,
|
||||
@ -103,6 +105,8 @@ typedef enum
|
||||
c_unknown_binary_type,
|
||||
c_multiple_target_exes,
|
||||
c_no_target_exe,
|
||||
c_incorrect_affinity,
|
||||
c_cannot_set_affinity,
|
||||
c_report_host,
|
||||
c_report_target,
|
||||
c_report_title,
|
||||
@ -159,7 +163,24 @@ typedef enum
|
||||
c_report_myosharedalignedfree,
|
||||
c_report_myoacquire,
|
||||
c_report_myorelease,
|
||||
c_coipipe_max_number
|
||||
c_report_myosupportsfeature,
|
||||
c_report_myosharedarenacreate,
|
||||
c_report_myosharedalignedarenamalloc,
|
||||
c_report_myosharedalignedarenafree,
|
||||
c_report_myoarenaacquire,
|
||||
c_report_myoarenarelease,
|
||||
c_coipipe_max_number,
|
||||
c_in_with_preallocated,
|
||||
c_report_no_host_exe,
|
||||
c_report_path_buff_overflow,
|
||||
c_create_pipeline_for_stream,
|
||||
c_offload_no_stream,
|
||||
c_get_engine_info,
|
||||
c_clear_cpu_mask,
|
||||
c_set_cpu_mask,
|
||||
c_report_state_stream,
|
||||
c_report_stream,
|
||||
c_unload_library
|
||||
} error_types;
|
||||
|
||||
enum OffloadHostPhase {
|
||||
@ -260,15 +281,21 @@ enum OffloadTargetPhase {
|
||||
c_offload_target_max_phase
|
||||
};
|
||||
|
||||
#ifdef TARGET_WINNT
|
||||
#define DLL_LOCAL
|
||||
#else
|
||||
#define DLL_LOCAL __attribute__((visibility("hidden")))
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void __liboffload_error_support(error_types input_tag, ...);
|
||||
void __liboffload_report_support(error_types input_tag, ...);
|
||||
char const *offload_get_message_str(int msgCode);
|
||||
char const * report_get_message_str(error_types input_tag);
|
||||
char const * report_get_host_stage_str(int i);
|
||||
char const * report_get_target_stage_str(int i);
|
||||
DLL_LOCAL void __liboffload_error_support(error_types input_tag, ...);
|
||||
DLL_LOCAL void __liboffload_report_support(error_types input_tag, ...);
|
||||
DLL_LOCAL char const *offload_get_message_str(int msgCode);
|
||||
DLL_LOCAL char const * report_get_message_str(error_types input_tag);
|
||||
DLL_LOCAL char const * report_get_host_stage_str(int i);
|
||||
DLL_LOCAL char const * report_get_target_stage_str(int i);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@ -281,7 +308,7 @@ char const * report_get_target_stage_str(int i);
|
||||
fprintf(stderr, "\t TEST for %s \n \t", nm); \
|
||||
__liboffload_error_support(msg, __VA_ARGS__);
|
||||
|
||||
void write_message(FILE * file, int msgCode, va_list args_p);
|
||||
DLL_LOCAL void write_message(FILE * file, int msgCode, va_list args_p);
|
||||
|
||||
#define LIBOFFLOAD_ERROR __liboffload_error_support
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -28,7 +28,6 @@
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
@ -55,7 +54,7 @@
|
||||
va_copy(args, args_p);
|
||||
buf[0] = '\n';
|
||||
vsnprintf(buf + 1, sizeof(buf) - 2,
|
||||
MESSAGE_TABLE_NAME[ msgCode ], args);
|
||||
MESSAGE_TABLE_NAME[ msgCode ], args);
|
||||
strcat(buf, "\n");
|
||||
va_end(args);
|
||||
fputs(buf, file);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -62,6 +62,7 @@ enum {
|
||||
msg_c_get_engine_handle,
|
||||
msg_c_get_engine_index,
|
||||
msg_c_process_create,
|
||||
msg_c_process_set_cache_size,
|
||||
msg_c_process_get_func_handles,
|
||||
msg_c_process_wait_shutdown,
|
||||
msg_c_process_proxy_flush,
|
||||
@ -85,6 +86,7 @@ enum {
|
||||
msg_c_event_wait,
|
||||
msg_c_zero_or_neg_ptr_len,
|
||||
msg_c_zero_or_neg_transfer_size,
|
||||
msg_c_bad_ptr_mem_alloc,
|
||||
msg_c_bad_ptr_mem_range,
|
||||
msg_c_different_src_and_dstn_sizes,
|
||||
msg_c_non_contiguous_dope_vector,
|
||||
@ -95,6 +97,8 @@ enum {
|
||||
msg_c_no_target_exe,
|
||||
msg_c_report_unknown_timer_node,
|
||||
msg_c_report_unknown_trace_node,
|
||||
msg_c_incorrect_affinity,
|
||||
msg_c_cannot_set_affinity,
|
||||
msg_c_report_host,
|
||||
msg_c_report_mic,
|
||||
msg_c_report_title,
|
||||
@ -148,6 +152,12 @@ enum {
|
||||
msg_c_report_myosharedalignedfree,
|
||||
msg_c_report_myoacquire,
|
||||
msg_c_report_myorelease,
|
||||
msg_c_report_myosupportsfeature,
|
||||
msg_c_report_myosharedarenacreate,
|
||||
msg_c_report_myosharedalignedarenamalloc,
|
||||
msg_c_report_myosharedalignedarenafree,
|
||||
msg_c_report_myoarenaacquire,
|
||||
msg_c_report_myoarenarelease,
|
||||
msg_c_report_host_total_offload_time,
|
||||
msg_c_report_host_initialize,
|
||||
msg_c_report_host_target_acquire,
|
||||
@ -182,7 +192,18 @@ enum {
|
||||
msg_c_destination_is_over,
|
||||
msg_c_slice_of_noncont_array,
|
||||
msg_c_pointer_array_mismatch,
|
||||
lastMsg = 153,
|
||||
msg_c_in_with_preallocated,
|
||||
msg_c_report_no_host_exe,
|
||||
msg_c_report_path_buff_overflow,
|
||||
msg_c_create_pipeline_for_stream,
|
||||
msg_c_offload_no_stream,
|
||||
msg_c_get_engine_info,
|
||||
msg_c_clear_cpu_mask,
|
||||
msg_c_set_cpu_mask,
|
||||
msg_c_report_state_stream,
|
||||
msg_c_report_stream,
|
||||
msg_c_unload_library,
|
||||
lastMsg = 174,
|
||||
firstMsg = 1
|
||||
};
|
||||
|
||||
@ -192,157 +213,178 @@ enum {
|
||||
#endif
|
||||
|
||||
static char const * MESSAGE_TABLE_NAME[] = {
|
||||
/* 0 __dummy__ */ "Un-used message",
|
||||
/* 1 msg_c_device_is_not_available */ "offload error: cannot offload to MIC - device is not available",
|
||||
/* 2 msg_c_invalid_device_number */ "offload error: expected a number greater than or equal to -1",
|
||||
/* 3 msg_c_send_func_ptr */ "offload error: cannot find function name for address %p",
|
||||
/* 4 msg_c_receive_func_ptr */ "offload error: cannot find address of function %s",
|
||||
/* 5 msg_c_malloc */ "offload error: memory allocation failed",
|
||||
/* 6 msg_c_offload_malloc */ "offload error: memory allocation failed (requested=%lld bytes, align %lld)",
|
||||
/* 7 msg_c_offload1 */ "offload error: device %d does not have a pending signal for wait(%p)",
|
||||
/* 8 msg_c_unknown_var_type */ "offload error: unknown variable type %d",
|
||||
/* 9 msg_c_invalid_env_var_value */ "offload warning: ignoring invalid value specified for %s",
|
||||
/* 10 msg_c_invalid_env_var_int_value */ "offload warning: specify an integer value for %s",
|
||||
/* 11 msg_c_invalid_env_report_value */ "offload warning: ignoring %s setting; use a value in range 1-3",
|
||||
/* 12 msg_c_offload_signaled1 */ "offload error: invalid device number %d specified in _Offload_signaled",
|
||||
/* 13 msg_c_offload_signaled2 */ "offload error: invalid signal %p specified for _Offload_signaled",
|
||||
/* 14 msg_c_myowrapper_checkresult */ "offload error: %s failed with error %d",
|
||||
/* 15 msg_c_myotarget_checkresult */ "offload error: %s failed with error %d",
|
||||
/* 16 msg_c_offload_descriptor_offload */ "offload error: cannot find offload entry %s",
|
||||
/* 17 msg_c_merge_var_descs1 */ "offload error: unexpected number of variable descriptors",
|
||||
/* 18 msg_c_merge_var_descs2 */ "offload error: unexpected variable type",
|
||||
/* 19 msg_c_mic_parse_env_var_list1 */ "offload_error: MIC environment variable must begin with an alpabetic character",
|
||||
/* 20 msg_c_mic_parse_env_var_list2 */ "offload_error: MIC environment variable value must be specified with \'=\'",
|
||||
/* 21 msg_c_mic_process_exit_ret */ "offload error: process on the device %d unexpectedly exited with code %d",
|
||||
/* 22 msg_c_mic_process_exit_sig */ "offload error: process on the device %d was terminated by signal %d (%s)",
|
||||
/* 23 msg_c_mic_process_exit */ "offload error: process on the device %d was unexpectedly terminated",
|
||||
/* 24 msg_c_mic_init3 */ "offload warning: ignoring MIC_STACKSIZE setting; use a value >= 16K and a multiple of 4K",
|
||||
/* 25 msg_c_mic_init4 */ "offload error: thread key create failed with error %d",
|
||||
/* 26 msg_c_mic_init5 */ "offload warning: specify OFFLOAD_DEVICES as comma-separated physical device numbers or 'none'",
|
||||
/* 27 msg_c_mic_init6 */ "offload warning: OFFLOAD_DEVICES device number %d does not correspond to a physical device",
|
||||
/* 28 msg_c_no_static_var_data */ "offload error: cannot find data associated with statically allocated variable %p",
|
||||
/* 29 msg_c_no_ptr_data */ "offload error: cannot find data associated with pointer variable %p",
|
||||
/* 30 msg_c_get_engine_handle */ "offload error: cannot get device %d handle (error code %d)",
|
||||
/* 31 msg_c_get_engine_index */ "offload error: cannot get physical index for logical device %d (error code %d)",
|
||||
/* 32 msg_c_process_create */ "offload error: cannot start process on the device %d (error code %d)",
|
||||
/* 33 msg_c_process_get_func_handles */ "offload error: cannot get function handles on the device %d (error code %d)",
|
||||
/* 34 msg_c_process_wait_shutdown */ "offload error: wait for process shutdown failed on device %d (error code %d)",
|
||||
/* 35 msg_c_process_proxy_flush */ "offload error: cannot flush process output on device %d (error code %d)",
|
||||
/* 36 msg_c_load_library */ "offload error: cannot load library to the device %d (error code %d)",
|
||||
/* 37 msg_c_pipeline_create */ "offload error: cannot create pipeline on the device %d (error code %d)",
|
||||
/* 38 msg_c_pipeline_run_func */ "offload error: cannot execute function on the device %d (error code %d)",
|
||||
/* 39 msg_c_pipeline_start_run_funcs */ "offload error: cannot start executing pipeline function on the device %d (error code %d)",
|
||||
/* 40 msg_c_buf_create */ "offload error: cannot create buffer on device %d (error code %d)",
|
||||
/* 41 msg_c_buf_create_out_of_mem */ "offload error: cannot create buffer on device %d, out of memory",
|
||||
/* 42 msg_c_buf_create_from_mem */ "offload error: cannot create buffer from memory on device %d (error code %d)",
|
||||
/* 43 msg_c_buf_destroy */ "offload error: buffer destroy failed (error code %d)",
|
||||
/* 44 msg_c_buf_map */ "offload error: buffer map failed (error code %d)",
|
||||
/* 45 msg_c_buf_unmap */ "offload error: buffer unmap failed (error code %d)",
|
||||
/* 46 msg_c_buf_read */ "offload error: buffer read failed (error code %d)",
|
||||
/* 47 msg_c_buf_write */ "offload error: buffer write failed (error code %d)",
|
||||
/* 48 msg_c_buf_copy */ "offload error: buffer copy failed (error code %d)",
|
||||
/* 49 msg_c_buf_get_address */ "offload error: cannot get buffer address on device %d (error code %d)",
|
||||
/* 50 msg_c_buf_add_ref */ "offload error: cannot reuse buffer memory on device %d (error code %d)",
|
||||
/* 51 msg_c_buf_release_ref */ "offload error: cannot release buffer memory on device %d (error code %d)",
|
||||
/* 52 msg_c_buf_set_state */ "offload error: buffer set state failed (error code %d)",
|
||||
/* 53 msg_c_event_wait */ "offload error: wait for event to become signaled failed (error code %d)",
|
||||
/* 54 msg_c_zero_or_neg_ptr_len */ "offload error: memory allocation of negative length is not supported",
|
||||
/* 55 msg_c_zero_or_neg_transfer_size */ "offload error: data transfer of zero or negative size is not supported",
|
||||
/* 56 msg_c_bad_ptr_mem_range */ "offload error: address range partially overlaps with existing allocation",
|
||||
/* 57 msg_c_different_src_and_dstn_sizes */ "offload error: size of the source %d differs from size of the destination %d",
|
||||
/* 58 msg_c_non_contiguous_dope_vector */ "offload error: offload data transfer supports only a single contiguous memory range per variable",
|
||||
/* 59 msg_c_omp_invalid_device_num_env */ "offload warning: ignoring %s setting; use a non-negative integer value",
|
||||
/* 60 msg_c_omp_invalid_device_num */ "offload error: device number should be a non-negative integer value",
|
||||
/* 61 msg_c_unknown_binary_type */ "offload error: unexpected embedded target binary type, expected either an executable or shared library",
|
||||
/* 62 msg_c_multiple_target_exes */ "offload error: more that one target executable found",
|
||||
/* 63 msg_c_no_target_exe */ "offload error: target executable is not available",
|
||||
/* 64 msg_c_report_unknown_timer_node */ "offload error: unknown timer node",
|
||||
/* 65 msg_c_report_unknown_trace_node */ "offload error: unknown trace node",
|
||||
/* 66 msg_c_report_host */ "HOST",
|
||||
/* 67 msg_c_report_mic */ "MIC",
|
||||
/* 68 msg_c_report_title */ "timer data (sec)",
|
||||
/* 69 msg_c_report_seconds */ "(seconds)",
|
||||
/* 70 msg_c_report_bytes */ "(bytes)",
|
||||
/* 71 msg_c_report_cpu_time */ "CPU Time",
|
||||
/* 72 msg_c_report_mic_time */ "MIC Time",
|
||||
/* 73 msg_c_report_tag */ "Tag",
|
||||
/* 74 msg_c_report_from_file */ "Offload from file",
|
||||
/* 75 msg_c_report_file */ "File",
|
||||
/* 76 msg_c_report_line */ "Line",
|
||||
/* 77 msg_c_report_cpu_to_mic_data */ "CPU->MIC Data",
|
||||
/* 78 msg_c_report_mic_to_cpu_data */ "MIC->CPU Data",
|
||||
/* 79 msg_c_report_offload */ "Offload",
|
||||
/* 80 msg_c_report_w_tag */ "Tag %d",
|
||||
/* 81 msg_c_report_state */ "State",
|
||||
/* 82 msg_c_report_start */ "Start target",
|
||||
/* 83 msg_c_report_init */ "Initialize",
|
||||
/* 84 msg_c_report_logical_card */ "logical card",
|
||||
/* 85 msg_c_report_physical_card */ "physical card",
|
||||
/* 86 msg_c_report_register */ "Register static data tables",
|
||||
/* 87 msg_c_report_init_func */ "Setup target entry",
|
||||
/* 88 msg_c_report_create_buf_host */ "Create host buffer",
|
||||
/* 89 msg_c_report_create_buf_mic */ "Create target buffer",
|
||||
/* 90 msg_c_report_send_pointer_data */ "Send pointer data",
|
||||
/* 91 msg_c_report_sent_pointer_data */ "Host->target pointer data",
|
||||
/* 92 msg_c_report_gather_copyin_data */ "Gather copyin data",
|
||||
/* 93 msg_c_report_copyin_data */ "Host->target copyin data",
|
||||
/* 94 msg_c_report_state_signal */ "Signal",
|
||||
/* 95 msg_c_report_signal */ "signal :",
|
||||
/* 96 msg_c_report_wait */ "waits :",
|
||||
/* 97 msg_c_report_compute */ "Execute task on target",
|
||||
/* 98 msg_c_report_receive_pointer_data */ "Receive pointer data",
|
||||
/* 99 msg_c_report_received_pointer_data */ "Target->host pointer data",
|
||||
/* 100 msg_c_report_start_target_func */ "Start target entry",
|
||||
/* 101 msg_c_report_var */ "Var",
|
||||
/* 102 msg_c_report_scatter_copyin_data */ "Scatter copyin data",
|
||||
/* 103 msg_c_report_gather_copyout_data */ "Gather copyout data",
|
||||
/* 104 msg_c_report_scatter_copyout_data */ "Scatter copyout data",
|
||||
/* 105 msg_c_report_copyout_data */ "Target->host copyout data",
|
||||
/* 106 msg_c_report_unregister */ "Unregister data tables",
|
||||
/* 107 msg_c_report_destroy */ "Destroy",
|
||||
/* 108 msg_c_report_myoinit */ "Initialize MYO",
|
||||
/* 109 msg_c_report_myoregister */ "Register MYO tables",
|
||||
/* 110 msg_c_report_myofini */ "Finalize MYO",
|
||||
/* 111 msg_c_report_mic_myo_shared */ "MIC MYO shared table register",
|
||||
/* 112 msg_c_report_mic_myo_fptr */ "MIC MYO fptr table register",
|
||||
/* 113 msg_c_report_myosharedmalloc */ "MYO shared malloc",
|
||||
/* 114 msg_c_report_myosharedfree */ "MYO shared free",
|
||||
/* 115 msg_c_report_myosharedalignedmalloc */ "MYO shared aligned malloc",
|
||||
/* 116 msg_c_report_myosharedalignedfree */ "MYO shared aligned free",
|
||||
/* 117 msg_c_report_myoacquire */ "MYO acquire",
|
||||
/* 118 msg_c_report_myorelease */ "MYO release",
|
||||
/* 119 msg_c_report_host_total_offload_time */ "host: total offload time",
|
||||
/* 120 msg_c_report_host_initialize */ "host: initialize target",
|
||||
/* 121 msg_c_report_host_target_acquire */ "host: acquire target",
|
||||
/* 122 msg_c_report_host_wait_deps */ "host: wait dependencies",
|
||||
/* 123 msg_c_report_host_setup_buffers */ "host: setup buffers",
|
||||
/* 124 msg_c_report_host_alloc_buffers */ "host: allocate buffers",
|
||||
/* 125 msg_c_report_host_setup_misc_data */ "host: setup misc_data",
|
||||
/* 126 msg_c_report_host_alloc_data_buffer */ "host: allocate buffer",
|
||||
/* 127 msg_c_report_host_send_pointers */ "host: send pointers",
|
||||
/* 128 msg_c_report_host_gather_inputs */ "host: gather inputs",
|
||||
/* 129 msg_c_report_host_map_in_data_buffer */ "host: map IN data buffer",
|
||||
/* 130 msg_c_report_host_unmap_in_data_buffer */ "host: unmap IN data buffer",
|
||||
/* 131 msg_c_report_host_start_compute */ "host: initiate compute",
|
||||
/* 132 msg_c_report_host_wait_compute */ "host: wait compute",
|
||||
/* 133 msg_c_report_host_start_buffers_reads */ "host: initiate pointer reads",
|
||||
/* 134 msg_c_report_host_scatter_outputs */ "host: scatter outputs",
|
||||
/* 135 msg_c_report_host_map_out_data_buffer */ "host: map OUT data buffer",
|
||||
/* 136 msg_c_report_host_unmap_out_data_buffer */ "host: unmap OUT data buffer",
|
||||
/* 137 msg_c_report_host_wait_buffers_reads */ "host: wait pointer reads",
|
||||
/* 138 msg_c_report_host_destroy_buffers */ "host: destroy buffers",
|
||||
/* 139 msg_c_report_target_total_time */ "target: total time",
|
||||
/* 140 msg_c_report_target_descriptor_setup */ "target: setup offload descriptor",
|
||||
/* 141 msg_c_report_target_func_lookup */ "target: entry lookup",
|
||||
/* 142 msg_c_report_target_func_time */ "target: entry time",
|
||||
/* 143 msg_c_report_target_scatter_inputs */ "target: scatter inputs",
|
||||
/* 144 msg_c_report_target_add_buffer_refs */ "target: add buffer reference",
|
||||
/* 145 msg_c_report_target_compute */ "target: compute",
|
||||
/* 146 msg_c_report_target_gather_outputs */ "target: gather outputs",
|
||||
/* 147 msg_c_report_target_release_buffer_refs */ "target: remove buffer reference",
|
||||
/* 148 msg_c_coi_pipeline_max_number */ "number of host threads doing offload exceeds maximum of %d",
|
||||
/* 149 msg_c_ranges_dont_match */ "ranges of source and destination don't match together",
|
||||
/* 150 msg_c_destination_is_over */ "insufficient destination memory to transfer source",
|
||||
/* 151 msg_c_slice_of_noncont_array */ "a non-contiguous slice may be taken of contiguous arrays only",
|
||||
/* 152 msg_c_pointer_array_mismatch */ "number of %s elements is less than described by the source",
|
||||
/* 0 __dummy__ */ "Un-used message",
|
||||
/* 1 msg_c_device_is_not_available */ "offload error: cannot offload to MIC - device is not available",
|
||||
/* 2 msg_c_invalid_device_number */ "offload error: expected a number greater than or equal to -1",
|
||||
/* 3 msg_c_send_func_ptr */ "offload error: cannot find function name for address %p",
|
||||
/* 4 msg_c_receive_func_ptr */ "offload error: cannot find address of function %s",
|
||||
/* 5 msg_c_malloc */ "offload error: memory allocation failed",
|
||||
/* 6 msg_c_offload_malloc */ "offload error: memory allocation failed (requested=%lld bytes, align %lld)",
|
||||
/* 7 msg_c_offload1 */ "offload error: device %d does not have a pending signal for wait(%p)",
|
||||
/* 8 msg_c_unknown_var_type */ "offload error: unknown variable type %d",
|
||||
/* 9 msg_c_invalid_env_var_value */ "offload warning: ignoring invalid value specified for %s",
|
||||
/* 10 msg_c_invalid_env_var_int_value */ "offload warning: specify an integer value for %s",
|
||||
/* 11 msg_c_invalid_env_report_value */ "offload warning: ignoring %s setting; use a value in range 1-3",
|
||||
/* 12 msg_c_offload_signaled1 */ "offload error: invalid device number %d specified in _Offload_signaled",
|
||||
/* 13 msg_c_offload_signaled2 */ "offload error: invalid signal %p specified for _Offload_signaled",
|
||||
/* 14 msg_c_myowrapper_checkresult */ "offload error: %s failed with error %d",
|
||||
/* 15 msg_c_myotarget_checkresult */ "offload error: %s failed with error %d",
|
||||
/* 16 msg_c_offload_descriptor_offload */ "offload error: cannot find offload entry %s",
|
||||
/* 17 msg_c_merge_var_descs1 */ "offload error: unexpected number of variable descriptors",
|
||||
/* 18 msg_c_merge_var_descs2 */ "offload error: unexpected variable type",
|
||||
/* 19 msg_c_mic_parse_env_var_list1 */ "offload_error: MIC environment variable must begin with an alpabetic character",
|
||||
/* 20 msg_c_mic_parse_env_var_list2 */ "offload_error: MIC environment variable value must be specified with '='",
|
||||
/* 21 msg_c_mic_process_exit_ret */ "offload error: process on the device %d unexpectedly exited with code %d",
|
||||
/* 22 msg_c_mic_process_exit_sig */ "offload error: process on the device %d was terminated by signal %d (%s)",
|
||||
/* 23 msg_c_mic_process_exit */ "offload error: process on the device %d was unexpectedly terminated",
|
||||
/* 24 msg_c_mic_init3 */ "offload warning: ignoring MIC_STACKSIZE setting; use a value >= 16K and a multiple of 4K",
|
||||
/* 25 msg_c_mic_init4 */ "offload error: thread key create failed with error %d",
|
||||
/* 26 msg_c_mic_init5 */ "offload warning: specify OFFLOAD_DEVICES as comma-separated physical device numbers or 'none'",
|
||||
/* 27 msg_c_mic_init6 */ "offload warning: OFFLOAD_DEVICES device number %d does not correspond to a physical device",
|
||||
/* 28 msg_c_no_static_var_data */ "offload error: cannot find data associated with statically allocated variable %p",
|
||||
/* 29 msg_c_no_ptr_data */ "offload error: cannot find data associated with pointer variable %p",
|
||||
/* 30 msg_c_get_engine_handle */ "offload error: cannot get device %d handle (error code %d)",
|
||||
/* 31 msg_c_get_engine_index */ "offload error: cannot get physical index for logical device %d (error code %d)",
|
||||
/* 32 msg_c_process_create */ "offload error: cannot start process on the device %d (error code %d)",
|
||||
/* 33 msg_c_process_set_cache_size */ "offload error: cannot reserve buffer on the device %d (error code %d)",
|
||||
/* 34 msg_c_process_get_func_handles */ "offload error: cannot get function handles on the device %d (error code %d)",
|
||||
/* 35 msg_c_process_wait_shutdown */ "offload error: wait for process shutdown failed on device %d (error code %d)",
|
||||
/* 36 msg_c_process_proxy_flush */ "offload error: cannot flush process output on device %d (error code %d)",
|
||||
/* 37 msg_c_load_library */ "offload error: cannot load library to the device %d (error code %d)",
|
||||
/* 38 msg_c_pipeline_create */ "offload error: cannot create pipeline on the device %d (error code %d)",
|
||||
/* 39 msg_c_pipeline_run_func */ "offload error: cannot execute function on the device %d (error code %d)",
|
||||
/* 40 msg_c_pipeline_start_run_funcs */ "offload error: cannot start executing pipeline function on the device %d (error code %d)",
|
||||
/* 41 msg_c_buf_create */ "offload error: cannot create buffer on device %d (error code %d)",
|
||||
/* 42 msg_c_buf_create_out_of_mem */ "offload error: cannot create buffer on device %d, out of memory",
|
||||
/* 43 msg_c_buf_create_from_mem */ "offload error: cannot create buffer from memory on device %d (error code %d)",
|
||||
/* 44 msg_c_buf_destroy */ "offload error: buffer destroy failed (error code %d)",
|
||||
/* 45 msg_c_buf_map */ "offload error: buffer map failed (error code %d)",
|
||||
/* 46 msg_c_buf_unmap */ "offload error: buffer unmap failed (error code %d)",
|
||||
/* 47 msg_c_buf_read */ "offload error: buffer read failed (error code %d)",
|
||||
/* 48 msg_c_buf_write */ "offload error: buffer write failed (error code %d)",
|
||||
/* 49 msg_c_buf_copy */ "offload error: buffer copy failed (error code %d)",
|
||||
/* 50 msg_c_buf_get_address */ "offload error: cannot get buffer address on device %d (error code %d)",
|
||||
/* 51 msg_c_buf_add_ref */ "offload error: cannot reuse buffer memory on device %d (error code %d)",
|
||||
/* 52 msg_c_buf_release_ref */ "offload error: cannot release buffer memory on device %d (error code %d)",
|
||||
/* 53 msg_c_buf_set_state */ "offload error: buffer set state failed (error code %d)",
|
||||
/* 54 msg_c_event_wait */ "offload error: wait for event to become signaled failed (error code %d)",
|
||||
/* 55 msg_c_zero_or_neg_ptr_len */ "offload error: memory allocation of zero or negative length is not supported",
|
||||
/* 56 msg_c_zero_or_neg_transfer_size */ "offload error: data transfer of zero or negative size is not supported",
|
||||
/* 57 msg_c_bad_ptr_mem_alloc */ "offload error: allocation (base=%p, size=%d) overlaps with existing allocation (base=%p, size=%d)",
|
||||
/* 58 msg_c_bad_ptr_mem_range */ "offload error: data transfer (base=%p, size=%d) not subset of existing allocation (base=%p, size=%d)",
|
||||
/* 59 msg_c_different_src_and_dstn_sizes */ "offload error: size of the source %d differs from size of the destination %d",
|
||||
/* 60 msg_c_non_contiguous_dope_vector */ "offload error: offload data transfer supports only a single contiguous memory range per variable",
|
||||
/* 61 msg_c_omp_invalid_device_num_env */ "offload warning: ignoring %s setting; use a non-negative integer value",
|
||||
/* 62 msg_c_omp_invalid_device_num */ "offload error: device number should be a non-negative integer value",
|
||||
/* 63 msg_c_unknown_binary_type */ "offload error: unexpected embedded target binary type, expected either an executable or shared library",
|
||||
/* 64 msg_c_multiple_target_exes */ "offload error: more that one target executable found",
|
||||
/* 65 msg_c_no_target_exe */ "offload error: target executable is not available",
|
||||
/* 66 msg_c_report_unknown_timer_node */ "offload error: unknown timer node",
|
||||
/* 67 msg_c_report_unknown_trace_node */ "offload error: unknown trace node",
|
||||
/* 68 msg_c_incorrect_affinity */ "offload error: unknow affinity type %s, specify compact, scatter or balanced",
|
||||
/* 69 msg_c_cannot_set_affinity */ "offload_error: unable to set affinity",
|
||||
/* 70 msg_c_report_host */ "HOST",
|
||||
/* 71 msg_c_report_mic */ "MIC",
|
||||
/* 72 msg_c_report_title */ "timer data (sec)",
|
||||
/* 73 msg_c_report_seconds */ "(seconds)",
|
||||
/* 74 msg_c_report_bytes */ "(bytes)",
|
||||
/* 75 msg_c_report_cpu_time */ "CPU Time",
|
||||
/* 76 msg_c_report_mic_time */ "MIC Time",
|
||||
/* 77 msg_c_report_tag */ "Tag",
|
||||
/* 78 msg_c_report_from_file */ "Offload from file",
|
||||
/* 79 msg_c_report_file */ "File",
|
||||
/* 80 msg_c_report_line */ "Line",
|
||||
/* 81 msg_c_report_cpu_to_mic_data */ "CPU->MIC Data",
|
||||
/* 82 msg_c_report_mic_to_cpu_data */ "MIC->CPU Data",
|
||||
/* 83 msg_c_report_offload */ "Offload",
|
||||
/* 84 msg_c_report_w_tag */ "Tag %d",
|
||||
/* 85 msg_c_report_state */ "State",
|
||||
/* 86 msg_c_report_start */ "Start target",
|
||||
/* 87 msg_c_report_init */ "Initialize",
|
||||
/* 88 msg_c_report_logical_card */ "logical card",
|
||||
/* 89 msg_c_report_physical_card */ "physical card",
|
||||
/* 90 msg_c_report_register */ "Register static data tables",
|
||||
/* 91 msg_c_report_init_func */ "Setup target entry",
|
||||
/* 92 msg_c_report_create_buf_host */ "Create host buffer",
|
||||
/* 93 msg_c_report_create_buf_mic */ "Create target buffer",
|
||||
/* 94 msg_c_report_send_pointer_data */ "Send pointer data",
|
||||
/* 95 msg_c_report_sent_pointer_data */ "Host->target pointer data",
|
||||
/* 96 msg_c_report_gather_copyin_data */ "Gather copyin data",
|
||||
/* 97 msg_c_report_copyin_data */ "Host->target copyin data",
|
||||
/* 98 msg_c_report_state_signal */ "Signal",
|
||||
/* 99 msg_c_report_signal */ "signal :",
|
||||
/* 100 msg_c_report_wait */ "waits :",
|
||||
/* 101 msg_c_report_compute */ "Execute task on target",
|
||||
/* 102 msg_c_report_receive_pointer_data */ "Receive pointer data",
|
||||
/* 103 msg_c_report_received_pointer_data */ "Target->host pointer data",
|
||||
/* 104 msg_c_report_start_target_func */ "Start target entry",
|
||||
/* 105 msg_c_report_var */ "Var",
|
||||
/* 106 msg_c_report_scatter_copyin_data */ "Scatter copyin data",
|
||||
/* 107 msg_c_report_gather_copyout_data */ "Gather copyout data",
|
||||
/* 108 msg_c_report_scatter_copyout_data */ "Scatter copyout data",
|
||||
/* 109 msg_c_report_copyout_data */ "Target->host copyout data",
|
||||
/* 110 msg_c_report_unregister */ "Unregister data tables",
|
||||
/* 111 msg_c_report_destroy */ "Destroy",
|
||||
/* 112 msg_c_report_myoinit */ "Initialize MYO",
|
||||
/* 113 msg_c_report_myoregister */ "Register MYO tables",
|
||||
/* 114 msg_c_report_myofini */ "Finalize MYO",
|
||||
/* 115 msg_c_report_mic_myo_shared */ "MIC MYO shared table register",
|
||||
/* 116 msg_c_report_mic_myo_fptr */ "MIC MYO fptr table register",
|
||||
/* 117 msg_c_report_myosharedmalloc */ "MYO shared malloc",
|
||||
/* 118 msg_c_report_myosharedfree */ "MYO shared free",
|
||||
/* 119 msg_c_report_myosharedalignedmalloc */ "MYO shared aligned malloc",
|
||||
/* 120 msg_c_report_myosharedalignedfree */ "MYO shared aligned free",
|
||||
/* 121 msg_c_report_myoacquire */ "MYO acquire",
|
||||
/* 122 msg_c_report_myorelease */ "MYO release",
|
||||
/* 123 msg_c_report_myosupportsfeature */ "MYO supports feature",
|
||||
/* 124 msg_c_report_myosharedarenacreate */ "MYO shared arena create",
|
||||
/* 125 msg_c_report_myosharedalignedarenamalloc */ "MYO shared aligned arena malloc",
|
||||
/* 126 msg_c_report_myosharedalignedarenafree */ "MYO shared aligned arena free",
|
||||
/* 127 msg_c_report_myoarenaacquire */ "MYO arena acquire",
|
||||
/* 128 msg_c_report_myoarenarelease */ "MYO arena release",
|
||||
/* 129 msg_c_report_host_total_offload_time */ "host: total offload time",
|
||||
/* 130 msg_c_report_host_initialize */ "host: initialize target",
|
||||
/* 131 msg_c_report_host_target_acquire */ "host: acquire target",
|
||||
/* 132 msg_c_report_host_wait_deps */ "host: wait dependencies",
|
||||
/* 133 msg_c_report_host_setup_buffers */ "host: setup buffers",
|
||||
/* 134 msg_c_report_host_alloc_buffers */ "host: allocate buffers",
|
||||
/* 135 msg_c_report_host_setup_misc_data */ "host: setup misc_data",
|
||||
/* 136 msg_c_report_host_alloc_data_buffer */ "host: allocate buffer",
|
||||
/* 137 msg_c_report_host_send_pointers */ "host: send pointers",
|
||||
/* 138 msg_c_report_host_gather_inputs */ "host: gather inputs",
|
||||
/* 139 msg_c_report_host_map_in_data_buffer */ "host: map IN data buffer",
|
||||
/* 140 msg_c_report_host_unmap_in_data_buffer */ "host: unmap IN data buffer",
|
||||
/* 141 msg_c_report_host_start_compute */ "host: initiate compute",
|
||||
/* 142 msg_c_report_host_wait_compute */ "host: wait compute",
|
||||
/* 143 msg_c_report_host_start_buffers_reads */ "host: initiate pointer reads",
|
||||
/* 144 msg_c_report_host_scatter_outputs */ "host: scatter outputs",
|
||||
/* 145 msg_c_report_host_map_out_data_buffer */ "host: map OUT data buffer",
|
||||
/* 146 msg_c_report_host_unmap_out_data_buffer */ "host: unmap OUT data buffer",
|
||||
/* 147 msg_c_report_host_wait_buffers_reads */ "host: wait pointer reads",
|
||||
/* 148 msg_c_report_host_destroy_buffers */ "host: destroy buffers",
|
||||
/* 149 msg_c_report_target_total_time */ "target: total time",
|
||||
/* 150 msg_c_report_target_descriptor_setup */ "target: setup offload descriptor",
|
||||
/* 151 msg_c_report_target_func_lookup */ "target: entry lookup",
|
||||
/* 152 msg_c_report_target_func_time */ "target: entry time",
|
||||
/* 153 msg_c_report_target_scatter_inputs */ "target: scatter inputs",
|
||||
/* 154 msg_c_report_target_add_buffer_refs */ "target: add buffer reference",
|
||||
/* 155 msg_c_report_target_compute */ "target: compute",
|
||||
/* 156 msg_c_report_target_gather_outputs */ "target: gather outputs",
|
||||
/* 157 msg_c_report_target_release_buffer_refs */ "target: remove buffer reference",
|
||||
/* 158 msg_c_coi_pipeline_max_number */ "number of host threads doing offload exceeds maximum of %d",
|
||||
/* 159 msg_c_ranges_dont_match */ "ranges of source and destination don't match together",
|
||||
/* 160 msg_c_destination_is_over */ "insufficient destination memory to transfer source",
|
||||
/* 161 msg_c_slice_of_noncont_array */ "a non-contiguous slice may be taken of contiguous arrays only",
|
||||
/* 162 msg_c_pointer_array_mismatch */ "number of %s elements is less than described by the source",
|
||||
/* 163 msg_c_in_with_preallocated */ "offload error: preallocated targetptr alloc_if(1) may not be used with an in clause",
|
||||
/* 164 msg_c_report_no_host_exe */ "offload error: Cannot find host executable",
|
||||
/* 165 msg_c_report_path_buff_overflow */ "offload error: Size of host executable path exceeded 4KB",
|
||||
/* 166 msg_c_create_pipeline_for_stream */ "offload error: number of cpus exceeds maximum of %d",
|
||||
/* 167 msg_c_offload_no_stream */ "offload error: the stream isn't found on device %d",
|
||||
/* 168 msg_c_get_engine_info */ "offload error: cannot get device %d info (error code %d)",
|
||||
/* 169 msg_c_clear_cpu_mask */ "offload error: cannot clear cpu mask (error code %d)",
|
||||
/* 170 msg_c_set_cpu_mask */ "offload error: cannot set cpu mask (error code %d)",
|
||||
/* 171 msg_c_report_state_stream */ "Stream",
|
||||
/* 172 msg_c_report_stream */ "stream :",
|
||||
/* 173 msg_c_unload_library */ "offload error: cannot unload library from the device %d (error code %d)",
|
||||
};
|
||||
|
@ -1,5 +1,5 @@
|
||||
!
|
||||
! Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
! Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -42,6 +42,13 @@
|
||||
#include <stddef.h>
|
||||
#include <omp.h>
|
||||
|
||||
#ifdef TARGET_WINNT
|
||||
// <stdint.h> is not compatible with Windows
|
||||
typedef unsigned long long int uint64_t;
|
||||
#else
|
||||
#include <stdint.h>
|
||||
#endif // TARGET_WINNT
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
@ -86,6 +93,8 @@ typedef struct {
|
||||
size_t data_received; /* number of bytes received by host */
|
||||
} _Offload_status;
|
||||
|
||||
typedef uint64_t _Offload_stream;
|
||||
|
||||
#define OFFLOAD_STATUS_INIT(x) \
|
||||
((x).result = OFFLOAD_DISABLED)
|
||||
|
||||
@ -98,14 +107,57 @@ extern int _Offload_number_of_devices(void);
|
||||
extern int _Offload_get_device_number(void);
|
||||
extern int _Offload_get_physical_device_number(void);
|
||||
|
||||
/* Offload stream runtime interfaces */
|
||||
|
||||
extern _Offload_stream _Offload_stream_create(
|
||||
int device, // MIC device number
|
||||
int number_of_cpus // Cores allocated to the stream
|
||||
);
|
||||
|
||||
extern int _Offload_stream_destroy(
|
||||
int device, // MIC device number
|
||||
_Offload_stream stream // stream handle
|
||||
);
|
||||
|
||||
extern int _Offload_stream_completed(
|
||||
int device, // MIC device number
|
||||
_Offload_stream handle // stream handle
|
||||
);
|
||||
|
||||
/*
|
||||
* _Offload_shared_malloc/free are only supported when offload is enabled
|
||||
* else they are defined to malloc and free
|
||||
*/
|
||||
#ifdef __INTEL_OFFLOAD
|
||||
extern void* _Offload_shared_malloc(size_t size);
|
||||
extern void _Offload_shared_free(void *ptr);
|
||||
|
||||
extern void* _Offload_shared_aligned_malloc(size_t size, size_t align);
|
||||
extern void _Offload_shared_aligned_free(void *ptr);
|
||||
#else
|
||||
#include <malloc.h>
|
||||
#define _Offload_shared_malloc(size) malloc(size)
|
||||
#define _Offload_shared_free(ptr) free(ptr);
|
||||
#if defined(_WIN32)
|
||||
#define _Offload_shared_aligned_malloc(size, align) _aligned_malloc(size, align)
|
||||
#define _Offload_shared_aligned_free(ptr) _aligned_free(ptr);
|
||||
#else
|
||||
#define _Offload_shared_aligned_malloc(size, align) memalign(align, size)
|
||||
#define _Offload_shared_aligned_free(ptr) free(ptr);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
extern int _Offload_signaled(int index, void *signal);
|
||||
extern void _Offload_report(int val);
|
||||
extern int _Offload_find_associated_mic_memory(
|
||||
int target,
|
||||
const void* cpu_addr,
|
||||
void** cpu_base_addr,
|
||||
uint64_t* buf_length,
|
||||
void** mic_addr,
|
||||
uint64_t* mic_buf_start_offset,
|
||||
int* is_static
|
||||
);
|
||||
|
||||
/* OpenMP API */
|
||||
|
||||
@ -343,7 +395,11 @@ namespace __offload {
|
||||
shared_allocator<void>::const_pointer) {
|
||||
/* Allocate from shared memory. */
|
||||
void *ptr = _Offload_shared_malloc(s*sizeof(T));
|
||||
if (ptr == 0) std::__throw_bad_alloc();
|
||||
#if (defined(_WIN32) || defined(_WIN64)) /* Windows */
|
||||
if (ptr == 0) throw std::bad_alloc();
|
||||
#else
|
||||
if (ptr == 0) std::__throw_bad_alloc();
|
||||
#endif
|
||||
return static_cast<pointer>(ptr);
|
||||
} /* allocate */
|
||||
|
||||
@ -355,13 +411,13 @@ namespace __offload {
|
||||
} /* deallocate */
|
||||
|
||||
template <typename _T1, typename _T2>
|
||||
inline bool operator==(const shared_allocator<_T1> &,
|
||||
inline bool operator==(const shared_allocator<_T1> &,
|
||||
const shared_allocator<_T2> &) throw() {
|
||||
return true;
|
||||
} /* operator== */
|
||||
|
||||
template <typename _T1, typename _T2>
|
||||
inline bool operator!=(const shared_allocator<_T1> &,
|
||||
inline bool operator!=(const shared_allocator<_T1> &,
|
||||
const shared_allocator<_T2> &) throw() {
|
||||
return false;
|
||||
} /* operator!= */
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -40,10 +40,6 @@
|
||||
#include <string.h>
|
||||
#include <memory.h>
|
||||
|
||||
#if (defined(LINUX) || defined(FREEBSD)) && !defined(__INTEL_COMPILER)
|
||||
#include <mm_malloc.h>
|
||||
#endif
|
||||
|
||||
#include "offload.h"
|
||||
#include "offload_table.h"
|
||||
#include "offload_trace.h"
|
||||
@ -65,22 +61,24 @@
|
||||
// The debug routines
|
||||
|
||||
// Host console and file logging
|
||||
extern int console_enabled;
|
||||
extern int offload_report_level;
|
||||
DLL_LOCAL extern int console_enabled;
|
||||
DLL_LOCAL extern int offload_report_level;
|
||||
|
||||
#define OFFLOAD_DO_TRACE (offload_report_level == 3)
|
||||
|
||||
extern const char *prefix;
|
||||
extern int offload_number;
|
||||
DLL_LOCAL extern const char *prefix;
|
||||
DLL_LOCAL extern int offload_number;
|
||||
#if !HOST_LIBRARY
|
||||
extern int mic_index;
|
||||
DLL_LOCAL extern int mic_index;
|
||||
#define OFFLOAD_DO_TRACE (offload_report_level == 3)
|
||||
#else
|
||||
#define OFFLOAD_DO_TRACE (offload_report_enabled && (offload_report_level == 3))
|
||||
#endif
|
||||
|
||||
#if HOST_LIBRARY
|
||||
void Offload_Report_Prolog(OffloadHostTimerData* timer_data);
|
||||
void Offload_Report_Epilog(OffloadHostTimerData* timer_data);
|
||||
void offload_report_free_data(OffloadHostTimerData * timer_data);
|
||||
void Offload_Timer_Print(void);
|
||||
DLL_LOCAL void Offload_Report_Prolog(OffloadHostTimerData* timer_data);
|
||||
DLL_LOCAL void Offload_Report_Epilog(OffloadHostTimerData* timer_data);
|
||||
DLL_LOCAL void offload_report_free_data(OffloadHostTimerData * timer_data);
|
||||
DLL_LOCAL void Offload_Timer_Print(void);
|
||||
|
||||
#ifndef TARGET_WINNT
|
||||
#define OFFLOAD_DEBUG_INCR_OFLD_NUM() \
|
||||
@ -130,7 +128,7 @@ void Offload_Timer_Print(void);
|
||||
#define OFFLOAD_DEBUG_DUMP_BYTES(level, a, b) \
|
||||
__dump_bytes(level, a, b)
|
||||
|
||||
extern void __dump_bytes(
|
||||
DLL_LOCAL extern void __dump_bytes(
|
||||
int level,
|
||||
const void *data,
|
||||
int len
|
||||
@ -156,6 +154,17 @@ extern void *OFFLOAD_MALLOC(size_t size, size_t align);
|
||||
|
||||
// The Marshaller
|
||||
|
||||
// Flags describing an offload
|
||||
|
||||
//! Flags describing an offload
|
||||
union OffloadFlags{
|
||||
uint32_t flags;
|
||||
struct {
|
||||
uint32_t fortran_traceback : 1; //!< Fortran traceback requested
|
||||
uint32_t omp_async : 1; //!< OpenMP asynchronous offload
|
||||
} bits;
|
||||
};
|
||||
|
||||
//! \enum Indicator for the type of entry on an offload item list.
|
||||
enum OffloadItemType {
|
||||
c_data = 1, //!< Plain data
|
||||
@ -203,6 +212,44 @@ enum OffloadParameterType {
|
||||
c_parameter_inout //!< Variable listed in "inout" clause
|
||||
};
|
||||
|
||||
|
||||
//! Flags describing an offloaded variable
|
||||
union varDescFlags {
|
||||
struct {
|
||||
//! source variable has persistent storage
|
||||
uint32_t is_static : 1;
|
||||
//! destination variable has persistent storage
|
||||
uint32_t is_static_dstn : 1;
|
||||
//! has length for c_dv && c_dv_ptr
|
||||
uint32_t has_length : 1;
|
||||
//! persisted local scalar is in stack buffer
|
||||
uint32_t is_stack_buf : 1;
|
||||
//! "targetptr" modifier used
|
||||
uint32_t targetptr : 1;
|
||||
//! "preallocated" modifier used
|
||||
uint32_t preallocated : 1;
|
||||
//! Needs documentation
|
||||
uint32_t is_pointer : 1;
|
||||
|
||||
//! buffer address is sent in data
|
||||
uint32_t sink_addr : 1;
|
||||
//! alloc displacement is sent in data
|
||||
uint32_t alloc_disp : 1;
|
||||
//! source data is noncontiguous
|
||||
uint32_t is_noncont_src : 1;
|
||||
//! destination data is noncontiguous
|
||||
uint32_t is_noncont_dst : 1;
|
||||
|
||||
//! "OpenMP always" modifier used
|
||||
uint32_t always_copy : 1;
|
||||
//! "OpenMP delete" modifier used
|
||||
uint32_t always_delete : 1;
|
||||
//! CPU memory pinning/unpinning operation
|
||||
uint32_t pin : 1;
|
||||
};
|
||||
uint32_t bits;
|
||||
};
|
||||
|
||||
//! An Offload Variable descriptor
|
||||
struct VarDesc {
|
||||
//! OffloadItemTypes of source and destination
|
||||
@ -230,27 +277,7 @@ struct VarDesc {
|
||||
/*! Used by runtime as offset to data from start of MIC buffer */
|
||||
uint32_t mic_offset;
|
||||
//! Flags describing this variable
|
||||
union {
|
||||
struct {
|
||||
//! source variable has persistent storage
|
||||
uint32_t is_static : 1;
|
||||
//! destination variable has persistent storage
|
||||
uint32_t is_static_dstn : 1;
|
||||
//! has length for c_dv && c_dv_ptr
|
||||
uint32_t has_length : 1;
|
||||
//! persisted local scalar is in stack buffer
|
||||
uint32_t is_stack_buf : 1;
|
||||
//! buffer address is sent in data
|
||||
uint32_t sink_addr : 1;
|
||||
//! alloc displacement is sent in data
|
||||
uint32_t alloc_disp : 1;
|
||||
//! source data is noncontiguous
|
||||
uint32_t is_noncont_src : 1;
|
||||
//! destination data is noncontiguous
|
||||
uint32_t is_noncont_dst : 1;
|
||||
};
|
||||
uint32_t bits;
|
||||
} flags;
|
||||
varDescFlags flags;
|
||||
//! Not used by compiler; set to 0
|
||||
/*! Used by runtime as offset to base from data stored in a buffer */
|
||||
int64_t offset;
|
||||
@ -472,4 +499,16 @@ struct FunctionDescriptor
|
||||
// Pointer to OffloadDescriptor.
|
||||
typedef struct OffloadDescriptor *OFFLOAD;
|
||||
|
||||
// Use for setting affinity of a stream
|
||||
enum affinity_type {
|
||||
affinity_compact,
|
||||
affinity_scatter
|
||||
};
|
||||
struct affinity_spec {
|
||||
uint64_t sink_mask[16];
|
||||
int affinity_type;
|
||||
int num_cores;
|
||||
int num_threads;
|
||||
};
|
||||
|
||||
#endif // OFFLOAD_COMMON_H_INCLUDED
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -37,6 +37,14 @@
|
||||
|
||||
#include "offload_host.h"
|
||||
#include "offload_table.h"
|
||||
#include "offload_iterator.h"
|
||||
|
||||
// Static members of Stream class must be described somewhere.
|
||||
// This members describe the list of all streams defined in programm
|
||||
// via call to _Offload_stream_create.
|
||||
uint64_t Stream::m_streams_count = 0;
|
||||
StreamMap Stream::all_streams;
|
||||
mutex_t Stream::m_stream_lock;
|
||||
|
||||
const char* Engine::m_func_names[Engine::c_funcs_total] =
|
||||
{
|
||||
@ -47,7 +55,8 @@ const char* Engine::m_func_names[Engine::c_funcs_total] =
|
||||
#endif // MYO_SUPPORT
|
||||
"server_init",
|
||||
"server_var_table_size",
|
||||
"server_var_table_copy"
|
||||
"server_var_table_copy",
|
||||
"server_set_stream_affinity"
|
||||
};
|
||||
|
||||
// Symbolic representation of system signals. Fix for CQ233593
|
||||
@ -115,6 +124,7 @@ void Engine::init_process(void)
|
||||
COIENGINE engine;
|
||||
COIRESULT res;
|
||||
const char **environ;
|
||||
char buf[4096]; // For exe path name
|
||||
|
||||
// create environment for the target process
|
||||
environ = (const char**) mic_env_vars.create_environ_for_card(m_index);
|
||||
@ -127,39 +137,147 @@ void Engine::init_process(void)
|
||||
// Create execution context in the specified device
|
||||
OFFLOAD_DEBUG_TRACE(2, "Getting device %d (engine %d) handle\n", m_index,
|
||||
m_physical_index);
|
||||
res = COI::EngineGetHandle(COI_ISA_KNC, m_physical_index, &engine);
|
||||
res = COI::EngineGetHandle(COI_ISA_MIC, m_physical_index, &engine);
|
||||
check_result(res, c_get_engine_handle, m_index, res);
|
||||
|
||||
// Target executable should be available by the time when we
|
||||
// attempt to initialize the device
|
||||
if (__target_exe == 0) {
|
||||
LIBOFFLOAD_ERROR(c_no_target_exe);
|
||||
exit(1);
|
||||
// Get engine info on threads and cores.
|
||||
// The values of core number and thread number will be used later at stream
|
||||
// creation by call to _Offload_stream_create(device,number_of_cpus).
|
||||
|
||||
COI_ENGINE_INFO engine_info;
|
||||
|
||||
res = COI::EngineGetInfo(engine, sizeof(COI_ENGINE_INFO), &engine_info);
|
||||
check_result(res, c_get_engine_info, m_index, res);
|
||||
|
||||
// m_cpus bitset has 1 for available thread. At the begining all threads
|
||||
// are available and m_cpus(i) is set to
|
||||
// 1 for i = [0...engine_info.NumThreads].
|
||||
m_cpus.reset();
|
||||
for (int i = 0; i < engine_info.NumThreads; i++) {
|
||||
m_cpus.set(i);
|
||||
}
|
||||
|
||||
OFFLOAD_DEBUG_TRACE(2,
|
||||
"Loading target executable \"%s\" from %p, size %lld\n",
|
||||
__target_exe->name, __target_exe->data, __target_exe->size);
|
||||
// The following values will be used at pipeline creation for streams
|
||||
m_num_cores = engine_info.NumCores;
|
||||
m_num_threads = engine_info.NumThreads;
|
||||
|
||||
res = COI::ProcessCreateFromMemory(
|
||||
engine, // in_Engine
|
||||
__target_exe->name, // in_pBinaryName
|
||||
__target_exe->data, // in_pBinaryBuffer
|
||||
__target_exe->size, // in_BinaryBufferLength,
|
||||
0, // in_Argc
|
||||
0, // in_ppArgv
|
||||
environ == 0, // in_DupEnv
|
||||
environ, // in_ppAdditionalEnv
|
||||
mic_proxy_io, // in_ProxyActive
|
||||
mic_proxy_fs_root, // in_ProxyfsRoot
|
||||
mic_buffer_size, // in_BufferSpace
|
||||
mic_library_path, // in_LibrarySearchPath
|
||||
__target_exe->origin, // in_FileOfOrigin
|
||||
__target_exe->offset, // in_FileOfOriginOffset
|
||||
&m_process // out_pProcess
|
||||
);
|
||||
// Check if OFFLOAD_DMA_CHANNEL_COUNT is set to 2
|
||||
// Only the value 2 is supported in 16.0
|
||||
if (mic_dma_channel_count == 2) {
|
||||
if (COI::ProcessConfigureDMA) {
|
||||
// Set DMA channels using COI API
|
||||
COI::ProcessConfigureDMA(2, COI::DMA_MODE_READ_WRITE);
|
||||
}
|
||||
else {
|
||||
// Set environment variable COI_DMA_CHANNEL_COUNT
|
||||
// use putenv instead of setenv as Windows has no setenv.
|
||||
// Note: putenv requires its argument can't be freed or modified.
|
||||
// So no free after call to putenv or elsewhere.
|
||||
char * env_var = (char*) malloc(sizeof("COI_DMA_CHANNEL_COUNT=2" + 1));
|
||||
sprintf(env_var, "COI_DMA_CHANNEL_COUNT=2");
|
||||
putenv(env_var);
|
||||
}
|
||||
}
|
||||
|
||||
// Target executable is not available then use compiler provided offload_main
|
||||
if (__target_exe == 0) {
|
||||
if (mic_device_main == 0)
|
||||
LIBOFFLOAD_ERROR(c_report_no_host_exe);
|
||||
|
||||
OFFLOAD_DEBUG_TRACE(2,
|
||||
"Loading target executable %s\n",mic_device_main);
|
||||
|
||||
res = COI::ProcessCreateFromFile(
|
||||
engine, // in_Engine
|
||||
mic_device_main, // in_pBinaryName
|
||||
0, // in_Argc
|
||||
0, // in_ppArgv
|
||||
environ == 0, // in_DupEnv
|
||||
environ, // in_ppAdditionalEnv
|
||||
mic_proxy_io, // in_ProxyActive
|
||||
mic_proxy_fs_root, // in_ProxyfsRoot
|
||||
mic_buffer_size, // in_BufferSpace
|
||||
mic_library_path, // in_LibrarySearchPath
|
||||
&m_process // out_pProcess
|
||||
);
|
||||
}
|
||||
else {
|
||||
// Target executable should be available by the time when we
|
||||
// attempt to initialize the device
|
||||
|
||||
// Need the full path of the FAT exe for VTUNE
|
||||
{
|
||||
#ifndef TARGET_WINNT
|
||||
ssize_t len = readlink("/proc/self/exe", buf,1000);
|
||||
#else
|
||||
int len = GetModuleFileName(NULL, buf,1000);
|
||||
#endif // TARGET_WINNT
|
||||
if (len == -1) {
|
||||
LIBOFFLOAD_ERROR(c_report_no_host_exe);
|
||||
exit(1);
|
||||
}
|
||||
else if (len > 999) {
|
||||
LIBOFFLOAD_ERROR(c_report_path_buff_overflow);
|
||||
exit(1);
|
||||
}
|
||||
buf[len] = '\0';
|
||||
}
|
||||
|
||||
OFFLOAD_DEBUG_TRACE(2,
|
||||
"Loading target executable \"%s\" from %p, size %lld, host file %s\n",
|
||||
__target_exe->name, __target_exe->data, __target_exe->size,
|
||||
buf);
|
||||
|
||||
res = COI::ProcessCreateFromMemory(
|
||||
engine, // in_Engine
|
||||
__target_exe->name, // in_pBinaryName
|
||||
__target_exe->data, // in_pBinaryBuffer
|
||||
__target_exe->size, // in_BinaryBufferLength,
|
||||
0, // in_Argc
|
||||
0, // in_ppArgv
|
||||
environ == 0, // in_DupEnv
|
||||
environ, // in_ppAdditionalEnv
|
||||
mic_proxy_io, // in_ProxyActive
|
||||
mic_proxy_fs_root, // in_ProxyfsRoot
|
||||
mic_buffer_size, // in_BufferSpace
|
||||
mic_library_path, // in_LibrarySearchPath
|
||||
buf, // in_FileOfOrigin
|
||||
-1, // in_FileOfOriginOffset use -1 to indicate to
|
||||
// COI that is is a FAT binary
|
||||
&m_process // out_pProcess
|
||||
);
|
||||
}
|
||||
check_result(res, c_process_create, m_index, res);
|
||||
|
||||
if ((mic_4k_buffer_size != 0) || (mic_2m_buffer_size !=0)) {
|
||||
// available only in MPSS 4.2 and greater
|
||||
if (COI::ProcessSetCacheSize != 0 ) {
|
||||
int flags;
|
||||
// Need compiler to use MPSS 3.2 or greater to get these
|
||||
// definition so currently hardcoding it
|
||||
// COI_CACHE_ACTION_GROW_NOW && COI_CACHE_MODE_ONDEMAND_SYNC;
|
||||
flags = 0x00020002;
|
||||
res = COI::ProcessSetCacheSize(
|
||||
m_process, // in_Process
|
||||
mic_2m_buffer_size, // in_HugePagePoolSize
|
||||
flags, // inHugeFlags
|
||||
mic_4k_buffer_size, // in_SmallPagePoolSize
|
||||
flags, // inSmallFlags
|
||||
0, // in_NumDependencies
|
||||
0, // in_pDependencies
|
||||
0 // out_PCompletion
|
||||
);
|
||||
OFFLOAD_DEBUG_TRACE(2,
|
||||
"Reserve target buffers 4K pages = %d 2M pages = %d\n",
|
||||
mic_4k_buffer_size, mic_2m_buffer_size);
|
||||
check_result(res, c_process_set_cache_size, m_index, res);
|
||||
}
|
||||
else {
|
||||
OFFLOAD_DEBUG_TRACE(2,
|
||||
"Reserve target buffers not supported in current MPSS\n");
|
||||
}
|
||||
}
|
||||
|
||||
// get function handles
|
||||
res = COI::ProcessGetFunctionHandles(m_process, c_funcs_total,
|
||||
m_func_names, m_funcs);
|
||||
@ -226,8 +344,9 @@ void Engine::load_libraries()
|
||||
// load libraries collected so far
|
||||
for (TargetImageList::iterator it = m_images.begin();
|
||||
it != m_images.end(); it++) {
|
||||
OFFLOAD_DEBUG_TRACE(2, "Loading library \"%s\" from %p, size %llu\n",
|
||||
it->name, it->data, it->size);
|
||||
OFFLOAD_DEBUG_TRACE(2,
|
||||
"Loading library \"%s\" from %p, size %llu, host file %s\n",
|
||||
it->name, it->data, it->size, it->origin);
|
||||
|
||||
// load library to the device
|
||||
COILIBRARY lib;
|
||||
@ -238,9 +357,10 @@ void Engine::load_libraries()
|
||||
it->name,
|
||||
mic_library_path,
|
||||
it->origin,
|
||||
it->offset,
|
||||
(it->origin) ? -1 : 0,
|
||||
COI_LOADLIBRARY_V1_FLAGS,
|
||||
&lib);
|
||||
m_dyn_libs.push_front(DynLib(it->name, it->data, lib));
|
||||
|
||||
if (res != COI_SUCCESS && res != COI_ALREADY_EXISTS) {
|
||||
check_result(res, c_load_library, m_index, res);
|
||||
@ -249,6 +369,27 @@ void Engine::load_libraries()
|
||||
m_images.clear();
|
||||
}
|
||||
|
||||
void Engine::unload_library(const void *data, const char *name)
|
||||
{
|
||||
if (m_process == 0) {
|
||||
return;
|
||||
}
|
||||
for (DynLibList::iterator it = m_dyn_libs.begin();
|
||||
it != m_dyn_libs.end(); it++) {
|
||||
if (it->data == data) {
|
||||
COIRESULT res;
|
||||
OFFLOAD_DEBUG_TRACE(2,
|
||||
"Unloading library \"%s\"\n",name);
|
||||
res = COI::ProcessUnloadLibrary(m_process,it->lib);
|
||||
m_dyn_libs.erase(it);
|
||||
if (res != COI_SUCCESS) {
|
||||
check_result(res, c_unload_library, m_index, res);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool target_entry_cmp(
|
||||
const VarList::BufEntry &l,
|
||||
const VarList::BufEntry &r
|
||||
@ -273,8 +414,9 @@ void Engine::init_ptr_data(void)
|
||||
COIEVENT event;
|
||||
|
||||
// Prepare table of host entries
|
||||
std::vector<const VarTable::Entry*> host_table(__offload_vars.begin(),
|
||||
__offload_vars.end());
|
||||
std::vector<const VarTable::Entry*> host_table(
|
||||
Iterator(__offload_vars.get_head()),
|
||||
Iterator());
|
||||
|
||||
// no need to do anything further is host table is empty
|
||||
if (host_table.size() <= 0) {
|
||||
@ -348,17 +490,16 @@ void Engine::init_ptr_data(void)
|
||||
while (hi != he && ti != te) {
|
||||
int res = strcmp((*hi)->name, reinterpret_cast<const char*>(ti->name));
|
||||
if (res == 0) {
|
||||
bool is_new;
|
||||
// add matching entry to var map
|
||||
std::pair<PtrSet::iterator, bool> res =
|
||||
m_ptr_set.insert(PtrData((*hi)->addr, (*hi)->size));
|
||||
PtrData *ptr = insert_ptr_data((*hi)->addr, (*hi)->size, is_new);
|
||||
|
||||
// store address for new entries
|
||||
if (res.second) {
|
||||
PtrData *ptr = const_cast<PtrData*>(res.first.operator->());
|
||||
if (is_new) {
|
||||
ptr->mic_addr = ti->addr;
|
||||
ptr->is_static = true;
|
||||
}
|
||||
|
||||
ptr->alloc_ptr_data_lock.unlock();
|
||||
hi++;
|
||||
ti++;
|
||||
}
|
||||
@ -379,6 +520,7 @@ void Engine::init_ptr_data(void)
|
||||
}
|
||||
|
||||
COIRESULT Engine::compute(
|
||||
_Offload_stream stream,
|
||||
const std::list<COIBUFFER> &buffers,
|
||||
const void* data,
|
||||
uint16_t data_size,
|
||||
@ -413,9 +555,11 @@ COIRESULT Engine::compute(
|
||||
bufs = 0;
|
||||
flags = 0;
|
||||
}
|
||||
|
||||
COIPIPELINE pipeline = (stream == no_stream) ?
|
||||
get_pipeline() :
|
||||
get_pipeline(stream);
|
||||
// start computation
|
||||
res = COI::PipelineRunFunction(get_pipeline(),
|
||||
res = COI::PipelineRunFunction(pipeline,
|
||||
m_funcs[c_func_compute],
|
||||
num_bufs, bufs, flags,
|
||||
num_deps, deps,
|
||||
@ -528,12 +672,214 @@ COIPIPELINE Engine::get_pipeline(void)
|
||||
// create pipeline for this thread
|
||||
res = COI::PipelineCreate(m_process, 0, mic_stack_size, &pipeline);
|
||||
check_result(res, c_pipeline_create, m_index, res);
|
||||
|
||||
thread->set_pipeline(m_index, pipeline);
|
||||
}
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
Stream* Stream::find_stream(uint64_t handle, bool remove)
|
||||
{
|
||||
Stream *stream = 0;
|
||||
|
||||
m_stream_lock.lock();
|
||||
{
|
||||
StreamMap::iterator it = all_streams.find(handle);
|
||||
if (it != all_streams.end()) {
|
||||
stream = it->second;
|
||||
if (remove) {
|
||||
all_streams.erase(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
m_stream_lock.unlock();
|
||||
return stream;
|
||||
}
|
||||
|
||||
COIPIPELINE Engine::get_pipeline(_Offload_stream handle)
|
||||
{
|
||||
Stream * stream = Stream::find_stream(handle, false);
|
||||
|
||||
if (!stream) {
|
||||
LIBOFFLOAD_ERROR(c_offload_no_stream, m_index);
|
||||
LIBOFFLOAD_ABORT;
|
||||
}
|
||||
|
||||
COIPIPELINE pipeline = stream->get_pipeline();
|
||||
|
||||
if (pipeline == 0) {
|
||||
COIRESULT res;
|
||||
int proc_num;
|
||||
COI_CPU_MASK in_Mask ;
|
||||
|
||||
#ifndef TARGET_WINNT
|
||||
proc_num = __sync_fetch_and_add(&m_proc_number, 1);
|
||||
#else // TARGET_WINNT
|
||||
proc_num = _InterlockedIncrement(&m_proc_number);
|
||||
#endif // TARGET_WINNT
|
||||
|
||||
if (proc_num > COI_PIPELINE_MAX_PIPELINES) {
|
||||
LIBOFFLOAD_ERROR(c_coipipe_max_number, COI_PIPELINE_MAX_PIPELINES);
|
||||
LIBOFFLOAD_ABORT;
|
||||
}
|
||||
|
||||
m_stream_lock.lock();
|
||||
|
||||
// start process if not done yet
|
||||
if (m_process == 0) {
|
||||
init_process();
|
||||
}
|
||||
|
||||
// create CPUmask
|
||||
res = COI::PipelineClearCPUMask(in_Mask);
|
||||
check_result(res, c_clear_cpu_mask, m_index, res);
|
||||
|
||||
int stream_cpu_num = stream->get_cpu_number();
|
||||
|
||||
stream->m_stream_cpus.reset();
|
||||
|
||||
int threads_per_core = m_num_threads / m_num_cores;
|
||||
|
||||
// The "stream_cpu_num" available threads is set in mask.
|
||||
// Available threads are defined by examining of m_cpus bitset.
|
||||
// We skip thread 0 .
|
||||
for (int i = 1; i < m_num_threads; i++) {
|
||||
// for available thread i m_cpus[i] is equal to 1
|
||||
if (m_cpus[i]) {
|
||||
res = COI::PipelineSetCPUMask(m_process,
|
||||
i / threads_per_core,
|
||||
i % threads_per_core,
|
||||
in_Mask);
|
||||
|
||||
check_result(res, c_set_cpu_mask, res);
|
||||
// mark thread i as nonavailable
|
||||
m_cpus.set(i,0);
|
||||
// Mark thread i as given for the stream.
|
||||
// In case of stream destroying by call to
|
||||
// _Offload_stream_destroy we can mark the thread i as
|
||||
// available.
|
||||
stream->m_stream_cpus.set(i);
|
||||
if (--stream_cpu_num <= 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if stream_cpu_num is greater than 0 there are not enough
|
||||
// available threads
|
||||
if (stream_cpu_num > 0) {
|
||||
LIBOFFLOAD_ERROR(c_create_pipeline_for_stream, m_num_threads);
|
||||
LIBOFFLOAD_ABORT;
|
||||
}
|
||||
// create pipeline for this thread
|
||||
OFFLOAD_DEBUG_TRACE(2, "COIPipelineCreate Mask\n"
|
||||
"%016lx %016lx %016lx %016lx\n%016lx %016lx %016lx %016lx\n"
|
||||
"%016lx %016lx %016lx %016lx\n%016lx %016lx %016lx %016lx\n",
|
||||
in_Mask[0], in_Mask[1], in_Mask[2], in_Mask[3],
|
||||
in_Mask[4], in_Mask[5], in_Mask[6], in_Mask[7],
|
||||
in_Mask[8], in_Mask[9], in_Mask[10], in_Mask[11],
|
||||
in_Mask[12], in_Mask[13], in_Mask[14], in_Mask[15]);
|
||||
res = COI::PipelineCreate(m_process, in_Mask,
|
||||
mic_stack_size, &pipeline);
|
||||
check_result(res, c_pipeline_create, m_index, res);
|
||||
|
||||
// Set stream's affinities
|
||||
{
|
||||
struct affinity_spec affinity_spec;
|
||||
char* affinity_type;
|
||||
int i;
|
||||
|
||||
// "compact" by default
|
||||
affinity_spec.affinity_type = affinity_compact;
|
||||
|
||||
// Check if user has specified type of affinity
|
||||
if ((affinity_type = getenv("OFFLOAD_STREAM_AFFINITY")) !=
|
||||
NULL)
|
||||
{
|
||||
char affinity_str[16];
|
||||
int affinity_str_len;
|
||||
|
||||
OFFLOAD_DEBUG_TRACE(2,
|
||||
"User has specified OFFLOAD_STREAM_AFFINITY=%s\n",
|
||||
affinity_type);
|
||||
|
||||
// Set type of affinity requested
|
||||
affinity_str_len = strlen(affinity_type);
|
||||
for (i=0; i<affinity_str_len && i<15; i++)
|
||||
{
|
||||
affinity_str[i] = tolower(affinity_type[i]);
|
||||
}
|
||||
affinity_str[i] = '\0';
|
||||
if (strcmp(affinity_str, "compact") == 0) {
|
||||
affinity_spec.affinity_type = affinity_compact;
|
||||
OFFLOAD_DEBUG_TRACE(2, "Setting affinity=compact\n");
|
||||
} else if (strcmp(affinity_str, "scatter") == 0) {
|
||||
affinity_spec.affinity_type = affinity_scatter;
|
||||
OFFLOAD_DEBUG_TRACE(2, "Setting affinity=scatter\n");
|
||||
} else {
|
||||
LIBOFFLOAD_ERROR(c_incorrect_affinity, affinity_str);
|
||||
affinity_spec.affinity_type = affinity_compact;
|
||||
OFFLOAD_DEBUG_TRACE(2, "Setting affinity=compact\n");
|
||||
}
|
||||
}
|
||||
// Make flat copy of sink mask because COI's mask is opaque
|
||||
for (i=0; i<16; i++) {
|
||||
affinity_spec.sink_mask[i] = in_Mask[i];
|
||||
}
|
||||
// Set number of cores and threads
|
||||
affinity_spec.num_cores = m_num_cores;
|
||||
affinity_spec.num_threads = m_num_threads;
|
||||
|
||||
COIEVENT event;
|
||||
res = COI::PipelineRunFunction(pipeline,
|
||||
m_funcs[c_func_set_stream_affinity],
|
||||
0, 0, 0,
|
||||
0, 0,
|
||||
&affinity_spec, sizeof(affinity_spec),
|
||||
0, 0,
|
||||
&event);
|
||||
check_result(res, c_pipeline_run_func, m_index, res);
|
||||
|
||||
res = COI::EventWait(1, &event, -1, 1, 0, 0);
|
||||
check_result(res, c_event_wait, res);
|
||||
}
|
||||
|
||||
m_stream_lock.unlock();
|
||||
stream->set_pipeline(pipeline);
|
||||
}
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
void Engine::stream_destroy(_Offload_stream handle)
|
||||
{
|
||||
// get stream
|
||||
Stream * stream = Stream::find_stream(handle, true);
|
||||
|
||||
if (stream) {
|
||||
// return cpus for future use
|
||||
for (int i = 0; i < m_num_threads; i++) {
|
||||
if (stream->m_stream_cpus.test(i)) {
|
||||
m_cpus.set(i);
|
||||
}
|
||||
}
|
||||
delete stream;
|
||||
}
|
||||
else {
|
||||
LIBOFFLOAD_ERROR(c_offload_no_stream, m_index);
|
||||
LIBOFFLOAD_ABORT;
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t Engine::get_thread_id(void)
|
||||
{
|
||||
Thread* thread = (Thread*) thread_getspecific(mic_thread_key);
|
||||
if (thread == 0) {
|
||||
thread = new Thread(&m_proc_number);
|
||||
thread_setspecific(mic_thread_key, thread);
|
||||
}
|
||||
|
||||
return reinterpret_cast<uint64_t>(thread);
|
||||
}
|
||||
|
||||
AutoSet& Engine::get_auto_vars(void)
|
||||
{
|
||||
Thread* thread = (Thread*) thread_getspecific(mic_thread_key);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -32,13 +32,16 @@
|
||||
#define OFFLOAD_ENGINE_H_INCLUDED
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
#include <bitset>
|
||||
#include <list>
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include "offload_common.h"
|
||||
#include "coi/coi_client.h"
|
||||
|
||||
#define SIGNAL_IS_REMOVED ((OffloadDescriptor *)-1)
|
||||
const int64_t no_stream = -1;
|
||||
|
||||
// Address range
|
||||
class MemRange {
|
||||
public:
|
||||
@ -157,6 +160,50 @@ private:
|
||||
|
||||
typedef std::list<PtrData*> PtrDataList;
|
||||
|
||||
class PtrDataTable {
|
||||
public:
|
||||
typedef std::set<PtrData> PtrSet;
|
||||
|
||||
PtrData* find_ptr_data(const void *ptr) {
|
||||
m_ptr_lock.lock();
|
||||
PtrSet::iterator res = list.find(PtrData(ptr, 0));
|
||||
|
||||
m_ptr_lock.unlock();
|
||||
if (res == list.end()) {
|
||||
return 0;
|
||||
}
|
||||
return const_cast<PtrData*>(res.operator->());
|
||||
}
|
||||
|
||||
PtrData* insert_ptr_data(const void *ptr, uint64_t len, bool &is_new) {
|
||||
m_ptr_lock.lock();
|
||||
std::pair<PtrSet::iterator, bool> res =
|
||||
list.insert(PtrData(ptr, len));
|
||||
|
||||
PtrData* ptr_data = const_cast<PtrData*>(res.first.operator->());
|
||||
m_ptr_lock.unlock();
|
||||
|
||||
is_new = res.second;
|
||||
if (is_new) {
|
||||
// It's necessary to lock as soon as possible.
|
||||
// unlock must be done at call site of insert_ptr_data at
|
||||
// branch for is_new
|
||||
ptr_data->alloc_ptr_data_lock.lock();
|
||||
}
|
||||
return ptr_data;
|
||||
}
|
||||
|
||||
void remove_ptr_data(const void *ptr) {
|
||||
m_ptr_lock.lock();
|
||||
list.erase(PtrData(ptr, 0));
|
||||
m_ptr_lock.unlock();
|
||||
}
|
||||
private:
|
||||
|
||||
PtrSet list;
|
||||
mutex_t m_ptr_lock;
|
||||
};
|
||||
|
||||
// Data associated with automatic variable
|
||||
class AutoData {
|
||||
public:
|
||||
@ -186,7 +233,15 @@ public:
|
||||
return _InterlockedDecrement(&ref_count);
|
||||
#endif // TARGET_WINNT
|
||||
}
|
||||
|
||||
|
||||
long nullify_reference() {
|
||||
#ifndef TARGET_WINNT
|
||||
return __sync_lock_test_and_set(&ref_count, 0);
|
||||
#else // TARGET_WINNT
|
||||
return _InterlockedExchange(&ref_count,0);
|
||||
#endif // TARGET_WINNT
|
||||
}
|
||||
|
||||
long get_reference() const {
|
||||
return ref_count;
|
||||
}
|
||||
@ -226,18 +281,39 @@ struct TargetImage
|
||||
|
||||
typedef std::list<TargetImage> TargetImageList;
|
||||
|
||||
// dynamic library and Image associated with lib
|
||||
struct DynLib
|
||||
{
|
||||
DynLib(const char *_name, const void *_data,
|
||||
COILIBRARY _lib) :
|
||||
name(_name), data(_data), lib(_lib)
|
||||
{}
|
||||
// library name
|
||||
const char* name;
|
||||
|
||||
// contents
|
||||
const void* data;
|
||||
|
||||
COILIBRARY lib;
|
||||
};
|
||||
typedef std::list<DynLib> DynLibList;
|
||||
|
||||
// Data associated with persistent auto objects
|
||||
struct PersistData
|
||||
{
|
||||
PersistData(const void *addr, uint64_t routine_num, uint64_t size) :
|
||||
stack_cpu_addr(addr), routine_id(routine_num)
|
||||
PersistData(const void *addr, uint64_t routine_num,
|
||||
uint64_t size, uint64_t thread) :
|
||||
stack_cpu_addr(addr), routine_id(routine_num), thread_id(thread)
|
||||
{
|
||||
stack_ptr_data = new PtrData(0, size);
|
||||
}
|
||||
// 1-st key value - begining of the stack at CPU
|
||||
// 1-st key value - beginning of the stack at CPU
|
||||
const void * stack_cpu_addr;
|
||||
// 2-nd key value - identifier of routine invocation at CPU
|
||||
uint64_t routine_id;
|
||||
// 3-rd key value - thread identifier
|
||||
uint64_t thread_id;
|
||||
|
||||
// corresponded PtrData; only stack_ptr_data->mic_buf is used
|
||||
PtrData * stack_ptr_data;
|
||||
// used to get offset of the variable in stack buffer
|
||||
@ -246,6 +322,75 @@ struct PersistData
|
||||
|
||||
typedef std::list<PersistData> PersistDataList;
|
||||
|
||||
// Data associated with stream
|
||||
struct Stream
|
||||
{
|
||||
Stream(int device, int num_of_cpus) :
|
||||
m_number_of_cpus(num_of_cpus), m_pipeline(0), m_last_offload(0),
|
||||
m_device(device)
|
||||
{}
|
||||
~Stream() {
|
||||
if (m_pipeline) {
|
||||
COI::PipelineDestroy(m_pipeline);
|
||||
}
|
||||
}
|
||||
|
||||
COIPIPELINE get_pipeline(void) {
|
||||
return(m_pipeline);
|
||||
}
|
||||
|
||||
int get_device(void) {
|
||||
return(m_device);
|
||||
}
|
||||
|
||||
int get_cpu_number(void) {
|
||||
return(m_number_of_cpus);
|
||||
}
|
||||
|
||||
void set_pipeline(COIPIPELINE pipeline) {
|
||||
m_pipeline = pipeline;
|
||||
}
|
||||
|
||||
OffloadDescriptor* get_last_offload(void) {
|
||||
return(m_last_offload);
|
||||
}
|
||||
|
||||
void set_last_offload(OffloadDescriptor* last_offload) {
|
||||
m_last_offload = last_offload;
|
||||
}
|
||||
|
||||
static Stream* find_stream(uint64_t handle, bool remove);
|
||||
|
||||
static _Offload_stream add_stream(int device, int number_of_cpus) {
|
||||
m_stream_lock.lock();
|
||||
all_streams[++m_streams_count] = new Stream(device, number_of_cpus);
|
||||
m_stream_lock.unlock();
|
||||
return(m_streams_count);
|
||||
}
|
||||
|
||||
typedef std::map<uint64_t, Stream*> StreamMap;
|
||||
|
||||
static uint64_t m_streams_count;
|
||||
static StreamMap all_streams;
|
||||
static mutex_t m_stream_lock;
|
||||
|
||||
int m_device;
|
||||
|
||||
// number of cpus
|
||||
int m_number_of_cpus;
|
||||
|
||||
// The pipeline associated with the stream
|
||||
COIPIPELINE m_pipeline;
|
||||
|
||||
// The last offload occured via the stream
|
||||
OffloadDescriptor* m_last_offload;
|
||||
|
||||
// Cpus used by the stream
|
||||
std::bitset<COI_MAX_HW_THREADS> m_stream_cpus;
|
||||
};
|
||||
|
||||
typedef std::map<uint64_t, Stream*> StreamMap;
|
||||
|
||||
// class representing a single engine
|
||||
struct Engine {
|
||||
friend void __offload_init_library_once(void);
|
||||
@ -275,9 +420,14 @@ struct Engine {
|
||||
return m_process;
|
||||
}
|
||||
|
||||
uint64_t get_thread_id(void);
|
||||
|
||||
// initialize device
|
||||
void init(void);
|
||||
|
||||
// unload library
|
||||
void unload_library(const void *data, const char *name);
|
||||
|
||||
// add new library
|
||||
void add_lib(const TargetImage &lib)
|
||||
{
|
||||
@ -288,6 +438,7 @@ struct Engine {
|
||||
}
|
||||
|
||||
COIRESULT compute(
|
||||
_Offload_stream stream,
|
||||
const std::list<COIBUFFER> &buffers,
|
||||
const void* data,
|
||||
uint16_t data_size,
|
||||
@ -323,36 +474,28 @@ struct Engine {
|
||||
// Memory association table
|
||||
//
|
||||
PtrData* find_ptr_data(const void *ptr) {
|
||||
m_ptr_lock.lock();
|
||||
PtrSet::iterator res = m_ptr_set.find(PtrData(ptr, 0));
|
||||
m_ptr_lock.unlock();
|
||||
if (res == m_ptr_set.end()) {
|
||||
return 0;
|
||||
}
|
||||
return const_cast<PtrData*>(res.operator->());
|
||||
return m_ptr_set.find_ptr_data(ptr);
|
||||
}
|
||||
|
||||
PtrData* find_targetptr_data(const void *ptr) {
|
||||
return m_targetptr_set.find_ptr_data(ptr);
|
||||
}
|
||||
|
||||
PtrData* insert_ptr_data(const void *ptr, uint64_t len, bool &is_new) {
|
||||
m_ptr_lock.lock();
|
||||
std::pair<PtrSet::iterator, bool> res =
|
||||
m_ptr_set.insert(PtrData(ptr, len));
|
||||
PtrData* ptr_data = const_cast<PtrData*>(res.first.operator->());
|
||||
m_ptr_lock.unlock();
|
||||
return m_ptr_set.insert_ptr_data(ptr, len, is_new);
|
||||
}
|
||||
|
||||
is_new = res.second;
|
||||
if (is_new) {
|
||||
// It's necessary to lock as soon as possible.
|
||||
// unlock must be done at call site of insert_ptr_data at
|
||||
// branch for is_new
|
||||
ptr_data->alloc_ptr_data_lock.lock();
|
||||
}
|
||||
return ptr_data;
|
||||
PtrData* insert_targetptr_data(const void *ptr, uint64_t len,
|
||||
bool &is_new) {
|
||||
return m_targetptr_set.insert_ptr_data(ptr, len, is_new);
|
||||
}
|
||||
|
||||
void remove_ptr_data(const void *ptr) {
|
||||
m_ptr_lock.lock();
|
||||
m_ptr_set.erase(PtrData(ptr, 0));
|
||||
m_ptr_lock.unlock();
|
||||
m_ptr_set.remove_ptr_data(ptr);
|
||||
}
|
||||
|
||||
void remove_targetptr_data(const void *ptr) {
|
||||
m_targetptr_set.remove_ptr_data(ptr);
|
||||
}
|
||||
|
||||
//
|
||||
@ -396,7 +539,7 @@ struct Engine {
|
||||
if (it != m_signal_map.end()) {
|
||||
desc = it->second;
|
||||
if (remove) {
|
||||
m_signal_map.erase(it);
|
||||
it->second = SIGNAL_IS_REMOVED;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -405,6 +548,14 @@ struct Engine {
|
||||
return desc;
|
||||
}
|
||||
|
||||
void stream_destroy(_Offload_stream handle);
|
||||
|
||||
COIPIPELINE get_pipeline(_Offload_stream stream);
|
||||
|
||||
StreamMap get_stream_map() {
|
||||
return m_stream_map;
|
||||
}
|
||||
|
||||
// stop device process
|
||||
void fini_process(bool verbose);
|
||||
|
||||
@ -417,6 +568,11 @@ private:
|
||||
{}
|
||||
|
||||
~Engine() {
|
||||
for (StreamMap::iterator it = m_stream_map.begin();
|
||||
it != m_stream_map.end(); it++) {
|
||||
Stream * stream = it->second;
|
||||
delete stream;
|
||||
}
|
||||
if (m_process != 0) {
|
||||
fini_process(false);
|
||||
}
|
||||
@ -469,14 +625,24 @@ private:
|
||||
// List of libraries to be loaded
|
||||
TargetImageList m_images;
|
||||
|
||||
// var table
|
||||
PtrSet m_ptr_set;
|
||||
mutex_t m_ptr_lock;
|
||||
// var tables
|
||||
PtrDataTable m_ptr_set;
|
||||
PtrDataTable m_targetptr_set;
|
||||
|
||||
// signals
|
||||
SignalMap m_signal_map;
|
||||
mutex_t m_signal_lock;
|
||||
|
||||
// streams
|
||||
StreamMap m_stream_map;
|
||||
mutex_t m_stream_lock;
|
||||
int m_num_cores;
|
||||
int m_num_threads;
|
||||
std::bitset<COI_MAX_HW_THREADS> m_cpus;
|
||||
|
||||
// List of dynamic libraries to be registred
|
||||
DynLibList m_dyn_libs;
|
||||
|
||||
// constants for accessing device function handles
|
||||
enum {
|
||||
c_func_compute = 0,
|
||||
@ -487,6 +653,7 @@ private:
|
||||
c_func_init,
|
||||
c_func_var_table_size,
|
||||
c_func_var_table_copy,
|
||||
c_func_set_stream_affinity,
|
||||
c_funcs_total
|
||||
};
|
||||
static const char* m_func_names[c_funcs_total];
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -146,7 +146,7 @@ void MicEnvVar::add_env_var(
|
||||
else {
|
||||
card = get_card(card_number);
|
||||
if (!card) {
|
||||
// definition for new card occured
|
||||
// definition for new card occurred
|
||||
card = new CardEnvVars(card_number);
|
||||
card_spec_list.push_back(card);
|
||||
}
|
||||
@ -321,7 +321,7 @@ void MicEnvVar::mic_parse_env_var_list(
|
||||
// Collect all definitions for the card with number "card_num".
|
||||
// The returned result is vector of string pointers defining one
|
||||
// environment variable. The vector is terminated by NULL pointer.
|
||||
// In the begining of the vector there are env vars defined as
|
||||
// In the beginning of the vector there are env vars defined as
|
||||
// <mic-prefix>_<card-number>_<var>=<value>
|
||||
// or
|
||||
// <mic-prefix>_<card-number>_ENV=<env-vars>
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -32,6 +32,7 @@
|
||||
#define OFFLOAD_ENV_H_INCLUDED
|
||||
|
||||
#include <list>
|
||||
#include "offload_util.h"
|
||||
|
||||
// data structure and routines to parse MIC user environment and pass to MIC
|
||||
|
||||
@ -43,7 +44,7 @@ enum MicEnvVarKind
|
||||
c_mic_card_env // for <mic-prefix>_<card-number>_ENV
|
||||
};
|
||||
|
||||
struct MicEnvVar {
|
||||
struct DLL_LOCAL MicEnvVar {
|
||||
public:
|
||||
MicEnvVar() : prefix(0) {}
|
||||
~MicEnvVar();
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -46,8 +46,12 @@
|
||||
#include "coi/coi_client.h"
|
||||
|
||||
// MIC engines.
|
||||
extern Engine* mic_engines;
|
||||
extern uint32_t mic_engines_total;
|
||||
DLL_LOCAL extern Engine* mic_engines;
|
||||
DLL_LOCAL extern uint32_t mic_engines_total;
|
||||
|
||||
// DMA channel count used by COI and set via
|
||||
// OFFLOAD_DMA_CHANNEL_COUNT environment variable
|
||||
DLL_LOCAL extern uint32_t mic_dma_channel_count;
|
||||
|
||||
//! The target image is packed as follows.
|
||||
/*! 1. 8 bytes containing the size of the target binary */
|
||||
@ -64,6 +68,13 @@ struct Image {
|
||||
class OffloadDescriptor
|
||||
{
|
||||
public:
|
||||
enum OmpAsyncLastEventType {
|
||||
c_last_not, // not last event
|
||||
c_last_write, // the last event that is write
|
||||
c_last_read, // the last event that is read
|
||||
c_last_runfunc // the last event that is runfunction
|
||||
};
|
||||
|
||||
OffloadDescriptor(
|
||||
int index,
|
||||
_Offload_status *status,
|
||||
@ -71,7 +82,7 @@ public:
|
||||
bool is_openmp,
|
||||
OffloadHostTimerData * timer_data
|
||||
) :
|
||||
m_device(mic_engines[index % mic_engines_total]),
|
||||
m_device(mic_engines[index == -1 ? 0 : index % mic_engines_total]),
|
||||
m_is_mandatory(is_mandatory),
|
||||
m_is_openmp(is_openmp),
|
||||
m_inout_buf(0),
|
||||
@ -79,13 +90,22 @@ public:
|
||||
m_func_desc_size(0),
|
||||
m_in_deps(0),
|
||||
m_in_deps_total(0),
|
||||
m_in_deps_allocated(0),
|
||||
m_out_deps(0),
|
||||
m_out_deps_total(0),
|
||||
m_out_deps_allocated(0),
|
||||
m_vars(0),
|
||||
m_vars_extra(0),
|
||||
m_status(status),
|
||||
m_timer_data(timer_data)
|
||||
{}
|
||||
m_timer_data(timer_data),
|
||||
m_out_with_preallocated(false),
|
||||
m_preallocated_alloc(false),
|
||||
m_traceback_called(false),
|
||||
m_stream(-1),
|
||||
m_omp_async_last_event_type(c_last_not)
|
||||
{
|
||||
m_wait_all_devices = index == -1;
|
||||
}
|
||||
|
||||
~OffloadDescriptor()
|
||||
{
|
||||
@ -107,8 +127,10 @@ public:
|
||||
bool offload(const char *name, bool is_empty,
|
||||
VarDesc *vars, VarDesc2 *vars2, int vars_total,
|
||||
const void **waits, int num_waits, const void **signal,
|
||||
int entry_id, const void *stack_addr);
|
||||
bool offload_finish();
|
||||
int entry_id, const void *stack_addr,
|
||||
OffloadFlags offload_flags);
|
||||
|
||||
bool offload_finish(bool is_traceback);
|
||||
|
||||
bool is_signaled();
|
||||
|
||||
@ -116,36 +138,60 @@ public:
|
||||
return m_timer_data;
|
||||
}
|
||||
|
||||
void set_stream(_Offload_stream stream) {
|
||||
m_stream = stream;
|
||||
}
|
||||
|
||||
_Offload_stream get_stream() {
|
||||
return(m_stream);
|
||||
}
|
||||
|
||||
private:
|
||||
bool wait_dependencies(const void **waits, int num_waits);
|
||||
bool offload_wrap(const char *name, bool is_empty,
|
||||
VarDesc *vars, VarDesc2 *vars2, int vars_total,
|
||||
const void **waits, int num_waits, const void **signal,
|
||||
int entry_id, const void *stack_addr,
|
||||
OffloadFlags offload_flags);
|
||||
bool wait_dependencies(const void **waits, int num_waits,
|
||||
_Offload_stream stream);
|
||||
bool setup_descriptors(VarDesc *vars, VarDesc2 *vars2, int vars_total,
|
||||
int entry_id, const void *stack_addr);
|
||||
bool setup_misc_data(const char *name);
|
||||
bool send_pointer_data(bool is_async);
|
||||
bool send_pointer_data(bool is_async, void* info);
|
||||
bool send_noncontiguous_pointer_data(
|
||||
int i,
|
||||
PtrData* src_buf,
|
||||
PtrData* dst_buf,
|
||||
COIEVENT *event);
|
||||
bool recieve_noncontiguous_pointer_data(
|
||||
COIEVENT *event,
|
||||
uint64_t &sent_data,
|
||||
uint32_t in_deps_amount,
|
||||
COIEVENT *in_deps
|
||||
);
|
||||
bool receive_noncontiguous_pointer_data(
|
||||
int i,
|
||||
char* src_data,
|
||||
COIBUFFER dst_buf,
|
||||
COIEVENT *event);
|
||||
COIEVENT *event,
|
||||
uint64_t &received_data,
|
||||
uint32_t in_deps_amount,
|
||||
COIEVENT *in_deps
|
||||
);
|
||||
|
||||
bool gather_copyin_data();
|
||||
|
||||
bool compute();
|
||||
bool compute(void *);
|
||||
|
||||
bool receive_pointer_data(bool is_async);
|
||||
bool receive_pointer_data(bool is_async, bool first_run, void * info);
|
||||
bool scatter_copyout_data();
|
||||
|
||||
void cleanup();
|
||||
|
||||
bool find_ptr_data(PtrData* &ptr_data, void *base, int64_t disp,
|
||||
int64_t length, bool error_does_not_exist = true);
|
||||
int64_t length, bool is_targptr,
|
||||
bool error_does_not_exist = true);
|
||||
bool alloc_ptr_data(PtrData* &ptr_data, void *base, int64_t disp,
|
||||
int64_t length, int64_t alloc_disp, int align);
|
||||
int64_t length, int64_t alloc_disp, int align,
|
||||
bool is_targptr, bool is_prealloc, bool pin);
|
||||
bool create_preallocated_buffer(PtrData* ptr_data, void *base);
|
||||
bool init_static_ptr_data(PtrData *ptr_data);
|
||||
bool init_mic_address(PtrData *ptr_data);
|
||||
bool offload_stack_memory_manager(const void * stack_begin, int routine_id,
|
||||
@ -154,9 +200,15 @@ private:
|
||||
|
||||
bool gen_var_descs_for_pointer_array(int i);
|
||||
|
||||
void get_stream_in_dependencies(uint32_t &in_deps_amount,
|
||||
COIEVENT* &in_deps);
|
||||
|
||||
void report_coi_error(error_types msg, COIRESULT res);
|
||||
_Offload_result translate_coi_error(COIRESULT res) const;
|
||||
|
||||
|
||||
void setup_omp_async_info();
|
||||
void register_omp_event_call_back(const COIEVENT *event, const void *info);
|
||||
|
||||
private:
|
||||
typedef std::list<COIBUFFER> BufferList;
|
||||
|
||||
@ -167,10 +219,12 @@ private:
|
||||
AutoData* auto_data;
|
||||
int64_t cpu_disp;
|
||||
int64_t cpu_offset;
|
||||
void *alloc;
|
||||
CeanReadRanges *read_rng_src;
|
||||
CeanReadRanges *read_rng_dst;
|
||||
int64_t ptr_arr_offset;
|
||||
bool is_arr_ptr_el;
|
||||
OmpAsyncLastEventType omp_last_event_type;
|
||||
};
|
||||
|
||||
template<typename T> class ReadArrElements {
|
||||
@ -230,6 +284,9 @@ private:
|
||||
// Engine
|
||||
Engine& m_device;
|
||||
|
||||
// true for offload_wait target(mic) stream(0)
|
||||
bool m_wait_all_devices;
|
||||
|
||||
// if true offload is mandatory
|
||||
bool m_is_mandatory;
|
||||
|
||||
@ -266,8 +323,13 @@ private:
|
||||
// Dependencies
|
||||
COIEVENT *m_in_deps;
|
||||
uint32_t m_in_deps_total;
|
||||
uint32_t m_in_deps_allocated;
|
||||
COIEVENT *m_out_deps;
|
||||
uint32_t m_out_deps_total;
|
||||
uint32_t m_out_deps_allocated;
|
||||
|
||||
// Stream
|
||||
_Offload_stream m_stream;
|
||||
|
||||
// Timer data
|
||||
OffloadHostTimerData *m_timer_data;
|
||||
@ -279,6 +341,25 @@ private:
|
||||
// a boolean value calculated in setup_descriptors. If true we need to do
|
||||
// a run function on the target. Otherwise it may be optimized away.
|
||||
bool m_need_runfunction;
|
||||
|
||||
// initialized value of m_need_runfunction;
|
||||
// is used to recognize offload_transfer
|
||||
bool m_initial_need_runfunction;
|
||||
|
||||
// a Boolean value set to true when OUT clauses with preallocated targetptr
|
||||
// is encountered to indicate that call receive_pointer_data needs to be
|
||||
// invoked again after call to scatter_copyout_data.
|
||||
bool m_out_with_preallocated;
|
||||
|
||||
// a Boolean value set to true if an alloc_if(1) is used with preallocated
|
||||
// targetptr to indicate the need to scatter_copyout_data even for
|
||||
// async offload
|
||||
bool m_preallocated_alloc;
|
||||
|
||||
// a Boolean value set to true if traceback routine is called
|
||||
bool m_traceback_called;
|
||||
|
||||
OmpAsyncLastEventType m_omp_async_last_event_type;
|
||||
};
|
||||
|
||||
// Initialization types for MIC
|
||||
@ -288,46 +369,60 @@ enum OffloadInitType {
|
||||
c_init_on_offload_all // all devices before starting the first offload
|
||||
};
|
||||
|
||||
// Determines if MIC code is an executable or a shared library
|
||||
extern "C" bool __offload_target_image_is_executable(const void *target_image);
|
||||
|
||||
// Initializes library and registers specified offload image.
|
||||
extern "C" void __offload_register_image(const void* image);
|
||||
extern "C" bool __offload_register_image(const void* image);
|
||||
extern "C" void __offload_unregister_image(const void* image);
|
||||
|
||||
// Initializes offload runtime library.
|
||||
extern int __offload_init_library(void);
|
||||
DLL_LOCAL extern int __offload_init_library(void);
|
||||
|
||||
// thread data for associating pipelines with threads
|
||||
extern pthread_key_t mic_thread_key;
|
||||
DLL_LOCAL extern pthread_key_t mic_thread_key;
|
||||
|
||||
// location of offload_main executable
|
||||
// To be used if the main application has no offload and is not built
|
||||
// with -offload but dynamic library linked in has offload pragma
|
||||
DLL_LOCAL extern char* mic_device_main;
|
||||
|
||||
// Environment variables for devices
|
||||
extern MicEnvVar mic_env_vars;
|
||||
DLL_LOCAL extern MicEnvVar mic_env_vars;
|
||||
|
||||
// CPU frequency
|
||||
extern uint64_t cpu_frequency;
|
||||
DLL_LOCAL extern uint64_t cpu_frequency;
|
||||
|
||||
// LD_LIBRARY_PATH for MIC libraries
|
||||
extern char* mic_library_path;
|
||||
DLL_LOCAL extern char* mic_library_path;
|
||||
|
||||
// stack size for target
|
||||
extern uint32_t mic_stack_size;
|
||||
DLL_LOCAL extern uint32_t mic_stack_size;
|
||||
|
||||
// Preallocated memory size for buffers on MIC
|
||||
extern uint64_t mic_buffer_size;
|
||||
DLL_LOCAL extern uint64_t mic_buffer_size;
|
||||
|
||||
// Preallocated 4K page memory size for buffers on MIC
|
||||
DLL_LOCAL extern uint64_t mic_4k_buffer_size;
|
||||
|
||||
// Preallocated 2M page memory size for buffers on MIC
|
||||
DLL_LOCAL extern uint64_t mic_2m_buffer_size;
|
||||
|
||||
// Setting controlling inout proxy
|
||||
extern bool mic_proxy_io;
|
||||
extern char* mic_proxy_fs_root;
|
||||
DLL_LOCAL extern bool mic_proxy_io;
|
||||
DLL_LOCAL extern char* mic_proxy_fs_root;
|
||||
|
||||
// Threshold for creating buffers with large pages
|
||||
extern uint64_t __offload_use_2mb_buffers;
|
||||
DLL_LOCAL extern uint64_t __offload_use_2mb_buffers;
|
||||
|
||||
// offload initialization type
|
||||
extern OffloadInitType __offload_init_type;
|
||||
DLL_LOCAL extern OffloadInitType __offload_init_type;
|
||||
|
||||
// Device number to offload to when device is not explicitly specified.
|
||||
extern int __omp_device_num;
|
||||
DLL_LOCAL extern int __omp_device_num;
|
||||
|
||||
// target executable
|
||||
extern TargetImage* __target_exe;
|
||||
DLL_LOCAL extern TargetImage* __target_exe;
|
||||
|
||||
// IDB support
|
||||
|
||||
|
103
liboffloadmic/runtime/offload_iterator.h
Normal file
103
liboffloadmic/runtime/offload_iterator.h
Normal file
@ -0,0 +1,103 @@
|
||||
/*
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
/*! \file
|
||||
\brief Iterator of Variable tables list used by the runtime library
|
||||
*/
|
||||
|
||||
#ifndef OFFLOAD_ITERATOR_H_INCLUDED
|
||||
#define OFFLOAD_ITERATOR_H_INCLUDED
|
||||
|
||||
#include <iterator>
|
||||
#include "offload_table.h"
|
||||
|
||||
// The following class is for iteration over var table.
|
||||
// It was extracted and moved to this offload_iterator.h file from offload_table.h
|
||||
// to solve the problem with compiling with VS 2010. The problem was in incompatibility
|
||||
// of STL objects in VS 2010 with ones in later VS versions.
|
||||
|
||||
// var table list iterator
|
||||
class Iterator : public std::iterator<std::input_iterator_tag,
|
||||
VarTable::Entry> {
|
||||
public:
|
||||
Iterator() : m_node(0), m_entry(0) {}
|
||||
|
||||
explicit Iterator(TableList<VarTable>::Node *node) {
|
||||
new_node(node);
|
||||
}
|
||||
|
||||
Iterator& operator++() {
|
||||
if (m_entry != 0) {
|
||||
m_entry++;
|
||||
while (m_entry->name == 0) {
|
||||
m_entry++;
|
||||
}
|
||||
if (m_entry->name == reinterpret_cast<const char*>(-1)) {
|
||||
new_node(m_node->next);
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool operator==(const Iterator &other) const {
|
||||
return m_entry == other.m_entry;
|
||||
}
|
||||
|
||||
bool operator!=(const Iterator &other) const {
|
||||
return m_entry != other.m_entry;
|
||||
}
|
||||
|
||||
const VarTable::Entry* operator*() const {
|
||||
return m_entry;
|
||||
}
|
||||
|
||||
private:
|
||||
void new_node(TableList<VarTable>::Node *node) {
|
||||
m_node = node;
|
||||
m_entry = 0;
|
||||
while (m_node != 0) {
|
||||
m_entry = m_node->table.entries;
|
||||
while (m_entry->name == 0) {
|
||||
m_entry++;
|
||||
}
|
||||
if (m_entry->name != reinterpret_cast<const char*>(-1)) {
|
||||
break;
|
||||
}
|
||||
m_node = m_node->next;
|
||||
m_entry = 0;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
TableList<VarTable>::Node *m_node;
|
||||
const VarTable::Entry *m_entry;
|
||||
};
|
||||
|
||||
#endif // OFFLOAD_ITERATOR_H_INCLUDED
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -28,14 +28,15 @@
|
||||
*/
|
||||
|
||||
|
||||
#if defined(LINUX) || defined(FREEBSD)
|
||||
#include <mm_malloc.h>
|
||||
#endif
|
||||
|
||||
#include "offload_myo_host.h"
|
||||
#include <errno.h>
|
||||
#include <malloc.h>
|
||||
#include "offload_host.h"
|
||||
|
||||
#if defined(LINUX) || defined(FREEBSD)
|
||||
#include <mm_malloc.h>
|
||||
#endif
|
||||
//#include "offload_util.h"
|
||||
|
||||
#define MYO_VERSION1 "MYO_1.0"
|
||||
|
||||
@ -47,11 +48,7 @@ extern "C" void __cilkrts_cilk_for_64(void*, void*, uint64_t, int32_t);
|
||||
#pragma weak __cilkrts_cilk_for_64
|
||||
#endif // TARGET_WINNT
|
||||
|
||||
#ifdef TARGET_WINNT
|
||||
#define MYO_TABLE_END_MARKER() reinterpret_cast<const char*>(-1)
|
||||
#else // TARGET_WINNT
|
||||
#define MYO_TABLE_END_MARKER() reinterpret_cast<const char*>(0)
|
||||
#endif // TARGET_WINNT
|
||||
static void __offload_myoProcessDeferredTables();
|
||||
|
||||
class MyoWrapper {
|
||||
public:
|
||||
@ -140,7 +137,7 @@ public:
|
||||
CheckResult(__func__, m_remote_thunk_call(thunk, args, device));
|
||||
}
|
||||
|
||||
MyoiRFuncCallHandle RemoteCall(char *func, void *args, int device) const {
|
||||
MyoiRFuncCallHandle RemoteCall(const char *func, void *args, int device) const {
|
||||
OFFLOAD_DEBUG_TRACE(4, "%s(%s, %p, %d)\n", __func__, func, args,
|
||||
device);
|
||||
return m_remote_call(func, args, device);
|
||||
@ -151,6 +148,73 @@ public:
|
||||
CheckResult(__func__, m_get_result(handle));
|
||||
}
|
||||
|
||||
bool PostInitFuncSupported() const {
|
||||
OFFLOAD_DEBUG_TRACE(4, "%s()\n", __func__);
|
||||
if (m_feature_available) {
|
||||
return m_feature_available(MYO_FEATURE_POST_LIB_INIT) ==
|
||||
MYO_SUCCESS;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void CreateVtableArena();
|
||||
|
||||
MyoArena GetVtableArena()const {
|
||||
return m_vtable_arena;
|
||||
}
|
||||
|
||||
void ArenaCreate(
|
||||
MyoOwnershipType ownership,
|
||||
int consistency,
|
||||
MyoArena* arena
|
||||
) const
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(4, "%s(%d, %d, %p)\n",
|
||||
__func__, ownership, consistency, arena);
|
||||
CheckResult(__func__, m_arena_create(ownership, consistency, arena));
|
||||
}
|
||||
|
||||
void* SharedAlignedArenaMalloc(
|
||||
MyoArena arena,
|
||||
size_t size,
|
||||
size_t align
|
||||
) const
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedalignedarenamalloc,
|
||||
"%s(%u, %lld, %lld)\n",
|
||||
__func__, arena, size, align);
|
||||
return m_arena_aligned_malloc(arena, size, align);
|
||||
}
|
||||
|
||||
void* SharedAlignedArenaFree(
|
||||
MyoArena arena,
|
||||
void* ptr
|
||||
) const
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedalignedarenafree,
|
||||
"%s(%u, %p)\n", __func__, arena, ptr);
|
||||
return m_arena_aligned_free(arena, ptr);
|
||||
}
|
||||
|
||||
void ArenaAcquire(
|
||||
MyoArena arena
|
||||
) const
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoarenaacquire,
|
||||
"%s()\n", __func__);
|
||||
CheckResult(__func__, m_arena_acquire(arena));
|
||||
}
|
||||
|
||||
void ArenaRelease(
|
||||
MyoArena arena
|
||||
) const
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoarenarelease,
|
||||
"%s()\n", __func__);
|
||||
CheckResult(__func__, m_arena_release(arena));
|
||||
}
|
||||
|
||||
private:
|
||||
void CheckResult(const char *func, MyoError error) const {
|
||||
if (error != MYO_SUCCESS) {
|
||||
@ -160,8 +224,10 @@ private:
|
||||
}
|
||||
|
||||
private:
|
||||
void* m_lib_handle;
|
||||
bool m_is_available;
|
||||
void* m_lib_handle;
|
||||
bool m_is_available;
|
||||
int m_post_init_func;
|
||||
MyoArena m_vtable_arena;
|
||||
|
||||
// pointers to functions from myo library
|
||||
MyoError (*m_lib_init)(void*, void*);
|
||||
@ -175,11 +241,18 @@ private:
|
||||
MyoError (*m_host_var_table_propagate)(void*, int);
|
||||
MyoError (*m_host_fptr_table_register)(void*, int, int);
|
||||
MyoError (*m_remote_thunk_call)(void*, void*, int);
|
||||
MyoiRFuncCallHandle (*m_remote_call)(char*, void*, int);
|
||||
MyoiRFuncCallHandle (*m_remote_call)(const char*, void*, int);
|
||||
MyoError (*m_get_result)(MyoiRFuncCallHandle);
|
||||
MyoError (*m_arena_create)(MyoOwnershipType, int, MyoArena*);
|
||||
void* (*m_arena_aligned_malloc)(MyoArena, size_t, size_t);
|
||||
void* (*m_arena_aligned_free)(MyoArena, void*);
|
||||
MyoError (*m_arena_acquire)(MyoArena);
|
||||
MyoError (*m_arena_release)(MyoArena);
|
||||
// Placeholder until MYO headers support enum type for feature
|
||||
MyoError (*m_feature_available)(int feature);
|
||||
};
|
||||
|
||||
bool MyoWrapper::LoadLibrary(void)
|
||||
DLL_LOCAL bool MyoWrapper::LoadLibrary(void)
|
||||
{
|
||||
#ifndef TARGET_WINNT
|
||||
const char *lib_name = "libmyo-client.so";
|
||||
@ -295,7 +368,7 @@ bool MyoWrapper::LoadLibrary(void)
|
||||
return false;
|
||||
}
|
||||
|
||||
m_remote_call = (MyoiRFuncCallHandle (*)(char*, void*, int))
|
||||
m_remote_call = (MyoiRFuncCallHandle (*)(const char*, void*, int))
|
||||
DL_sym(m_lib_handle, "myoiRemoteCall", MYO_VERSION1);
|
||||
if (m_remote_call == 0) {
|
||||
OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
|
||||
@ -313,8 +386,66 @@ bool MyoWrapper::LoadLibrary(void)
|
||||
return false;
|
||||
}
|
||||
|
||||
m_arena_create = (MyoError (*)(MyoOwnershipType, int, MyoArena*))
|
||||
DL_sym(m_lib_handle, "myoArenaCreate", MYO_VERSION1);
|
||||
if (m_arena_create == 0) {
|
||||
OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
|
||||
"myoArenaCreate");
|
||||
UnloadLibrary();
|
||||
return false;
|
||||
}
|
||||
|
||||
m_arena_aligned_malloc = (void* (*)(MyoArena, size_t, size_t))
|
||||
DL_sym(m_lib_handle, "myoArenaAlignedMalloc", MYO_VERSION1);
|
||||
if (m_arena_aligned_malloc == 0) {
|
||||
OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
|
||||
"myoArenaAlignedMalloc");
|
||||
UnloadLibrary();
|
||||
return false;
|
||||
}
|
||||
|
||||
m_arena_aligned_free = (void* (*)(MyoArena, void*))
|
||||
DL_sym(m_lib_handle, "myoArenaAlignedFree", MYO_VERSION1);
|
||||
if (m_arena_aligned_free == 0) {
|
||||
OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
|
||||
"myoArenaAlignedFree");
|
||||
UnloadLibrary();
|
||||
return false;
|
||||
}
|
||||
|
||||
m_arena_acquire = (MyoError (*)(MyoArena))
|
||||
DL_sym(m_lib_handle, "myoArenaAcquire", MYO_VERSION1);
|
||||
if (m_acquire == 0) {
|
||||
OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
|
||||
"myoArenaAcquire");
|
||||
UnloadLibrary();
|
||||
return false;
|
||||
}
|
||||
|
||||
m_arena_release = (MyoError (*)(MyoArena))
|
||||
DL_sym(m_lib_handle, "myoArenaRelease", MYO_VERSION1);
|
||||
if (m_release == 0) {
|
||||
OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
|
||||
"myoArenaRelease");
|
||||
UnloadLibrary();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check for "feature-available" API added in MPSS 3.3.
|
||||
// Not finding it is not an error.
|
||||
m_feature_available = (MyoError (*)(int))
|
||||
DL_sym(m_lib_handle, "myoiSupportsFeature", MYO_VERSION1);
|
||||
if (m_feature_available == 0) {
|
||||
OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
|
||||
"myoiSupportsFeature");
|
||||
}
|
||||
|
||||
OFFLOAD_DEBUG_TRACE(2, "The library was successfully loaded\n");
|
||||
|
||||
// Create arena if supported
|
||||
CreateVtableArena();
|
||||
OFFLOAD_DEBUG_TRACE(3, "Vtable arena created\n");
|
||||
|
||||
m_is_available = true;
|
||||
|
||||
return true;
|
||||
@ -323,6 +454,23 @@ bool MyoWrapper::LoadLibrary(void)
|
||||
static bool myo_is_available;
|
||||
static MyoWrapper myo_wrapper;
|
||||
|
||||
void MyoWrapper::CreateVtableArena()
|
||||
{
|
||||
MyoArena* vtable_arena;
|
||||
|
||||
// Check if this MYO supports arenas for vtables
|
||||
if (myo_wrapper.PostInitFuncSupported()) {
|
||||
// Create arena for vtables
|
||||
vtable_arena = (MyoArena *)myo_wrapper.SharedMalloc(sizeof(MyoArena));
|
||||
myo_wrapper.ArenaCreate(
|
||||
MYO_ARENA_OURS, MYO_NO_CONSISTENCY, vtable_arena);
|
||||
m_vtable_arena = *vtable_arena;
|
||||
OFFLOAD_DEBUG_TRACE(4, "created arena = %d\n", m_vtable_arena);
|
||||
} else {
|
||||
m_vtable_arena = 0;
|
||||
}
|
||||
}
|
||||
|
||||
struct MyoTable
|
||||
{
|
||||
MyoTable(SharedTableEntry *tab, int len) : var_tab(tab), var_tab_len(len)
|
||||
@ -337,9 +485,11 @@ static MyoTableList __myo_table_list;
|
||||
static mutex_t __myo_table_lock;
|
||||
static bool __myo_tables = false;
|
||||
|
||||
static void __offload_myo_shared_table_register(SharedTableEntry *entry);
|
||||
static void __offload_myo_shared_init_table_register(InitTableEntry* entry);
|
||||
static void __offload_myo_fptr_table_register(FptrTableEntry *entry);
|
||||
static void __offload_myo_shared_vtable_process(SharedTableEntry *entry);
|
||||
static void __offload_myo_shared_table_process(SharedTableEntry *entry);
|
||||
static void __offload_myo_shared_init_table_process(InitTableEntry* entry);
|
||||
static void __offload_myo_fptr_table_process(FptrTableEntry *entry);
|
||||
static void __offload_propagate_shared_vars();
|
||||
|
||||
static void __offload_myoLoadLibrary_once(void)
|
||||
{
|
||||
@ -350,6 +500,7 @@ static void __offload_myoLoadLibrary_once(void)
|
||||
|
||||
static bool __offload_myoLoadLibrary(void)
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(4, "__offload_myoLoadLibrary\n");
|
||||
static OffloadOnceControl ctrl = OFFLOAD_ONCE_CONTROL_INIT;
|
||||
__offload_run_once(&ctrl, __offload_myoLoadLibrary_once);
|
||||
|
||||
@ -371,17 +522,71 @@ static void __offload_myoInit_once(void)
|
||||
OFFLOAD_DEBUG_TRACE(2, "Initializing MYO library ...\n");
|
||||
|
||||
COIEVENT events[MIC_ENGINES_MAX];
|
||||
MyoiUserParams params[MIC_ENGINES_MAX+1];
|
||||
|
||||
// load target library to all devices
|
||||
// One entry per device +
|
||||
// A pair of entries for the Host postInit func +
|
||||
// A pair of entries for the MIC postInit func +
|
||||
// end marker
|
||||
MyoiUserParams params[MIC_ENGINES_MAX+5];
|
||||
|
||||
// Load target library to all devices and
|
||||
// create libinit parameters for all devices
|
||||
for (int i = 0; i < mic_engines_total; i++) {
|
||||
mic_engines[i].init_myo(&events[i]);
|
||||
|
||||
params[i].type = MYOI_USERPARAMS_DEVID;
|
||||
params[i].nodeid = mic_engines[i].get_physical_index() + 1;
|
||||
OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %d, %d }\n",
|
||||
i, params[i].type, params[i].nodeid);
|
||||
}
|
||||
|
||||
params[mic_engines_total].type = MYOI_USERPARAMS_LAST_MSG;
|
||||
// Check if V2 myoLibInit is available
|
||||
if (myo_wrapper.PostInitFuncSupported()) {
|
||||
// Set the host post libInit function indicator
|
||||
params[mic_engines_total].type =
|
||||
MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC;
|
||||
params[mic_engines_total].nodeid =
|
||||
MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_HOST_NODE;
|
||||
OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %d, %d }\n",
|
||||
mic_engines_total,
|
||||
params[mic_engines_total].type, params[mic_engines_total].nodeid);
|
||||
|
||||
// Set the host post libInit host function address
|
||||
((MyoiUserParamsPostLibInit*)(¶ms[mic_engines_total+1]))->
|
||||
postLibInitHostFuncAddress =
|
||||
(void (*)())&__offload_propagate_shared_vars;
|
||||
OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %p }\n",
|
||||
mic_engines_total+1,
|
||||
((MyoiUserParamsPostLibInit*)(¶ms[mic_engines_total+1]))->
|
||||
postLibInitHostFuncAddress);
|
||||
|
||||
// Set the target post libInit function indicator
|
||||
params[mic_engines_total+2].type =
|
||||
MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC;
|
||||
params[mic_engines_total+2].nodeid =
|
||||
MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_ALL_NODES;
|
||||
|
||||
// Set the target post libInit target function name
|
||||
((MyoiUserParamsPostLibInit*)(¶ms[mic_engines_total+3]))->
|
||||
postLibInitRemoveFuncName = "--vtable_initializer--";
|
||||
OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %s }\n",
|
||||
mic_engines_total+3,
|
||||
((MyoiUserParamsPostLibInit*)(¶ms[mic_engines_total+1]))->
|
||||
postLibInitRemoveFuncName);
|
||||
|
||||
params[mic_engines_total+4].type = MYOI_USERPARAMS_LAST_MSG;
|
||||
params[mic_engines_total+4].nodeid = 0;
|
||||
OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %d, %d }\n",
|
||||
mic_engines_total+4,
|
||||
params[mic_engines_total+4].type,
|
||||
params[mic_engines_total+4].nodeid);
|
||||
} else {
|
||||
params[mic_engines_total].type = MYOI_USERPARAMS_LAST_MSG;
|
||||
params[mic_engines_total].nodeid = 0;
|
||||
OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %d, %d }\n",
|
||||
mic_engines_total,
|
||||
params[mic_engines_total].type, params[mic_engines_total].nodeid);
|
||||
}
|
||||
|
||||
// initialize myo runtime on host
|
||||
myo_wrapper.LibInit(params, 0);
|
||||
@ -395,6 +600,7 @@ static void __offload_myoInit_once(void)
|
||||
}
|
||||
|
||||
myo_is_available = true;
|
||||
OFFLOAD_DEBUG_TRACE(2, "setting myo_is_available=%d\n", myo_is_available);
|
||||
|
||||
OFFLOAD_DEBUG_TRACE(2, "Initializing MYO library ... done\n");
|
||||
}
|
||||
@ -404,12 +610,22 @@ static bool __offload_myoInit(void)
|
||||
static OffloadOnceControl ctrl = OFFLOAD_ONCE_CONTROL_INIT;
|
||||
__offload_run_once(&ctrl, __offload_myoInit_once);
|
||||
|
||||
// register pending shared var tables
|
||||
if (myo_is_available && __myo_tables) {
|
||||
// Check if using V1 myoLibInit
|
||||
if (!myo_wrapper.PostInitFuncSupported()) {
|
||||
__offload_propagate_shared_vars();
|
||||
}
|
||||
|
||||
return myo_is_available;
|
||||
}
|
||||
|
||||
static void __offload_propagate_shared_vars()
|
||||
{
|
||||
// Propagate pending shared var tables
|
||||
if (__myo_tables) {
|
||||
mutex_locker_t locker(__myo_table_lock);
|
||||
|
||||
if (__myo_tables) {
|
||||
// Register tables with MYO so it can propagate to target.
|
||||
// Give tables with MYO so it can propagate to target
|
||||
for(MyoTableList::const_iterator it = __myo_table_list.begin();
|
||||
it != __myo_table_list.end(); ++it) {
|
||||
#ifdef TARGET_WINNT
|
||||
@ -419,6 +635,8 @@ static bool __offload_myoInit(void)
|
||||
continue;
|
||||
}
|
||||
myo_wrapper.HostVarTablePropagate(entry, 1);
|
||||
OFFLOAD_DEBUG_TRACE(2, "HostVarTablePropagate(%s, 1)\n",
|
||||
entry->varName);
|
||||
}
|
||||
#else // TARGET_WINNT
|
||||
myo_wrapper.HostVarTablePropagate(it->var_tab,
|
||||
@ -430,8 +648,6 @@ static bool __offload_myoInit(void)
|
||||
__myo_tables = false;
|
||||
}
|
||||
}
|
||||
|
||||
return myo_is_available;
|
||||
}
|
||||
|
||||
static bool shared_table_entries(
|
||||
@ -485,13 +701,164 @@ extern "C" void __offload_myoRegisterTables(
|
||||
__offload_myoLoadLibrary();
|
||||
|
||||
// register tables
|
||||
__offload_myo_shared_table_register(shared_table);
|
||||
__offload_myo_fptr_table_register(fptr_table);
|
||||
__offload_myo_shared_init_table_register(init_table);
|
||||
__offload_myo_shared_table_process(shared_table);
|
||||
__offload_myo_fptr_table_process(fptr_table);
|
||||
__offload_myo_shared_init_table_process(init_table);
|
||||
}
|
||||
}
|
||||
|
||||
void __offload_myoFini(void)
|
||||
extern "C" bool __offload_myoProcessTables(
|
||||
const void* image,
|
||||
MYOInitTableList::Node *init_table,
|
||||
MYOVarTableList::Node *shared_table,
|
||||
MYOVarTableList::Node *shared_vtable,
|
||||
MYOFuncTableList::Node *fptr_table
|
||||
)
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
|
||||
|
||||
// Collect the tables in this .dll/.so
|
||||
__offload_myoRegisterTables1(
|
||||
init_table, shared_table, shared_vtable, fptr_table);
|
||||
|
||||
// Now check what type of module we are dealing with
|
||||
if (__offload_target_image_is_executable(image)) {
|
||||
OFFLOAD_DEBUG_TRACE(2, "Main encountered\n");
|
||||
OFFLOAD_DEBUG_TRACE(2, "MYO initialization not deferred\n");
|
||||
// MYO tables across dlls have been collected
|
||||
// Now init MYO and process the tables
|
||||
__offload_myoProcessDeferredTables();
|
||||
// Return true to indicate that atexit needs to be calld by ofldbegin
|
||||
return true;
|
||||
} else {
|
||||
// This is a shared library, either auto-loaded or dynamically loaded
|
||||
// If __target_exe is set, then main has started running
|
||||
if (__target_exe != 0) {
|
||||
// Main is running: this is a dynamic load of a shared library
|
||||
// Finish processing the tables in this library
|
||||
OFFLOAD_DEBUG_TRACE(2,
|
||||
"Dynamically loaded shared library encountered\n");
|
||||
OFFLOAD_DEBUG_TRACE(2,
|
||||
"MYO initialization not deferred\n");
|
||||
__offload_myoProcessDeferredTables();
|
||||
} else {
|
||||
// Main is not running: this is an auto-loaded shared library
|
||||
// Tables have been collected, nothing else to do
|
||||
OFFLOAD_DEBUG_TRACE(2,
|
||||
"Auto-loaded shared library encountered\n");
|
||||
OFFLOAD_DEBUG_TRACE(2, "Deferring initialization of MYO\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Process contents of all Var tables
|
||||
void MYOVarTableList::process()
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(2, "Process MYO Var tables:\n");
|
||||
|
||||
m_lock.lock();
|
||||
|
||||
for (Node *n = m_head; n != 0; n = n->next) {
|
||||
__offload_myo_shared_table_process(
|
||||
(SharedTableEntry*)n->table.entries);
|
||||
}
|
||||
for (Node *n = m_head; n != 0; n = n->next) {
|
||||
remove_table(n);
|
||||
}
|
||||
|
||||
m_lock.unlock();
|
||||
}
|
||||
|
||||
// Process contents of all Var tables
|
||||
void MYOVarTableList::process_vtable()
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(2, "Process MYO Vtable tables:\n");
|
||||
|
||||
m_lock.lock();
|
||||
|
||||
for (Node *n = m_head; n != 0; n = n->next) {
|
||||
__offload_myo_shared_vtable_process(
|
||||
(SharedTableEntry*)n->table.entries);
|
||||
}
|
||||
for (Node *n = m_head; n != 0; n = n->next) {
|
||||
remove_table(n);
|
||||
}
|
||||
|
||||
m_lock.unlock();
|
||||
}
|
||||
|
||||
// Process contents of all Func tables
|
||||
void MYOFuncTableList::process()
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(2, "Process MYO Func tables:\n");
|
||||
|
||||
m_lock.lock();
|
||||
|
||||
for (Node *n = m_head; n != 0; n = n->next) {
|
||||
__offload_myo_fptr_table_process(
|
||||
(FptrTableEntry*)n->table.entries);
|
||||
}
|
||||
for (Node *n = m_head; n != 0; n = n->next) {
|
||||
remove_table(n);
|
||||
}
|
||||
|
||||
m_lock.unlock();
|
||||
}
|
||||
|
||||
// Process contents of all Init tables
|
||||
void MYOInitTableList::process()
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(2, "Process MYO Init tables:\n");
|
||||
|
||||
m_lock.lock();
|
||||
|
||||
for (Node *n = m_head; n != 0; n = n->next) {
|
||||
__offload_myo_shared_init_table_process(
|
||||
(InitTableEntry*)n->table.entries);
|
||||
}
|
||||
for (Node *n = m_head; n != 0; n = n->next) {
|
||||
remove_table(n);
|
||||
}
|
||||
|
||||
m_lock.unlock();
|
||||
}
|
||||
|
||||
static void __offload_myoProcessDeferredTables()
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(3, "%s()\n", __func__);
|
||||
|
||||
// Debug dumps of MYO tables
|
||||
if (console_enabled >= 2) {
|
||||
__offload_myo_var_tables.dump();
|
||||
__offload_myo_vtable_tables.dump();
|
||||
__offload_myo_func_tables.dump();
|
||||
__offload_myo_init_tables.dump();
|
||||
}
|
||||
|
||||
if (!__offload_myo_var_tables.is_empty() ||
|
||||
!__offload_myo_vtable_tables.is_empty() ||
|
||||
!__offload_myo_func_tables.is_empty() ||
|
||||
!__offload_myo_init_tables.is_empty())
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(3, "MYO usage detected in program\n");
|
||||
|
||||
// Make sure myo library is loaded
|
||||
__offload_myoLoadLibrary();
|
||||
OFFLOAD_DEBUG_TRACE(3, "Initialized MYO\n");
|
||||
|
||||
__offload_myo_var_tables.process();
|
||||
__offload_myo_vtable_tables.process_vtable();
|
||||
__offload_myo_func_tables.process();
|
||||
__offload_myo_init_tables.process();
|
||||
OFFLOAD_DEBUG_TRACE(3, "Finished processing MYO tables\n");
|
||||
} else {
|
||||
OFFLOAD_DEBUG_TRACE(3,
|
||||
"MYO tables are empty; Will not initialize MYO\n");
|
||||
}
|
||||
}
|
||||
|
||||
DLL_LOCAL void __offload_myoFini(void)
|
||||
{
|
||||
if (myo_is_available) {
|
||||
OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
|
||||
@ -516,7 +883,7 @@ void __offload_myoFini(void)
|
||||
}
|
||||
}
|
||||
|
||||
static void __offload_myo_shared_table_register(
|
||||
static void __offload_myo_shared_table_process(
|
||||
SharedTableEntry *entry
|
||||
)
|
||||
{
|
||||
@ -529,7 +896,8 @@ static void __offload_myo_shared_table_register(
|
||||
for (; entry->varName != MYO_TABLE_END_MARKER(); entry++) {
|
||||
#ifdef TARGET_WINNT
|
||||
if (entry->varName == 0) {
|
||||
OFFLOAD_DEBUG_TRACE(4, "skip registering a NULL MyoSharedTable entry\n");
|
||||
OFFLOAD_DEBUG_TRACE(4,
|
||||
"skip registering a NULL MyoSharedTable entry\n");
|
||||
continue;
|
||||
}
|
||||
#endif // TARGET_WINNT
|
||||
@ -550,29 +918,69 @@ static void __offload_myo_shared_table_register(
|
||||
}
|
||||
}
|
||||
|
||||
static void __offload_myo_shared_init_table_register(InitTableEntry* entry)
|
||||
static void __offload_myo_shared_vtable_process(
|
||||
SharedTableEntry *entry
|
||||
)
|
||||
{
|
||||
SharedTableEntry *start = entry;
|
||||
int entries = 0;
|
||||
|
||||
OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
|
||||
|
||||
// allocate shared memory for vtables
|
||||
for (; entry->varName != MYO_TABLE_END_MARKER(); entry++) {
|
||||
#ifdef TARGET_WINNT
|
||||
if (entry->varName == 0) {
|
||||
OFFLOAD_DEBUG_TRACE(4,
|
||||
"skip registering a NULL MyoSharedVTable entry\n");
|
||||
continue;
|
||||
}
|
||||
#endif // TARGET_WINNT
|
||||
|
||||
OFFLOAD_DEBUG_TRACE(4,
|
||||
"registering MyoSharedVTable entry for %s @%p\n",
|
||||
entry->varName, entry);
|
||||
|
||||
// Invoke the function to create shared memory
|
||||
reinterpret_cast<void(*)(MyoArena)>(entry->sharedAddr)(
|
||||
myo_wrapper.GetVtableArena());
|
||||
entries++;
|
||||
}
|
||||
|
||||
// add table to the list if it is not empty
|
||||
if (entries > 0) {
|
||||
mutex_locker_t locker(__myo_table_lock);
|
||||
__myo_table_list.push_back(MyoTable(start, entries));
|
||||
__myo_tables = true;
|
||||
}
|
||||
}
|
||||
|
||||
void __offload_myo_shared_init_table_process(InitTableEntry* entry)
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
|
||||
|
||||
#ifdef TARGET_WINNT
|
||||
for (; entry->funcName != MYO_TABLE_END_MARKER(); entry++) {
|
||||
if (entry->funcName == 0) {
|
||||
OFFLOAD_DEBUG_TRACE(4, "skip registering a NULL MyoSharedInit entry\n");
|
||||
OFFLOAD_DEBUG_TRACE(4,
|
||||
"skip registering a NULL MyoSharedInit entry\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
// Invoke the function to init the shared memory
|
||||
entry->func();
|
||||
OFFLOAD_DEBUG_TRACE(4, "execute MyoSharedInit routine for %s\n",
|
||||
entry->funcName);
|
||||
entry->func(myo_wrapper.GetVtableArena());
|
||||
}
|
||||
#else // TARGET_WINNT
|
||||
for (; entry->func != 0; entry++) {
|
||||
// Invoke the function to init the shared memory
|
||||
entry->func();
|
||||
entry->func(myo_wrapper.GetVtableArena());
|
||||
}
|
||||
#endif // TARGET_WINNT
|
||||
}
|
||||
|
||||
static void __offload_myo_fptr_table_register(
|
||||
static void __offload_myo_fptr_table_process(
|
||||
FptrTableEntry *entry
|
||||
)
|
||||
{
|
||||
@ -584,7 +992,8 @@ static void __offload_myo_fptr_table_register(
|
||||
for (; entry->funcName != MYO_TABLE_END_MARKER(); entry++) {
|
||||
#ifdef TARGET_WINNT
|
||||
if (entry->funcName == 0) {
|
||||
OFFLOAD_DEBUG_TRACE(4, "skip registering a NULL MyoFptrTable entry\n");
|
||||
OFFLOAD_DEBUG_TRACE(4,
|
||||
"skip registering a NULL MyoFptrTable entry\n");
|
||||
continue;
|
||||
}
|
||||
#endif // TARGET_WINNT
|
||||
@ -722,6 +1131,80 @@ extern "C" void _Offload_shared_aligned_free(void *ptr)
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void _Offload_shared_arena_create(
|
||||
MyoOwnershipType ownership,
|
||||
int consistency,
|
||||
MyoArena* arena
|
||||
)
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(3, "%s(%d, %d, %p)\n",
|
||||
__func__, ownership, consistency, arena);
|
||||
|
||||
if (__offload_myoLoadLibrary()) {
|
||||
myo_wrapper.ArenaCreate(ownership, consistency, arena);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void* _Offload_shared_aligned_arena_malloc(
|
||||
MyoArena arena,
|
||||
size_t size,
|
||||
size_t align
|
||||
)
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(3, "%s(%u, %lld, %lld)\n",
|
||||
__func__, arena, size, align);
|
||||
|
||||
if (__offload_myoLoadLibrary()) {
|
||||
void *p = myo_wrapper.SharedAlignedArenaMalloc(arena, size, align);
|
||||
OFFLOAD_DEBUG_TRACE(3, "%s(%u, %lld, %lld)->%p\n",
|
||||
__func__, arena, size, align, p);
|
||||
return p;
|
||||
}
|
||||
else {
|
||||
if (align < sizeof(void*)) {
|
||||
align = sizeof(void*);
|
||||
}
|
||||
return _mm_malloc(size, align);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void _Offload_shared_aligned_arena_free(
|
||||
MyoArena arena,
|
||||
void *ptr
|
||||
)
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(3, "%s(%u, %p)\n", __func__, arena, ptr);
|
||||
|
||||
if (__offload_myoLoadLibrary()) {
|
||||
myo_wrapper.SharedAlignedArenaFree(arena, ptr);
|
||||
}
|
||||
else {
|
||||
_mm_free(ptr);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void _Offload_shared_arena_acquire(
|
||||
MyoArena arena
|
||||
)
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(3, "%s(%u)\n", __func__, arena);
|
||||
|
||||
if (__offload_myoLoadLibrary()) {
|
||||
myo_wrapper.ArenaAcquire(arena);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void _Offload_shared_arena_release(
|
||||
MyoArena arena
|
||||
)
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(3, "%s(%u)\n", __func__, arena);
|
||||
|
||||
if (__offload_myoLoadLibrary()) {
|
||||
myo_wrapper.ArenaRelease(arena);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void __intel_cilk_for_32_offload(
|
||||
int size,
|
||||
void (*copy_constructor)(void*, void*),
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -34,67 +34,35 @@
|
||||
#include <myotypes.h>
|
||||
#include <myoimpl.h>
|
||||
#include <myo.h>
|
||||
|
||||
#include "offload.h"
|
||||
// undefine the following since offload.h defines them to malloc and free if __INTEL_OFFLOAD
|
||||
// is not defined which is the case when building the offload library
|
||||
#undef _Offload_shared_malloc
|
||||
#undef _Offload_shared_free
|
||||
#undef _Offload_shared_aligned_malloc
|
||||
#undef _Offload_shared_aligned_free
|
||||
#include "offload_table.h"
|
||||
|
||||
typedef MyoiSharedVarEntry SharedTableEntry;
|
||||
//typedef MyoiHostSharedFptrEntry FptrTableEntry;
|
||||
typedef struct {
|
||||
//! Function Name
|
||||
const char *funcName;
|
||||
//! Function Address
|
||||
void *funcAddr;
|
||||
//! Local Thunk Address
|
||||
void *localThunkAddr;
|
||||
#ifdef TARGET_WINNT
|
||||
// Dummy to pad up to 32 bytes
|
||||
void *dummy;
|
||||
#endif // TARGET_WINNT
|
||||
} FptrTableEntry;
|
||||
|
||||
struct InitTableEntry {
|
||||
#ifdef TARGET_WINNT
|
||||
// Dummy to pad up to 16 bytes
|
||||
// Function Name
|
||||
const char *funcName;
|
||||
#endif // TARGET_WINNT
|
||||
void (*func)(void);
|
||||
};
|
||||
|
||||
#ifdef TARGET_WINNT
|
||||
#define OFFLOAD_MYO_SHARED_TABLE_SECTION_START ".MyoSharedTable$a"
|
||||
#define OFFLOAD_MYO_SHARED_TABLE_SECTION_END ".MyoSharedTable$z"
|
||||
|
||||
#define OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_START ".MyoSharedInitTable$a"
|
||||
#define OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_END ".MyoSharedInitTable$z"
|
||||
|
||||
#define OFFLOAD_MYO_FPTR_TABLE_SECTION_START ".MyoFptrTable$a"
|
||||
#define OFFLOAD_MYO_FPTR_TABLE_SECTION_END ".MyoFptrTable$z"
|
||||
#else // TARGET_WINNT
|
||||
#define OFFLOAD_MYO_SHARED_TABLE_SECTION_START ".MyoSharedTable."
|
||||
#define OFFLOAD_MYO_SHARED_TABLE_SECTION_END ".MyoSharedTable."
|
||||
|
||||
#define OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_START ".MyoSharedInitTable."
|
||||
#define OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_END ".MyoSharedInitTable."
|
||||
|
||||
#define OFFLOAD_MYO_FPTR_TABLE_SECTION_START ".MyoFptrTable."
|
||||
#define OFFLOAD_MYO_FPTR_TABLE_SECTION_END ".MyoFptrTable."
|
||||
#endif // TARGET_WINNT
|
||||
|
||||
#pragma section(OFFLOAD_MYO_SHARED_TABLE_SECTION_START, read, write)
|
||||
#pragma section(OFFLOAD_MYO_SHARED_TABLE_SECTION_END, read, write)
|
||||
|
||||
#pragma section(OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_START, read, write)
|
||||
#pragma section(OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_END, read, write)
|
||||
|
||||
#pragma section(OFFLOAD_MYO_FPTR_TABLE_SECTION_START, read, write)
|
||||
#pragma section(OFFLOAD_MYO_FPTR_TABLE_SECTION_END, read, write)
|
||||
|
||||
// This function retained for compatibility with 15.0
|
||||
extern "C" void __offload_myoRegisterTables(
|
||||
InitTableEntry *init_table,
|
||||
SharedTableEntry *shared_table,
|
||||
FptrTableEntry *fptr_table
|
||||
);
|
||||
|
||||
// Process shared variable, shared vtable and function and init routine tables.
|
||||
// In .dlls/.sos these will be collected together.
|
||||
// In the main program, all collected tables will be processed.
|
||||
extern "C" bool __offload_myoProcessTables(
|
||||
const void* image,
|
||||
MYOInitTableList::Node *init_table,
|
||||
MYOVarTableList::Node *shared_table,
|
||||
MYOVarTableList::Node *shared_vtable,
|
||||
MYOFuncTableList::Node *fptr_table
|
||||
);
|
||||
|
||||
extern void __offload_myoFini(void);
|
||||
extern bool __offload_myo_init_is_deferred(const void *image);
|
||||
|
||||
#endif // OFFLOAD_MYO_HOST_H_INCLUDED
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -44,7 +44,7 @@ static void CheckResult(const char *func, MyoError error) {
|
||||
}
|
||||
}
|
||||
|
||||
static void __offload_myo_shared_table_register(SharedTableEntry *entry)
|
||||
static void __offload_myo_shared_table_process(SharedTableEntry *entry)
|
||||
{
|
||||
int entries = 0;
|
||||
SharedTableEntry *t_start;
|
||||
@ -68,7 +68,32 @@ static void __offload_myo_shared_table_register(SharedTableEntry *entry)
|
||||
}
|
||||
}
|
||||
|
||||
static void __offload_myo_fptr_table_register(
|
||||
static void __offload_myo_shared_vtable_process(SharedTableEntry *entry)
|
||||
{
|
||||
int entries = 0;
|
||||
SharedTableEntry *t_start;
|
||||
|
||||
OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
|
||||
|
||||
t_start = entry;
|
||||
while (t_start->varName != 0) {
|
||||
OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_mic_myo_shared,
|
||||
"myo shared vtable entry name"
|
||||
" = \"%s\" addr = %p\n",
|
||||
t_start->varName, t_start->sharedAddr);
|
||||
t_start++;
|
||||
entries++;
|
||||
}
|
||||
|
||||
if (entries > 0) {
|
||||
OFFLOAD_DEBUG_TRACE(3, "myoiMicVarTableRegister(%p, %d)\n", entry,
|
||||
entries);
|
||||
CheckResult("myoiMicVarTableRegister",
|
||||
myoiMicVarTableRegister(entry, entries));
|
||||
}
|
||||
}
|
||||
|
||||
static void __offload_myo_fptr_table_process(
|
||||
FptrTableEntry *entry
|
||||
)
|
||||
{
|
||||
@ -94,9 +119,22 @@ static void __offload_myo_fptr_table_register(
|
||||
}
|
||||
}
|
||||
|
||||
void __offload_myo_shared_init_table_process(InitTableEntry* entry)
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
|
||||
|
||||
for (; entry->func != 0; entry++) {
|
||||
// Invoke the function to init the shared memory
|
||||
OFFLOAD_DEBUG_TRACE(3, "Invoked a shared init function @%p\n",
|
||||
(void *)(entry->func));
|
||||
entry->func();
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void __offload_myoAcquire(void)
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
|
||||
|
||||
CheckResult("myoAcquire", myoAcquire());
|
||||
}
|
||||
|
||||
@ -162,8 +200,35 @@ extern "C" void __offload_myoRegisterTables(
|
||||
return;
|
||||
}
|
||||
|
||||
__offload_myo_shared_table_register(shared_table);
|
||||
__offload_myo_fptr_table_register(fptr_table);
|
||||
__offload_myo_shared_table_process(shared_table);
|
||||
__offload_myo_fptr_table_process(fptr_table);
|
||||
}
|
||||
|
||||
extern "C" void __offload_myoProcessTables(
|
||||
InitTableEntry* init_table,
|
||||
SharedTableEntry *shared_table,
|
||||
SharedTableEntry *shared_vtable,
|
||||
FptrTableEntry *fptr_table
|
||||
)
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
|
||||
|
||||
// one time registration of Intel(R) Cilk(TM) language entries
|
||||
static pthread_once_t once_control = PTHREAD_ONCE_INIT;
|
||||
pthread_once(&once_control, __offload_myo_once_init);
|
||||
|
||||
// register module's tables
|
||||
// check slot-1 of the function table because
|
||||
// slot-0 is predefined with --vtable_initializer--
|
||||
if (shared_table->varName == 0 &&
|
||||
shared_vtable->varName == 0 &&
|
||||
fptr_table[1].funcName == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
__offload_myo_shared_table_process(shared_table);
|
||||
__offload_myo_shared_vtable_process(shared_vtable);
|
||||
__offload_myo_fptr_table_process(fptr_table);
|
||||
}
|
||||
|
||||
extern "C" void* _Offload_shared_malloc(size_t size)
|
||||
@ -190,6 +255,46 @@ extern "C" void _Offload_shared_aligned_free(void *ptr)
|
||||
myoSharedAlignedFree(ptr);
|
||||
}
|
||||
|
||||
extern "C" void* _Offload_shared_aligned_arena_malloc(
|
||||
MyoArena arena,
|
||||
size_t size,
|
||||
size_t align
|
||||
)
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(
|
||||
3, "%s(%u, %lld, %lld)\n", __func__, arena, size, align);
|
||||
|
||||
return myoArenaAlignedMalloc(arena, size, align);
|
||||
}
|
||||
|
||||
extern "C" void _Offload_shared_aligned_arena_free(
|
||||
MyoArena arena,
|
||||
void *ptr
|
||||
)
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(3, "%s(%u, %p)\n", __func__, arena, ptr);
|
||||
|
||||
myoArenaAlignedFree(arena, ptr);
|
||||
}
|
||||
|
||||
extern "C" void _Offload_shared_arena_acquire(
|
||||
MyoArena arena
|
||||
)
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(3, "%s(%u)\n", __func__, arena);
|
||||
|
||||
myoArenaAcquire(arena);
|
||||
}
|
||||
|
||||
extern "C" void _Offload_shared_arena_release(
|
||||
MyoArena arena
|
||||
)
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(3, "%s(%u)\n", __func__, arena);
|
||||
|
||||
myoArenaRelease(arena);
|
||||
}
|
||||
|
||||
// temporary workaround for blocking behavior of myoiLibInit/Fini calls
|
||||
extern "C" void __offload_myoLibInit()
|
||||
{
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -31,42 +31,38 @@
|
||||
#ifndef OFFLOAD_MYO_TARGET_H_INCLUDED
|
||||
#define OFFLOAD_MYO_TARGET_H_INCLUDED
|
||||
|
||||
#include <myotypes.h>
|
||||
#include <myoimpl.h>
|
||||
#include <myo.h>
|
||||
|
||||
#include "offload.h"
|
||||
// undefine the following since offload.h defines them to malloc and free if __INTEL_OFFLOAD
|
||||
// is not defined which is the case when building the offload library
|
||||
#undef _Offload_shared_malloc
|
||||
#undef _Offload_shared_free
|
||||
#undef _Offload_shared_aligned_malloc
|
||||
#undef _Offload_shared_aligned_free
|
||||
#include "offload_table.h"
|
||||
|
||||
typedef MyoiSharedVarEntry SharedTableEntry;
|
||||
typedef MyoiTargetSharedFptrEntry FptrTableEntry;
|
||||
|
||||
#ifdef TARGET_WINNT
|
||||
#define OFFLOAD_MYO_SHARED_TABLE_SECTION_START ".MyoSharedTable$a"
|
||||
#define OFFLOAD_MYO_SHARED_TABLE_SECTION_END ".MyoSharedTable$z"
|
||||
|
||||
#define OFFLOAD_MYO_FPTR_TABLE_SECTION_START ".MyoFptrTable$a"
|
||||
#define OFFLOAD_MYO_FPTR_TABLE_SECTION_END ".MyoFptrTable$z"
|
||||
#else // TARGET_WINNT
|
||||
#define OFFLOAD_MYO_SHARED_TABLE_SECTION_START ".MyoSharedTable."
|
||||
#define OFFLOAD_MYO_SHARED_TABLE_SECTION_END ".MyoSharedTable."
|
||||
|
||||
#define OFFLOAD_MYO_FPTR_TABLE_SECTION_START ".MyoFptrTable."
|
||||
#define OFFLOAD_MYO_FPTR_TABLE_SECTION_END ".MyoFptrTable."
|
||||
#endif // TARGET_WINNT
|
||||
|
||||
#pragma section(OFFLOAD_MYO_SHARED_TABLE_SECTION_START, read, write)
|
||||
#pragma section(OFFLOAD_MYO_SHARED_TABLE_SECTION_END, read, write)
|
||||
|
||||
#pragma section(OFFLOAD_MYO_FPTR_TABLE_SECTION_START, read, write)
|
||||
#pragma section(OFFLOAD_MYO_FPTR_TABLE_SECTION_END, read, write)
|
||||
|
||||
// This function retained for compatibility with 15.0
|
||||
extern "C" void __offload_myoRegisterTables(
|
||||
SharedTableEntry *shared_table,
|
||||
FptrTableEntry *fptr_table
|
||||
);
|
||||
|
||||
// Process shared variable, shared vtable and function and init routine tables.
|
||||
// On the target side the contents of the tables are registered with MYO.
|
||||
extern "C" void __offload_myoProcessTables(
|
||||
InitTableEntry* init_table,
|
||||
SharedTableEntry *shared_table,
|
||||
SharedTableEntry *shared_vtable,
|
||||
FptrTableEntry *fptr_table
|
||||
);
|
||||
|
||||
extern "C" void __offload_myoAcquire(void);
|
||||
extern "C" void __offload_myoRelease(void);
|
||||
|
||||
// Call the compiler-generated routines for initializing shared variables.
|
||||
// This can only be done after shared memory allocation has been done.
|
||||
extern void __offload_myo_shared_init_table_process(InitTableEntry* entry);
|
||||
|
||||
// temporary workaround for blocking behavior for myoiLibInit/Fini calls
|
||||
extern "C" void __offload_myoLibInit();
|
||||
extern "C" void __offload_myoLibFini();
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -86,7 +86,7 @@ static int omp_get_int_from_host(
|
||||
return setting;
|
||||
}
|
||||
|
||||
void omp_set_num_threads_lrb(
|
||||
DLL_LOCAL void omp_set_num_threads_lrb(
|
||||
void *ofld
|
||||
)
|
||||
{
|
||||
@ -96,7 +96,7 @@ void omp_set_num_threads_lrb(
|
||||
omp_set_num_threads(num_threads);
|
||||
}
|
||||
|
||||
void omp_get_max_threads_lrb(
|
||||
DLL_LOCAL void omp_get_max_threads_lrb(
|
||||
void *ofld
|
||||
)
|
||||
{
|
||||
@ -106,7 +106,7 @@ void omp_get_max_threads_lrb(
|
||||
omp_send_int_to_host(ofld, num_threads);
|
||||
}
|
||||
|
||||
void omp_get_num_procs_lrb(
|
||||
DLL_LOCAL void omp_get_num_procs_lrb(
|
||||
void *ofld
|
||||
)
|
||||
{
|
||||
@ -116,7 +116,7 @@ void omp_get_num_procs_lrb(
|
||||
omp_send_int_to_host(ofld, num_procs);
|
||||
}
|
||||
|
||||
void omp_set_dynamic_lrb(
|
||||
DLL_LOCAL void omp_set_dynamic_lrb(
|
||||
void *ofld
|
||||
)
|
||||
{
|
||||
@ -126,7 +126,7 @@ void omp_set_dynamic_lrb(
|
||||
omp_set_dynamic(dynamic);
|
||||
}
|
||||
|
||||
void omp_get_dynamic_lrb(
|
||||
DLL_LOCAL void omp_get_dynamic_lrb(
|
||||
void *ofld
|
||||
)
|
||||
{
|
||||
@ -136,7 +136,7 @@ void omp_get_dynamic_lrb(
|
||||
omp_send_int_to_host(ofld, dynamic);
|
||||
}
|
||||
|
||||
void omp_set_nested_lrb(
|
||||
DLL_LOCAL void omp_set_nested_lrb(
|
||||
void *ofld
|
||||
)
|
||||
{
|
||||
@ -146,7 +146,7 @@ void omp_set_nested_lrb(
|
||||
omp_set_nested(nested);
|
||||
}
|
||||
|
||||
void omp_get_nested_lrb(
|
||||
DLL_LOCAL void omp_get_nested_lrb(
|
||||
void *ofld
|
||||
)
|
||||
{
|
||||
@ -156,7 +156,7 @@ void omp_get_nested_lrb(
|
||||
omp_send_int_to_host(ofld, nested);
|
||||
}
|
||||
|
||||
void omp_set_schedule_lrb(
|
||||
DLL_LOCAL void omp_set_schedule_lrb(
|
||||
void *ofld_
|
||||
)
|
||||
{
|
||||
@ -180,7 +180,7 @@ void omp_set_schedule_lrb(
|
||||
OFFLOAD_TARGET_LEAVE(ofld);
|
||||
}
|
||||
|
||||
void omp_get_schedule_lrb(
|
||||
DLL_LOCAL void omp_get_schedule_lrb(
|
||||
void *ofld_
|
||||
)
|
||||
{
|
||||
@ -206,7 +206,7 @@ void omp_get_schedule_lrb(
|
||||
|
||||
// lock API functions
|
||||
|
||||
void omp_init_lock_lrb(
|
||||
DLL_LOCAL void omp_init_lock_lrb(
|
||||
void *ofld_
|
||||
)
|
||||
{
|
||||
@ -224,7 +224,7 @@ void omp_init_lock_lrb(
|
||||
OFFLOAD_TARGET_LEAVE(ofld);
|
||||
}
|
||||
|
||||
void omp_destroy_lock_lrb(
|
||||
DLL_LOCAL void omp_destroy_lock_lrb(
|
||||
void *ofld_
|
||||
)
|
||||
{
|
||||
@ -242,7 +242,7 @@ void omp_destroy_lock_lrb(
|
||||
OFFLOAD_TARGET_LEAVE(ofld);
|
||||
}
|
||||
|
||||
void omp_set_lock_lrb(
|
||||
DLL_LOCAL void omp_set_lock_lrb(
|
||||
void *ofld_
|
||||
)
|
||||
{
|
||||
@ -260,7 +260,7 @@ void omp_set_lock_lrb(
|
||||
OFFLOAD_TARGET_LEAVE(ofld);
|
||||
}
|
||||
|
||||
void omp_unset_lock_lrb(
|
||||
DLL_LOCAL void omp_unset_lock_lrb(
|
||||
void *ofld_
|
||||
)
|
||||
{
|
||||
@ -278,7 +278,7 @@ void omp_unset_lock_lrb(
|
||||
OFFLOAD_TARGET_LEAVE(ofld);
|
||||
}
|
||||
|
||||
void omp_test_lock_lrb(
|
||||
DLL_LOCAL void omp_test_lock_lrb(
|
||||
void *ofld_
|
||||
)
|
||||
{
|
||||
@ -304,7 +304,7 @@ void omp_test_lock_lrb(
|
||||
|
||||
// nested lock API functions
|
||||
|
||||
void omp_init_nest_lock_lrb(
|
||||
DLL_LOCAL void omp_init_nest_lock_lrb(
|
||||
void *ofld_
|
||||
)
|
||||
{
|
||||
@ -322,7 +322,7 @@ void omp_init_nest_lock_lrb(
|
||||
OFFLOAD_TARGET_LEAVE(ofld);
|
||||
}
|
||||
|
||||
void omp_destroy_nest_lock_lrb(
|
||||
DLL_LOCAL void omp_destroy_nest_lock_lrb(
|
||||
void *ofld_
|
||||
)
|
||||
{
|
||||
@ -340,7 +340,7 @@ void omp_destroy_nest_lock_lrb(
|
||||
OFFLOAD_TARGET_LEAVE(ofld);
|
||||
}
|
||||
|
||||
void omp_set_nest_lock_lrb(
|
||||
DLL_LOCAL void omp_set_nest_lock_lrb(
|
||||
void *ofld_
|
||||
)
|
||||
{
|
||||
@ -358,7 +358,7 @@ void omp_set_nest_lock_lrb(
|
||||
OFFLOAD_TARGET_LEAVE(ofld);
|
||||
}
|
||||
|
||||
void omp_unset_nest_lock_lrb(
|
||||
DLL_LOCAL void omp_unset_nest_lock_lrb(
|
||||
void *ofld_
|
||||
)
|
||||
{
|
||||
@ -376,7 +376,7 @@ void omp_unset_nest_lock_lrb(
|
||||
OFFLOAD_TARGET_LEAVE(ofld);
|
||||
}
|
||||
|
||||
void omp_test_nest_lock_lrb(
|
||||
DLL_LOCAL void omp_test_nest_lock_lrb(
|
||||
void *ofld_
|
||||
)
|
||||
{
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -36,7 +36,7 @@
|
||||
namespace ORSL {
|
||||
|
||||
static bool is_enabled = false;
|
||||
static const ORSLTag my_tag = "Offload";
|
||||
static const ORSLTag my_tag = (const ORSLTag) "Offload";
|
||||
|
||||
void init()
|
||||
{
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -28,17 +28,19 @@
|
||||
*/
|
||||
|
||||
|
||||
#include "offload_util.h"
|
||||
|
||||
#ifndef OFFLOAD_ORSL_H_INCLUDED
|
||||
#define OFFLOAD_ORSL_H_INCLUDED
|
||||
|
||||
// ORSL interface
|
||||
namespace ORSL {
|
||||
|
||||
extern void init();
|
||||
DLL_LOCAL extern void init();
|
||||
|
||||
extern bool reserve(int device);
|
||||
extern bool try_reserve(int device);
|
||||
extern void release(int device);
|
||||
DLL_LOCAL extern bool reserve(int device);
|
||||
DLL_LOCAL extern bool try_reserve(int device);
|
||||
DLL_LOCAL extern void release(int device);
|
||||
|
||||
} // namespace ORSL
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -321,6 +321,8 @@ extern "C" void __offload_unregister_tables(
|
||||
VarList::Node *var_table
|
||||
)
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(2, "Unregistering offload function entry table %p\n",
|
||||
entry_table);
|
||||
__offload_entries.remove_table(entry_table);
|
||||
|
||||
OFFLOAD_DEBUG_TRACE(2, "Unregistering function table %p\n", func_table);
|
||||
@ -329,3 +331,219 @@ extern "C" void __offload_unregister_tables(
|
||||
OFFLOAD_DEBUG_TRACE(2, "Unregistering var table %p\n", var_table);
|
||||
__offload_vars.remove_table(var_table);
|
||||
}
|
||||
|
||||
#ifdef MYO_SUPPORT
|
||||
|
||||
MYOVarTableList __offload_myo_var_tables;
|
||||
MYOVarTableList __offload_myo_vtable_tables;
|
||||
MYOFuncTableList __offload_myo_func_tables;
|
||||
MYOInitTableList __offload_myo_init_tables;
|
||||
|
||||
// Debugging dump
|
||||
void MYOVarTableList::dump(void)
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(2, "MYO Var tables:\n");
|
||||
|
||||
m_lock.lock();
|
||||
|
||||
for (Node *n = m_head; n != 0; n = n->next) {
|
||||
OFFLOAD_DEBUG_TRACE(2, " MYO Var table:\n");
|
||||
for (const Table::Entry *e = n->table.entries;
|
||||
e->varName != MYO_TABLE_END_MARKER(); e++) {
|
||||
#ifdef TARGET_WINNT
|
||||
if (e->varName == 0) {
|
||||
continue;
|
||||
}
|
||||
#endif // TARGET_WINNT
|
||||
OFFLOAD_DEBUG_TRACE(2, " %s %p\n",
|
||||
e->varName, e->sharedAddr);
|
||||
}
|
||||
}
|
||||
|
||||
m_lock.unlock();
|
||||
}
|
||||
|
||||
// check if any shared variables
|
||||
bool MYOVarTableList::is_empty()
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(3, "Are MYO Var tables empty?\n");
|
||||
|
||||
m_lock.lock();
|
||||
|
||||
for (Node *n = m_head; n != 0; n = n->next) {
|
||||
for (const Table::Entry *e = n->table.entries;
|
||||
e->varName != MYO_TABLE_END_MARKER(); e++) {
|
||||
#ifdef TARGET_WINNT
|
||||
if (e->varName == 0) {
|
||||
continue;
|
||||
}
|
||||
#endif // TARGET_WINNT
|
||||
m_lock.unlock();
|
||||
OFFLOAD_DEBUG_TRACE(3, "No\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
m_lock.unlock();
|
||||
OFFLOAD_DEBUG_TRACE(3, "Yes\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
void MYOFuncTableList::dump(void)
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(2, "MYO Func tables:\n");
|
||||
|
||||
m_lock.lock();
|
||||
|
||||
for (Node *n = m_head; n != 0; n = n->next) {
|
||||
OFFLOAD_DEBUG_TRACE(2, " MYO Func table:\n");
|
||||
for (const Table::Entry *e = n->table.entries;
|
||||
e->funcName != MYO_TABLE_END_MARKER(); e++) {
|
||||
#ifdef TARGET_WINNT
|
||||
if (e->funcName == 0) {
|
||||
continue;
|
||||
}
|
||||
#endif // TARGET_WINNT
|
||||
#if HOST_LIBRARY
|
||||
OFFLOAD_DEBUG_TRACE(2, " %s %p %p\n",
|
||||
e->funcName, e->funcAddr, e->localThunkAddr);
|
||||
#else // HOST_LIBRARY
|
||||
OFFLOAD_DEBUG_TRACE(2, " %s %p %p %p\n",
|
||||
e->funcName, e->funcAddr, e->wrapFuncAddr, e->localThunkAddr);
|
||||
#endif // HOST_LIBRARY
|
||||
}
|
||||
}
|
||||
|
||||
m_lock.unlock();
|
||||
}
|
||||
|
||||
// check if any shared functions
|
||||
bool MYOFuncTableList::is_empty()
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(3, "Are MYO Func tables empty?\n");
|
||||
|
||||
m_lock.lock();
|
||||
|
||||
for (Node *n = m_head; n != 0; n = n->next) {
|
||||
int count = 0;
|
||||
for (const Table::Entry *e = n->table.entries;
|
||||
e->funcName != MYO_TABLE_END_MARKER(); e++) {
|
||||
#ifdef TARGET_WINNT
|
||||
if (e->funcName == 0) {
|
||||
continue;
|
||||
}
|
||||
#endif // TARGET_WINNT
|
||||
count++;
|
||||
if (count > 1) {
|
||||
m_lock.unlock();
|
||||
OFFLOAD_DEBUG_TRACE(3, "No\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_lock.unlock();
|
||||
OFFLOAD_DEBUG_TRACE(3, "Yes\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
void MYOInitTableList::dump(void)
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(2, "MYO Init tables:\n");
|
||||
|
||||
m_lock.lock();
|
||||
|
||||
for (Node *n = m_head; n != 0; n = n->next) {
|
||||
OFFLOAD_DEBUG_TRACE(2, " MYO Init table:\n");
|
||||
for (const Table::Entry *e = n->table.entries;
|
||||
#ifdef TARGET_WINNT
|
||||
e->funcName != MYO_TABLE_END_MARKER(); e++) {
|
||||
if (e->funcName == 0) {
|
||||
continue;
|
||||
}
|
||||
OFFLOAD_DEBUG_TRACE(2, " %s %p\n", e->funcName, e->func);
|
||||
#else // TARGET_WINNT
|
||||
e->func != 0; e++) {
|
||||
OFFLOAD_DEBUG_TRACE(2, " %p\n", e->func);
|
||||
#endif // TARGET_WINNT
|
||||
}
|
||||
}
|
||||
|
||||
m_lock.unlock();
|
||||
}
|
||||
|
||||
// check if any shared functions
|
||||
bool MYOInitTableList::is_empty()
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(3, "Are MYO Init tables empty?\n");
|
||||
|
||||
m_lock.lock();
|
||||
|
||||
for (Node *n = m_head; n != 0; n = n->next) {
|
||||
for (const Table::Entry *e = n->table.entries;
|
||||
#ifdef TARGET_WINNT
|
||||
e->funcName != MYO_TABLE_END_MARKER(); e++) {
|
||||
if (e->funcName == 0) {
|
||||
continue;
|
||||
}
|
||||
m_lock.unlock();
|
||||
OFFLOAD_DEBUG_TRACE(3, "No\n");
|
||||
return false;
|
||||
#else // TARGET_WINNT
|
||||
e->func != 0; e++) {
|
||||
#endif // TARGET_WINNT
|
||||
}
|
||||
}
|
||||
|
||||
m_lock.unlock();
|
||||
OFFLOAD_DEBUG_TRACE(3, "Yes\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
extern "C" void __offload_myoRegisterTables1(
|
||||
MYOInitTableList::Node *init_table,
|
||||
MYOVarTableList::Node *shared_table,
|
||||
MYOVarTableList::Node *shared_vtable,
|
||||
MYOFuncTableList::Node *fptr_table
|
||||
)
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(2, "Registering MYO shared var table %p\n",
|
||||
shared_table);
|
||||
__offload_myo_var_tables.add_table(shared_table);
|
||||
|
||||
OFFLOAD_DEBUG_TRACE(2, "Registering MYO shared vtable table %p\n",
|
||||
shared_vtable);
|
||||
__offload_myo_vtable_tables.add_table(shared_vtable);
|
||||
|
||||
OFFLOAD_DEBUG_TRACE(2, "Registering MYO function table %p\n", fptr_table);
|
||||
__offload_myo_func_tables.add_table(fptr_table);
|
||||
|
||||
OFFLOAD_DEBUG_TRACE(2, "Registering MYO init table %p\n", init_table);
|
||||
__offload_myo_init_tables.add_table(init_table);
|
||||
}
|
||||
|
||||
extern "C" void __offload_myoRemoveTables(
|
||||
MYOInitTableList::Node *init_table,
|
||||
MYOVarTableList::Node *shared_table,
|
||||
MYOVarTableList::Node *shared_vtable,
|
||||
MYOFuncTableList::Node *fptr_table
|
||||
)
|
||||
{
|
||||
OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
|
||||
|
||||
OFFLOAD_DEBUG_TRACE(2, "Removing MYO shared var table %p\n",
|
||||
shared_table);
|
||||
__offload_myo_var_tables.remove_table(shared_table);
|
||||
|
||||
OFFLOAD_DEBUG_TRACE(2, "Removing MYO shared vtable table %p\n",
|
||||
shared_vtable);
|
||||
__offload_myo_vtable_tables.remove_table(shared_vtable);
|
||||
|
||||
OFFLOAD_DEBUG_TRACE(2, "Removing MYO function table %p\n", fptr_table);
|
||||
__offload_myo_func_tables.remove_table(fptr_table);
|
||||
|
||||
OFFLOAD_DEBUG_TRACE(2, "Removing MYO init table %p\n", init_table);
|
||||
__offload_myo_init_tables.remove_table(init_table);
|
||||
}
|
||||
|
||||
#endif // MYO_SUPPORT
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -35,7 +35,6 @@
|
||||
#ifndef OFFLOAD_TABLE_H_INCLUDED
|
||||
#define OFFLOAD_TABLE_H_INCLUDED
|
||||
|
||||
#include <iterator>
|
||||
#include "offload_util.h"
|
||||
|
||||
// Template representing double linked list of tables
|
||||
@ -56,7 +55,6 @@ public:
|
||||
|
||||
void add_table(Node *node) {
|
||||
m_lock.lock();
|
||||
|
||||
if (m_head != 0) {
|
||||
node->next = m_head;
|
||||
m_head->prev = node;
|
||||
@ -67,8 +65,6 @@ public:
|
||||
}
|
||||
|
||||
void remove_table(Node *node) {
|
||||
m_lock.lock();
|
||||
|
||||
if (node->next != 0) {
|
||||
node->next->prev = node->prev;
|
||||
}
|
||||
@ -78,8 +74,6 @@ public:
|
||||
if (m_head == node) {
|
||||
m_head = node->next;
|
||||
}
|
||||
|
||||
m_lock.unlock();
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -109,7 +103,7 @@ struct FuncTable {
|
||||
};
|
||||
|
||||
// Function table
|
||||
class FuncList : public TableList<FuncTable> {
|
||||
class DLL_LOCAL FuncList : public TableList<FuncTable> {
|
||||
public:
|
||||
explicit FuncList(Node *node = 0) : TableList<Table>(node),
|
||||
m_max_name_len(-1)
|
||||
@ -172,7 +166,7 @@ struct VarTable {
|
||||
};
|
||||
|
||||
// List of var tables
|
||||
class VarList : public TableList<VarTable> {
|
||||
class DLL_LOCAL VarList : public TableList<VarTable> {
|
||||
public:
|
||||
VarList() : TableList<Table>()
|
||||
{}
|
||||
@ -181,69 +175,9 @@ public:
|
||||
void dump();
|
||||
|
||||
public:
|
||||
// var table list iterator
|
||||
class Iterator : public std::iterator<std::input_iterator_tag,
|
||||
Table::Entry> {
|
||||
public:
|
||||
Iterator() : m_node(0), m_entry(0) {}
|
||||
|
||||
explicit Iterator(Node *node) {
|
||||
new_node(node);
|
||||
}
|
||||
|
||||
Iterator& operator++() {
|
||||
if (m_entry != 0) {
|
||||
m_entry++;
|
||||
while (m_entry->name == 0) {
|
||||
m_entry++;
|
||||
}
|
||||
if (m_entry->name == reinterpret_cast<const char*>(-1)) {
|
||||
new_node(m_node->next);
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool operator==(const Iterator &other) const {
|
||||
return m_entry == other.m_entry;
|
||||
}
|
||||
|
||||
bool operator!=(const Iterator &other) const {
|
||||
return m_entry != other.m_entry;
|
||||
}
|
||||
|
||||
const Table::Entry* operator*() const {
|
||||
return m_entry;
|
||||
}
|
||||
|
||||
private:
|
||||
void new_node(Node *node) {
|
||||
m_node = node;
|
||||
m_entry = 0;
|
||||
while (m_node != 0) {
|
||||
m_entry = m_node->table.entries;
|
||||
while (m_entry->name == 0) {
|
||||
m_entry++;
|
||||
}
|
||||
if (m_entry->name != reinterpret_cast<const char*>(-1)) {
|
||||
break;
|
||||
}
|
||||
m_node = m_node->next;
|
||||
m_entry = 0;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
Node *m_node;
|
||||
const Table::Entry *m_entry;
|
||||
};
|
||||
|
||||
Iterator begin() const {
|
||||
return Iterator(m_head);
|
||||
}
|
||||
|
||||
Iterator end() const {
|
||||
return Iterator();
|
||||
Node * get_head() {
|
||||
return m_head;
|
||||
}
|
||||
|
||||
public:
|
||||
@ -265,9 +199,9 @@ public:
|
||||
static void table_patch_names(void *buf, int64_t nelems);
|
||||
};
|
||||
|
||||
extern FuncList __offload_entries;
|
||||
extern FuncList __offload_funcs;
|
||||
extern VarList __offload_vars;
|
||||
DLL_LOCAL extern FuncList __offload_entries;
|
||||
DLL_LOCAL extern FuncList __offload_funcs;
|
||||
DLL_LOCAL extern VarList __offload_vars;
|
||||
|
||||
// Section names where the lookup tables are stored
|
||||
#ifdef TARGET_WINNT
|
||||
@ -318,4 +252,206 @@ extern "C" void __offload_unregister_tables(
|
||||
FuncList::Node *func_table,
|
||||
VarList::Node *var_table
|
||||
);
|
||||
|
||||
|
||||
#ifdef MYO_SUPPORT
|
||||
|
||||
#include <myotypes.h>
|
||||
#include <myoimpl.h>
|
||||
#include <myo.h>
|
||||
|
||||
#ifdef TARGET_WINNT
|
||||
#define MYO_TABLE_END_MARKER() reinterpret_cast<const char*>(-1)
|
||||
#else // TARGET_WINNT
|
||||
#define MYO_TABLE_END_MARKER() reinterpret_cast<const char*>(0)
|
||||
#endif // TARGET_WINNT
|
||||
|
||||
// Host and Target-side MYO shared variable table entry layout
|
||||
typedef MyoiSharedVarEntry SharedTableEntry;
|
||||
|
||||
#if HOST_LIBRARY
|
||||
|
||||
// Host-side MYO function table entry layout
|
||||
typedef struct {
|
||||
//! Function Name
|
||||
const char *funcName;
|
||||
//! Function Address
|
||||
void *funcAddr;
|
||||
//! Local Thunk Address
|
||||
void *localThunkAddr;
|
||||
#ifdef TARGET_WINNT
|
||||
// Dummy to pad up to 32 bytes
|
||||
void *dummy;
|
||||
#endif // TARGET_WINNT
|
||||
} FptrTableEntry;
|
||||
|
||||
// Host-side MYO init routine table entry layout
|
||||
typedef struct {
|
||||
#ifdef TARGET_WINNT
|
||||
// Dummy to pad up to 16 bytes
|
||||
// Function Name
|
||||
const char *funcName;
|
||||
#endif // TARGET_WINNT
|
||||
void (*func)(MyoArena);
|
||||
} InitTableEntry;
|
||||
|
||||
#else // HOST_LIBRARY
|
||||
|
||||
// Target-side MYO function table entry layout
|
||||
typedef MyoiTargetSharedFptrEntry FptrTableEntry;
|
||||
|
||||
// Target-side MYO init routine table entry layout
|
||||
struct InitTableEntry {
|
||||
void (*func)(void);
|
||||
};
|
||||
|
||||
#endif // HOST_LIBRARY
|
||||
|
||||
#ifdef TARGET_WINNT
|
||||
|
||||
#define OFFLOAD_MYO_SHARED_TABLE_SECTION_START ".MyoSharedTable$a"
|
||||
#define OFFLOAD_MYO_SHARED_TABLE_SECTION_END ".MyoSharedTable$z"
|
||||
|
||||
#define OFFLOAD_MYO_SHARED_VTABLE_SECTION_START ".MyoSharedVTable$a"
|
||||
#define OFFLOAD_MYO_SHARED_VTABLE_SECTION_END ".MyoSharedVTable$z"
|
||||
|
||||
#define OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_START ".MyoSharedInitTable$a"
|
||||
#define OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_END ".MyoSharedInitTable$z"
|
||||
|
||||
#define OFFLOAD_MYO_FPTR_TABLE_SECTION_START ".MyoFptrTable$a"
|
||||
#define OFFLOAD_MYO_FPTR_TABLE_SECTION_END ".MyoFptrTable$z"
|
||||
|
||||
#else // TARGET_WINNT
|
||||
|
||||
#define OFFLOAD_MYO_SHARED_TABLE_SECTION_START ".MyoSharedTable."
|
||||
#define OFFLOAD_MYO_SHARED_TABLE_SECTION_END ".MyoSharedTable."
|
||||
|
||||
#define OFFLOAD_MYO_SHARED_VTABLE_SECTION_START ".MyoSharedVTable."
|
||||
#define OFFLOAD_MYO_SHARED_VTABLE_SECTION_END ".MyoSharedVTable."
|
||||
|
||||
#define OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_START ".MyoSharedInitTable."
|
||||
#define OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_END ".MyoSharedInitTable."
|
||||
|
||||
#define OFFLOAD_MYO_FPTR_TABLE_SECTION_START ".MyoFptrTable."
|
||||
#define OFFLOAD_MYO_FPTR_TABLE_SECTION_END ".MyoFptrTable."
|
||||
|
||||
#endif // TARGET_WINNT
|
||||
|
||||
#pragma section(OFFLOAD_MYO_SHARED_TABLE_SECTION_START, read, write)
|
||||
#pragma section(OFFLOAD_MYO_SHARED_TABLE_SECTION_END, read, write)
|
||||
|
||||
#pragma section(OFFLOAD_MYO_SHARED_VTABLE_SECTION_START, read, write)
|
||||
#pragma section(OFFLOAD_MYO_SHARED_VTABLE_SECTION_END, read, write)
|
||||
|
||||
#pragma section(OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_START, read, write)
|
||||
#pragma section(OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_END, read, write)
|
||||
|
||||
#pragma section(OFFLOAD_MYO_FPTR_TABLE_SECTION_START, read, write)
|
||||
#pragma section(OFFLOAD_MYO_FPTR_TABLE_SECTION_END, read, write)
|
||||
|
||||
// List of MYO shared variable tables
|
||||
struct MYOVarTable {
|
||||
typedef SharedTableEntry Entry;
|
||||
const Entry *entries;
|
||||
};
|
||||
|
||||
class MYOVarTableList : public TableList<MYOVarTable> {
|
||||
public:
|
||||
MYOVarTableList() : TableList<Table>()
|
||||
{}
|
||||
|
||||
// add table to the list
|
||||
void add_table(Node *node) {
|
||||
// add table
|
||||
TableList<Table>::add_table(node);
|
||||
}
|
||||
|
||||
// debug dump
|
||||
void dump(void);
|
||||
|
||||
// check if any shared variables
|
||||
bool is_empty();
|
||||
|
||||
// process the table contents for ordinary variables
|
||||
void process();
|
||||
|
||||
// process the table contents for vtable objects
|
||||
void process_vtable();
|
||||
};
|
||||
|
||||
// List of MYO shared function tables
|
||||
struct MYOFuncTable {
|
||||
typedef FptrTableEntry Entry;
|
||||
const Entry *entries;
|
||||
};
|
||||
|
||||
class MYOFuncTableList : public TableList<MYOFuncTable> {
|
||||
public:
|
||||
MYOFuncTableList() : TableList<Table>()
|
||||
{}
|
||||
|
||||
// add table to the list
|
||||
void add_table(Node *node) {
|
||||
// add table
|
||||
TableList<Table>::add_table(node);
|
||||
}
|
||||
|
||||
// debug dump
|
||||
void dump(void);
|
||||
|
||||
// check if any shared functions
|
||||
bool is_empty();
|
||||
|
||||
// process the table contents
|
||||
void process();
|
||||
};
|
||||
|
||||
// List of MYO shared variable initialization routine tables
|
||||
struct MYOInitTable {
|
||||
typedef InitTableEntry Entry;
|
||||
const Entry *entries;
|
||||
};
|
||||
|
||||
class MYOInitTableList : public TableList<MYOInitTable> {
|
||||
public:
|
||||
MYOInitTableList() : TableList<Table>()
|
||||
{}
|
||||
|
||||
// add table to the list
|
||||
void add_table(Node *node) {
|
||||
// add table
|
||||
TableList<Table>::add_table(node);
|
||||
}
|
||||
|
||||
// debug dump
|
||||
void dump(void);
|
||||
|
||||
// check if any init routines
|
||||
bool is_empty();
|
||||
|
||||
// process the table contents
|
||||
void process();
|
||||
};
|
||||
|
||||
extern MYOVarTableList __offload_myo_var_tables;
|
||||
extern MYOVarTableList __offload_myo_vtable_tables;
|
||||
extern MYOFuncTableList __offload_myo_func_tables;
|
||||
extern MYOInitTableList __offload_myo_init_tables;
|
||||
|
||||
extern "C" void __offload_myoRegisterTables1(
|
||||
MYOInitTableList::Node *init_table,
|
||||
MYOVarTableList::Node *shared_table,
|
||||
MYOVarTableList::Node *shared_vtable,
|
||||
MYOFuncTableList::Node *fptr_table
|
||||
);
|
||||
|
||||
extern "C" void __offload_myoRemoveTables(
|
||||
MYOInitTableList::Node *init_table,
|
||||
MYOVarTableList::Node *shared_table,
|
||||
MYOVarTableList::Node *shared_vtable,
|
||||
MYOFuncTableList::Node *fptr_table
|
||||
);
|
||||
|
||||
#endif // MYO_SUPPORT
|
||||
|
||||
#endif // OFFLOAD_TABLE_H_INCLUDED
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -114,6 +114,8 @@ static void BufReleaseRef(void * buf)
|
||||
if (info) {
|
||||
--info->count;
|
||||
if (info->count == 0 && info->is_added) {
|
||||
OFFLOAD_TRACE(1, "Calling COIBufferReleaseRef AddRef count = %d\n",
|
||||
((RefInfo *) ref_data[buf])->count);
|
||||
BufferReleaseRef(buf);
|
||||
info->is_added = 0;
|
||||
}
|
||||
@ -360,7 +362,6 @@ void OffloadDescriptor::scatter_copyin_data()
|
||||
if (m_vars[i].flags.alloc_disp) {
|
||||
int64_t offset = 0;
|
||||
m_in.receive_data(&offset, sizeof(offset));
|
||||
m_vars[i].offset = -offset;
|
||||
}
|
||||
if (VAR_TYPE_IS_DV_DATA_SLICE(type) ||
|
||||
VAR_TYPE_IS_DV_DATA(type)) {
|
||||
@ -369,7 +370,6 @@ void OffloadDescriptor::scatter_copyin_data()
|
||||
*reinterpret_cast<ArrDesc**>(ptr_addr);
|
||||
ptr_addr = reinterpret_cast<void**>(&dvp->Base);
|
||||
}
|
||||
|
||||
// Set pointer values
|
||||
switch (type) {
|
||||
case c_data_ptr_array:
|
||||
@ -380,6 +380,9 @@ void OffloadDescriptor::scatter_copyin_data()
|
||||
*(reinterpret_cast<char**>(m_vars[i].ptr)) :
|
||||
reinterpret_cast<char*>(m_vars[i].into);
|
||||
|
||||
if (m_vars[i].flags.is_pointer) {
|
||||
dst_arr_ptr = *((char**)dst_arr_ptr);
|
||||
}
|
||||
for (; j < max_el; j++) {
|
||||
if (src_is_for_mic) {
|
||||
m_vars[j].ptr =
|
||||
@ -402,8 +405,8 @@ void OffloadDescriptor::scatter_copyin_data()
|
||||
case c_data_ptr:
|
||||
case c_cean_var_ptr:
|
||||
case c_dv_ptr:
|
||||
if (m_vars[i].alloc_if) {
|
||||
void *buf;
|
||||
if (m_vars[i].alloc_if && !m_vars[i].flags.preallocated) {
|
||||
void *buf = NULL;
|
||||
if (m_vars[i].flags.sink_addr) {
|
||||
m_in.receive_data(&buf, sizeof(buf));
|
||||
}
|
||||
@ -417,9 +420,12 @@ void OffloadDescriptor::scatter_copyin_data()
|
||||
// increment buffer reference
|
||||
OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
|
||||
BufferAddRef(buf);
|
||||
OFFLOAD_TRACE(1, "Calling COIBufferAddRef %p\n", buf);
|
||||
OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
|
||||
}
|
||||
add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
|
||||
OFFLOAD_TRACE(1, " AddRef count = %d\n",
|
||||
((RefInfo *) ref_data[buf])->count);
|
||||
}
|
||||
ptr = static_cast<char*>(buf) +
|
||||
m_vars[i].mic_offset +
|
||||
@ -597,6 +603,7 @@ void OffloadDescriptor::gather_copyout_data()
|
||||
case c_dv_ptr:
|
||||
if (m_vars[i].free_if &&
|
||||
src_is_for_mic &&
|
||||
!m_vars[i].flags.preallocated &&
|
||||
!m_vars[i].flags.is_static) {
|
||||
void *buf = *static_cast<char**>(m_vars[i].ptr) -
|
||||
m_vars[i].mic_offset -
|
||||
@ -610,6 +617,9 @@ void OffloadDescriptor::gather_copyout_data()
|
||||
BufReleaseRef(buf);
|
||||
OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs);
|
||||
}
|
||||
if (m_vars[i].flags.preallocated && m_vars[i].alloc_if) {
|
||||
m_out.send_data((void*) m_vars[i].ptr, sizeof(void*));
|
||||
}
|
||||
break;
|
||||
|
||||
case c_func_ptr:
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -99,16 +99,16 @@ private:
|
||||
};
|
||||
|
||||
// one time target initialization in main
|
||||
extern void __offload_target_init(void);
|
||||
DLL_LOCAL extern void __offload_target_init(void);
|
||||
|
||||
// logical device index
|
||||
extern int mic_index;
|
||||
DLL_LOCAL extern int mic_index;
|
||||
|
||||
// total number of available logical devices
|
||||
extern int mic_engines_total;
|
||||
DLL_LOCAL extern int mic_engines_total;
|
||||
|
||||
// device frequency (from COI)
|
||||
extern uint64_t mic_frequency;
|
||||
DLL_LOCAL extern uint64_t mic_frequency;
|
||||
|
||||
struct RefInfo {
|
||||
RefInfo(bool is_add, long amount):is_added(is_add),count(amount)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -36,7 +36,7 @@
|
||||
#include <stdint.h>
|
||||
#include "liboffload_error_codes.h"
|
||||
|
||||
extern int timer_enabled;
|
||||
DLL_LOCAL extern int timer_enabled;
|
||||
|
||||
#ifdef TIMING_SUPPORT
|
||||
|
||||
@ -73,8 +73,8 @@ struct OffloadHostTimerData {
|
||||
|
||||
#if HOST_LIBRARY
|
||||
|
||||
extern int offload_report_level;
|
||||
extern int offload_report_enabled;
|
||||
DLL_LOCAL extern int offload_report_level;
|
||||
DLL_LOCAL extern int offload_report_enabled;
|
||||
#define OFFLOAD_REPORT_1 1
|
||||
#define OFFLOAD_REPORT_2 2
|
||||
#define OFFLOAD_REPORT_3 3
|
||||
@ -121,18 +121,18 @@ extern int offload_report_enabled;
|
||||
offload_timer_fill_host_mic_num(timer_data, data); \
|
||||
}
|
||||
|
||||
extern void offload_timer_start(OffloadHostTimerData *,
|
||||
extern DLL_LOCAL void offload_timer_start(OffloadHostTimerData *,
|
||||
OffloadHostPhase t_node);
|
||||
extern void offload_timer_stop(OffloadHostTimerData *,
|
||||
extern DLL_LOCAL void offload_timer_stop(OffloadHostTimerData *,
|
||||
OffloadHostPhase t_node);
|
||||
extern OffloadHostTimerData * offload_timer_init(const char *file, int line);
|
||||
extern void offload_timer_fill_target_data(OffloadHostTimerData *,
|
||||
extern DLL_LOCAL OffloadHostTimerData * offload_timer_init(const char *file, int line);
|
||||
extern DLL_LOCAL void offload_timer_fill_target_data(OffloadHostTimerData *,
|
||||
void *data);
|
||||
extern void offload_timer_fill_host_sdata(OffloadHostTimerData *,
|
||||
extern DLL_LOCAL void offload_timer_fill_host_sdata(OffloadHostTimerData *,
|
||||
uint64_t sent_bytes);
|
||||
extern void offload_timer_fill_host_rdata(OffloadHostTimerData *,
|
||||
extern DLL_LOCAL void offload_timer_fill_host_rdata(OffloadHostTimerData *,
|
||||
uint64_t sent_bytes);
|
||||
extern void offload_timer_fill_host_mic_num(OffloadHostTimerData *,
|
||||
extern DLL_LOCAL void offload_timer_fill_host_mic_num(OffloadHostTimerData *,
|
||||
int card_number);
|
||||
|
||||
// Utility structure for starting/stopping timer
|
||||
@ -172,10 +172,10 @@ private:
|
||||
#define OFFLOAD_TIMER_TARGET_DATA(data) \
|
||||
if (timer_enabled) offload_timer_fill_target_data(data);
|
||||
|
||||
extern void offload_timer_start(OffloadTargetPhase t_node);
|
||||
extern void offload_timer_stop(OffloadTargetPhase t_node);
|
||||
extern void offload_timer_init(void);
|
||||
extern void offload_timer_fill_target_data(void *data);
|
||||
extern DLL_LOCAL void offload_timer_start(OffloadTargetPhase t_node);
|
||||
extern DLL_LOCAL void offload_timer_stop(OffloadTargetPhase t_node);
|
||||
extern DLL_LOCAL void offload_timer_init(void);
|
||||
extern DLL_LOCAL void offload_timer_fill_target_data(void *data);
|
||||
|
||||
#endif // HOST_LIBRARY
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -73,7 +73,7 @@ static const char * offload_stage(std::stringstream &ss,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char * offload_signal(std::stringstream &ss,
|
||||
static const char * offload_message_2str(std::stringstream &ss,
|
||||
int offload_number,
|
||||
const char *tag,
|
||||
const char *text)
|
||||
@ -216,27 +216,57 @@ void offload_stage_print(int stage, int offload_number, ...)
|
||||
uint64_t *signal;
|
||||
str1 = report_get_message_str(c_report_state_signal);
|
||||
str2 = report_get_message_str(c_report_signal);
|
||||
offload_signal(ss, offload_number, str1, str2);
|
||||
signal = va_arg(va_args, uint64_t*);
|
||||
if (signal)
|
||||
ss << " 0x" << std::hex << *signal;
|
||||
offload_message_2str(ss, offload_number, str1, str2);
|
||||
signal = va_arg(va_args, uint64_t*);
|
||||
if (signal)
|
||||
ss << " 0x" << std::hex << *signal;
|
||||
else
|
||||
ss << " none";
|
||||
ss << " none";
|
||||
}
|
||||
break;
|
||||
case c_offload_stream:
|
||||
{
|
||||
int64_t stream;
|
||||
str1 = report_get_message_str(c_report_state_stream);
|
||||
str2 = report_get_message_str(c_report_stream);
|
||||
offload_message_2str(ss, offload_number, str1, str2);
|
||||
stream = va_arg(va_args, int64_t);
|
||||
if (stream)
|
||||
ss << " 0x" << std::hex << stream;
|
||||
else
|
||||
ss << " none";
|
||||
}
|
||||
break;
|
||||
case c_offload_wait:
|
||||
{
|
||||
int count;
|
||||
OffloadWaitKind kind;
|
||||
uint64_t **signal;
|
||||
str1 = report_get_message_str(c_report_state_signal);
|
||||
kind = (enum OffloadWaitKind) va_arg(va_args, int);
|
||||
// kind == c_offload_wait_signal for signal;
|
||||
// other kinds are for stream
|
||||
if (kind == c_offload_wait_signal) {
|
||||
str1 = report_get_message_str(c_report_state_signal);
|
||||
}
|
||||
else {
|
||||
str1 = report_get_message_str(c_report_state_stream);
|
||||
}
|
||||
str2 = report_get_message_str(c_report_wait);
|
||||
offload_signal(ss, offload_number, str1, str2);
|
||||
offload_message_2str(ss, offload_number, str1, str2);
|
||||
count = va_arg(va_args, int);
|
||||
signal = va_arg(va_args, uint64_t**);
|
||||
if (count) {
|
||||
while (count) {
|
||||
ss << " " << std::hex << signal[count-1];
|
||||
count--;
|
||||
if (kind == c_offload_wait_signal) {
|
||||
while (count) {
|
||||
ss << " " << std::hex << signal[count-1];
|
||||
count--;
|
||||
}
|
||||
}
|
||||
else if (kind == c_offload_wait_stream) {
|
||||
ss << signal;
|
||||
}
|
||||
else {
|
||||
ss << " all";
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -304,6 +334,7 @@ void offload_stage_print(int stage, int offload_number, ...)
|
||||
str1 = report_get_message_str(c_report_state);
|
||||
str2 = report_get_message_str(c_report_myosharedalignedfree);
|
||||
offload_stage(ss, offload_number, str1, str2, false);
|
||||
ss << " " << va_arg(va_args, size_t);
|
||||
break;
|
||||
case c_offload_myoacquire:
|
||||
str1 = report_get_message_str(c_report_state);
|
||||
@ -315,6 +346,55 @@ void offload_stage_print(int stage, int offload_number, ...)
|
||||
str2 = report_get_message_str(c_report_myorelease);
|
||||
offload_stage(ss, offload_number, str1, str2, false);
|
||||
break;
|
||||
case c_offload_myosupportsfeature:
|
||||
str1 = report_get_message_str(c_report_state);
|
||||
str2 = report_get_message_str(c_report_myosupportsfeature);
|
||||
offload_stage(ss, offload_number, str1, str2, false);
|
||||
va_arg(va_args, int);
|
||||
ss << " " << va_arg(va_args, int);
|
||||
ss << " " << va_arg(va_args, int);
|
||||
ss << " " << va_arg(va_args, int);
|
||||
break;
|
||||
case c_offload_myosharedarenacreate:
|
||||
str1 = report_get_message_str(c_report_state);
|
||||
str2 = report_get_message_str(c_report_myosharedarenacreate);
|
||||
offload_stage(ss, offload_number, str1, str2, false);
|
||||
va_arg(va_args, char*);
|
||||
ss << " " << va_arg(va_args, int);
|
||||
ss << " " << va_arg(va_args, int);
|
||||
ss << " " << va_arg(va_args, unsigned int);
|
||||
break;
|
||||
case c_offload_myosharedalignedarenamalloc:
|
||||
str1 = report_get_message_str(c_report_state);
|
||||
str2 = report_get_message_str(c_report_myosharedalignedarenamalloc);
|
||||
offload_stage(ss, offload_number, str1, str2, false);
|
||||
va_arg(va_args, char*);
|
||||
ss << " " << va_arg(va_args, int);
|
||||
ss << " " << va_arg(va_args, size_t);
|
||||
ss << " " << va_arg(va_args, size_t);
|
||||
break;
|
||||
case c_offload_myosharedalignedarenafree:
|
||||
str1 = report_get_message_str(c_report_state);
|
||||
str2 = report_get_message_str(c_report_myosharedalignedarenafree);
|
||||
offload_stage(ss, offload_number, str1, str2, false);
|
||||
va_arg(va_args, char*);
|
||||
ss << " " << va_arg(va_args, int);
|
||||
ss << " " << va_arg(va_args, size_t);
|
||||
break;
|
||||
case c_offload_myoarenaacquire:
|
||||
str1 = report_get_message_str(c_report_state);
|
||||
str2 = report_get_message_str(c_report_myoarenaacquire);
|
||||
offload_stage(ss, offload_number, str1, str2, false);
|
||||
va_arg(va_args, char*);
|
||||
ss << " " << va_arg(va_args, int);
|
||||
break;
|
||||
case c_offload_myoarenarelease:
|
||||
str1 = report_get_message_str(c_report_state);
|
||||
str2 = report_get_message_str(c_report_myoarenarelease);
|
||||
offload_stage(ss, offload_number, str1, str2, false);
|
||||
va_arg(va_args, char*);
|
||||
ss << " " << va_arg(va_args, int);
|
||||
break;
|
||||
default:
|
||||
LIBOFFLOAD_ERROR(c_report_unknown_trace_node);
|
||||
abort();
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -29,8 +29,9 @@
|
||||
|
||||
|
||||
// The parts of the offload library common to host and target
|
||||
#include "offload_util.h"
|
||||
|
||||
void offload_stage_print(int stage, int offload_number, ...);
|
||||
DLL_LOCAL void offload_stage_print(int stage, int offload_number, ...);
|
||||
|
||||
enum OffloadTraceStage {
|
||||
// Total time spent on the target
|
||||
@ -68,5 +69,18 @@ enum OffloadTraceStage {
|
||||
c_offload_myosharedalignedfree,
|
||||
c_offload_myoacquire,
|
||||
c_offload_myorelease,
|
||||
c_offload_myofini
|
||||
c_offload_myofini,
|
||||
c_offload_myosupportsfeature,
|
||||
c_offload_myosharedarenacreate,
|
||||
c_offload_myosharedalignedarenamalloc,
|
||||
c_offload_myosharedalignedarenafree,
|
||||
c_offload_myoarenaacquire,
|
||||
c_offload_myoarenarelease,
|
||||
c_offload_stream
|
||||
};
|
||||
|
||||
enum OffloadWaitKind {
|
||||
c_offload_wait_signal = 0,
|
||||
c_offload_wait_stream,
|
||||
c_offload_wait_all_streams
|
||||
};
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -31,14 +31,23 @@
|
||||
#ifndef OFFLOAD_UTIL_H_INCLUDED
|
||||
#define OFFLOAD_UTIL_H_INCLUDED
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef TARGET_WINNT
|
||||
#define DLL_LOCAL
|
||||
#else
|
||||
#define DLL_LOCAL __attribute__((visibility("hidden")))
|
||||
#endif
|
||||
|
||||
#ifdef TARGET_WINNT
|
||||
// Don't use <stdint.h> as compiling with VS2010 makes ofldbegin.obj
|
||||
// incompatible with STL library of versions older than VS2010.
|
||||
typedef unsigned long long int uint64_t;
|
||||
typedef signed long long int int64_t;
|
||||
#include <windows.h>
|
||||
#include <process.h>
|
||||
#else // TARGET_WINNT
|
||||
#include <stdint.h>
|
||||
#include <dlfcn.h>
|
||||
#include <pthread.h>
|
||||
#endif // TARGET_WINNT
|
||||
@ -143,7 +152,7 @@ int DL_addr(const void *addr, Dl_info *info);
|
||||
#define DL_addr(addr, info) dladdr((addr), (info))
|
||||
#endif // TARGET_WINNT
|
||||
|
||||
extern void* DL_sym(void *handle, const char *name, const char *version);
|
||||
DLL_LOCAL extern void* DL_sym(void *handle, const char *name, const char *version);
|
||||
|
||||
// One-time initialization API
|
||||
#ifdef TARGET_WINNT
|
||||
@ -159,13 +168,13 @@ typedef pthread_once_t OffloadOnceControl;
|
||||
#endif // TARGET_WINNT
|
||||
|
||||
// Parses size specification string.
|
||||
extern bool __offload_parse_size_string(const char *str, uint64_t &new_size);
|
||||
DLL_LOCAL extern bool __offload_parse_size_string(const char *str, uint64_t &new_size);
|
||||
|
||||
// Parses string with integer value
|
||||
extern bool __offload_parse_int_string(const char *str, int64_t &value);
|
||||
DLL_LOCAL extern bool __offload_parse_int_string(const char *str, int64_t &value);
|
||||
|
||||
// get value by its base, offset and size
|
||||
int64_t get_el_value(
|
||||
DLL_LOCAL int64_t get_el_value(
|
||||
char *base,
|
||||
int64_t offset,
|
||||
int64_t size
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -29,7 +29,7 @@
|
||||
|
||||
|
||||
#if HOST_LIBRARY
|
||||
#include "offload_host.h"
|
||||
#include "offload_table.h"
|
||||
#include "offload_myo_host.h"
|
||||
#else
|
||||
#include "compiler_if_target.h"
|
||||
@ -37,6 +37,14 @@
|
||||
#include "offload_myo_target.h"
|
||||
#endif
|
||||
|
||||
// Initializes library and registers specified offload image.
|
||||
// Don't use this declarations from offload_host.h as offload_table.h
|
||||
// is used instead of it. Using offload_host.h contradicts with
|
||||
// STL library compiled with VS2010.
|
||||
extern "C" bool __offload_register_image(const void* image);
|
||||
extern "C" void __offload_unregister_image(const void* image);
|
||||
extern "C" bool __offload_target_image_is_executable(const void *image);
|
||||
|
||||
#ifdef TARGET_WINNT
|
||||
#define ALLOCATE(name) __declspec(allocate(name))
|
||||
#define DLL_LOCAL
|
||||
@ -110,33 +118,127 @@ static VarList::Node __offload_var_node = {
|
||||
#ifdef MYO_SUPPORT
|
||||
|
||||
// offload myo shared var section prolog
|
||||
// first element is empty
|
||||
ALLOCATE(OFFLOAD_MYO_SHARED_TABLE_SECTION_START)
|
||||
#ifdef TARGET_WINNT
|
||||
__declspec(align(sizeof(SharedTableEntry)))
|
||||
#endif // TARGET_WINNT
|
||||
static SharedTableEntry __offload_myo_shared_table_start = { 0 };
|
||||
static MYOVarTable::Entry __offload_myo_shared_var_start = { 0 };
|
||||
|
||||
// list element for the current module
|
||||
// table entry pointer skips the empty first entry
|
||||
static MYOVarTableList::Node __offload_myo_shared_var_node = {
|
||||
{ &__offload_myo_shared_var_start + 1 },
|
||||
0, 0
|
||||
};
|
||||
|
||||
// offload myo shared vtable section prolog
|
||||
// first element is empty
|
||||
ALLOCATE(OFFLOAD_MYO_SHARED_VTABLE_SECTION_START)
|
||||
#ifdef TARGET_WINNT
|
||||
__declspec(align(sizeof(SharedTableEntry)))
|
||||
#endif // TARGET_WINNT
|
||||
static MYOVarTable::Entry __offload_myo_shared_vtable_start = { 0 };
|
||||
|
||||
// list element for the current module
|
||||
// table entry pointer skips the empty first entry
|
||||
static MYOVarTableList::Node __offload_myo_shared_vtable_node = {
|
||||
{ &__offload_myo_shared_vtable_start + 1 },
|
||||
0, 0
|
||||
};
|
||||
|
||||
#if HOST_LIBRARY
|
||||
// offload myo shared var init section prolog
|
||||
// first element is empty
|
||||
ALLOCATE(OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_START)
|
||||
#ifdef TARGET_WINNT
|
||||
__declspec(align(sizeof(InitTableEntry)))
|
||||
#endif // TARGET_WINNT
|
||||
static InitTableEntry __offload_myo_shared_init_table_start = { 0 };
|
||||
static MYOInitTable::Entry __offload_myo_init_table_start = { 0 };
|
||||
|
||||
// list element for the current module
|
||||
// table entry pointer skips the empty first entry
|
||||
static MYOInitTableList::Node __offload_myo_init_table_node = {
|
||||
{ &__offload_myo_init_table_start + 1 },
|
||||
0, 0
|
||||
};
|
||||
|
||||
// The functions and variables needed for a built-in
|
||||
// remote function entry for vtable initialization on MIC
|
||||
|
||||
#if !HOST_LIBRARY
|
||||
MyoError __offload_init_vtables(void)
|
||||
{
|
||||
SharedTableEntry *t_start;
|
||||
|
||||
//OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
|
||||
t_start = &__offload_myo_shared_vtable_start + 1;
|
||||
//OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, t_start);
|
||||
while (t_start->varName != 0) {
|
||||
//OFFLOAD_DEBUG_TRACE(4,
|
||||
// "myo shared vtable \"%s\" &myo_ptr = %p myo_ptr = %p\n",
|
||||
// t_start->varName,
|
||||
// (void *)(t_start->sharedAddr),
|
||||
// ((void **)(t_start->sharedAddr))[0]);
|
||||
t_start++;
|
||||
}
|
||||
|
||||
__offload_myo_shared_init_table_process(
|
||||
&__offload_myo_init_table_start + 1);
|
||||
return MYO_SUCCESS;
|
||||
}
|
||||
#endif // !HOST_LIBRARY
|
||||
|
||||
static void vtable_initializer()
|
||||
{
|
||||
}
|
||||
|
||||
#if !HOST_LIBRARY
|
||||
static MyoError vtable_initializer_wrapper()
|
||||
{
|
||||
__offload_myoAcquire();
|
||||
__offload_init_vtables();
|
||||
__offload_myoRelease();
|
||||
return MYO_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void* __offload_vtable_initializer_thunk_ptr = 0;
|
||||
|
||||
// offload myo fptr section prolog
|
||||
// first element is pre-initialized to the MIC vtable initializer
|
||||
ALLOCATE(OFFLOAD_MYO_FPTR_TABLE_SECTION_START)
|
||||
#ifdef TARGET_WINNT
|
||||
__declspec(align(sizeof(FptrTableEntry)))
|
||||
#endif // TARGET_WINNT
|
||||
static FptrTableEntry __offload_myo_fptr_table_start = { 0 };
|
||||
static MYOFuncTable::Entry __offload_myo_fptr_table_start = {
|
||||
#if HOST_LIBRARY
|
||||
"--vtable_initializer--",
|
||||
(void*)&vtable_initializer,
|
||||
(void*)&__offload_vtable_initializer_thunk_ptr,
|
||||
#ifdef TARGET_WINNT
|
||||
// Dummy to pad up to 32 bytes
|
||||
0
|
||||
#endif // TARGET_WINNT
|
||||
#else // HOST_LIBRARY
|
||||
"--vtable_initializer--",
|
||||
(void*)&vtable_initializer,
|
||||
(void*)&vtable_initializer_wrapper,
|
||||
&__offload_vtable_initializer_thunk_ptr,
|
||||
#endif // HOST_LIBRARY
|
||||
};
|
||||
|
||||
// list element for the current module
|
||||
static MYOFuncTableList::Node __offload_myo_fptr_table_node = {
|
||||
{ &__offload_myo_fptr_table_start },
|
||||
0, 0
|
||||
};
|
||||
|
||||
#endif // MYO_SUPPORT
|
||||
|
||||
// init/fini code which adds/removes local lookup data to/from the global list
|
||||
|
||||
static void offload_fini();
|
||||
static void offload_fini_so();
|
||||
|
||||
#ifndef TARGET_WINNT
|
||||
static void offload_init() __attribute__((constructor(101)));
|
||||
@ -150,35 +252,81 @@ static void (*addressof_offload_init)() = offload_init;
|
||||
|
||||
static void offload_init()
|
||||
{
|
||||
bool success;
|
||||
|
||||
// register offload tables
|
||||
__offload_register_tables(&__offload_entry_node,
|
||||
&__offload_func_node,
|
||||
&__offload_var_node);
|
||||
|
||||
#if HOST_LIBRARY
|
||||
__offload_register_image(&__offload_target_image);
|
||||
atexit(offload_fini);
|
||||
success = __offload_register_image(&__offload_target_image);
|
||||
if (!success)
|
||||
{
|
||||
return;
|
||||
}
|
||||
#endif // HOST_LIBRARY
|
||||
|
||||
#ifdef MYO_SUPPORT
|
||||
__offload_myoRegisterTables(
|
||||
#if HOST_LIBRARY
|
||||
&__offload_myo_shared_init_table_start + 1,
|
||||
#endif // HOST_LIBRARY
|
||||
&__offload_myo_shared_table_start + 1,
|
||||
&__offload_myo_fptr_table_start + 1
|
||||
// If this was the main program register main atexit routine
|
||||
if (__offload_myoProcessTables(
|
||||
&__offload_target_image,
|
||||
&__offload_myo_init_table_node,
|
||||
&__offload_myo_shared_var_node,
|
||||
&__offload_myo_shared_vtable_node,
|
||||
&__offload_myo_fptr_table_node))
|
||||
{
|
||||
atexit(offload_fini);
|
||||
#ifdef TARGET_WINNT
|
||||
} else {
|
||||
atexit(offload_fini_so);
|
||||
#endif
|
||||
}
|
||||
#else // HOST_LIBRARY
|
||||
__offload_myoProcessTables(
|
||||
&__offload_myo_init_table_start + 1,
|
||||
&__offload_myo_shared_var_start + 1,
|
||||
&__offload_myo_shared_vtable_start + 1,
|
||||
&__offload_myo_fptr_table_start
|
||||
);
|
||||
#endif // HOST_LIBRARY
|
||||
#endif // MYO_SUPPORT
|
||||
}
|
||||
|
||||
#ifndef TARGET_WINNT
|
||||
static void offload_fini_so() __attribute__((destructor(101)));
|
||||
#else // TARGET_WINNT
|
||||
static void offload_init_so();
|
||||
#endif // TARGET_WINNT
|
||||
|
||||
static void offload_fini()
|
||||
{
|
||||
#if HOST_LIBRARY
|
||||
__offload_unregister_image(&__offload_target_image);
|
||||
#endif // HOST_LIBRARY
|
||||
}
|
||||
|
||||
// unregister offload tables
|
||||
static void offload_fini_so()
|
||||
{
|
||||
// Offload and MYO tables need to be removed from list
|
||||
// to prevent invalid accesses after dlclose
|
||||
// Remove offload tables
|
||||
__offload_unregister_tables(&__offload_entry_node,
|
||||
&__offload_func_node,
|
||||
&__offload_var_node);
|
||||
#if HOST_LIBRARY
|
||||
if(!__offload_target_image_is_executable(&__offload_target_image)) {
|
||||
__offload_unregister_image(&__offload_target_image);
|
||||
}
|
||||
#endif
|
||||
#ifdef MYO_SUPPORT
|
||||
#if HOST_LIBRARY
|
||||
// Remove MYO tables
|
||||
__offload_myoRemoveTables(
|
||||
&__offload_myo_init_table_node,
|
||||
&__offload_myo_shared_var_node,
|
||||
&__offload_myo_shared_vtable_node,
|
||||
&__offload_myo_fptr_table_node);
|
||||
#endif // HOST_LIBRARY
|
||||
#endif // MYO_SUPPORT
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -29,7 +29,7 @@
|
||||
|
||||
|
||||
#if HOST_LIBRARY
|
||||
#include "offload_host.h"
|
||||
#include "offload_table.h"
|
||||
#include "offload_myo_host.h"
|
||||
#else
|
||||
#include "offload_target.h"
|
||||
@ -69,29 +69,42 @@ static VarTable::Entry __offload_var_table_end = { (const char*)-1 };
|
||||
ALLOCATE(OFFLOAD_MYO_SHARED_TABLE_SECTION_END)
|
||||
#ifdef TARGET_WINNT
|
||||
__declspec(align(sizeof(SharedTableEntry)))
|
||||
static SharedTableEntry __offload_myo_shared_table_end = { (const char*)-1, 0 };
|
||||
static MYOVarTable::Entry __offload_myo_shared_var_end =
|
||||
{ (const char*)-1, 0 };
|
||||
#else // TARGET_WINNT
|
||||
static SharedTableEntry __offload_myo_shared_table_end = { 0 };
|
||||
static MYOVarTable::Entry __offload_myo_shared_var_end = { 0 };
|
||||
#endif // TARGET_WINNT
|
||||
|
||||
#if HOST_LIBRARY
|
||||
// offload myo shared vtable section epilog
|
||||
ALLOCATE(OFFLOAD_MYO_SHARED_VTABLE_SECTION_END)
|
||||
#ifdef TARGET_WINNT
|
||||
__declspec(align(sizeof(SharedTableEntry)))
|
||||
static MYOVarTable::Entry __offload_myo_shared_vtable_end =
|
||||
{ (const char*)-1, 0 };
|
||||
#else // TARGET_WINNT
|
||||
static MYOVarTable::Entry __offload_myo_shared_vtable_end = { 0 };
|
||||
#endif // TARGET_WINNT
|
||||
|
||||
//#if HOST_LIBRARY
|
||||
// offload myo shared var init section epilog
|
||||
ALLOCATE(OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_END)
|
||||
#ifdef TARGET_WINNT
|
||||
__declspec(align(sizeof(InitTableEntry)))
|
||||
static InitTableEntry __offload_myo_shared_init_table_end = { (const char*)-1, 0 };
|
||||
static MYOInitTable::Entry __offload_myo_init_table_end =
|
||||
{ (const char*)-1, 0 };
|
||||
#else // TARGET_WINNT
|
||||
static InitTableEntry __offload_myo_shared_init_table_end = { 0 };
|
||||
static MYOInitTable::Entry __offload_myo_init_table_end = { 0 };
|
||||
#endif // TARGET_WINNT
|
||||
#endif // HOST_LIBRARY
|
||||
//#endif // HOST_LIBRARY
|
||||
|
||||
// offload myo fptr section epilog
|
||||
ALLOCATE(OFFLOAD_MYO_FPTR_TABLE_SECTION_END)
|
||||
#ifdef TARGET_WINNT
|
||||
__declspec(align(sizeof(FptrTableEntry)))
|
||||
static FptrTableEntry __offload_myo_fptr_table_end = { (const char*)-1, 0, 0 };
|
||||
static MYOFuncTable::Entry __offload_myo_fptr_table_end =
|
||||
{ (const char*)-1, 0, 0 };
|
||||
#else // TARGET_WINNT
|
||||
static FptrTableEntry __offload_myo_fptr_table_end = { 0 };
|
||||
static MYOFuncTable::Entry __offload_myo_fptr_table_end = { 0 };
|
||||
#endif // TARGET_WINNT
|
||||
|
||||
#endif // MYO_SUPPORT
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 Intel Corporation. All Rights Reserved.
|
||||
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
|
@ -1 +1 @@
|
||||
2.1.6720-13
|
||||
3.4.1
|
||||
|
Loading…
x
Reference in New Issue
Block a user