要在C ++中进行特征分解,我使用例程“ zhpev”。该例程嵌入在较大软件的dll文件中,并且在运行时被详尽地使用。在调用“ zhpev”约5000次后,我测量了运行时间。前900次运行时评估一切正常。运行时间约为0.7秒,几乎没有变化。但是,经过900次运行时间评估,运行时间突然从0.7秒增加到2.7秒,变化很大。
我做了以下观察:
抱歉,由于正在处理的项目太大,我无法发布任何代码。
我会很高兴能阻止我这种奇怪行为的任何提示!
编辑例程“ zhpev”适用于双精度大小为32x32的复杂Hermitian矩阵。因此,一次处理的数据块很小。
更新1)分页不是这里的问题。我在系统选项中禁用了页面文件。运行时问题仍未解决。2)在其他Windows计算机上运行该应用程序也会导致相同的运行时问题。但是,现在开始运行时间增加的开始是在1400次运行时间评估之后。
更新,我发现仅当我在线程内调用“ zhpev”时,才会发生运行时问题。这样,我可以创建一个小的代码示例,在其中遇到同样的问题。
让我解释一下我的代码
这是我的代码
#include <windows.h>
#include <tchar.h>
#include <strsafe.h>
#include "stdafx.h"
#include "mkl_lapack.h"
#include "mkl_service.h"
#include <time.h>
#include <stdio.h>
#include <iostream>
#include <fstream>
#include <stdlib.h>
#include <iostream>
using namespace std;
#define CACHE_LINE 32
#define CACHE_ALIGN __declspec(align(CACHE_LINE))
#define MAX_THREADS 2
#define BUF_SIZE 255
DWORD WINAPI MyThreadFunction( LPVOID lpParam );
void ErrorHandler(LPTSTR lpszFunction);
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// This is the critical function.
void Eigendecomposition();
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
typedef struct MyData {
int val1;
int val2;
} MYDATA, *PMYDATA;
int _tmain()
{
PMYDATA pDataArray[MAX_THREADS];
DWORD dwThreadIdArray[MAX_THREADS];
HANDLE hThreadArray[MAX_THREADS];
std::ofstream ofs;
double tstart;
double tend;
double proc_time_pure;
for(int j=0;j<10000;j++){
// Start one iteration
tstart = clock();
// Create MAX_THREADS worker threads.
for( int i=0; i<MAX_THREADS; i++ )
{
pDataArray[i] = (PMYDATA) HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
sizeof(MYDATA));
if( pDataArray[i] == NULL )
{
ExitProcess(2);
}
pDataArray[i]->val1 = i;
pDataArray[i]->val2 = i+100;
// Create the thread to begin execution on its own.
hThreadArray[i] = CreateThread(
NULL, // default security attributes
0, // use default stack size
MyThreadFunction, // thread function name
pDataArray[i], // argument to thread function
0, // use default creation flags
&dwThreadIdArray[i]); // returns the thread identifier
if (hThreadArray[i] == NULL)
{
ErrorHandler(TEXT("CreateThread"));
ExitProcess(3);
}
} // End of main thread creation loop.
// Wait until all threads have terminated.
WaitForMultipleObjects(MAX_THREADS, hThreadArray, TRUE, INFINITE);
for(int i=0; i<MAX_THREADS; i++)
{
CloseHandle(hThreadArray[i]);
if(pDataArray[i] != NULL)
{
HeapFree(GetProcessHeap(), 0, pDataArray[i]);
pDataArray[i] = NULL; // Ensure address is not reused.
}
}
tend = clock();
proc_time_pure = tend-tstart;
// Print processing time into console and write it into a file
printf(" Processing time: %4.3f \n", proc_time_pure/1000.0);
ofs.open ("Processing_time.txt", std::ofstream::out | std::ofstream::app);
ofs << proc_time_pure/1000.0 << " ";
ofs.close();
}
return 0;
}
DWORD WINAPI MyThreadFunction( LPVOID lpParam )
{
HANDLE hStdout;
PMYDATA pDataArray;
TCHAR msgBuf[BUF_SIZE];
size_t cchStringSize;
DWORD dwChars;
hStdout = GetStdHandle(STD_OUTPUT_HANDLE);
if( hStdout == INVALID_HANDLE_VALUE )
return 1;
pDataArray = (PMYDATA)lpParam;
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// Critical function
Eigendecomposition();
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
return 0;
}
void ErrorHandler(LPTSTR lpszFunction)
{
// Retrieve the system error message for the last-error code.
LPVOID lpMsgBuf;
LPVOID lpDisplayBuf;
DWORD dw = GetLastError();
FormatMessage(
FORMAT_MESSAGE_ALLOCATE_BUFFER |
FORMAT_MESSAGE_FROM_SYSTEM |
FORMAT_MESSAGE_IGNORE_INSERTS,
NULL,
dw,
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
(LPTSTR) &lpMsgBuf,
0, NULL );
// Display the error message.
lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT,
(lstrlen((LPCTSTR) lpMsgBuf) + lstrlen((LPCTSTR) lpszFunction) + 40) * sizeof(TCHAR));
StringCchPrintf((LPTSTR)lpDisplayBuf,
LocalSize(lpDisplayBuf) / sizeof(TCHAR),
TEXT("%s failed with error %d: %s"),
lpszFunction, dw, lpMsgBuf);
MessageBox(NULL, (LPCTSTR) lpDisplayBuf, TEXT("Error"), MB_OK);
// Free error-handling buffer allocations.
LocalFree(lpMsgBuf);
LocalFree(lpDisplayBuf);
}
void Eigendecomposition(){
const int M = 32;
typedef MKL_Complex16 double_complex;
const char jobz = 'V';
const char uplo = 'L'; // lower triangular part of input matrix is used
const MKL_INT dim = M;
const MKL_INT ldz = M;
const MKL_INT LWORK = (2*M-1);
const MKL_INT LRWORK = (3*M-2);
MKL_INT info = 0;
double_complex A_H_MKL[(M*M+M)/2];
CACHE_ALIGN double_complex work[LWORK];
CACHE_ALIGN double rwork[LRWORK];
double D[M];
double_complex U[M][M];
for(int i=0;i<500;i++ ){
// Create the input matrix
for (int tmp=0; tmp < (M*M+M)/2; tmp++){
A_H_MKL[tmp].real = 1 ;
A_H_MKL[tmp].imag = 0;}
// This is the mkl function
zhpev(&jobz, // const char* jobz,
&uplo, // const char* uplo,
&dim, // const MKL_INT* n,
(double_complex *)&A_H_MKL[0], // double_complex* ap,
(double *)&D[0], // double* w,
(double_complex *)&U[0][0], // double_complex* z,
&ldz, // const MKL_INT* ldz,
work, // double_complex* work,
rwork, // double* rwork,
&info); // MKL_INT* info
}
}
我在代码中找到了该错误。我在用Windows函数CreatThread创建的线程中运行本征分解例程。但是,没有函数来结束线程,例如,WaitForMultipleObjects例程。对于我的应用程序的所有其他部分,这不是问题,但是特征分解遇到了困难。
本文收集自互联网,转载请注明来源。
如有侵权,请联系[email protected] 删除。
我来说两句