Skip to main content

Building a harness for inline .NET assembly execution, Part 1

· 7 min read

Abstract

In my previous post, I delineated the primary disadvantage of in-process tool execution, namely, the possibility of a crash. I also sought to present a remedy; however, the proposed solution did not address leftover memory artifacts. Instead, I recommended the migration to another process. The goal of this series is to showcase the construction of a harness for executing .NET assemblies, a common format for post-exploitation tools, in a manner that minimizes memory artifacts. For the first part, the focus will be on the steps that are performed before we start the runtime

To accomplish this we will leverage the IHostMemoryManager interface, which provides methods that enable the CLR to make memory allocations through the unmanaged host. This approach enables us to track and control virtual memory/heap allocations made by the runtime

note

All mentioned interfaces are documented in the Unmanaged API reference

Here goes

The initial step in this process is the implementation of the IHostMalloc interface, which is utilized by IHostMemoryManager for heap allocations. Redirecting allocations to a private heap is optional, although it simplifies the process of heap encryption. Furthermore, we can also zero out every allocation prior to its release

HANDLE ImplantHeap{ HeapCreate(0, 0, 0) };

class LuciHostMalloc final : public IHostMalloc
{
public:
HRESULT STDMETHODCALLTYPE Alloc(IN SIZE_T cbSize, IN EMemoryCriticalLevel eCriticalLevel, OUT PVOID* ppMem) override
{
PVOID Allocation{ HeapAlloc(ImplantHeap, HEAP_ZERO_MEMORY, cbSize) };

if (Allocation == nullptr)
{
//
// Every enumeration in EMemoryCriticalLevel sounds catastrophic, e.g. "If the allocation fails, the CLR is effectively disabled".
// Although since the heap we created is growable this should basically never happen
//

BYTE Tries{ 10 };

while (Tries--)
{
Sleep(5 * 1000);
Allocation = HeapAlloc(ImplantHeap, HEAP_ZERO_MEMORY, cbSize);

if (Allocation != nullptr)
{
break;
}
}

if (Allocation == nullptr)
{
return E_OUTOFMEMORY;
}
}

*ppMem = Allocation;
return S_OK;
}

HRESULT STDMETHODCALLTYPE DebugAlloc(IN SIZE_T cbSize, IN EMemoryCriticalLevel dwCriticalLevel, IN PCHAR pszFileName, IN INT iLineNo, OUT PVOID* ppMem) override
{
return Alloc(cbSize, dwCriticalLevel, ppMem);
}

HRESULT STDMETHODCALLTYPE Free(IN PVOID pMem) override
{
//
// Zero out the memory before freeing the allocation
//

if (CONST SIZE_T AllocationSize{ HeapSize(ImplantHeap, 0, pMem) }; AllocationSize != HEAP_SIZE_FAILED)
{
RtlSecureZeroMemory(pMem, AllocationSize);
}

HeapFree(ImplantHeap, 0, pMem);
return S_OK;
}

//
// Implement the IUnknown interface as it is inherited from IHostMalloc
//

STDMETHODIMP QueryInterface(IN REFIID riid, OUT PVOID* ppvObj) override
{
if (ppvObj == nullptr)
{
return E_INVALIDARG;
}

*ppvObj = nullptr;

if (riid == IID_IUnknown || riid == IID_IHostMalloc)
{
*ppvObj = static_cast<LPVOID>(this);
AddRef();

return NOERROR;
}

return E_NOINTERFACE;
}

STDMETHODIMP_(ULONG) AddRef() override
{
return InterlockedIncrement(&ReferenceCount);
}

STDMETHODIMP_(ULONG) Release() override
{
CONST ULONG NewCount{ InterlockedDecrement(&ReferenceCount) };

if (NewCount == 0)
{
delete this;
}

return NewCount;
}

private:
ULONG ReferenceCount{ 1 };
};

The CLR acquires an interface pointer to an IHostMalloc instance by invoking the IHostMemoryManager::CreateMalloc method. Consequently, our implementation of the CreateMalloc method must return a LuciHostMalloc. With this last piece of the puzzle, we can create a class that implements IHostMemoryManager

//
// Contains the address and size of an allocation made by the CLR through the host memory manager interface
//
typedef struct tagCLR_ALLOCATION
{
PVOID Address;
SIZE_T Size;
} CLR_ALLOCATION, *PCLR_ALLOCATION;

//
// We will keep track of said allocations using a global vector
//
std::vector<CLR_ALLOCATION> ClrAllocations;

class LuciMemoryManager final : public IHostMemoryManager
{
public:
HRESULT CreateMalloc(IN DWORD dwMallocType, OUT IHostMalloc** ppMalloc) override
{
*ppMalloc = new LuciHostMalloc();
return S_OK;
}

HRESULT STDMETHODCALLTYPE VirtualAlloc(IN PVOID pAddress, IN SIZE_T dwSize, IN DWORD flAllocationType, IN DWORD flProtect, IN EMemoryCriticalLevel eCriticalLevel, OUT PVOID* ppMem) override
{
//
// During sleep, we will encrypt the memory blocks in ClrAllocations. You may also leverage indirect syscalls
//

LPVOID Allocation{ ::VirtualAlloc(pAddress, dwSize, flAllocationType, flProtect) };

if (Allocation == nullptr)
{
BYTE Tries{ 10 };

while (Tries--)
{
Sleep(5 * 1000);
Allocation = ::VirtualAlloc(pAddress, dwSize, flAllocationType, flProtect);

if (Allocation != nullptr)
{
break;
}
}

if (Allocation == nullptr)
{
return E_OUTOFMEMORY;
}
}

*ppMem = Allocation;
ClrAllocations.emplace_back(Allocation, dwSize);
return S_OK;
}

HRESULT STDMETHODCALLTYPE VirtualQuery(IN PVOID lpAddress, IN PVOID lpBuffer, IN SIZE_T dwLength, OUT PSIZE_T pResult) override
{
CONST SIZE_T Result{ ::VirtualQuery(lpAddress, static_cast<PMEMORY_BASIC_INFORMATION>(lpBuffer), dwLength) };

if (Result == 0)
{
return E_FAIL;
}

*pResult = Result;

return S_OK;
}

HRESULT STDMETHODCALLTYPE VirtualProtect(IN PVOID lpAddress, IN SIZE_T dwSize, IN DWORD flNewProtect, IN PDWORD pflOldProtect) override
{
if (::VirtualProtect(lpAddress, dwSize, flNewProtect, pflOldProtect) == FALSE)
{
return E_FAIL;
}

return S_OK;
}

HRESULT STDMETHODCALLTYPE VirtualFree(IN LPVOID lpAddress, IN SIZE_T dwSize, IN DWORD dwFreeType) override
{
//
// In the event that the allocation cannot be found in the vector, the memory was not allocated through the host
//

CONST AUTO Allocation{ std::ranges::find_if(ClrAllocations, [lpAddress, dwSize](CONST AUTO& a) { return a.Address == lpAddress && a.Size == dwSize; }) };

if (Allocation == ClrAllocations.end())
{
return HOST_E_INVALIDOPERATION;
}

ClrAllocations.erase(Allocation);

if (::VirtualFree(lpAddress, dwSize, dwFreeType) == FALSE)
{
return E_FAIL;
}

return S_OK;
}

HRESULT STDMETHODCALLTYPE GetMemoryLoad(OUT PDWORD pMemoryLoad, OUT PSIZE_T pAvailableBytes) override
{
//
// The runtime uses the return value as a heuristic for the garbage collector
//

MEMORYSTATUSEX MemoryStatus{};

if (GlobalMemoryStatusEx(&MemoryStatus) == FALSE)
{
return E_FAIL;
}

*pMemoryLoad = MemoryStatus.dwMemoryLoad;
*pAvailableBytes = MemoryStatus.ullAvailPhys / 2;

return S_OK;
}

//
// We have no need for these callbacks
//

HRESULT AcquiredVirtualAddressSpace(IN PVOID startAddress, IN SIZE_T size) override
{
return S_OK;
}

HRESULT NeedsVirtualAddressSpace(IN PVOID startAddress, IN SIZE_T size) override
{
return S_OK;
}

HRESULT ReleasedVirtualAddressSpace(IN PVOID startAddress) override
{
return S_OK;
}

HRESULT RegisterMemoryNotificationCallback(ICLRMemoryNotificationCallback* pCallback) override
{
return S_OK;
}

//
// IHostMemoryManager also inherits IUnknown
//

STDMETHODIMP QueryInterface(IN REFIID riid, OUT PVOID* ppvObj) override
{
if (ppvObj == nullptr)
{
return E_INVALIDARG;
}

*ppvObj = nullptr;

if (riid == IID_IUnknown || riid == IID_IHostMemoryManager)
{
*ppvObj = static_cast<LPVOID>(this);
AddRef();

return NOERROR;
}

return E_NOINTERFACE;
}

STDMETHODIMP_(ULONG) AddRef() override
{
return InterlockedIncrement(&ReferenceCount);
}

STDMETHODIMP_(ULONG) Release() override
{
CONST ULONG NewCount{ InterlockedDecrement(&ReferenceCount) };

if (NewCount == 0)
{
delete this;
}

return NewCount;
}

private:
ULONG ReferenceCount{ 1 };
};

In order for the runtime to use the memory manager we defined, it is necessary to implement IHostControl. The runtime employs this interface to ascertain which hosting interfaces the unmanaged host supports...such as IHostMemoryManager

class LuciHostControl final : public IHostControl
{
public:
HRESULT STDMETHODCALLTYPE GetHostManager(IN REFIID riid, OUT PVOID* ppv) override
{
//
// When the runtime asks for our implementation of IHostMemoryManager, we return an interface pointer to LuciMemoryManager
//

if (riid == IID_IHostMemoryManager)
{
IHostMemoryManager* MemoryManager = new LuciMemoryManager();
*ppv = MemoryManager;
return S_OK;
}

*ppv = nullptr;
return E_NOINTERFACE;
}

HRESULT SetAppDomainManager(DWORD dwAppDomainID, IUnknown* pUnkAppDomainManager) override
{
return S_OK;
}

STDMETHODIMP QueryInterface(IN REFIID riid, OUT PVOID* ppvObj) override
{
if (ppvObj == nullptr)
{
return E_INVALIDARG;
}

*ppvObj = nullptr;

if (riid == IID_IUnknown || riid == IID_IHostControl)
{
*ppvObj = static_cast<LPVOID>(this);
AddRef();

return NOERROR;
}

return E_NOINTERFACE;
}

STDMETHODIMP_(ULONG) AddRef() override
{
return InterlockedIncrement(&ReferenceCount);
}

STDMETHODIMP_(ULONG) Release() override
{
CONST ULONG NewCount{ InterlockedDecrement(&ReferenceCount) };

if (NewCount == 0)
{
delete this;
}

return NewCount;
}

private:
ULONG ReferenceCount{ 1 };
};

All previous elements integrate after calling SetHostControl, as sketched below


ICLRMetaHost* MetaHost{};
HRESULT Result { Instance.Mscoree.CLRCreateInstance(GidMetaHost, IIDMetaHost, reinterpret_cast<LPVOID*>(&MetaHost)) };

//
// Check if the required runtime version is loadable
//

ICLRRuntimeInfo* RuntimeInformation{};
WCHAR RequiredVersion[] = L"v2.0.50727";

Result = MetaHost->GetRuntime(RequiredVersion, IID_ICLRRuntimeInfo, reinterpret_cast<LPVOID*>(&RuntimeInformation));

BOOL IsLoadable{};
RuntimeInformation->IsLoadable(&IsLoadable);

//
// Load the CLR into the current process
//

ICLRRuntimeHost* RuntimeHost{};

Result = RuntimeInformation->GetInterface(CLSID_CLRRuntimeHost, IID_ICLRRuntimeHost, reinterpret_cast<LPVOID*>(&RuntimeHost));

//
// Set the host control to a LuciHostControl so that the CLR can get our implementation of IHostMemoryManager
//

CONST AUTO HostControl{ new LuciHostControl() };

Result = RuntimeHost->SetHostControl(HostControl);

References