现代 C++ 跨平台开发-内存篇:内存问题分析工具

本文是整个【现代 C++ 跨平台开发-内存篇】系列的第 9 篇,主要涉及:内存分析工具。

静态分析

clang-tidy

LLVM 提供的静态代码扫描工具,从名字也能看出,侧重于代码规范方面的校验,也能检测一部分 bug 性质的代码问题。

安装

  • VSCode 插件;

  • 安卓和鸿蒙 NDK 均有内置;

  • 也可通过 brew/apt-get/choco 等方式安装;

配置

规则按 abiperformancebugpronecppcoreguidelinesmodernize 等分组,其中 bugprone 就是检测 bug 性质问题的。

可通过根目录创建 .clang-tidy 文件自定义配置,通过 -xxxGroup-xxxConfig 禁用某条子规则。

Checks: 
  -*,
  performance-*,
  abi-*,
  cert-*,
  concurrency-*,
  android-*,
  objc-*,
  bugprone-*,
  -bugprone-easily-swappable-parameters,
  cppcoreguidelines-*,
  -cppcoreguidelines-avoid-do-while,
  modernize-*, 
  -modernize-use-trailing-return-type,
  llvm-*,
  -llvm-include-order,
  readability-*,
  -readability-qualified-auto,
  google-*,
  -google-build-using-namespace,
  misc-*,
  -misc-non-private-member-variables-in-classes,

FormatStyle: llvm

如果某些地方需要手动忽略,可在注释中使用 NOLINT 相关声明:

class Foo {
  // Suppress all the diagnostics for the line
  Foo(int param); // NOLINT

  // Consider explaining the motivation to suppress the warning
  Foo(char param); // NOLINT: Allow implicit conversion from `char`, because <some valid reason>

  // Silence only the specified checks for the line
  Foo(double param); // NOLINT(google-explicit-constructor, google-runtime-int)

  // Silence all checks from the `google` module
  Foo(bool param); // NOLINT(google*)

  // Silence all checks ending with `-avoid-c-arrays`
  int array[10]; // NOLINT(*-avoid-c-arrays)

  // Silence only the specified diagnostics for the next line
  // NOLINTNEXTLINE(google-explicit-constructor, google-runtime-int)
  Foo(bool param);

  // Silence all checks from the `google` module for the next line
  // NOLINTNEXTLINE(google*)
  Foo(bool param);

  // Silence all checks ending with `-avoid-c-arrays` for the next line
  // NOLINTNEXTLINE(*-avoid-c-arrays)
  int array[10];

  // Silence only the specified checks for all lines between the BEGIN and END
  // NOLINTBEGIN(google-explicit-constructor, google-runtime-int)
  Foo(short param);
  Foo(long param);
  // NOLINTEND(google-explicit-constructor, google-runtime-int)

  // Silence all checks from the `google` module for all lines between the BEGIN and END
  // NOLINTBEGIN(google*)
  Foo(bool param);
  // NOLINTEND(google*)

  // Silence all checks ending with `-avoid-c-arrays` for all lines between the BEGIN and END
  // NOLINTBEGIN(*-avoid-c-arrays)
  int array[10];
  // NOLINTEND(*-avoid-c-arrays)
};

使用

这里主要看下命令工具的使用方式。

我们以下面这段代码为例:

#include <cstring>
#include <memory>
#include <vector>

void f0() {
    std::vector<int> v = {1, 2, 3};
    auto it = v.begin();
    v.push_back(4);
    *it = 999;
}

void f1() {
    auto p0 = std::make_unique<int>(8);
    auto p1 = std::move(p0);
    auto i0 = *p0;
}

void f2() {
    auto p0 = std::make_shared<int>(1);
    auto p1 = std::shared_ptr<int>(p0.get());
}

void f3() {
    char buf[32] = "hello world";
    std::memcpy(buf, buf + 2, 6);
}

void f4() {
    struct T {
        std::shared_ptr<T> shared() {
              return std::shared_ptr<T>(this);
        }
    };
    T t{};
    auto p = t.shared();
}

int main() {
    f0();
    f1();
    f2();
    f3();
    f4();
    return 0;
}

执行 clang-tidy:

# 针对单个文件:
clang-tidy -checks='bugprone-*,clang-analyzer-*' x.cpp -- -std=c++17 -I.
# 针对整个项目(依赖 compile_commands.json):
run-clang-tidy -p build -checks='bugprone-*,clang-analyzer-*'

检测结果:

9891 warnings generated.
/home/i/x.cpp:15:10: warning: Value stored to 'i0' during its initialization is never read [clang-analyzer-deadcode.DeadStores]
   15 |     auto i0 = *p0;
      |          ^~   ~~~
/home/i/x.cpp:15:10: note: Value stored to 'i0' during its initialization is never read
   15 |     auto i0 = *p0;
      |          ^~   ~~~
/home/i/x.cpp:15:15: warning: Dereference of null smart pointer 'p0' of type 'std::unique_ptr' [clang-analyzer-cplusplus.Move]
   15 |     auto i0 = *p0;
      |               ^
/home/i/x.cpp:40:5: note: Calling 'f1'
   40 |     f1();
      |     ^~~~
/home/i/x.cpp:14:15: note: Smart pointer 'p0' of type 'std::unique_ptr' is reset to null when moved from
   14 |     auto p1 = std::move(p0);
      |               ^~~~~~~~~~~~~
/home/i/x.cpp:15:15: note: Dereference of null smart pointer 'p0' of type 'std::unique_ptr'
   15 |     auto i0 = *p0;
      |               ^~~
/home/i/x.cpp:15:16: warning: 'p0' used after it was moved [bugprone-use-after-move]
   15 |     auto i0 = *p0;
      |                ^
/home/i/x.cpp:14:15: note: move occurred here
   14 |     auto p1 = std::move(p0);
      |               ^
Suppressed 9888 warnings (9888 in non-user code).
Use -header-filter=.* to display errors from all non-system headers. Use -system-headers to display errors from system headers as well.

可以看到:它只能检测出【Use-after-move】这一个问题。

cppcheck

cppcheck 是一个更轻量且专注于代码安全问题检测的工具。

安装

可直接通过 brew/apt-get/choco 安装。

使用

需要注意的是,它默认会检查头文件,可添加 --suppress=missingIncludeSystem 忽略:

# 针对单个文件:
cppcheck --enable=all --suppress=missingIncludeSystem --std=c++17 x.cpp
# 针对整个项目:
cppcheck --project=build/compile_commands.json --enable=all --suppress=missingIncludeSystem --std=c++17

仍然使用前面的代码验证,运行结果:

Checking x.cpp ...
x.cpp:15:16: warning: Access of moved variable 'p0'. [accessMoved]
    auto i0 = *p0;
               ^
x.cpp:14:15: note: Calling std::move(p0)
    auto p1 = std::move(p0);
              ^
x.cpp:15:16: note: Access of moved variable 'p0'.
    auto i0 = *p0;
               ^
x.cpp:25:10: error: Overlapping read/write in memcpy() is undefined behavior [overlappingWriteFunction]
    std::memcpy(buf, buf + 2, 6);
         ^
x.cpp:9:6: error: Using iterator to local container 'v' that may be invalid. [invalidContainer]
    *it = 999;
     ^
x.cpp:7:22: note: Iterator to container is created here.
    auto it = v.begin();
                     ^
x.cpp:8:7: note: After calling 'push_back', iterators or references to the container's data may be invalid .
    v.push_back(4);
      ^
x.cpp:6:22: note: Variable created here.
    std::vector<int> v = {1, 2, 3};
                     ^
x.cpp:9:6: note: Using iterator to local container 'v' that may be invalid.
    *it = 999;
     ^
x.cpp:35:12: style: Variable 'p' is assigned a value that is never used. [unreadVariable]
    auto p = t.shared();
           ^
nofile:0:0: information: Active checkers: 161/592 (use --checkers-report=<filename> to see details) [checkersReport]

可以看到:它能额外检测出【memcpy() 内存重叠】、【迭代器失效非法访问】两个问题。

infer

Meta 开源的 infer 是对 clang-tidy 和 cppcheck 的一个强力补充:

  • 从编译环节就开始拦截,利用 LLVM 提供的 AST(抽象语法树) 和 IR (中间代码表示)等信息,深入分析潜在的内存问题;

  • 支持跨函数分析,除了常见的内存问题分析,还支持线程安全等众多安全问题分析

  • 支持 C++、Java、Objc;

安装

infer 是 OCaml 语言编写的,需要依赖其包管理工具 opam 下载相关依赖;

安装完 opam 后,可按照官方文档从源码编译和安装 infer,其间会自动下载和编译 LLVM。

infer 官方对 Windows 支持不太好,既未提供二进制也未提供安装说明,建议通过 WSL/Docker 等方式使用。

使用

默认只启用常用的内存相关的分析,线程安全问题可通过 --racerd 开启。

# 针对单个文件:
infer run -- clang++ -c x.cpp
# 针对整个项目:
infer run --compilation-database build/compile_commands.json

同样使用前面的测试代码,检测结果:

Capturing in make/cc mode...
Found 1 source file to analyze in /home/i/infer-out
69/69 [##############################################################################] 100% 135ms

x.cpp:9: error: Vector Invalidation(VECTOR_INVALIDATION)
  accessing `it.__infer_backing_pointer` that was potentially invalidated by `std::vector::push_back` on line 8.
   7.     auto it = v.begin();
   8.     v.push_back(4);
   9.     *it = 999;
          ^
  10. }
  11.

x.cpp:15: error: Dead Store(DEAD_STORE)
  The value written to `&i0` is never used.
  13.     auto p0 = std::make_unique<int>(8);
  14.     auto p1 = std::move(p0);
  15.     auto i0 = *p0;
          ^
  16. }
  17.

x.cpp:15: error: Null Dereference(NULLPTR_DEREFERENCE)
  `p0.__infer_backing_pointer` could be null (null value originating from line 14) and is dereferenced.
  13.     auto p0 = std::make_unique<int>(8);
  14.     auto p1 = std::move(p0);
  15.     auto i0 = *p0;
                    ^
  16. }
  17.

x.cpp:21: error: Use After Delete(USE_AFTER_DELETE)
  accessing `p0.__infer_backing_pointer` that was invalidated by `delete` on line 21.
  19.     auto p0 = std::make_shared<int>(1);
  20.     auto p1 = std::shared_ptr<int>(p0.get());
  21. }
      ^
  22.
  23. void f3() {


Found 4 issues
                Issue Type(ISSUED_TYPE_ID): #
  Vector Invalidation(VECTOR_INVALIDATION): 1
        Use After Delete(USE_AFTER_DELETE): 1
     Null Dereference(NULLPTR_DEREFERENCE): 1
                    Dead Store(DEAD_STORE): 1

可以看到:错误信息输出特别清晰,而且还能检测出【Use-after-delete】等问题,但是没有检测出【memcpy() 内存重叠】问题。

所以,实践中建议将 clang-tidy、cppcheck、infer 三者配合使用。

另外,三个工具都对 f4() 这种复杂场景束手无策(栈对象构造智能指针,并且直接拿 this 构造 shared_ptr)。

因此,还需要借助动态分析工具。

动态分析

Sanitizers

最初由 Google 开源的 Sanitizers 套件,现已成为 LLVM 项目的一部分,各家编译器也做了深度支持。
可用于运行时动态检测内存问题、资源竞争、UB 等,可直接在 CMakeLists.txt 配置。

不过要注意:

  • 各平台编译器支持情况有差异;

  • 有一定开销,建议仅在 debug 构建打开。

set(SANITIZERS "")
if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
    if(ANDROID AND "${CMAKE_ANDROID_ARCH_ABI}" STREQUAL "arm64-v8a")
        list(APPEND SANITIZERS "hwaddress")
    else()
        list(APPEND SANITIZERS "address")
    endif()

    if(NOT WIN32)
        list(APPEND SANITIZERS "undefined")
    endif()

    if(NOT WIN32 AND NOT EMSCRIPTEN AND NOT ANDROID AND NOT OHOS AND NOT APPLE)
        list(APPEND SANITIZERS "leak")
    endif()

    # TSan is mutually exclusive with ASan and LSan. 
    # Enable it only if you strictly need to check for race conditions.
    if(NOT WIN32 AND NOT EMSCRIPTEN AND NOT ANDROID AND NOT OHOS)
        # list(APPEND SANITIZERS "thread")
    endif()

    if(SANITIZERS)
        list(JOIN SANITIZERS "," SANITIZERS_STR)
        if(MSVC)
            set(SANITIZER_FLAGS "/fsanitize=${SANITIZERS_STR}")
        else()
            set(SANITIZER_FLAGS "-fsanitize=${SANITIZERS_STR}")
        endif()
        set_target_properties(${PROJECT_NAME} PROPERTIES
            COMPILE_FLAGS "${SANITIZER_FLAGS} -fno-omit-frame-pointer -g"
            LINK_FLAGS "${SANITIZER_FLAGS}"
        )
    endif()
endif()

基于软件的 AdSan

  • 为每个字节分配一个“影子字节”,记录该地址是否可访问(如是否已分配、是否在有效范围内);

  • 编译时插入额外检查代码,每次内存访问前,检查影子内存状态,若非法则立即中止并报告错误。

AdSan 依赖对应的动态库,可通过 CMake 函数拿到当前编译任务对应架构的库位置并拷贝:

function(installAsanRuntime dest_dir)
	if(APPLE)
		if(CMAKE_OSX_SYSROOT MATCHES "Simulator|iPhoneSimulator")
			set(_asan_lib_name "libclang_rt.asan_iossim_dynamic.dylib")
		elseif(CMAKE_OSX_SYSROOT MATCHES "iPhoneOS|iphoneos")
			set(_asan_lib_name "libclang_rt.asan_ios_dynamic.dylib")
		elseif(CMAKE_OSX_SYSROOT MATCHES "MacOSX|macosx")
			set(_asan_lib_name "libclang_rt.asan_osx_dynamic.dylib")
		else()
			message(SEND_ERROR "install_asan_runtime: UNKNOWN CMAKE_OSX_SYSROOT: ${CMAKE_OSX_SYSROOT}")
			return()
		endif()
	elseif(CMAKE_SYSTEM_NAME STREQUAL "Android" OR CMAKE_SYSTEM_NAME STREQUAL "OHOS")
		set(_abi "${CMAKE_ANDROID_ARCH_ABI}")
		if(CMAKE_SYSTEM_NAME STREQUAL "OHOS" AND NOT _abi)
			set(_abi "${CMAKE_SYSTEM_PROCESSOR}")
		endif()
		if(_abi STREQUAL "arm64-v8a" OR _abi STREQUAL "aarch64")
			set(_asan_lib_name "libclang_rt.asan-aarch64-android.so")
		elseif(_abi STREQUAL "armeabi-v7a" OR _abi STREQUAL "armv7a" OR _abi STREQUAL "arm")
			set(_asan_lib_name "libclang_rt.asan-arm-android.so")
		elseif(_abi STREQUAL "x86_64")
			set(_asan_lib_name "libclang_rt.asan-x86_64-android.so")
		elseif(_abi STREQUAL "x86" OR _abi STREQUAL "i686")
			set(_asan_lib_name "libclang_rt.asan-i686-android.so")
		else()
			message(SEND_ERROR "install_asan_runtime: UNKNOWN ABI: ${_abi}")
			return()
		endif()
	else()
		message(SEND_ERROR "install_asan_runtime: Only support APPLE / Android / OHOS (CMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME})")
		return()
	endif()

	execute_process(
		COMMAND ${CMAKE_CXX_COMPILER} -print-file-name=${_asan_lib_name}
		OUTPUT_VARIABLE _asan_lib_full_path
	)

	string(STRIP "${_asan_lib_full_path}" _asan_lib_full_path)

	if(NOT EXISTS "${_asan_lib_full_path}" AND (CMAKE_SYSTEM_NAME STREQUAL "Android" OR CMAKE_SYSTEM_NAME STREQUAL "OHOS"))
		if(NOT _asan_lib_full_path MATCHES "^[/\\\\]|^[A-Za-z]:")
			execute_process(
				COMMAND ${CMAKE_CXX_COMPILER} -print-resource-dir
				OUTPUT_VARIABLE _clang_resource_dir
			)
			string(STRIP "${_clang_resource_dir}" _clang_resource_dir)
			set(_asan_lib_full_path "${_clang_resource_dir}/lib/linux/${_asan_lib_name}")
		endif()
	endif()

	if(EXISTS "${_asan_lib_full_path}")
		file(COPY "${_asan_lib_full_path}" DESTINATION "${dest_dir}")
		message(STATUS "ASan runtime lib is installed: ${_asan_lib_full_path} -> ${dest_dir}/${_asan_lib_name}")
	else()
		message(SEND_ERROR "ASan lib not found: ${_asan_lib_name} (path: ${_asan_lib_full_path})")
	endif()
endfunction()
Android 的额外配置
  • 需要接管整个进程内存,并且在所有 so 加载之前,需要在 resources/lib/$abi 目录通过 wrap.sh 脚本手动加载上面的 so:
#!/system/bin/sh
LD_PRELOAD=libclang_rt.asan-aarch64-android.so exec "$@"
  • shell 脚本需确保拥有可执行权限,并且注意 Windows 编辑器下可能出现不兼容的换行符(会影响脚本执行);

  • AdSan 在 Android 上依赖帧指针,所以必须加上 -fno-omit-frame-pointer 链接参数,确保错误报告可读。

Android 基于硬件的 AdSan

  • 利用硬件特性(主要是 ARM64 的 Top Byte Ignore, TBI):

    • 在 64 位指针的高 8 位(即 tag)存储一个随机值;

    • 分配内存时,同时为内存块分配一个 tag,并存储在影子内存中;

    • 每次内存访问时,比较指针 tag 与目标地址影子内存中的 tag,不匹配则报错;

    • tag 针对的是 16 字节对齐的内存块,并且不需要全局唯一,只需要在当前所有活跃的内存块保持唯一,因此冲突概率并不高。

  • 仍使用影子内存,但粒度更粗(通常为 16 字节对齐),因此内存开销更低。

  • 仅支持具备 TBI 能力的平台:主要是 ARM64;

  • 不依赖动态链接库,也无需 hook 启动流程。

XCode 启用 Sanitizers

XCode 深度集成 Sanitizer 相关工具链,只需 Build Settings 勾选对应的 Sanitizer,Instruments 可直接查看报告。

鸿蒙端 Sanitizers 支持

鸿蒙及 DevEco Studio 同样深度集成 Sanitizers,并且不需要任何额外配置(只需要 CMakeLists.txt 添加配置并且确保 debug 构建)。