diff --git a/.github/issue_stale.yaml b/.github/issue_stale.yaml deleted file mode 100644 index 17948d3..0000000 --- a/.github/issue_stale.yaml +++ /dev/null @@ -1,16 +0,0 @@ -# Number of days of inactivity before an issue becomes stale -daysUntilStale: 7 -# Number of days of inactivity before a stale issue is closed -daysUntilClose: 7 -# Issues with these labels will never be considered stale -exemptLabels: - - backlog -# Label to use when marking an issue as stale -staleLabel: stale -# Comment to post when marking an issue as stale. Set to `false` to disable -markComment: > - This issue has been automatically marked as stale because it has not had - recent activity. It will be closed if no further activity occurs. Thank you - for your contributions. -# Comment to post when closing a stale issue. Set to `false` to disable -closeComment: false \ No newline at end of file diff --git a/.github/workflow/issue_stale.yaml b/.github/workflow/issue_stale.yaml new file mode 100644 index 0000000..7c11c2b --- /dev/null +++ b/.github/workflow/issue_stale.yaml @@ -0,0 +1,30 @@ +name: Close inactive issues +on: + schedule: + - cron: "35 11 * * 5" + +env: + DAYS_BEFORE_ISSUE_STALE: 30 + DAYS_BEFORE_ISSUE_CLOSE: 14 + +jobs: + close-issues: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@v5 + with: + days-before-issue-stale: ${{ env.DAYS_BEFORE_ISSUE_STALE }} + days-before-issue-close: ${{ env.DAYS_BEFORE_ISSUE_CLOSE }} + stale-issue-label: "stale" + stale-issue-message: | + This issue is stale because it has been open for ${{ env.DAYS_BEFORE_ISSUE_STALE }} days with no activity. + It will be closed if no further activity occurs. Let us know if you still need help! + close-issue-message: | + This issue is being closed because it has been stale for ${{ env.DAYS_BEFORE_ISSUE_CLOSE }} days with no activity. + If you still need help, please feel free to leave comments. + days-before-pr-stale: -1 + days-before-pr-close: -1 + repo-token: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..0bd27a2 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "OpenSceneFlow"] + path = OpenSceneFlow + url = https://github.com/KTH-RPL/OpenSceneFlow.git diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index b80f234..0000000 --- a/Dockerfile +++ /dev/null @@ -1,43 +0,0 @@ -# check more: https://hub.docker.com/r/nvidia/cuda -FROM nvidia/cuda:11.7.1-devel-ubuntu20.04 -ENV DEBIAN_FRONTEND noninteractive - -RUN apt update && apt install -y --no-install-recommends \ - git curl vim rsync htop - -RUN curl -o ~/miniconda.sh -LO https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ - chmod +x ~/miniconda.sh && \ - ~/miniconda.sh -b -p /opt/conda && \ - rm ~/miniconda.sh && \ - /opt/conda/bin/conda clean -ya && /opt/conda/bin/conda init bash - -RUN curl -o ~/mamba.sh -LO https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh && \ - chmod +x ~/mamba.sh && \ - ~/mamba.sh -b -p /opt/mambaforge && \ - rm ~/mamba.sh && /opt/mambaforge/bin/mamba init bash - -# install zsh and oh-my-zsh -RUN apt install -y wget git zsh tmux vim g++ -RUN sh -c "$(wget -O- https://github.com/deluan/zsh-in-docker/releases/download/v1.1.5/zsh-in-docker.sh)" -- \ - -t robbyrussell -p git \ - -p https://github.com/agkozak/zsh-z \ - -p https://github.com/zsh-users/zsh-autosuggestions \ - -p https://github.com/zsh-users/zsh-completions \ - -p https://github.com/zsh-users/zsh-syntax-highlighting - -RUN printf "y\ny\ny\n\n" | bash -c "$(curl -fsSL https://raw.githubusercontent.com/Kin-Zhang/Kin-Zhang/main/scripts/setup_ohmyzsh.sh)" -RUN /opt/conda/bin/conda init zsh && /opt/mambaforge/bin/mamba init zsh - -# change to conda env -ENV PATH /opt/conda/bin:$PATH -ENV PATH /opt/mambaforge/bin:$PATH - -RUN mkdir -p /home/kin/workspace && cd /home/kin/workspace && git clone https://github.com/KTH-RPL/SeFlow.git -WORKDIR /home/kin/workspace/SeFlow -RUN apt-get update && apt-get install libgl1 -y -# need read the gpu device info to compile the cuda extension -RUN cd /home/kin/workspace/SeFlow && /opt/mambaforge/bin/mamba env create -f environment.yaml -RUN cd /home/kin/workspace/SeFlow/assets/cuda/mmcv && /opt/mambaforge/envs/seflow/bin/python ./setup.py install -RUN cd /home/kin/workspace/SeFlow/assets/cuda/chamfer3D && /opt/mambaforge/envs/seflow/bin/python ./setup.py install - - diff --git a/OpenSceneFlow b/OpenSceneFlow new file mode 160000 index 0000000..6226b07 --- /dev/null +++ b/OpenSceneFlow @@ -0,0 +1 @@ +Subproject commit 6226b07784dbd6f1aa52931d419a7d3707b4e3d3 diff --git a/README.md b/README.md index d2fa38d..49fd33a 100644 --- a/README.md +++ b/README.md @@ -8,70 +8,36 @@ SeFlow: A Self-Supervised Scene Flow Method in Autonomous Driving ![](assets/docs/seflow_arch.png) -2024/11/18 16:17: Update model and demo data download link through HuggingFace, Personally I found `wget` from HuggingFace link is much faster than Zenodo. - -2024/09/26 16:24: All codes already uploaded and tested. You can to try training directly by downloading (through [HuggingFace](https://huggingface.co/kin-zhang/OpenSceneFlow)/[Zenodo](https://zenodo.org/records/13744999)) demo data or pretrained weight for evaluation. - -Pre-trained weights for models are available in [Zenodo](https://zenodo.org/records/13744999)/[HuggingFace](https://huggingface.co/kin-zhang/OpenSceneFlow) link. Check usage in [2. Evaluation](#2-evaluation) or [3. Visualization](#3-visualization). - Task: __Self-Supervised__ Scene Flow Estimation in Autonomous Driving. No human-label needed. Real-time inference (15-20Hz in RTX3090). -We directly follow our previous work [code structure](https://github.com/KTH-RPL/DeFlow), so you may want to start from the easier one with supervised learning first: Try [DeFlow](https://github.com/KTH-RPL/DeFlow). Then you will find this is simple to you (things about how to train under self-supervised). Here are **Scripts** quick view in this repo: - -- `dataprocess/extract_*.py` : pre-process data before training to speed up the whole training time. - [Dataset we included now: Argoverse 2 and Waymo. more on the way: Nuscenes, custom data.] - -- `process.py`: process data with save dufomap, cluster labels inside file. Only needed once for training. - -- `train.py`: Train the model and get model checkpoints. Pls remember to check the config. - -- `eval.py` : Evaluate the model on the validation/test set. And also output the zip file to upload to online leaderboard. - -- `save.py` : Will save result into h5py file, using [tool/visualization.py] to show results with interactive window. - -
🎁 One repository, All methods! - -You can try following methods in our code without any effort to make your own benchmark. +📜 2025/02/18: Merging all scene flow code to [OpenSceneFLow codebase](https://github.com/KTH-RPL/OpenSceneFlow) for afterward code maintenance. This repo saved README, [cluster slurm files](assets/slurm), and [quick core file](lossfunc.py) in SeFlow. The old source code branch is also [available here](https://github.com/KTH-RPL/SeFlow/tree/source). -- [x] [SeFlow](https://arxiv.org/abs/2407.01702) (Ours 🚀): ECCV 2024 -- [x] [DeFlow](https://arxiv.org/abs/2401.16122) (Ours 🚀): ICRA 2024 -- [x] [FastFlow3d](https://arxiv.org/abs/2103.01306): RA-L 2021 -- [x] [ZeroFlow](https://arxiv.org/abs/2305.10424): ICLR 2024, their pre-trained weight can covert into our format easily through [the script](tools/zerof2ours.py). -- [ ] [NSFP](https://arxiv.org/abs/2111.01253): NeurIPS 2021, faster 3x than original version because of [our CUDA speed up](assets/cuda/README.md), same (slightly better) performance. Done coding, public after review. -- [ ] [FastNSF](https://arxiv.org/abs/2304.09121): ICCV 2023. Done coding, public after review. - -- [ ] ... more on the way +2024/11/18 16:17: Update model and demo data download link through HuggingFace, Personally I found `wget` from HuggingFace link is much faster than Zenodo. -
+2024/09/26 16:24: All codes already uploaded and tested. You can to try training directly by downloading (through [HuggingFace](https://huggingface.co/kin-zhang/OpenSceneFlow)/[Zenodo](https://zenodo.org/records/13744999)) demo data or pretrained weight for evaluation. -💡: Want to learn how to add your own network in this structure? Check [Contribute](assets/README.md#contribute) section and know more about the code. Fee free to pull request! +Pre-trained weights for models are available in [Zenodo](https://zenodo.org/records/13744999)/[HuggingFace](https://huggingface.co/kin-zhang/OpenSceneFlow) link. Check usage in [2. Evaluation](#2-evaluation) or [3. Visualization](#3-visualization). ## 0. Setup -**Environment**: Same to [DeFlow](https://github.com/KTH-RPL/DeFlow). And even lighter here with extracting mmcv module we needed into cuda assets. +**Environment**: Clone the repo and build the environment, check [detail installation](https://github.com/KTH-RPL/OpenSceneFlow/assets/README.md) for more information. [Conda](https://docs.conda.io/projects/miniconda/en/latest/)/[Mamba](https://github.com/mamba-org/mamba) is recommended. + ```bash -git clone --recursive https://github.com/KTH-RPL/SeFlow.git -cd SeFlow && mamba env create -f environment.yaml +git clone --recursive https://github.com/KTH-RPL/OpenSceneFlow.git +cd OpenSceneFlow +mamba env create -f environment.yaml ``` CUDA package (need install nvcc compiler), the compile time is around 1-5 minutes: ```bash -mamba activate seflow +mamba activate opensf # CUDA already install in python environment. I also tested others version like 11.3, 11.4, 11.7, 11.8 all works cd assets/cuda/mmcv && python ./setup.py install && cd ../../.. cd assets/cuda/chamfer3D && python ./setup.py install && cd ../../.. ``` -Or you always can choose [Docker](https://en.wikipedia.org/wiki/Docker_(software)) which isolated environment and free yourself from installation, you can pull it by. -If you have different arch, please build it by yourself `cd SeFlow && docker build -t zhangkin/seflow` by going through [build-docker-image](https://github.com/KTH-RPL/DeFlow/blob/main/assets/README.md/#build-docker-image) section. -```bash -# option 1: pull from docker hub -docker pull zhangkin/seflow - -# run container -docker run -it --gpus all -v /dev/shm:/dev/shm -v /home/kin/data:/home/kin/data --name seflow zhangkin/seflow /bin/zsh -``` +Or another environment setup choice is [Docker](https://en.wikipedia.org/wiki/Docker_(software)) which isolated environment, check more information in [OpenSceneFlow/assets/README.md](https://github.com/KTH-RPL/OpenSceneFlow/assets/README.md#docker-environment). ## 1. Run & Train @@ -87,16 +53,6 @@ wget https://huggingface.co/kin-zhang/OpenSceneFlow/resolve/main/demo_data.zip unzip demo_data.zip -p /home/kin/data/av2 ``` -#### Prepare raw data - -Checking more information (step for downloading raw data, storage size, #frame etc) in [dataprocess/README.md](dataprocess/README.md). Extract all data to unified `.h5` format. -[Runtime: Normally need 45 mins finished run following commands totally in setup mentioned in our paper] -```bash -python dataprocess/extract_av2.py --av2_type sensor --data_mode train --argo_dir /home/kin/data/av2 --output_dir /home/kin/data/av2/preprocess_v2 -python dataprocess/extract_av2.py --av2_type sensor --data_mode val --mask_dir /home/kin/data/av2/3d_scene_flow -python dataprocess/extract_av2.py --av2_type sensor --data_mode test --mask_dir /home/kin/data/av2/3d_scene_flow -``` - #### Process train data Process train data for self-supervised learning. Only training data needs this step. [Runtime: Normally need 15 hours for my desktop, 3 hours for the cluster with five available nodes parallel running.] diff --git a/assets/README.md b/assets/README.md deleted file mode 100644 index 07cc3cd..0000000 --- a/assets/README.md +++ /dev/null @@ -1,78 +0,0 @@ -SeFlow Assets ---- - -There are two ways to setup the environment: conda in your desktop and docker container isolate environment. - -## Docker - -Docker installation check [DeFlow Assets](https://github.com/KTH-RPL/DeFlow/blob/main/assets/README.md#docker-environment). Then you can build and run the container by: - -```bash -cd SeFlow -docker build -t zhangkin/seflow . - -docker run -it --gpus all -v /dev/shm:/dev/shm -v /home/kin/data:/home/kin/data --name seflow zhangkin/seflow /bin/zsh -``` - -## Installation - -We will use conda to manage the environment with mamba for faster package installation. - -### System -Install conda with mamba for package management and for faster package installation: -```bash -curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" -bash Miniforge3-$(uname)-$(uname -m).sh -``` - -### Environment - -Create base env: [~5 mins] - -```bash -git clone https://github.com/KTH-RPL/SeFlow.git -mamba env create -f assets/environment.yml -``` - -CUDA package (nvcc compiler already installed through conda), the compile time is around 1-5 minutes: -```bash -mamba activate seflow -cd assets/cuda/mmcv && python ./setup.py install && cd ../../.. -cd assets/cuda/chamfer3D && python ./setup.py install && cd ../../.. -``` - - -Checking important packages in our environment now: -```bash -mamba activate seflow -python -c "import torch; print(torch.__version__); print(torch.cuda.is_available()); print(torch.version.cuda)" -python -c "import lightning.pytorch as pl; print(pl.__version__)" -python -c "from assets.cuda.mmcv import Voxelization, DynamicScatter;print('successfully import on our lite mmcv package')" -python -c "from assets.cuda.chamfer3D import nnChamferDis;print('successfully import on our chamfer3D package')" -``` - - -### Other issues - -1. looks like open3d and fire package conflict, not sure - - need install package like `pip install --ignore-installed`, ref: [pip cannot install distutils installed project](https://stackoverflow.com/questions/53807511/pip-cannot-uninstall-package-it-is-a-distutils-installed-project), my error: `ERROR: Cannot uninstall 'blinker'.` - - need specific werkzeug version for open3d 0.16.0, otherwise error: `ImportError: cannot import name 'url_quote' from 'werkzeug.urls'`. But need update to solve the problem: `pip install --upgrade Flask` [ref](https://stackoverflow.com/questions/77213053/why-did-flask-start-failing-with-importerror-cannot-import-name-url-quote-fr) - - -2. `ImportError: libtorch_cuda.so: undefined symbol: cudaGraphInstantiateWithFlags, version libcudart.so.11.0` - The cuda version: `pytorch::pytorch-cuda` and `nvidia::cudatoolkit` need be same. [Reference link](https://github.com/pytorch/pytorch/issues/90673#issuecomment-1563799299) - - -3. In cluster have error: `pandas ImportError: /lib64/libstdc++.so.6: version 'GLIBCXX_3.4.29' not found` - Solved by `export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/proj/berzelius-2023-154/users/x_qinzh/mambaforge/lib` - - -## Contribute - -If you want to contribute to new model, here are tips you can follow: -1. Dataloader: we believe all data could be process to `.h5`, we named as different scene and inside a scene, the key of each data is timestamp. Check [dataprocess/README.md](../dataprocess/README.md#process) for more details. -2. Model: All model files can be found [here: src/models](../src/models). You can view deflow and fastflow3d to know how to implement a new model. Don't forget to add to the `__init__.py` [file to import class](../src/models/__init__.py). -3. Loss: All loss files can be found [here: src/lossfuncs.py](../src/lossfuncs.py). There are three loss functions already inside the file, you can add a new one following the same pattern. -4. Training: Once you have implemented the model, you can add the model to the config file [here: conf/model](../conf/model) and train the model using the command `python train.py model=your_model_name`. One more note here may: if your res_dict from model output is different, you may need add one pattern in `def training_step` and `def validation_step`. - -All others like eval and vis will be changed according to the model you implemented as you follow the above steps. \ No newline at end of file diff --git a/assets/cuda/README.md b/assets/cuda/README.md deleted file mode 100644 index 621bcd3..0000000 --- a/assets/cuda/README.md +++ /dev/null @@ -1,21 +0,0 @@ -My CUDA library ---- - -Faster our code in CUDA. - -- chamfer3D: 3D chamfer distance within two point cloud, by Qingwen Zhang involved when she was working on SeFlow. -- mmcv: directly from mmcv, not our code. - ---- - -Quick View about CUDA speed on our chamfer3D (Faster 60x than others): - -The number of points: (pc0: 88132, pc1: 88101) - -| Function | Time (ms) | -| :---: | :---: | -| Faiss | 817.698 | -| CUDA([SCOOP](https://github.com/itailang/SCOOP/tree/master/auxiliary/ChamferDistancePytorch), Batch) | 83.275 | -| Pytorch3D | 68.256 | -| CUDA([SeFlow](https://github.com/KTH-RPL/SeFlow), SharedM) | **14.308** | -| ~~mmcv~~(chamfer2D) | 651.510 | \ No newline at end of file diff --git a/assets/cuda/chamfer3D/README.md b/assets/cuda/chamfer3D/README.md deleted file mode 100644 index abf8274..0000000 --- a/assets/cuda/chamfer3D/README.md +++ /dev/null @@ -1,155 +0,0 @@ -CUDA with Torch 初步尝试 ---- - -主要参考都是mmcv里的库 and [SCOOP](https://github.com/itailang/SCOOP/blob/master/auxiliary/ChamferDistancePytorch/chamfer3D/),只是想着仅提取这一个功能试一下,然后发现几个有意思的点 -CUDA with Torch C++ Programming, [torch official ref link](https://pytorch.org/tutorials/advanced/cpp_extension.html) - -1. `.cu` 不能被`.cpp` include,否则失去cu属性 -2. `.cpp`必须命名为`xxx_cuda.cpp` 否则torch CUDAextension不会找 -3. `.cu` 的include和平常的cuda编程有所不同:必须先include ATen然后再正常的导入 CUDA等库 [ref link](https://blog.csdn.net/weixin_39849839/article/details/125980694) - ```cpp - #include - - #include - #include - ``` - -注意要自己多写一个lib_xxx.py 内含class以方便调用,但是class内的forward函数必须有 `ctx` 参数,否则会报错 - - -## Install -```bash -# change it if you use different cuda version -export PATH=/usr/local/cuda-11.3/bin:$PATH -export LD_LIBRARY_PATH=/usr/local/cuda-11.3/lib64:$LD_LIBRARY_PATH - -cd assets/cuda/chamfer3Dlib -python ./setup.py install - -# then you will see -Installed /home/kin/mambaforge/envs/seflow/lib/python3.8/site-packages/chamfer3D-1.0.0-py3.8-linux-x86_64.egg -Processing dependencies for chamfer3D==1.0.0 -Finished processing dependencies for chamfer3D==1.0.0 - -# then run with lib_voxelize.py to see if it works -python ../chamfer_cuda.py -``` - -## ChamferDis Speed - -The number of points: (pc0: 88132, pc1: 88101) - -| Function | Time (ms) | -| :---: | :---: | -| Faiss | 817.698 | -| CUDA([SCOOP](https://github.com/itailang/SCOOP/tree/master/auxiliary/ChamferDistancePytorch), Batch) | 83.275 | -| Pytorch3D | 68.256 | -| CUDA([SeFlow](https://github.com/KTH-RPL/SeFlow), SharedM) | **14.308** | -| ~~mmcv~~(chamfer2D) | 651.510 | - -对比命令行: - -```bash -cd assets/tests -python chamferdis_speed_test.py -``` - - -Test computer and System: -- Desktop setting: i9-12900KF, GPU 3090, CUDA 11.3 -- System setting: Ubuntu 20.04, Python 3.8 - -Output Example: -``` -Output in my desktop with a 3090 GPU: ------- START Faiss Chamfer Distance Cal ------ -loss: tensor(0.1710, device='cuda:0') -Faiss Chamfer Distance Cal time: 809.593 ms - ------- START Pytorch3d Chamfer Distance Cal ------ -Pytorch3d Chamfer Distance Cal time: 68.906 ms -loss: tensor(0.1710, device='cuda:0', grad_fn=) - ------- START CUDA Chamfer Distance Cal ------ -Chamfer Distance Cal time: 1.814 ms -loss: tensor(0.1710, device='cuda:0', grad_fn=) -``` - -## Mics - - -### Note for CUDA ChamferDis - -主要是 两个月前写的 已经看不懂了;然后问题原因是因为 总是缺0.0003的精度(精度强迫症患者) -然后就以为是自己写错了 后面发现是因为block的这种并行化 线程大小的不同对CUDA的浮点运算会有所不同,所以导致精度差距是有一点的 如果介意的话 可以使用pytorch3d的版本(也就是速度慢4倍左右 从15ms 到 80ms) - -这里主要重申一遍 shared memory在这里的用法: -1. 首先我们每个点都会分开走到 `int tid = blockIdx.x * blockDim.x + threadIdx.x;` 也就是全局索引,注意这个每个点都分开了 因为pc0每个点和pc1的临近点 和 其他的pc0点无关 -2. 然后走到每个点内部 就是__shared__ 我们首先建立了 pc1的share,但是因为共享内存有限,所以每次只保存THREADS_PER_BLOCK -3. 保存 THREADS_PER_BLOCK 也是每个线程做的 我们在对比距离前 运行了 __syncthreads(); 确保 THREADS_PER_BLOCK 个点的 pc1 已经到了 -4. 接着 我们在 `num_elems` 这一部分的数据内进行对比,同步best -5. 最后传给 全局这个点的 `result` - -需要注意的是 这种极致的并行化 会对精度产生一定的影响,但是如果你感兴趣 `#define THREADS_PER_BLOCK 256` 可以调整这个,对每个block设置不同的threads 会对精度有影响(当然 影响是 在 gt: 0.1710 但cuda计算会是 0.1711 - 0.1713之间) - -以下为chatgpt: -精度差异的原因之一可能是由于在不同的线程块大小下,浮点运算的顺序发生了改变。由于浮点运算是不结合的(即(a + b) + c 可能不等于 a + (b + c)),因此改变运算的顺序可能会导致轻微的结果差异。 - -这种类型的精度变化在GPU计算中是非常常见的,特别是在使用较大的数据集和进行大量的浮点运算时。要完全消除这种差异是非常困难的,因为即使是非常微小的实现细节变化(例如改变线程块大小、更改循环的结构、甚至是不同的GPU硬件或不同的CUDA版本)都可能导致浮点运算顺序的微小变化。 - -如果需要确保结果的一致性,可以考虑以下方法: - -1. 固定线程块大小:选择一个固定的线程块大小,并始终使用它。 - -2. 双精度浮点数(Double Precision):使用double类型代替float,可以提高精度,但代价是更高的内存使用和可能的性能下降。 - -3. 数值稳定的算法:尽量使用数值稳定的算法,尽管这在GPU上实现起来可能比较复杂且效率较低。 - -4. 减少并行化程度:通过减少并行化程度来减少由于不同线程执行顺序引起的差异,但这通常会牺牲性能。 - - -复制代码部分如下: -```cpp - -for (int i = 0; i < pc1_n; i += THREADS_PER_BLOCK) { - // Copy a block of pc1 to shared memory - int pc1_idx = i + threadIdx.x; - if (pc1_idx < pc1_n) { - shared_pc1[threadIdx.x * 3 + 0] = pc1_xyz[pc1_idx * 3 + 0]; - shared_pc1[threadIdx.x * 3 + 1] = pc1_xyz[pc1_idx * 3 + 1]; - shared_pc1[threadIdx.x * 3 + 2] = pc1_xyz[pc1_idx * 3 + 2]; - } - - __syncthreads(); - - // Compute the distance between pc0[tid] and the points in shared_pc1 - // NOTE(Qingwen): since after two months I forgot what I did here, I write some notes for future me - // 0. One reason for the difference in precision may be due to the changing order of floating point operations at different thread block sizes. - // But I think it's fine we lose 0.0001 precision for speed up cal time 4x - // 1. since we use shared to store pc1, here Every BLOCK will have new shared_pc1 start from 0 - // 2. we use THREADS_PER_BLOCK to loop pc1, so we need to check if the last block is not full - // 3. Based on the CUDA document, the __syncthreads() is not necessary here, but we keep it for safety - // 4. After running once, we go for next block of pc1, and find the best in that batch - - int num_elems = min(THREADS_PER_BLOCK, pc1_n - i); - for (int j = 0; j < num_elems; j++) { - float x1 = shared_pc1[j * 3 + 0]; - float y1 = shared_pc1[j * 3 + 1]; - float z1 = shared_pc1[j * 3 + 2]; - float d = (x1 - x0) * (x1 - x0) + (y1 - y0) * (y1 - y0) + (z1 - z0) * (z1 - z0); - if (d < best) { - best = d; - best_i = j + i; - } - } - __syncthreads(); -} -``` - -## Other issues -In cluster when build cuda things, you may occur problem: -- `gcc: error trying to exec 'cc1plus': execvp: No such file or directory`, - Main reason is gcc and g++ version problem in cluster, you can try to install inside `conda` to solve that, with: `mamba install -c conda-forge gxx==9.5.0`. And the reason why I set 9.5.0 is because of the version for cuda 11.3 need inside specific version. - ```bash - RuntimeError: The current installed version of g++ (13.2.0) is greater than the maximum required version by CUDA 11.3. Please make sure to use an adequate version of g++ (>=5.0.0, <11.0). - ``` diff --git a/assets/cuda/chamfer3D/__init__.py b/assets/cuda/chamfer3D/__init__.py deleted file mode 100644 index 3aac3a5..0000000 --- a/assets/cuda/chamfer3D/__init__.py +++ /dev/null @@ -1,117 +0,0 @@ -""" -# Created: 2023-08-04 11:20 -# Copyright (C) 2023-now, RPL, KTH Royal Institute of Technology -# Author: Qingwen Zhang (https://kin-zhang.github.io/) -# -# This file is part of SeFlow (https://github.com/KTH-RPL/SeFlow). -# If you find this repo helpful, please cite the respective publication as -# listed on the above website. -# -# -# Description: ChamferDis speedup using CUDA -""" -from torch import nn -from torch.autograd import Function -import torch - -import os, time -import chamfer3D -BASE_DIR = os.path.abspath(os.path.join( os.path.dirname( __file__ ), '../..' )) - - -# GPU tensors only -class ChamferDis(Function): - @staticmethod - def forward(ctx, pc0, pc1): - # pc0: (N,3), pc1: (M,3) - dis0 = torch.zeros(pc0.shape[0]).to(pc0.device).contiguous() - dis1 = torch.zeros(pc1.shape[0]).to(pc1.device).contiguous() - - idx0 = torch.zeros(pc0.shape[0], dtype=torch.int32).to(pc0.device).contiguous() - idx1 = torch.zeros(pc1.shape[0], dtype=torch.int32).to(pc1.device).contiguous() - - - chamfer3D.forward(pc0, pc1, dis0, dis1, idx0, idx1) - ctx.save_for_backward(pc0, pc1, idx0, idx1) - return dis0, dis1, idx0, idx1 - - @staticmethod - def backward(ctx, grad_dist0, grad_dist1, grad_idx0, grad_idx1): - pc0, pc1, idx0, idx1 = ctx.saved_tensors - grad_dist0 = grad_dist0.contiguous() - grad_dist1 = grad_dist1.contiguous() - device = grad_dist1.device - - grad_pc0 = torch.zeros(pc0.size()).to(device).contiguous() - grad_pc1 = torch.zeros(pc1.size()).to(device).contiguous() - - chamfer3D.backward( - pc0, pc1, idx0, idx1, grad_dist0, grad_dist1, grad_pc0, grad_pc1 - ) - return grad_pc0, grad_pc1 - -class nnChamferDis(nn.Module): - def __init__(self, truncate_dist=True): - super(nnChamferDis, self).__init__() - self.truncate_dist = truncate_dist - - def forward(self, input0, input1, truncate_dist=-1): - input0 = input0.contiguous() - input1 = input1.contiguous() - dist0, dist1, _, _ = ChamferDis.apply(input0, input1) - - if truncate_dist<=0: - return torch.mean(dist0) + torch.mean(dist1) - - valid_mask0 = (dist0 <= truncate_dist) - valid_mask1 = (dist1 <= truncate_dist) - truncated_sum = torch.nanmean(dist0[valid_mask0]) + torch.nanmean(dist1[valid_mask1]) - return truncated_sum - - def dis_res(self, input0, input1): - input0 = input0.contiguous() - input1 = input1.contiguous() - dist0, dist1, _, _ = ChamferDis.apply(input0, input1) - return dist0, dist1 - - def truncated_dis(self, input0, input1): - # nsfp: truncated distance way is set >= 2 to 0 but not nanmean - cham_x, cham_y = self.dis_res(input0, input1) - cham_x[cham_x >= 2] = 0.0 - cham_y[cham_y >= 2] = 0.0 - return torch.mean(cham_x) + torch.mean(cham_y) - - def disid_res(self, input0, input1): - input0 = input0.contiguous() - input1 = input1.contiguous() - dist0, dist1, idx0, idx1 = ChamferDis.apply(input0, input1) - return dist0, dist1, idx0, idx1 -class NearestNeighborDis(nn.Module): - def __init__(self): - super(NearestNeighborDis, self).__init__() - - def forward(self, input0, input1): - input0 = input0.contiguous() - input1 = input1.contiguous() - dist0, dist1, _, _ = ChamferDis.apply(input0, input1) - - return torch.mean(dist0[dist0 <= 2]) - -if __name__ == "__main__": - import numpy as np - pc0 = np.load(f'{BASE_DIR}/assets/tests/test_pc0.npy') - pc1 = np.load(f'{BASE_DIR}/assets/tests/test_pc1.npy') - print('0: {:.3f}MB'.format(torch.cuda.memory_allocated()/1024**2)) - pc0 = torch.from_numpy(pc0[...,:3]).float().cuda().contiguous() - pc1 = torch.from_numpy(pc1[...,:3]).float().cuda().contiguous() - pc0.requires_grad = True - pc1.requires_grad = True - print(pc0.shape, "demo data: ", pc0[0]) - print(pc1.shape, "demo data: ", pc1[0]) - print('1: {:.3f}MB'.format(torch.cuda.memory_allocated()/1024**2)) - - start_time = time.time() - loss = nnChamferDis(truncate_dist=False)(pc0, pc1) - loss.backward() - print("loss: ", loss) - print(f"Chamfer Distance Cal time: {(time.time() - start_time)*1000:.3f} ms") \ No newline at end of file diff --git a/assets/cuda/chamfer3D/chamfer3D.cu b/assets/cuda/chamfer3D/chamfer3D.cu deleted file mode 100644 index 72af47d..0000000 --- a/assets/cuda/chamfer3D/chamfer3D.cu +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (C) 2022-now, RPL, KTH Royal Institute of Technology - * @Author: Qingwen Zhang (https://kin-zhang.github.io/) - * @Date: 2023-08-03 16:55 - * @Description: Chamfer distance calculation between two point clouds with CUDA - * This file is part of SeFlow (https://github.com/KTH-RPL/SeFlow). - * If you find this repo helpful, please cite the respective publication as - * listed on the above website. - - - * Reference: Modified from SCOOP chamfer3D [https://github.com/itailang/SCOOP] - * faster 2x than the original version -*/ - -#include -#include -#include -#include - -#include -#include - -#include - -#define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ - i += blockDim.x * gridDim.x) - -#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) -#define THREADS_PER_BLOCK 256 - - -__global__ void NmDistanceKernel(const int pc0_n, const float *pc0_xyz, const int pc1_n, const float *pc1_xyz, float *result, int *result_i){ - int tid = blockIdx.x * blockDim.x + threadIdx.x; - - if (tid >= pc0_n) return; - - float x0 = pc0_xyz[tid * 3 + 0]; - float y0 = pc0_xyz[tid * 3 + 1]; - float z0 = pc0_xyz[tid * 3 + 2]; - - __shared__ float shared_pc1[THREADS_PER_BLOCK * 3]; - - int best_i = -1; - float best = 1e20; - - for (int i = 0; i < pc1_n; i += THREADS_PER_BLOCK) { - // Copy a block of pc1 to shared memory - int pc1_idx = i + threadIdx.x; - if (pc1_idx < pc1_n) { - shared_pc1[threadIdx.x * 3 + 0] = pc1_xyz[pc1_idx * 3 + 0]; - shared_pc1[threadIdx.x * 3 + 1] = pc1_xyz[pc1_idx * 3 + 1]; - shared_pc1[threadIdx.x * 3 + 2] = pc1_xyz[pc1_idx * 3 + 2]; - } - - __syncthreads(); - - // Compute the distance between pc0[tid] and the points in shared_pc1 - int num_elems = min(THREADS_PER_BLOCK, pc1_n - i); - for (int j = 0; j < num_elems; j++) { - float x1 = shared_pc1[j * 3 + 0]; - float y1 = shared_pc1[j * 3 + 1]; - float z1 = shared_pc1[j * 3 + 2]; - float d = (x1 - x0) * (x1 - x0) + (y1 - y0) * (y1 - y0) + (z1 - z0) * (z1 - z0); - if (d < best) { - best = d; - best_i = j + i; - } - } - - __syncthreads(); - } - - // done with this thread in tid in pc_0, save the result to global memory - atomicExch(&result[tid], best); - atomicExch(&result_i[tid], best_i); -} - -int chamfer_cuda_forward(const at::Tensor &pc0, const at::Tensor &pc1, at::Tensor &dist0, at::Tensor &dist1, at::Tensor &idx0, at::Tensor &idx1) -{ - at::cuda::CUDAGuard device_guard(pc0.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const int pc0_n = pc0.size(0); - const int pc1_n = pc1.size(0); - - const int col_blocks_pc0 = DIVUP(pc0_n, THREADS_PER_BLOCK); - dim3 blocks_pc0(col_blocks_pc0); - const int col_blocks_pc1 = DIVUP(pc1_n, THREADS_PER_BLOCK); - dim3 blocks_pc1(col_blocks_pc1); - dim3 threads(THREADS_PER_BLOCK); - - NmDistanceKernel<<>>(pc0_n, pc0.data_ptr(), pc1_n, pc1.data_ptr(), dist0.data_ptr(), idx0.data_ptr()); - NmDistanceKernel<<>>(pc1_n, pc1.data_ptr(), pc0_n, pc0.data_ptr(), dist1.data_ptr(), idx1.data_ptr()); - - AT_CUDA_CHECK(cudaGetLastError()); - - return 1; -} - -__global__ void NmDistanceGradKernel(const int pc0_n, const float *pc0_xyz, const int pc1_n, const float *pc1_xyz, - const float *grad_dist0, const int *idx0, float *grad_pc0, float *grad_pc1) -{ - CUDA_1D_KERNEL_LOOP(j0, pc0_n){ - float x0 = pc0_xyz[j0 * 3 + 0]; - float y0 = pc0_xyz[j0 * 3 + 1]; - float z0 = pc0_xyz[j0 * 3 + 2]; - - int j1 = idx0[j0]; - float x1 = pc1_xyz[j1 * 3 + 0]; - float y1 = pc1_xyz[j1 * 3 + 1]; - float z1 = pc1_xyz[j1 * 3 + 2]; - - float g = grad_dist0[j0] * 2; - - atomicAdd(&grad_pc0[j0 * 3 + 0], g * (x0 - x1)); - atomicAdd(&grad_pc0[j0 * 3 + 1], g * (y0 - y1)); - atomicAdd(&grad_pc0[j0 * 3 + 2], g * (z0 - z1)); - - atomicAdd(&grad_pc1[j1 * 3 + 0], - (g * (x0 - x1))); - atomicAdd(&grad_pc1[j1 * 3 + 1], - (g * (y0 - y1))); - atomicAdd(&grad_pc1[j1 * 3 + 2], - (g * (z0 - z1))); - } -} -int chamfer_cuda_backward(const at::Tensor &pc0, const at::Tensor &pc1, - const at::Tensor &idx0, const at::Tensor &idx1, - at::Tensor &grad_dist0, at::Tensor &grad_dist1, - at::Tensor &grad_pc0, at::Tensor &grad_pc1) -{ - at::cuda::CUDAGuard device_guard(pc0.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const int pc0_n = pc0.size(0); - const int pc1_n = pc1.size(0); - - const int col_blocks_pc0 = DIVUP(pc0_n, THREADS_PER_BLOCK); - dim3 blocks_pc0(col_blocks_pc0); - const int col_blocks_pc1 = DIVUP(pc1_n, THREADS_PER_BLOCK); - dim3 blocks_pc1(col_blocks_pc1); - dim3 threads(THREADS_PER_BLOCK); - - NmDistanceGradKernel<<>>(pc0_n, pc0.data_ptr(), pc1_n, pc1.data_ptr(), grad_dist0.data_ptr(), idx0.data_ptr(), grad_pc0.data_ptr(), grad_pc1.data_ptr()); - NmDistanceGradKernel<<>>(pc1_n, pc1.data_ptr(), pc0_n, pc0.data_ptr(), grad_dist1.data_ptr(), idx1.data_ptr(), grad_pc1.data_ptr(), grad_pc0.data_ptr()); - - AT_CUDA_CHECK(cudaGetLastError()); - - return 1; -} \ No newline at end of file diff --git a/assets/cuda/chamfer3D/chamfer3D_cuda.cpp b/assets/cuda/chamfer3D/chamfer3D_cuda.cpp deleted file mode 100644 index 7a89216..0000000 --- a/assets/cuda/chamfer3D/chamfer3D_cuda.cpp +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (C) 2022-now, RPL, KTH Royal Institute of Technology - * @Author: Qingwen Zhang (https://kin-zhang.github.io/) - * @Date: 2023-08-03 16:55 - * @Description: Chamfer distance calculation between two point clouds with CUDA - * This file is part of SeFlow (https://github.com/KTH-RPL/SeFlow). - * If you find this repo helpful, please cite the respective publication as - * listed on the above website. - - - * Reference: Modified from SCOOP chamfer3D [https://github.com/itailang/SCOOP] - * faster 2x than the original version -*/ - -#include -#include - -int chamfer_cuda_forward( - const at::Tensor &pc0, - const at::Tensor &pc1, - at::Tensor &dist0, - at::Tensor &dist1, - at::Tensor &idx0, - at::Tensor &idx1); - -int chamfer_cuda_backward( - const at::Tensor &pc0, const at::Tensor &pc1, - const at::Tensor &idx0, const at::Tensor &idx1, - at::Tensor &grad_dist0, at::Tensor &grad_dist1, - at::Tensor &grad_pc0, at::Tensor &grad_pc1); - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("forward", &chamfer_cuda_forward, "Chamfer Distance (CUDA)"); - m.def("backward", &chamfer_cuda_backward, "Chamfer Distance (CUDA) Backward Grad"); -} \ No newline at end of file diff --git a/assets/cuda/chamfer3D/setup.py b/assets/cuda/chamfer3D/setup.py deleted file mode 100755 index 2c16070..0000000 --- a/assets/cuda/chamfer3D/setup.py +++ /dev/null @@ -1,15 +0,0 @@ -from setuptools import setup -from torch.utils.cpp_extension import BuildExtension, CUDAExtension - -setup( - name='chamfer3D', - ext_modules=[ - CUDAExtension('chamfer3D', [ - "/".join(__file__.split('/')[:-1] + ['chamfer3D_cuda.cpp']), # must named as xxx_cuda.cpp - "/".join(__file__.split('/')[:-1] + ['chamfer3D.cu']), - ]), - ], - cmdclass={ - 'build_ext': BuildExtension - }, - version='1.0.1') diff --git a/assets/cuda/mmcv/README.md b/assets/cuda/mmcv/README.md deleted file mode 100644 index 5fa6799..0000000 --- a/assets/cuda/mmcv/README.md +++ /dev/null @@ -1,4 +0,0 @@ -mmcv ---- - -This file extracted functions we need used in mmcv to release the dependency of mmcv-full and faster the installation process. \ No newline at end of file diff --git a/assets/cuda/mmcv/__init__.py b/assets/cuda/mmcv/__init__.py deleted file mode 100755 index dca7091..0000000 --- a/assets/cuda/mmcv/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from .voxelize import Voxelization, voxelization -from .scatter_points import DynamicScatter, dynamic_scatter - -__all__ = [ - 'Voxelization', 'voxelization', - 'dynamic_scatter', 'DynamicScatter' -] diff --git a/assets/cuda/mmcv/common_cuda_helper.hpp b/assets/cuda/mmcv/common_cuda_helper.hpp deleted file mode 100644 index b12aa9a..0000000 --- a/assets/cuda/mmcv/common_cuda_helper.hpp +++ /dev/null @@ -1,120 +0,0 @@ -#ifndef COMMON_CUDA_HELPER -#define COMMON_CUDA_HELPER - -#include - -#define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ - i += blockDim.x * gridDim.x) - -#define CUDA_2D_KERNEL_LOOP(i, n, j, m) \ - for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ - i += blockDim.x * gridDim.x) \ - for (size_t j = blockIdx.y * blockDim.y + threadIdx.y; j < (m); \ - j += blockDim.y * gridDim.y) - -#define CUDA_2D_KERNEL_BLOCK_LOOP(i, n, j, m) \ - for (size_t i = blockIdx.x; i < (n); i += gridDim.x) \ - for (size_t j = blockIdx.y; j < (m); j += gridDim.y) - -#define THREADS_PER_BLOCK 512 - -inline int GET_BLOCKS(const int N, const int num_threads = THREADS_PER_BLOCK) { - int optimal_block_num = (N + num_threads - 1) / num_threads; - int max_block_num = 4096; - return min(optimal_block_num, max_block_num); -} - -template -__device__ T bilinear_interpolate(const T* input, const int height, - const int width, T y, T x, - const int index /* index for debug only*/) { - // deal with cases that inverse elements are out of feature map boundary - if (y < -1.0 || y > height || x < -1.0 || x > width) return 0; - - if (y <= 0) y = 0; - if (x <= 0) x = 0; - - int y_low = (int)y; - int x_low = (int)x; - int y_high; - int x_high; - - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = (T)y_low; - } else { - y_high = y_low + 1; - } - - if (x_low >= width - 1) { - x_high = x_low = width - 1; - x = (T)x_low; - } else { - x_high = x_low + 1; - } - - T ly = y - y_low; - T lx = x - x_low; - T hy = 1. - ly, hx = 1. - lx; - // do bilinear interpolation - T v1 = input[y_low * width + x_low]; - T v2 = input[y_low * width + x_high]; - T v3 = input[y_high * width + x_low]; - T v4 = input[y_high * width + x_high]; - T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; - - T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); - - return val; -} - -template -__device__ void bilinear_interpolate_gradient( - const int height, const int width, T y, T x, T& w1, T& w2, T& w3, T& w4, - int& x_low, int& x_high, int& y_low, int& y_high, - const int index /* index for debug only*/) { - // deal with cases that inverse elements are out of feature map boundary - if (y < -1.0 || y > height || x < -1.0 || x > width) { - // empty - w1 = w2 = w3 = w4 = 0.; - x_low = x_high = y_low = y_high = -1; - return; - } - - if (y <= 0) y = 0; - if (x <= 0) x = 0; - - y_low = (int)y; - x_low = (int)x; - - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = (T)y_low; - } else { - y_high = y_low + 1; - } - - if (x_low >= width - 1) { - x_high = x_low = width - 1; - x = (T)x_low; - } else { - x_high = x_low + 1; - } - - T ly = y - y_low; - T lx = x - x_low; - T hy = 1. - ly, hx = 1. - lx; - - // reference in forward - // T v1 = input[y_low * width + x_low]; - // T v2 = input[y_low * width + x_high]; - // T v3 = input[y_high * width + x_low]; - // T v4 = input[y_high * width + x_high]; - // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); - - w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; - - return; -} -#endif // COMMON_CUDA_HELPER diff --git a/assets/cuda/mmcv/cudabind.cpp b/assets/cuda/mmcv/cudabind.cpp deleted file mode 100644 index 6dd27f1..0000000 --- a/assets/cuda/mmcv/cudabind.cpp +++ /dev/null @@ -1,104 +0,0 @@ -#include "pytorch_cpp_helper.hpp" -#include "pytorch_device_registry.hpp" - -typedef enum { SUM = 0, MEAN = 1, MAX = 2 } reduce_t; - -void DynamicVoxelizeForwardCUDAKernelLauncher( - const at::Tensor &points, at::Tensor &coors, - const std::vector voxel_size, const std::vector coors_range, - const int NDim = 3); - -void dynamic_voxelize_forward_cuda(const at::Tensor &points, at::Tensor &coors, - const std::vector voxel_size, - const std::vector coors_range, - const int NDim) { - DynamicVoxelizeForwardCUDAKernelLauncher(points, coors, voxel_size, - coors_range, NDim); -}; - -void dynamic_voxelize_forward_impl(const at::Tensor &points, at::Tensor &coors, - const std::vector voxel_size, - const std::vector coors_range, - const int NDim); - - -int HardVoxelizeForwardCUDAKernelLauncher( - const at::Tensor &points, at::Tensor &voxels, at::Tensor &coors, - at::Tensor &num_points_per_voxel, const std::vector voxel_size, - const std::vector coors_range, const int max_points, - const int max_voxels, const int NDim = 3); - -int hard_voxelize_forward_cuda(const at::Tensor &points, at::Tensor &voxels, - at::Tensor &coors, - at::Tensor &num_points_per_voxel, - const std::vector voxel_size, - const std::vector coors_range, - const int max_points, const int max_voxels, - const int NDim) { - return HardVoxelizeForwardCUDAKernelLauncher( - points, voxels, coors, num_points_per_voxel, voxel_size, coors_range, - max_points, max_voxels, NDim); -}; - -int hard_voxelize_forward_impl(const at::Tensor &points, at::Tensor &voxels, - at::Tensor &coors, - at::Tensor &num_points_per_voxel, - const std::vector voxel_size, - const std::vector coors_range, - const int max_points, const int max_voxels, - const int NDim); - -int nondeterministic_hard_voxelize_forward_impl( - const at::Tensor &points, at::Tensor &voxels, at::Tensor &coors, - at::Tensor &num_points_per_voxel, const std::vector voxel_size, - const std::vector coors_range, const int max_points, - const int max_voxels, const int NDim); - -REGISTER_DEVICE_IMPL(hard_voxelize_forward_impl, CUDA, - hard_voxelize_forward_cuda); -REGISTER_DEVICE_IMPL(dynamic_voxelize_forward_impl, CUDA, - dynamic_voxelize_forward_cuda); - - -std::vector DynamicPointToVoxelForwardCUDAKernelLauncher( - const at::Tensor &feats, const at::Tensor &coors, - const reduce_t reduce_type); - - -std::vector dynamic_point_to_voxel_forward_cuda( - const torch::Tensor &feats, const torch::Tensor &coors, - const reduce_t reduce_type) { - return DynamicPointToVoxelForwardCUDAKernelLauncher(feats, coors, - reduce_type); -}; - -void DynamicPointToVoxelBackwardCUDAKernelLauncher( - at::Tensor &grad_feats, const at::Tensor &grad_reduced_feats, - const at::Tensor &feats, const at::Tensor &reduced_feats, - const at::Tensor &coors_map, const at::Tensor &reduce_count, - const reduce_t reduce_type); - -void dynamic_point_to_voxel_backward_cuda( - torch::Tensor &grad_feats, const torch::Tensor &grad_reduced_feats, - const torch::Tensor &feats, const torch::Tensor &reduced_feats, - const torch::Tensor &coors_idx, const torch::Tensor &reduce_count, - const reduce_t reduce_type) { - DynamicPointToVoxelBackwardCUDAKernelLauncher(grad_feats, grad_reduced_feats, - feats, reduced_feats, coors_idx, - reduce_count, reduce_type); -}; - -std::vector dynamic_point_to_voxel_forward_impl( - const torch::Tensor &feats, const torch::Tensor &coors, - const reduce_t reduce_type); - -void dynamic_point_to_voxel_backward_impl( - torch::Tensor &grad_feats, const torch::Tensor &grad_reduced_feats, - const torch::Tensor &feats, const torch::Tensor &reduced_feats, - const torch::Tensor &coors_idx, const torch::Tensor &reduce_count, - const reduce_t reduce_type); - -REGISTER_DEVICE_IMPL(dynamic_point_to_voxel_forward_impl, CUDA, - dynamic_point_to_voxel_forward_cuda); -REGISTER_DEVICE_IMPL(dynamic_point_to_voxel_backward_impl, CUDA, - dynamic_point_to_voxel_backward_cuda); \ No newline at end of file diff --git a/assets/cuda/mmcv/pybind.cpp b/assets/cuda/mmcv/pybind.cpp deleted file mode 100644 index 1b0823b..0000000 --- a/assets/cuda/mmcv/pybind.cpp +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright (c) OpenMMLab. All rights reserved -#include - -#include "pytorch_cpp_helper.hpp" - - -std::vector dynamic_point_to_voxel_forward( - const torch::Tensor &feats, const torch::Tensor &coors, - const std::string &reduce_type); - -void dynamic_point_to_voxel_backward(torch::Tensor &grad_feats, - const torch::Tensor &grad_reduced_feats, - const torch::Tensor &feats, - const torch::Tensor &reduced_feats, - const torch::Tensor &coors_idx, - const torch::Tensor &reduce_count, - const std::string &reduce_type); - -void dynamic_voxelize_forward(const at::Tensor &points, - const at::Tensor &voxel_size, - const at::Tensor &coors_range, at::Tensor &coors, - const int NDim); - -void hard_voxelize_forward(const at::Tensor &points, - const at::Tensor &voxel_size, - const at::Tensor &coors_range, at::Tensor &voxels, - at::Tensor &coors, at::Tensor &num_points_per_voxel, - at::Tensor &voxel_num, const int max_points, - const int max_voxels, const int NDim, - const bool deterministic); - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("dynamic_point_to_voxel_forward", &dynamic_point_to_voxel_forward, - "dynamic_point_to_voxel_forward", py::arg("feats"), py::arg("coors"), - py::arg("reduce_type")); - m.def("dynamic_point_to_voxel_backward", &dynamic_point_to_voxel_backward, - "dynamic_point_to_voxel_backward", py::arg("grad_feats"), - py::arg("grad_reduced_feats"), py::arg("feats"), - py::arg("reduced_feats"), py::arg("coors_idx"), py::arg("reduce_count"), - py::arg("reduce_type")); - m.def("hard_voxelize_forward", &hard_voxelize_forward, - "hard_voxelize_forward", py::arg("points"), py::arg("voxel_size"), - py::arg("coors_range"), py::arg("voxels"), py::arg("coors"), - py::arg("num_points_per_voxel"), py::arg("voxel_num"), - py::arg("max_points"), py::arg("max_voxels"), py::arg("NDim"), - py::arg("deterministic")); - m.def("dynamic_voxelize_forward", &dynamic_voxelize_forward, - "dynamic_voxelize_forward", py::arg("points"), py::arg("voxel_size"), - py::arg("coors_range"), py::arg("coors"), py::arg("NDim")); -} \ No newline at end of file diff --git a/assets/cuda/mmcv/pytorch_cpp_helper.hpp b/assets/cuda/mmcv/pytorch_cpp_helper.hpp deleted file mode 100644 index f68e874..0000000 --- a/assets/cuda/mmcv/pytorch_cpp_helper.hpp +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef PYTORCH_CPP_HELPER -#define PYTORCH_CPP_HELPER -#include - -#include - -using namespace at; - -#define CHECK_CUDA(x) \ - TORCH_CHECK(x.device().is_cuda(), #x " must be a CUDA tensor") -#define CHECK_MLU(x) \ - TORCH_CHECK(x.device().type() == at::kMLU, #x " must be a MLU tensor") -#define CHECK_CPU(x) \ - TORCH_CHECK(x.device().type() == at::kCPU, #x " must be a CPU tensor") -#define CHECK_CONTIGUOUS(x) \ - TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") -#define CHECK_CUDA_INPUT(x) \ - CHECK_CUDA(x); \ - CHECK_CONTIGUOUS(x) -#define CHECK_MLU_INPUT(x) \ - CHECK_MLU(x); \ - CHECK_CONTIGUOUS(x) -#define CHECK_CPU_INPUT(x) \ - CHECK_CPU(x); \ - CHECK_CONTIGUOUS(x) - -#endif // PYTORCH_CPP_HELPER diff --git a/assets/cuda/mmcv/pytorch_cuda_helper.hpp b/assets/cuda/mmcv/pytorch_cuda_helper.hpp deleted file mode 100644 index 52e5126..0000000 --- a/assets/cuda/mmcv/pytorch_cuda_helper.hpp +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef PYTORCH_CUDA_HELPER -#define PYTORCH_CUDA_HELPER - -#include -#include -#include - -#include -#include - -#include "common_cuda_helper.hpp" - -using at::Half; -using at::Tensor; -using phalf = at::Half; - -#define __PHALF(x) (x) -#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) - -#endif // PYTORCH_CUDA_HELPER diff --git a/assets/cuda/mmcv/pytorch_device_registry.hpp b/assets/cuda/mmcv/pytorch_device_registry.hpp deleted file mode 100644 index 2a32b72..0000000 --- a/assets/cuda/mmcv/pytorch_device_registry.hpp +++ /dev/null @@ -1,141 +0,0 @@ -#ifndef PYTORCH_DEVICE_REGISTRY_H -#define PYTORCH_DEVICE_REGISTRY_H - -// Using is recommended in the official documentation in -// https://pytorch.org/tutorials/advanced/cpp_extension.html#writing-the-c-op. -// However, we use for compatibility with CUDA 9.0 -// Read https://github.com/pytorch/extension-cpp/issues/35 for more details. -#include - -#include -#include -#include -#include - -inline std::string GetDeviceStr(const at::Device& device) { - std::string str = DeviceTypeName(device.type(), true); - if (device.has_index()) { - str.push_back(':'); - str.append(std::to_string(device.index())); - } - return str; -} - -// Registry -template -class DeviceRegistry; - -template -class DeviceRegistry { - public: - using FunctionType = Ret (*)(Args...); - static const int MAX_DEVICE_TYPES = - int8_t(at::DeviceType::COMPILE_TIME_MAX_DEVICE_TYPES); - - void Register(at::DeviceType device, FunctionType function) { - funcs_[int8_t(device)] = function; - } - - FunctionType Find(at::DeviceType device) const { - return funcs_[int8_t(device)]; - } - - static DeviceRegistry& instance() { - static DeviceRegistry inst; - return inst; - } - - private: - DeviceRegistry() { - for (size_t i = 0; i < MAX_DEVICE_TYPES; ++i) { - funcs_[i] = nullptr; - } - }; - FunctionType funcs_[MAX_DEVICE_TYPES]; -}; - -// get device of first tensor param - -template , at::Tensor>::value, - bool> = true> -at::Device GetFirstTensorDevice(T&& t, Args&&... args) { - return std::forward(t).device(); -} -template , at::Tensor>::value, - bool> = true> -at::Device GetFirstTensorDevice(T&& t, Args&&... args) { - return GetFirstTensorDevice(std::forward(args)...); -} - -// check device consistency - -inline std::pair CheckDeviceConsistency( - const at::Device& device, int index) { - return {index, device}; -} - -template , at::Tensor>::value, - bool> = true> -std::pair CheckDeviceConsistency(const at::Device& device, - int index, T&& t, - Args&&... args); - -template , at::Tensor>::value, - bool> = true> -std::pair CheckDeviceConsistency(const at::Device& device, - int index, T&& t, - Args&&... args) { - auto new_device = std::forward(t).device(); - if (new_device.type() != device.type() || - new_device.index() != device.index()) { - return {index, new_device}; - } - return CheckDeviceConsistency(device, index + 1, std::forward(args)...); -} - -template < - typename T, typename... Args, - std::enable_if_t, at::Tensor>::value, bool>> -std::pair CheckDeviceConsistency(const at::Device& device, - int index, T&& t, - Args&&... args) { - return CheckDeviceConsistency(device, index + 1, std::forward(args)...); -} - -// dispatch - -template -auto Dispatch(const R& registry, const char* name, Args&&... args) { - auto device = GetFirstTensorDevice(std::forward(args)...); - auto inconsist = - CheckDeviceConsistency(device, 0, std::forward(args)...); - TORCH_CHECK(inconsist.first >= int(sizeof...(Args)), name, ": at param ", - inconsist.first, - ", inconsistent device: ", GetDeviceStr(inconsist.second).c_str(), - " vs ", GetDeviceStr(device).c_str(), "\n") - auto f_ptr = registry.Find(device.type()); - TORCH_CHECK(f_ptr != nullptr, name, ": implementation for device ", - GetDeviceStr(device).c_str(), " not found.\n") - return f_ptr(std::forward(args)...); -} - -// helper macro - -#define DEVICE_REGISTRY(key) DeviceRegistry::instance() - -#define REGISTER_DEVICE_IMPL(key, device, value) \ - struct key##_##device##_registerer { \ - key##_##device##_registerer() { \ - DEVICE_REGISTRY(key).Register(at::k##device, value); \ - } \ - }; \ - static key##_##device##_registerer _##key##_##device##_registerer; - -#define DISPATCH_DEVICE_IMPL(key, ...) \ - Dispatch(DEVICE_REGISTRY(key), #key, __VA_ARGS__) - -#endif // PYTORCH_DEVICE_REGISTRY diff --git a/assets/cuda/mmcv/scatter_points.cpp b/assets/cuda/mmcv/scatter_points.cpp deleted file mode 100644 index 0de8ebf..0000000 --- a/assets/cuda/mmcv/scatter_points.cpp +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright (c) OpenMMLab. All rights reserved. -#include "pytorch_cpp_helper.hpp" -#include "pytorch_device_registry.hpp" - -typedef enum { SUM = 0, MEAN = 1, MAX = 2 } reduce_t; - -std::vector dynamic_point_to_voxel_forward_impl( - const torch::Tensor &feats, const torch::Tensor &coors, - const reduce_t reduce_type) { - return DISPATCH_DEVICE_IMPL(dynamic_point_to_voxel_forward_impl, feats, coors, - reduce_type); -} - -void dynamic_point_to_voxel_backward_impl( - torch::Tensor &grad_feats, const torch::Tensor &grad_reduced_feats, - const torch::Tensor &feats, const torch::Tensor &reduced_feats, - const torch::Tensor &coors_idx, const torch::Tensor &reduce_count, - const reduce_t reduce_type) { - DISPATCH_DEVICE_IMPL(dynamic_point_to_voxel_backward_impl, grad_feats, - grad_reduced_feats, feats, reduced_feats, coors_idx, - reduce_count, reduce_type); -} - -inline reduce_t convert_reduce_type(const std::string &reduce_type) { - if (reduce_type == "max") - return reduce_t::MAX; - else if (reduce_type == "sum") - return reduce_t::SUM; - else if (reduce_type == "mean") - return reduce_t::MEAN; - else - TORCH_CHECK(false, "do not support reduce type " + reduce_type) - return reduce_t::SUM; -} - -std::vector dynamic_point_to_voxel_forward( - const torch::Tensor &feats, const torch::Tensor &coors, - const std::string &reduce_type) { - return dynamic_point_to_voxel_forward_impl(feats, coors, - convert_reduce_type(reduce_type)); -} - -void dynamic_point_to_voxel_backward(torch::Tensor &grad_feats, - const torch::Tensor &grad_reduced_feats, - const torch::Tensor &feats, - const torch::Tensor &reduced_feats, - const torch::Tensor &coors_idx, - const torch::Tensor &reduce_count, - const std::string &reduce_type) { - dynamic_point_to_voxel_backward_impl(grad_feats, grad_reduced_feats, feats, - reduced_feats, coors_idx, reduce_count, - convert_reduce_type(reduce_type)); -} diff --git a/assets/cuda/mmcv/scatter_points.py b/assets/cuda/mmcv/scatter_points.py deleted file mode 100644 index 258ca97..0000000 --- a/assets/cuda/mmcv/scatter_points.py +++ /dev/null @@ -1,154 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from typing import Any, List, Optional, Tuple - -import torch -import torch.nn.functional as F -from torch import nn -from torch.autograd import Function - -# from utils import ext_loader -import importlib -def load_ext(name, funcs): - ext = importlib.import_module('mmcv.' + name) - for fun in funcs: - assert hasattr(ext, fun), f'{fun} miss in module {name}' - return ext - -ext_module = load_ext( - '_ext', - ['dynamic_point_to_voxel_forward', 'dynamic_point_to_voxel_backward']) - - -class _DynamicScatter(Function): - - @staticmethod - def forward(ctx: Any, - feats: torch.Tensor, - coors: torch.Tensor, - reduce_type: str = 'max') -> Tuple[torch.Tensor, torch.Tensor]: - """convert kitti points(N, >=3) to voxels. - - Args: - feats (torch.Tensor): [N, C]. Points features to be reduced - into voxels. - coors (torch.Tensor): [N, ndim]. Corresponding voxel coordinates - (specifically multi-dim voxel index) of each points. - reduce_type (str, optional): Reduce op. support 'max', 'sum' and - 'mean'. Default: 'max'. - - Returns: - tuple[torch.Tensor]: A tuple contains two elements. The first one - is the voxel features with shape [M, C] which are respectively - reduced from input features that share the same voxel coordinates. - The second is voxel coordinates with shape [M, ndim]. - """ - results = ext_module.dynamic_point_to_voxel_forward( - feats, coors, reduce_type) - (voxel_feats, voxel_coors, point2voxel_map, - voxel_points_count) = results - ctx.reduce_type = reduce_type - ctx.save_for_backward(feats, voxel_feats, point2voxel_map, - voxel_points_count) - ctx.mark_non_differentiable(voxel_coors) - return voxel_feats, voxel_coors - - @staticmethod - def backward(ctx: Any, - grad_voxel_feats: torch.Tensor, - grad_voxel_coors: Optional[torch.Tensor] = None) -> tuple: - (feats, voxel_feats, point2voxel_map, - voxel_points_count) = ctx.saved_tensors - grad_feats = torch.zeros_like(feats) - # TODO: whether to use index put or use cuda_backward - # To use index put, need point to voxel index - ext_module.dynamic_point_to_voxel_backward( - grad_feats, grad_voxel_feats.contiguous(), feats, voxel_feats, - point2voxel_map, voxel_points_count, ctx.reduce_type) - return grad_feats, None, None - - -dynamic_scatter = _DynamicScatter.apply - - -class DynamicScatter(nn.Module): - """Scatters points into voxels, used in the voxel encoder with dynamic - voxelization. - - Note: - The CPU and GPU implementation get the same output, but have numerical - difference after summation and division (e.g., 5e-7). - - Args: - voxel_size (list): list [x, y, z] size of three dimension. - point_cloud_range (list): The coordinate range of points, [x_min, - y_min, z_min, x_max, y_max, z_max]. - average_points (bool): whether to use avg pooling to scatter points - into voxel. - """ - - def __init__(self, voxel_size: List, point_cloud_range: List, - average_points: bool): - super().__init__() - - self.voxel_size = voxel_size - self.point_cloud_range = point_cloud_range - self.average_points = average_points - - def forward_single( - self, points: torch.Tensor, - coors: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: - """Scatters points into voxels. - - Args: - points (torch.Tensor): Points to be reduced into voxels. - coors (torch.Tensor): Corresponding voxel coordinates (specifically - multi-dim voxel index) of each points. - - Returns: - tuple[torch.Tensor]: A tuple contains two elements. The first one - is the voxel features with shape [M, C] which are respectively - reduced from input features that share the same voxel coordinates. - The second is voxel coordinates with shape [M, ndim]. - """ - reduce = 'mean' if self.average_points else 'max' - return dynamic_scatter(points.contiguous(), coors.contiguous(), reduce) - - def forward(self, points: torch.Tensor, - coors: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: - """Scatters points/features into voxels. - - Args: - points (torch.Tensor): Points to be reduced into voxels. - coors (torch.Tensor): Corresponding voxel coordinates (specifically - multi-dim voxel index) of each points. - - Returns: - tuple[torch.Tensor]: A tuple contains two elements. The first one - is the voxel features with shape [M, C] which are respectively - reduced from input features that share the same voxel coordinates. - The second is voxel coordinates with shape [M, ndim]. - """ - if coors.size(-1) == 3: - return self.forward_single(points, coors) - else: - batch_size = coors[-1, 0] + 1 - voxels, voxel_coors = [], [] - for i in range(batch_size): - inds = torch.where(coors[:, 0] == i) - voxel, voxel_coor = self.forward_single( - points[inds], coors[inds][:, 1:]) - coor_pad = F.pad(voxel_coor, (1, 0), mode='constant', value=i) - voxel_coors.append(coor_pad) - voxels.append(voxel) - features = torch.cat(voxels, dim=0) - feature_coors = torch.cat(voxel_coors, dim=0) - - return features, feature_coors - - def __repr__(self): - s = self.__class__.__name__ + '(' - s += 'voxel_size=' + str(self.voxel_size) - s += ', point_cloud_range=' + str(self.point_cloud_range) - s += ', average_points=' + str(self.average_points) - s += ')' - return s diff --git a/assets/cuda/mmcv/scatter_points_cuda.cu b/assets/cuda/mmcv/scatter_points_cuda.cu deleted file mode 100644 index cbc4465..0000000 --- a/assets/cuda/mmcv/scatter_points_cuda.cu +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright (c) OpenMMLab. All rights reserved. -#include -#include -#include - -#include "pytorch_cuda_helper.hpp" -#include "scatter_points_cuda_kernel.cuh" - -std::vector DynamicPointToVoxelForwardCUDAKernelLauncher( - const at::Tensor &feats, const at::Tensor &coors, - const reduce_t reduce_type) { - const int num_input = feats.size(0); - const int num_feats = feats.size(1); - - if (num_input == 0) - return {feats.clone().detach(), coors.clone().detach(), - coors.new_empty({0}, torch::kInt32), - coors.new_empty({0}, torch::kInt32)}; - - at::Tensor out_coors; - at::Tensor coors_map; - at::Tensor reduce_count; - - auto coors_clean = coors.masked_fill(coors.lt(0).any(-1, true), -1); - - std::tie(out_coors, coors_map, reduce_count) = - at::unique_dim(coors_clean, 0, true, true, true); - - if (out_coors[0][0].lt(0).item()) { - // the first element of out_coors (-1,-1,-1) and should be removed - out_coors = out_coors.slice(0, 1); - reduce_count = reduce_count.slice(0, 1); - coors_map = coors_map - 1; - } - - coors_map = coors_map.to(torch::kInt32); - reduce_count = reduce_count.to(torch::kInt32); - - auto reduced_feats = - at::empty({out_coors.size(0), num_feats}, feats.options()); - - at::cuda::CUDAGuard device_guard(feats.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - AT_DISPATCH_FLOATING_TYPES( - feats.scalar_type(), "feats_reduce_kernel", ([&] { - if (reduce_type == reduce_t::MAX) - reduced_feats.fill_(-std::numeric_limits::infinity()); - else - reduced_feats.fill_(static_cast(0)); - - dim3 blocks(std::min( - at::cuda::ATenCeilDiv(num_input, THREADS_PER_BLOCK), maxGridDim)); - dim3 threads(THREADS_PER_BLOCK); - feats_reduce_kernel<<>>( - feats.data_ptr(), coors_map.data_ptr(), - reduced_feats.data_ptr(), num_input, num_feats, - reduce_type); - if (reduce_type == reduce_t::MEAN) - reduced_feats /= reduce_count.unsqueeze(-1).to(reduced_feats.dtype()); - })); - - AT_CUDA_CHECK(cudaGetLastError()); - - return {reduced_feats, out_coors, coors_map, reduce_count}; -} - -void DynamicPointToVoxelBackwardCUDAKernelLauncher( - at::Tensor &grad_feats, const at::Tensor &grad_reduced_feats, - const at::Tensor &feats, const at::Tensor &reduced_feats, - const at::Tensor &coors_map, const at::Tensor &reduce_count, - const reduce_t reduce_type) { - const int num_input = feats.size(0); - const int num_reduced = reduced_feats.size(0); - const int num_feats = feats.size(1); - - grad_feats.fill_(0); - // copy voxel grad to points - - if (num_input == 0 || num_reduced == 0) return; - at::cuda::CUDAGuard device_guard(feats.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - if (reduce_type == reduce_t::MEAN || reduce_type == reduce_t::SUM) { - AT_DISPATCH_FLOATING_TYPES( - grad_reduced_feats.scalar_type(), "add_reduce_traceback_grad_kernel", - ([&] { - dim3 blocks(std::min( - at::cuda::ATenCeilDiv(num_input, THREADS_PER_BLOCK), maxGridDim)); - dim3 threads(THREADS_PER_BLOCK); - add_reduce_traceback_grad_kernel<<>>( - grad_feats.data_ptr(), - grad_reduced_feats.data_ptr(), - coors_map.data_ptr(), reduce_count.data_ptr(), - num_input, num_feats, reduce_type); - })); - - AT_CUDA_CHECK(cudaGetLastError()); - } else { - auto reduce_from = at::full({num_reduced, num_feats}, num_input, - coors_map.options().dtype(torch::kInt32)); - AT_DISPATCH_FLOATING_TYPES( - grad_reduced_feats.scalar_type(), - "max_reduce_traceback_scatter_idx_kernel", ([&] { - dim3 blocks(std::min( - at::cuda::ATenCeilDiv(num_input, THREADS_PER_BLOCK), maxGridDim)); - dim3 threads(THREADS_PER_BLOCK); - max_reduce_traceback_scatter_idx_kernel<<>>( - feats.data_ptr(), reduced_feats.data_ptr(), - reduce_from.data_ptr(), coors_map.data_ptr(), - num_input, num_feats); - })); - - AT_CUDA_CHECK(cudaGetLastError()); - - AT_DISPATCH_FLOATING_TYPES( - grad_reduced_feats.scalar_type(), - "max_reduce_traceback_scatter_idx_kernel", ([&] { - dim3 blocks( - std::min(at::cuda::ATenCeilDiv(num_reduced, THREADS_PER_BLOCK), - maxGridDim)); - dim3 threads(THREADS_PER_BLOCK); - max_reduce_scatter_grad_kernel<<>>( - grad_feats.data_ptr(), - grad_reduced_feats.data_ptr(), - reduce_from.data_ptr(), num_reduced, num_feats); - })); - - AT_CUDA_CHECK(cudaGetLastError()); - } -} diff --git a/assets/cuda/mmcv/scatter_points_cuda_kernel.cuh b/assets/cuda/mmcv/scatter_points_cuda_kernel.cuh deleted file mode 100644 index af5b9f6..0000000 --- a/assets/cuda/mmcv/scatter_points_cuda_kernel.cuh +++ /dev/null @@ -1,187 +0,0 @@ -// Copyright (c) OpenMMLab. All rights reserved -#ifndef SCATTER_POINTS_CUDA_KERNEL_CUH -#define SCATTER_POINTS_CUDA_KERNEL_CUH - -#ifdef MMCV_USE_PARROTS -#include "parrots_cuda_helper.hpp" -#else -#include "pytorch_cuda_helper.hpp" -#endif - -typedef enum { SUM = 0, MEAN = 1, MAX = 2 } reduce_t; -int const maxGridDim = 50000; - -__device__ __forceinline__ static void reduceMax(float *address, float val) { - int *address_as_i = reinterpret_cast(address); - int old = *address_as_i, assumed; - do { - assumed = old; - old = atomicCAS(address_as_i, assumed, - __float_as_int(fmaxf(val, __int_as_float(assumed)))); - } while (assumed != old || __int_as_float(old) < val); -} - -__device__ __forceinline__ static void reduceMax(double *address, double val) { - unsigned long long *address_as_ull = - reinterpret_cast(address); - unsigned long long old = *address_as_ull, assumed; - do { - assumed = old; - old = atomicCAS( - address_as_ull, assumed, - __double_as_longlong(fmax(val, __longlong_as_double(assumed)))); - } while (assumed != old || __longlong_as_double(old) < val); -} - -// get rid of meaningless warnings when compiling host code -#ifdef MMCV_WITH_HIP -__device__ __forceinline__ static void reduceAdd(float *address, float val) { - atomicAdd(address, val); -} -__device__ __forceinline__ static void reduceAdd(double *address, double val) { - atomicAdd(address, val); -} -#else -#ifdef __CUDA_ARCH__ -__device__ __forceinline__ static void reduceAdd(float *address, float val) { -#if (__CUDA_ARCH__ < 200) -#ifdef _MSC_VER -#pragma message( \ - "compute capability lower than 2.x. fall back to use CAS version of atomicAdd for float32") -#else -#warning \ - "compute capability lower than 2.x. fall back to use CAS version of atomicAdd for float32" -#endif - int *address_as_i = reinterpret_cast(address); - int old = *address_as_i, assumed; - do { - assumed = old; - old = atomicCAS(address_as_i, assumed, - __float_as_int(val + __int_as_float(assumed))); - } while (assumed != old); -#else - atomicAdd(address, val); -#endif -} - -__device__ __forceinline__ static void reduceAdd(double *address, double val) { -#if (__CUDA_ARCH__ < 600) -#ifdef _MSC_VER -#pragma message( \ - "compute capability lower than 6.x. fall back to use CAS version of atomicAdd for float64") -#else -#warning \ - "compute capability lower than 6.x. fall back to use CAS version of atomicAdd for float64" -#endif - unsigned long long *address_as_ull = - reinterpret_cast(address); - unsigned long long old = *address_as_ull, assumed; - do { - assumed = old; - old = atomicCAS(address_as_ull, assumed, - __double_as_longlong(val + __longlong_as_double(assumed))); - } while (assumed != old); -#else - atomicAdd(address, val); -#endif -} -#endif // __CUDA_ARCH__ -#endif // MMCV_WITH_HIP - -template -__global__ void feats_reduce_kernel( - const T *feats, const int32_t *coors_map, - T *reduced_feats, // shall be 0 at initialization - const int num_input, const int num_feats, const reduce_t reduce_type) { - CUDA_1D_KERNEL_LOOP(x, num_input) { - int32_t reduce_to = coors_map[x]; - if (reduce_to == -1) continue; - - const T *feats_offset = feats + x * num_feats; - T *reduced_feats_offset = reduced_feats + reduce_to * num_feats; - if (reduce_type == reduce_t::MAX) { - for (int i = 0; i < num_feats; i++) { - reduceMax(&reduced_feats_offset[i], feats_offset[i]); - } - } else { - for (int i = 0; i < num_feats; i++) { - reduceAdd(&reduced_feats_offset[i], feats_offset[i]); - } - } - } -} - -template -__global__ void add_reduce_traceback_grad_kernel( - T *grad_feats, const T *grad_reduced_feats, const int32_t *coors_map, - const int32_t *reduce_count, const int num_input, const int num_feats, - const reduce_t reduce_type) { - CUDA_1D_KERNEL_LOOP(x, num_input) { - int32_t reduce_to = coors_map[x]; - if (reduce_to == -1) { - continue; - } - - const int input_offset = x * num_feats; - T *grad_feats_offset = grad_feats + input_offset; - const int reduced_offset = reduce_to * num_feats; - const T *grad_reduced_feats_offset = grad_reduced_feats + reduced_offset; - - if (reduce_type == reduce_t::SUM) { - for (int i = 0; i < num_feats; i++) { - grad_feats_offset[i] = grad_reduced_feats_offset[i]; - } - } else if (reduce_type == reduce_t::MEAN) { - for (int i = 0; i < num_feats; i++) { - grad_feats_offset[i] = grad_reduced_feats_offset[i] / - static_cast(reduce_count[reduce_to]); - } - } - } -} - -template -__global__ void max_reduce_traceback_scatter_idx_kernel( - const T *feats, const T *reduced_feats, int32_t *reduce_from, - const int32_t *coors_map, const int num_input, const int num_feats) { - CUDA_1D_KERNEL_LOOP(x, num_input) { - int32_t reduce_to = coors_map[x]; - - const int input_offset = x * num_feats; - const T *feats_offset = feats + input_offset; - - if (reduce_to == -1) { - continue; - } - - const int reduced_offset = reduce_to * num_feats; - const T *reduced_feats_offset = reduced_feats + reduced_offset; - int32_t *reduce_from_offset = reduce_from + reduced_offset; - - for (int i = 0; i < num_feats; i++) { - if (feats_offset[i] == reduced_feats_offset[i]) { - atomicMin(&reduce_from_offset[i], static_cast(x)); - } - } - } -} - -template -__global__ void max_reduce_scatter_grad_kernel(T *grad_feats, - const T *grad_reduced_feats, - const int32_t *reduce_from, - const int num_reduced, - const int num_feats) { - CUDA_1D_KERNEL_LOOP(x, num_reduced) { - const int reduced_offset = x * num_feats; - const int32_t *scatter_to_offset = reduce_from + reduced_offset; - const T *grad_reduced_feats_offset = grad_reduced_feats + reduced_offset; - - for (int i = 0; i < num_feats; i++) { - grad_feats[scatter_to_offset[i] * num_feats + i] = - grad_reduced_feats_offset[i]; - } - } -} - -#endif // SCATTER_POINTS_CUDA_KERNEL_CUH diff --git a/assets/cuda/mmcv/setup.py b/assets/cuda/mmcv/setup.py deleted file mode 100644 index 53ed40b..0000000 --- a/assets/cuda/mmcv/setup.py +++ /dev/null @@ -1,52 +0,0 @@ -import os -from pkg_resources import DistributionNotFound, get_distribution, parse_version -from setuptools import find_packages, setup -from torch.utils.cpp_extension import BuildExtension, CUDAExtension - -setup( - name='mmcv', - version='1.0.1', - ext_modules=[ - CUDAExtension( - name='mmcv._ext', - sources=[ - "/".join(__file__.split("/")[:-1] + ["scatter_points_cuda.cu"]), - "/".join(__file__.split("/")[:-1] + ["scatter_points.cpp"]), - "/".join(__file__.split("/")[:-1] + ["voxelization_cuda.cu"]), - "/".join(__file__.split("/")[:-1] + ["voxelization.cpp"]), - "/".join(__file__.split("/")[:-1] + ["cudabind.cpp"]), - "/".join(__file__.split("/")[:-1] + ["pybind.cpp"]), - - ], - # extra_compile_args={ - # 'cxx': ['-std=c++17'], - # 'nvcc': ['-std=c++17', - # '-D__CUDA_NO_HALF_OPERATORS__', - # '-D__CUDA_NO_HALF_CONVERSIONS__', - # '-D__CUDA_NO_HALF2_OPERATORS__', - # ],} - ), - ], - cmdclass={'build_ext': BuildExtension}, - - # no change below - description='OpenMMLab Computer Vision Foundation', - keywords='computer vision', - packages=find_packages(), - include_package_data=True, - classifiers=[ - 'Development Status :: 4 - Beta', - 'License :: OSI Approved :: Apache Software License', - 'Operating System :: OS Independent', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3.10', - 'Topic :: Utilities', - ], - url='https://github.com/open-mmlab/mmcv', - author='MMCV Contributors', - author_email='openmmlab@gmail.com', - python_requires='>=3.7', - zip_safe=False) \ No newline at end of file diff --git a/assets/cuda/mmcv/voxelization.cpp b/assets/cuda/mmcv/voxelization.cpp deleted file mode 100644 index 7946be6..0000000 --- a/assets/cuda/mmcv/voxelization.cpp +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright (c) OpenMMLab. All rights reserved. -#include "pytorch_cpp_helper.hpp" -#include "pytorch_device_registry.hpp" - -int hard_voxelize_forward_impl(const at::Tensor &points, at::Tensor &voxels, - at::Tensor &coors, - at::Tensor &num_points_per_voxel, - const std::vector voxel_size, - const std::vector coors_range, - const int max_points, const int max_voxels, - const int NDim = 3) { - return DISPATCH_DEVICE_IMPL(hard_voxelize_forward_impl, points, voxels, coors, - num_points_per_voxel, voxel_size, coors_range, - max_points, max_voxels, NDim); -} - -int nondeterministic_hard_voxelize_forward_impl( - const at::Tensor &points, at::Tensor &voxels, at::Tensor &coors, - at::Tensor &num_points_per_voxel, const std::vector voxel_size, - const std::vector coors_range, const int max_points, - const int max_voxels, const int NDim = 3) { - return DISPATCH_DEVICE_IMPL(nondeterministic_hard_voxelize_forward_impl, - points, voxels, coors, num_points_per_voxel, - voxel_size, coors_range, max_points, max_voxels, - NDim); -} - -void dynamic_voxelize_forward_impl(const at::Tensor &points, at::Tensor &coors, - const std::vector voxel_size, - const std::vector coors_range, - const int NDim = 3) { - DISPATCH_DEVICE_IMPL(dynamic_voxelize_forward_impl, points, coors, voxel_size, - coors_range, NDim); -} - -void hard_voxelize_forward(const at::Tensor &points, - const at::Tensor &voxel_size, - const at::Tensor &coors_range, at::Tensor &voxels, - at::Tensor &coors, at::Tensor &num_points_per_voxel, - at::Tensor &voxel_num, const int max_points, - const int max_voxels, const int NDim = 3, - const bool deterministic = true) { - int64_t *voxel_num_data = voxel_num.data_ptr(); - std::vector voxel_size_v( - voxel_size.data_ptr(), - voxel_size.data_ptr() + voxel_size.numel()); - std::vector coors_range_v( - coors_range.data_ptr(), - coors_range.data_ptr() + coors_range.numel()); - - if (deterministic) { - *voxel_num_data = hard_voxelize_forward_impl( - points, voxels, coors, num_points_per_voxel, voxel_size_v, - coors_range_v, max_points, max_voxels, NDim); - } else { - *voxel_num_data = nondeterministic_hard_voxelize_forward_impl( - points, voxels, coors, num_points_per_voxel, voxel_size_v, - coors_range_v, max_points, max_voxels, NDim); - } -} - -void dynamic_voxelize_forward(const at::Tensor &points, - const at::Tensor &voxel_size, - const at::Tensor &coors_range, at::Tensor &coors, - const int NDim = 3) { - std::vector voxel_size_v( - voxel_size.data_ptr(), - voxel_size.data_ptr() + voxel_size.numel()); - std::vector coors_range_v( - coors_range.data_ptr(), - coors_range.data_ptr() + coors_range.numel()); - dynamic_voxelize_forward_impl(points, coors, voxel_size_v, coors_range_v, - NDim); -} diff --git a/assets/cuda/mmcv/voxelization_cuda.cu b/assets/cuda/mmcv/voxelization_cuda.cu deleted file mode 100644 index f4166b7..0000000 --- a/assets/cuda/mmcv/voxelization_cuda.cu +++ /dev/null @@ -1,286 +0,0 @@ -// Copyright (c) OpenMMLab. All rights reserved. -#include -#include - -#include "pytorch_cuda_helper.hpp" -#include "voxelization_cuda_kernel.cuh" - -int HardVoxelizeForwardCUDAKernelLauncher( - const at::Tensor &points, at::Tensor &voxels, at::Tensor &coors, - at::Tensor &num_points_per_voxel, const std::vector voxel_size, - const std::vector coors_range, const int max_points, - const int max_voxels, const int NDim = 3) { - // current version tooks about 0.04s for one frame on cpu - // check device - - at::cuda::CUDAGuard device_guard(points.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const int num_points = points.size(0); - const int num_features = points.size(1); - - const float voxel_x = voxel_size[0]; - const float voxel_y = voxel_size[1]; - const float voxel_z = voxel_size[2]; - const float coors_x_min = coors_range[0]; - const float coors_y_min = coors_range[1]; - const float coors_z_min = coors_range[2]; - const float coors_x_max = coors_range[3]; - const float coors_y_max = coors_range[4]; - const float coors_z_max = coors_range[5]; - - const int grid_x = round((coors_x_max - coors_x_min) / voxel_x); - const int grid_y = round((coors_y_max - coors_y_min) / voxel_y); - const int grid_z = round((coors_z_max - coors_z_min) / voxel_z); - - // map points to voxel coors - at::Tensor temp_coors = - at::zeros({num_points, NDim}, points.options().dtype(at::kInt)); - - dim3 grid(std::min(at::cuda::ATenCeilDiv(num_points, 512), 4096)); - dim3 block(512); - - // 1. link point to corresponding voxel coors - AT_DISPATCH_ALL_TYPES( - points.scalar_type(), "hard_voxelize_kernel", ([&] { - dynamic_voxelize_kernel<<>>( - points.contiguous().data_ptr(), - temp_coors.contiguous().data_ptr(), voxel_x, voxel_y, voxel_z, - coors_x_min, coors_y_min, coors_z_min, coors_x_max, coors_y_max, - coors_z_max, grid_x, grid_y, grid_z, num_points, num_features, - NDim); - })); - - AT_CUDA_CHECK(cudaGetLastError()); - - // 2. map point to the idx of the corresponding voxel, find duplicate coor - // create some temporary variables - auto point_to_pointidx = -at::ones( - { - num_points, - }, - points.options().dtype(at::kInt)); - auto point_to_voxelidx = -at::ones( - { - num_points, - }, - points.options().dtype(at::kInt)); - - dim3 map_grid(std::min(at::cuda::ATenCeilDiv(num_points, 512), 4096)); - dim3 map_block(512); - - AT_DISPATCH_ALL_TYPES( - temp_coors.scalar_type(), "determin_duplicate", ([&] { - point_to_voxelidx_kernel<<>>( - temp_coors.contiguous().data_ptr(), - point_to_voxelidx.contiguous().data_ptr(), - point_to_pointidx.contiguous().data_ptr(), max_points, - max_voxels, num_points, NDim); - })); - - AT_CUDA_CHECK(cudaGetLastError()); - - // 3. determine voxel num and voxel's coor index - // make the logic in the CUDA device could accelerate about 10 times - auto coor_to_voxelidx = -at::ones( - { - num_points, - }, - points.options().dtype(at::kInt)); - auto voxel_num = at::zeros( - { - 1, - }, - points.options().dtype(at::kInt)); // must be zero from the beginning - - AT_DISPATCH_ALL_TYPES(temp_coors.scalar_type(), "determin_duplicate", ([&] { - determin_voxel_num<<<1, 1, 0, stream>>>( - num_points_per_voxel.contiguous().data_ptr(), - point_to_voxelidx.contiguous().data_ptr(), - point_to_pointidx.contiguous().data_ptr(), - coor_to_voxelidx.contiguous().data_ptr(), - voxel_num.contiguous().data_ptr(), - max_points, max_voxels, num_points); - })); - - AT_CUDA_CHECK(cudaGetLastError()); - - // 4. copy point features to voxels - // Step 4 & 5 could be parallel - auto pts_output_size = num_points * num_features; - dim3 cp_grid(std::min(at::cuda::ATenCeilDiv(pts_output_size, 512), 4096)); - dim3 cp_block(512); - AT_DISPATCH_ALL_TYPES( - points.scalar_type(), "assign_point_to_voxel", ([&] { - assign_point_to_voxel<<>>( - pts_output_size, points.contiguous().data_ptr(), - point_to_voxelidx.contiguous().data_ptr(), - coor_to_voxelidx.contiguous().data_ptr(), - voxels.contiguous().data_ptr(), max_points, num_features, - num_points, NDim); - })); - // cudaDeviceSynchronize(); - // AT_CUDA_CHECK(cudaGetLastError()); - - // 5. copy coors of each voxels - auto coors_output_size = num_points * NDim; - dim3 coors_cp_grid( - std::min(at::cuda::ATenCeilDiv(coors_output_size, 512), 4096)); - dim3 coors_cp_block(512); - AT_DISPATCH_ALL_TYPES( - points.scalar_type(), "assign_point_to_voxel", ([&] { - assign_voxel_coors - <<>>( - coors_output_size, temp_coors.contiguous().data_ptr(), - point_to_voxelidx.contiguous().data_ptr(), - coor_to_voxelidx.contiguous().data_ptr(), - coors.contiguous().data_ptr(), num_points, NDim); - })); - - AT_CUDA_CHECK(cudaGetLastError()); - - auto voxel_num_cpu = voxel_num.to(at::kCPU); - int voxel_num_int = voxel_num_cpu.data_ptr()[0]; - - return voxel_num_int; -} - -int NondeterministicHardVoxelizeForwardCUDAKernelLauncher( - const at::Tensor &points, at::Tensor &voxels, at::Tensor &coors, - at::Tensor &num_points_per_voxel, const std::vector voxel_size, - const std::vector coors_range, const int max_points, - const int max_voxels, const int NDim = 3) { - at::cuda::CUDAGuard device_guard(points.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const int num_points = points.size(0); - const int num_features = points.size(1); - - if (num_points == 0) return 0; - - dim3 blocks( - std::min(at::cuda::ATenCeilDiv(num_points, THREADS_PER_BLOCK), 4096)); - dim3 threads(THREADS_PER_BLOCK); - - const float voxel_x = voxel_size[0]; - const float voxel_y = voxel_size[1]; - const float voxel_z = voxel_size[2]; - const float coors_x_min = coors_range[0]; - const float coors_y_min = coors_range[1]; - const float coors_z_min = coors_range[2]; - const float coors_x_max = coors_range[3]; - const float coors_y_max = coors_range[4]; - const float coors_z_max = coors_range[5]; - - const int grid_x = round((coors_x_max - coors_x_min) / voxel_x); - const int grid_y = round((coors_y_max - coors_y_min) / voxel_y); - const int grid_z = round((coors_z_max - coors_z_min) / voxel_z); - - // map points to voxel coors - at::Tensor temp_coors = - at::zeros({num_points, NDim}, points.options().dtype(at::kInt)); - - // 1. link point to corresponding voxel coors - AT_DISPATCH_ALL_TYPES( - points.scalar_type(), "hard_voxelize_kernel", ([&] { - dynamic_voxelize_kernel<<>>( - points.contiguous().data_ptr(), - temp_coors.contiguous().data_ptr(), voxel_x, voxel_y, voxel_z, - coors_x_min, coors_y_min, coors_z_min, coors_x_max, coors_y_max, - coors_z_max, grid_x, grid_y, grid_z, num_points, num_features, - NDim); - })); - - at::Tensor coors_map; - at::Tensor reduce_count; - - auto coors_clean = temp_coors.masked_fill(temp_coors.lt(0).any(-1, true), -1); - - std::tie(temp_coors, coors_map, reduce_count) = - at::unique_dim(coors_clean, 0, true, true, false); - - if (temp_coors[0][0].lt(0).item()) { - // the first element of temp_coors is (-1,-1,-1) and should be removed - temp_coors = temp_coors.slice(0, 1); - coors_map = coors_map - 1; - } - - int num_coors = temp_coors.size(0); - temp_coors = temp_coors.to(at::kInt); - coors_map = coors_map.to(at::kInt); - - at::Tensor coors_count = at::zeros({1}, coors_map.options()); - at::Tensor coors_order = at::empty({num_coors}, coors_map.options()); - at::Tensor pts_id = at::zeros({num_points}, coors_map.options()); - reduce_count = at::zeros({num_coors}, coors_map.options()); - - AT_DISPATCH_ALL_TYPES( - points.scalar_type(), "get_assign_pos", ([&] { - nondeterministic_get_assign_pos<<>>( - num_points, coors_map.contiguous().data_ptr(), - pts_id.contiguous().data_ptr(), - coors_count.contiguous().data_ptr(), - reduce_count.contiguous().data_ptr(), - coors_order.contiguous().data_ptr()); - })); - - AT_DISPATCH_ALL_TYPES( - points.scalar_type(), "assign_point_to_voxel", ([&] { - nondeterministic_assign_point_voxel - <<>>( - num_points, points.contiguous().data_ptr(), - coors_map.contiguous().data_ptr(), - pts_id.contiguous().data_ptr(), - temp_coors.contiguous().data_ptr(), - reduce_count.contiguous().data_ptr(), - coors_order.contiguous().data_ptr(), - voxels.contiguous().data_ptr(), - coors.contiguous().data_ptr(), - num_points_per_voxel.contiguous().data_ptr(), - max_voxels, max_points, num_features, NDim); - })); - AT_CUDA_CHECK(cudaGetLastError()); - return max_voxels < num_coors ? max_voxels : num_coors; -} - -void DynamicVoxelizeForwardCUDAKernelLauncher( - const at::Tensor &points, at::Tensor &coors, - const std::vector voxel_size, const std::vector coors_range, - const int NDim = 3) { - // current version tooks about 0.04s for one frame on cpu - // check device - - at::cuda::CUDAGuard device_guard(points.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const int num_points = points.size(0); - const int num_features = points.size(1); - - const float voxel_x = voxel_size[0]; - const float voxel_y = voxel_size[1]; - const float voxel_z = voxel_size[2]; - const float coors_x_min = coors_range[0]; - const float coors_y_min = coors_range[1]; - const float coors_z_min = coors_range[2]; - const float coors_x_max = coors_range[3]; - const float coors_y_max = coors_range[4]; - const float coors_z_max = coors_range[5]; - - const int grid_x = round((coors_x_max - coors_x_min) / voxel_x); - const int grid_y = round((coors_y_max - coors_y_min) / voxel_y); - const int grid_z = round((coors_z_max - coors_z_min) / voxel_z); - - const int col_blocks = at::cuda::ATenCeilDiv(num_points, THREADS_PER_BLOCK); - dim3 blocks(col_blocks); - dim3 threads(THREADS_PER_BLOCK); - - AT_DISPATCH_ALL_TYPES(points.scalar_type(), "dynamic_voxelize_kernel", [&] { - dynamic_voxelize_kernel<<>>( - points.contiguous().data_ptr(), - coors.contiguous().data_ptr(), voxel_x, voxel_y, voxel_z, - coors_x_min, coors_y_min, coors_z_min, coors_x_max, coors_y_max, - coors_z_max, grid_x, grid_y, grid_z, num_points, num_features, NDim); - }); - - AT_CUDA_CHECK(cudaGetLastError()); -} diff --git a/assets/cuda/mmcv/voxelization_cuda_kernel.cuh b/assets/cuda/mmcv/voxelization_cuda_kernel.cuh deleted file mode 100644 index 021b488..0000000 --- a/assets/cuda/mmcv/voxelization_cuda_kernel.cuh +++ /dev/null @@ -1,216 +0,0 @@ -// Copyright (c) OpenMMLab. All rights reserved. -#ifndef VOXELIZATION_CUDA_KERNEL_CUH -#define VOXELIZATION_CUDA_KERNEL_CUH - -#ifdef MMCV_USE_PARROTS -#include "parrots_cuda_helper.hpp" -#else -#include "pytorch_cuda_helper.hpp" -#endif - -typedef enum { SUM = 0, MEAN = 1, MAX = 2 } reduce_t; - -template -__global__ void dynamic_voxelize_kernel( - const T* points, T_int* coors, const float voxel_x, const float voxel_y, - const float voxel_z, const float coors_x_min, const float coors_y_min, - const float coors_z_min, const float coors_x_max, const float coors_y_max, - const float coors_z_max, const int grid_x, const int grid_y, - const int grid_z, const int num_points, const int num_features, - const int NDim) { - // const int index = blockIdx.x * threadsPerBlock + threadIdx.x; - CUDA_1D_KERNEL_LOOP(index, num_points) { - // To save some computation - auto points_offset = points + index * num_features; - auto coors_offset = coors + index * NDim; - int c_x = floorf((points_offset[0] - coors_x_min) / voxel_x); - if (c_x < 0 || c_x >= grid_x) { - coors_offset[0] = -1; - continue; - } - - int c_y = floorf((points_offset[1] - coors_y_min) / voxel_y); - if (c_y < 0 || c_y >= grid_y) { - coors_offset[0] = -1; - coors_offset[1] = -1; - continue; - } - - int c_z = floorf((points_offset[2] - coors_z_min) / voxel_z); - if (c_z < 0 || c_z >= grid_z) { - coors_offset[0] = -1; - coors_offset[1] = -1; - coors_offset[2] = -1; - } else { - coors_offset[0] = c_z; - coors_offset[1] = c_y; - coors_offset[2] = c_x; - } - } -} - -template -__global__ void assign_point_to_voxel(const int nthreads, const T* points, - T_int* point_to_voxelidx, - T_int* coor_to_voxelidx, T* voxels, - const int max_points, - const int num_features, - const int num_points, const int NDim) { - CUDA_1D_KERNEL_LOOP(thread_idx, nthreads) { - // const int index = blockIdx.x * threadsPerBlock + threadIdx.x; - int index = thread_idx / num_features; - - int num = point_to_voxelidx[index]; - int voxelidx = coor_to_voxelidx[index]; - if (num > -1 && voxelidx > -1) { - auto voxels_offset = - voxels + voxelidx * max_points * num_features + num * num_features; - - int k = thread_idx % num_features; - voxels_offset[k] = points[thread_idx]; - } - } -} - -template -__global__ void assign_voxel_coors(const int nthreads, T_int* coor, - T_int* point_to_voxelidx, - T_int* coor_to_voxelidx, T_int* voxel_coors, - const int num_points, const int NDim) { - CUDA_1D_KERNEL_LOOP(thread_idx, nthreads) { - // const int index = blockIdx.x * threadsPerBlock + threadIdx.x; - // if (index >= num_points) return; - int index = thread_idx / NDim; - int num = point_to_voxelidx[index]; - int voxelidx = coor_to_voxelidx[index]; - if (num == 0 && voxelidx > -1) { - auto coors_offset = voxel_coors + voxelidx * NDim; - int k = thread_idx % NDim; - coors_offset[k] = coor[thread_idx]; - } - } -} - -template -__global__ void point_to_voxelidx_kernel(const T_int* coor, - T_int* point_to_voxelidx, - T_int* point_to_pointidx, - const int max_points, - const int max_voxels, - const int num_points, const int NDim) { - CUDA_1D_KERNEL_LOOP(index, num_points) { - auto coor_offset = coor + index * NDim; - // skip invalid points - if (coor_offset[0] == -1) continue; - - int num = 0; - int coor_x = coor_offset[0]; - int coor_y = coor_offset[1]; - int coor_z = coor_offset[2]; - // only calculate the coors before this coor[index] - for (int i = 0; i < index; ++i) { - auto prev_coor = coor + i * NDim; - if (prev_coor[0] == -1) continue; - - // Find all previous points that have the same coors - // if find the same coor, record it - if ((prev_coor[0] == coor_x) && (prev_coor[1] == coor_y) && - (prev_coor[2] == coor_z)) { - num++; - if (num == 1) { - // point to the same coor that first show up - point_to_pointidx[index] = i; - } else if (num >= max_points) { - // out of boundary - break; - } - } - } - if (num == 0) { - point_to_pointidx[index] = index; - } - if (num < max_points) { - point_to_voxelidx[index] = num; - } - } -} - -template -__global__ void determin_voxel_num( - // const T_int* coor, - T_int* num_points_per_voxel, T_int* point_to_voxelidx, - T_int* point_to_pointidx, T_int* coor_to_voxelidx, T_int* voxel_num, - const int max_points, const int max_voxels, const int num_points) { - // only calculate the coors before this coor[index] - for (int i = 0; i < num_points; ++i) { - int point_pos_in_voxel = point_to_voxelidx[i]; - // record voxel - if (point_pos_in_voxel == -1) { - // out of max_points or invalid point - continue; - } else if (point_pos_in_voxel == 0) { - // record new voxel - int voxelidx = voxel_num[0]; - if (voxel_num[0] >= max_voxels) continue; - voxel_num[0] += 1; - coor_to_voxelidx[i] = voxelidx; - num_points_per_voxel[voxelidx] = 1; - } else { - int point_idx = point_to_pointidx[i]; - int voxelidx = coor_to_voxelidx[point_idx]; - if (voxelidx != -1) { - coor_to_voxelidx[i] = voxelidx; - num_points_per_voxel[voxelidx] += 1; - } - } - } -} - -__global__ void nondeterministic_get_assign_pos( - const int nthreads, const int32_t* coors_map, int32_t* pts_id, - int32_t* coors_count, int32_t* reduce_count, int32_t* coors_order) { - CUDA_1D_KERNEL_LOOP(thread_idx, nthreads) { - int coors_idx = coors_map[thread_idx]; - if (coors_idx > -1) { - int32_t coors_pts_pos = atomicAdd(&reduce_count[coors_idx], 1); - pts_id[thread_idx] = coors_pts_pos; - if (coors_pts_pos == 0) { - coors_order[coors_idx] = atomicAdd(coors_count, 1); - } - } - } -} - -template -__global__ void nondeterministic_assign_point_voxel( - const int nthreads, const T* points, const int32_t* coors_map, - const int32_t* pts_id, const int32_t* coors_in, const int32_t* reduce_count, - const int32_t* coors_order, T* voxels, int32_t* coors, int32_t* pts_count, - const int max_voxels, const int max_points, const int num_features, - const int NDim) { - CUDA_1D_KERNEL_LOOP(thread_idx, nthreads) { - int coors_idx = coors_map[thread_idx]; - int coors_pts_pos = pts_id[thread_idx]; - if (coors_idx > -1 && coors_pts_pos < max_points) { - int coors_pos = coors_order[coors_idx]; - if (coors_pos < max_voxels) { - auto voxels_offset = - voxels + (coors_pos * max_points + coors_pts_pos) * num_features; - auto points_offset = points + thread_idx * num_features; - for (int k = 0; k < num_features; k++) { - voxels_offset[k] = points_offset[k]; - } - if (coors_pts_pos == 0) { - pts_count[coors_pos] = min(reduce_count[coors_idx], max_points); - auto coors_offset = coors + coors_pos * NDim; - auto coors_in_offset = coors_in + coors_idx * NDim; - for (int k = 0; k < NDim; k++) { - coors_offset[k] = coors_in_offset[k]; - } - } - } - } - } -} - -#endif // VOXELIZATION_CUDA_KERNEL_CUH diff --git a/assets/cuda/mmcv/voxelize.py b/assets/cuda/mmcv/voxelize.py deleted file mode 100644 index 10a6d5c..0000000 --- a/assets/cuda/mmcv/voxelize.py +++ /dev/null @@ -1,189 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from typing import Any, List, Tuple, Union - -import torch -from torch import nn -from torch.autograd import Function -from torch.nn.modules.utils import _pair - -# from utils import ext_loader -import importlib -def load_ext(name, funcs): - ext = importlib.import_module('mmcv.' + name) - for fun in funcs: - assert hasattr(ext, fun), f'{fun} miss in module {name}' - return ext - -ext_module = load_ext( - '_ext', ['dynamic_voxelize_forward', 'hard_voxelize_forward']) - - -class _Voxelization(Function): - - @staticmethod - def forward( - ctx: Any, - points: torch.Tensor, - voxel_size: Union[tuple, float], - coors_range: Union[tuple, float], - max_points: int = 35, - max_voxels: int = 20000, - deterministic: bool = True) -> Union[Tuple[torch.Tensor], Tuple]: - """Convert kitti points(N, >=3) to voxels. - - Args: - points (torch.Tensor): [N, ndim]. Points[:, :3] contain xyz points - and points[:, 3:] contain other information like reflectivity. - voxel_size (tuple or float): The size of voxel with the shape of - [3]. - coors_range (tuple or float): The coordinate range of voxel with - the shape of [6]. - max_points (int, optional): maximum points contained in a voxel. if - max_points=-1, it means using dynamic_voxelize. Default: 35. - max_voxels (int, optional): maximum voxels this function create. - for second, 20000 is a good choice. Users should shuffle points - before call this function because max_voxels may drop points. - Default: 20000. - deterministic: bool. whether to invoke the non-deterministic - version of hard-voxelization implementations. non-deterministic - version is considerablly fast but is not deterministic. only - affects hard voxelization. default True. for more information - of this argument and the implementation insights, please refer - to the following links: - https://github.com/open-mmlab/mmdetection3d/issues/894 - https://github.com/open-mmlab/mmdetection3d/pull/904 - it is an experimental feature and we will appreciate it if - you could share with us the failing cases. - - Returns: - tuple[torch.Tensor]: tuple[torch.Tensor]: A tuple contains three - elements. The first one is the output voxels with the shape of - [M, max_points, n_dim], which only contain points and returned - when max_points != -1. The second is the voxel coordinates with - shape of [M, 3]. The last is number of point per voxel with the - shape of [M], which only returned when max_points != -1. - """ - if max_points == -1 or max_voxels == -1: - coors = points.new_zeros(size=(points.size(0), 3), dtype=torch.int) - ext_module.dynamic_voxelize_forward( - points, - torch.tensor(voxel_size, dtype=torch.float), - torch.tensor(coors_range, dtype=torch.float), - coors, - NDim=3) - return coors - else: - voxels = points.new_zeros( - size=(max_voxels, max_points, points.size(1))) - coors = points.new_zeros(size=(max_voxels, 3), dtype=torch.int) - num_points_per_voxel = points.new_zeros( - size=(max_voxels, ), dtype=torch.int) - voxel_num = torch.zeros(size=(), dtype=torch.long) - ext_module.hard_voxelize_forward( - points, - torch.tensor(voxel_size, dtype=torch.float), - torch.tensor(coors_range, dtype=torch.float), - voxels, - coors, - num_points_per_voxel, - voxel_num, - max_points=max_points, - max_voxels=max_voxels, - NDim=3, - deterministic=deterministic) - # select the valid voxels - voxels_out = voxels[:voxel_num] - coors_out = coors[:voxel_num] - num_points_per_voxel_out = num_points_per_voxel[:voxel_num] - return voxels_out, coors_out, num_points_per_voxel_out - - -voxelization = _Voxelization.apply - - -class Voxelization(nn.Module): - """Convert kitti points(N, >=3) to voxels. - - Please refer to `Point-Voxel CNN for Efficient 3D Deep Learning - `_ for more details. - - Args: - voxel_size (tuple or float): The size of voxel with the shape of [3]. - point_cloud_range (tuple or float): The coordinate range of voxel with - the shape of [6]. - max_num_points (int): maximum points contained in a voxel. if - max_points=-1, it means using dynamic_voxelize. - max_voxels (int, optional): maximum voxels this function create. - for second, 20000 is a good choice. Users should shuffle points - before call this function because max_voxels may drop points. - Default: 20000. - """ - - def __init__(self, - voxel_size: List, - point_cloud_range: List, - max_num_points: int, - max_voxels: Union[tuple, int] = 20000, - deterministic: bool = True): - """ - Args: - voxel_size (list): list [x, y, z] size of three dimension - point_cloud_range (list): - [x_min, y_min, z_min, x_max, y_max, z_max] - max_num_points (int): max number of points per voxel - max_voxels (tuple or int): max number of voxels in - (training, testing) time - deterministic: bool. whether to invoke the non-deterministic - version of hard-voxelization implementations. non-deterministic - version is considerablly fast but is not deterministic. only - affects hard voxelization. default True. for more information - of this argument and the implementation insights, please refer - to the following links: - https://github.com/open-mmlab/mmdetection3d/issues/894 - https://github.com/open-mmlab/mmdetection3d/pull/904 - it is an experimental feature and we will appreciate it if - you could share with us the failing cases. - """ - super().__init__() - - self.voxel_size = voxel_size - self.point_cloud_range = point_cloud_range - self.max_num_points = max_num_points - if isinstance(max_voxels, tuple): - self.max_voxels = max_voxels - else: - self.max_voxels = _pair(max_voxels) - self.deterministic = deterministic - - point_cloud_range = torch.tensor( - point_cloud_range, dtype=torch.float32) - voxel_size = torch.tensor(voxel_size, dtype=torch.float32) - grid_size = ( - point_cloud_range[3:] - # type: ignore - point_cloud_range[:3]) / voxel_size # type: ignore - grid_size = torch.round(grid_size).long() - input_feat_shape = grid_size[:2] - self.grid_size = grid_size - # the origin shape is as [x-len, y-len, z-len] - # [w, h, d] -> [d, h, w] - self.pcd_shape = [*input_feat_shape, 1][::-1] - - def forward(self, input: torch.Tensor) -> torch.Tensor: - if self.training: - max_voxels = self.max_voxels[0] - else: - max_voxels = self.max_voxels[1] - - return voxelization(input, self.voxel_size, self.point_cloud_range, - self.max_num_points, max_voxels, - self.deterministic) - - def __repr__(self): - s = self.__class__.__name__ + '(' - s += 'voxel_size=' + str(self.voxel_size) - s += ', point_cloud_range=' + str(self.point_cloud_range) - s += ', max_num_points=' + str(self.max_num_points) - s += ', max_voxels=' + str(self.max_voxels) - s += ', deterministic=' + str(self.deterministic) - s += ')' - return s diff --git a/assets/docs/index_eval_v2.pkl b/assets/docs/index_eval_v2.pkl deleted file mode 100644 index 3bf3e5a..0000000 Binary files a/assets/docs/index_eval_v2.pkl and /dev/null differ diff --git a/assets/tests/README.md b/assets/tests/README.md deleted file mode 100644 index ee80f86..0000000 --- a/assets/tests/README.md +++ /dev/null @@ -1,42 +0,0 @@ -Mics Tests (Speed,) ---- - -Some speed comparison testing in algorithms and data structures. - - -## Cal Chamfer Distance - -Points num: pc0: 88132, pc1: 88101 - -| Function | Time (ms) | -| :---: | :---: | -| CUDA(Old, SCOOP, Batch) | 83.275 | -| Faiss | 368.269 | -| Pytorch3D | 61.474 | -| CUDA(New, NonBatch) Ours| 33.199 | -| CUDA(New, SharedM) Ours | 17.379 | - - -## Cal HDBSCAN - -Points num: pc0: 85396, pc1: 85380 - -| Function | Time (s) | -| :---: | :---: | -| cuml | 1.604 | -| sklearn | 31.946 | -| scikit-hdbscan | 3.064 | - -dependence: [cuml](https://github.com/rapidsai/cuml) -```bash -# cuml -mamba create -n rapids -c rapidsai -c conda-forge -c nvidia rapids=23.12 python=3.9 cuda-version=11.8 pytorch -mamba activate rapids - -# sklearn -mamba install -c conda-forge scikit-learn - -# scikit-hdbscan (independent version) -mamba install -c conda-forge hdbscan - -``` \ No newline at end of file diff --git a/assets/tests/chamferdis_speed_test.py b/assets/tests/chamferdis_speed_test.py deleted file mode 100644 index e53c33a..0000000 --- a/assets/tests/chamferdis_speed_test.py +++ /dev/null @@ -1,128 +0,0 @@ -""" -# Created: 2023-08-09 23:40 -# Copyright (C) 2023-now, RPL, KTH Royal Institute of Technology -# Author: Qingwen Zhang (https://kin-zhang.github.io/) -# -# -# Description: Test which existing chamfer distance is faster. - -Dependence for this test scripts: - * faiss-gpu - * Pytorch3d - * mmcv -""" -import os, sys -BASE_DIR = os.path.abspath(os.path.join( os.path.dirname( __file__ ), '..' )) -BASEF_DIR = os.path.abspath(os.path.join( os.path.dirname( __file__ ), '../..')) -sys.path.append(BASE_DIR) -sys.path.append(BASEF_DIR) - -import torch -import numpy as np -import time - -FAISS_TEST = True -PYTORCH3D_TEST = True -MMCV_TEST = False -CUDA_TEST = True - -if __name__ == "__main__": - pc0 = np.load(f'{BASEF_DIR}/assets/tests/test_pc0.npy') - pc1 = np.load(f'{BASEF_DIR}/assets/tests/test_pc1.npy') - print('Start status on GPU allocation: {:.3f}MB'.format(torch.cuda.memory_allocated()/1024**2)) - pc0 = torch.from_numpy(pc0[...,:3]).float().cuda().contiguous() - pc1 = torch.from_numpy(pc1[...,:3]).float().cuda().contiguous() - pc0.requires_grad = True - pc1.requires_grad = True - - print(pc0.shape, "demo data: ", pc0[0]) - print(pc1.shape, "demo data: ", pc1[0]) - print('Status after loading data: {:.3f}MB'.format(torch.cuda.memory_allocated()/1024**2)) - - time.sleep(1) - if FAISS_TEST: - print("------ START Faiss Chamfer Distance Cal ------") - import faiss - import faiss.contrib.torch_utils - def faiss_chamfer_distance(pc1, pc2): - def faiss_knn(pc1, pc2): - res = faiss.StandardGpuResources() - index = faiss.GpuIndexFlatL2(res, 3) - index.add(pc2) - distances, indices = index.search(pc1, 1) # [N_1, 1] - return distances - pc1_knn = faiss_knn(pc1, pc2) - pc2_knn = faiss_knn(pc2, pc1) - - # # NOTE: truncated Chamfer distance. - # dist_thd = 2 - # pc1_knn[pc1_knn >= dist_thd] = 0.0 - # pc2_knn[pc2_knn >= dist_thd] = 0.0 - - # NOTE: Chamfer distance. mean based on pts number - cham_pc1 = pc1_knn.mean() - cham_pc2 = pc2_knn.mean() - - return cham_pc1 + cham_pc2 - - start_time = time.time() - loss = faiss_chamfer_distance(pc0, pc1) - print("loss: ", loss) - print(f"Faiss Chamfer Distance Cal time: {(time.time() - start_time)*1000:.3f} ms]\n") - - if PYTORCH3D_TEST: - print("------ START Pytorch3d Chamfer Distance Cal ------") - from src.models.basic.nsfp_module import my_chamfer_fn - start_time = time.time() - loss0, _ = my_chamfer_fn(pc0.unsqueeze(0), pc1.unsqueeze(0), truncate_dist=False) - - print(f"Pytorch3d Chamfer Distance Cal time: {(time.time() - start_time)*1000:.3f} ms") - print("loss: ", loss0) - print() - - if MMCV_TEST: - # NOTE: mmcv chamfer distance is on x,y only.. That's why the result is not correct. - # DONE: the result is not correct.... - print("------ START mmcv 2D Chamfer Distance Cal ------") - from mmcv.ops import chamfer_distance - start_time = time.time() - dis0, dis1, idx0, idx1= chamfer_distance(pc0.unsqueeze(0), pc1.unsqueeze(0)) - loss_0t1 = torch.sum(dis0) / pc0.shape[0] - loss_1t0 = torch.sum(dis1) / pc1.shape[0] - - loss = loss_0t1 + loss_1t0 - print("loss: ", loss) - print(f"mmcv Chamfer Distance Cal time: {(time.time() - start_time)*1000:.3f} ms\n") - - if CUDA_TEST: - print("------ START CUDA Chamfer Distance Cal ------") - from assets.cuda.chamfer3D import nnChamferDis - start_time = time.time() - loss = nnChamferDis(truncate_dist=False)(pc0, pc1) - print("loss: ", loss) - print(f"Chamfer Distance Cal time: {(time.time() - start_time)*1000:.3f} ms") - print() - -""" -0: 0.000MB -torch.Size([88132, 3]) demo data: tensor([-8.2266, 8.3516, 1.4922], device='cuda:0', grad_fn=) -torch.Size([88101, 3]) demo data: tensor([-7.9961, 8.1328, 0.4839], device='cuda:0', grad_fn=) -1: 2.017MB - ------- START Faiss Chamfer Distance Cal ------ -loss: tensor(0.1710, device='cuda:0') -Faiss Chamfer Distance Cal time: 817.698 ms - ------- START Pytorch3d Chamfer Distance Cal ------ -Pytorch3d Chamfer Distance Cal time: 68.256 ms -loss: tensor(0.1710, device='cuda:0', grad_fn=) - ------- START mmcv 2D Chamfer Distance Cal ------ -loss: tensor(0.0591, device='cuda:0', grad_fn=) -mmcv Chamfer Distance Cal time: 651.510 ms - ------- START CUDA Chamfer Distance Cal ------ -Chamfer Distance Cal time: 14.308 ms -loss: tensor(0.1710, device='cuda:0', grad_fn=) - -""" \ No newline at end of file diff --git a/assets/tests/hdbscan_speed.py b/assets/tests/hdbscan_speed.py deleted file mode 100644 index f92580d..0000000 --- a/assets/tests/hdbscan_speed.py +++ /dev/null @@ -1,99 +0,0 @@ -""" -# Created: 2023-12-18 17:23 -# Copyright (C) 2023-now, RPL, KTH Royal Institute of Technology -# Author: Qingwen Zhang (https://kin-zhang.github.io/) -# -# -# -# Description: Test which existing hdbscan speed -""" -import os, sys -BASEF_DIR = os.path.abspath(os.path.join( os.path.dirname( __file__ ), '../..')) -sys.path.append(BASEF_DIR) -import torch, time -import numpy as np - -# ------ Three different hdbscan implementations ------ -from cuml.cluster import hdbscan -from sklearn.cluster import HDBSCAN -import hdbscan as cpu_hdbscan - -from src.utils.o3d_view import MyVisualizer, color_map -import open3d as o3d - -MAX_AXIS_RANGE = 60 - -def vis(pc0, labels, title='HDBSCAN'): - # visualize - vis = MyVisualizer(view_file=f'{BASEF_DIR}/assets/view/av2.json', window_title=title) - pcd = o3d.geometry.PointCloud() - num_points = pc0.shape[0] - pcd.points = o3d.utility.Vector3dVector(pc0) - pcd.colors = o3d.utility.Vector3dVector(np.zeros((num_points, 3))) - for i in range(num_points): - if labels[i] >= 0: - # print(i, labels[i]) - pcd.colors[i] = color_map[labels[i] % len(color_map)] - vis.show([pcd]) - -if __name__ == "__main__": - pc0 = np.load(f'{BASEF_DIR}/assets/tests/test_pc0.npy') - pc1 = np.load(f'{BASEF_DIR}/assets/tests/test_pc1.npy') - print('0: {:.3f}MB'.format(torch.cuda.memory_allocated()/1024**2)) - pc0 = torch.from_numpy(pc0[...,:3]).float().cuda().contiguous() - pc1 = torch.from_numpy(pc1[...,:3]).float().cuda().contiguous() - - # filter out MAX_AXIS_RANGE - pc0 = pc0[torch.logical_and(torch.abs(pc0[:,0]) Adam -lr: 2e-6 -loss_fn: seflowLoss # choices: [ff3dLoss, zeroflowLoss, deflowLoss, seflowLoss] -add_seloss: # {chamfer_dis: 1.0, static_flow_loss: 1.0, dynamic_chamfer_dis: 1.0, cluster_based_pc0pc1: 1.0} - -# log settings -seed: 42069 -log_every: 10 # steps epochs*dataset_size/batch_size -val_every: 3 # epochs -save_top_model: 5 # top_k model will be saved. - -# -----> Model and Task Parameters -voxel_size: [0.2, 0.2, 6] -point_cloud_range: [-51.2, -51.2, -3, 51.2, 51.2, 3] -num_frames: 2 \ No newline at end of file diff --git a/conf/eval.yaml b/conf/eval.yaml deleted file mode 100644 index cec0559..0000000 --- a/conf/eval.yaml +++ /dev/null @@ -1,19 +0,0 @@ - -dataset_path: /home/kin/data/av2/preprocess_v2/sensor -checkpoint: /home/kin/model_zoo/deflow.ckpt -av2_mode: val # [val, test] -save_res: False # [True, False] - -leaderboard_version: 1 # [1, 2] -supervised_flag: True # [True, False], whether you use any label from the dataset - -# no need to change -slurm_id: 00000 -output: ${model.name}-${slurm_id} -gpus: 1 -seed: 42069 -eval_only: True -wandb_mode: offline # [offline, disabled, online] -defaults: - - hydra: default - - model: deflow \ No newline at end of file diff --git a/conf/hydra/default.yaml b/conf/hydra/default.yaml deleted file mode 100644 index bddcd90..0000000 --- a/conf/hydra/default.yaml +++ /dev/null @@ -1,2 +0,0 @@ -run: - dir: logs/jobs/${output}/${now:%m-%d-%H-%M} \ No newline at end of file diff --git a/conf/model/deflow.yaml b/conf/model/deflow.yaml deleted file mode 100644 index 83afb0a..0000000 --- a/conf/model/deflow.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: deflow - -target: - _target_: src.models.DeFlow - decoder_option: gru # choices: [linear, gru] - num_iters: 4 - voxel_size: ${voxel_size} - point_cloud_range: ${point_cloud_range} - -val_monitor: val/Dynamic/Mean \ No newline at end of file diff --git a/conf/model/fastflow3d.yaml b/conf/model/fastflow3d.yaml deleted file mode 100644 index b0ea5f9..0000000 --- a/conf/model/fastflow3d.yaml +++ /dev/null @@ -1,8 +0,0 @@ -name: fastflow3d - -target: - _target_: src.models.FastFlow3D - voxel_size: ${voxel_size} - point_cloud_range: ${point_cloud_range} - -val_monitor: val/Dynamic/Mean \ No newline at end of file diff --git a/conf/save.yaml b/conf/save.yaml deleted file mode 100644 index 253843e..0000000 --- a/conf/save.yaml +++ /dev/null @@ -1,13 +0,0 @@ -dataset_path: /home/kin/data/av2/preprocess_v2/demo/sensor/val -checkpoint: /home/kin/model_zoo/seflow_best.ckpt -res_name: # if None will directly be the `model_name.ckpt` in checkpoint path - - -# no need to change -defaults: - - hydra: default - - model: deflow -seed: 42069 -gpus: 1 -slurm_id: 00000 -output: ${model.name}-${slurm_id} \ No newline at end of file diff --git a/dataprocess/README.md b/dataprocess/README.md deleted file mode 100644 index 5be29d4..0000000 --- a/dataprocess/README.md +++ /dev/null @@ -1,153 +0,0 @@ -Dataset ---- - -README for downloading and preprocessing the dataset. We includes waymo, argoverse 2.0 and nuscenes dataset in our project. - -- [Download](#download): includes how to download the dataset. -- [Process](#process): run script to preprocess the dataset. - -We've updated the process dataset for: - -- [x] Argoverse 2.0: check [here](#argoverse-20). The process script Involved from [DeFlow](https://github.com/KTH-RPL/DeFlow). -- [x] Waymo: check [here](#waymo-dataset). The process script was involved from [SeFlow](https://github.com/KTH-RPL/SeFlow). -- [ ] nuScenes: done coding, public after review. Will be involved later by another paper. - -If you want to use all datasets above, there is a specific process environment in [envprocess.yaml](../envprocess.yaml) to install all the necessary packages. As Waymo package have different configuration and conflict with the main environment. Setup through the following command: - -```bash -conda env create -f envprocess.yaml -conda activate dataprocess -# NOTE we need **manually reinstall numpy** (higher than 1.22) -# * since waymo package force numpy==1.21.5, BUT! -# * hdbscan w. numpy<1.22.0 will raise error: 'numpy.float64' object cannot be interpreted as an integer -# * av2 need numpy >=1.22.0, waymo with numpy==1.22.0 will be fine on code running. -pip install numpy==1.22 -``` - -## Download - -### Argoverse 2.0 - -Install their download tool `s5cmd`, already in our envprocess.yaml. Then download the dataset: -```bash -# train is really big (750): totally 966 GB -s5cmd --numworkers 12 --no-sign-request cp "s3://argoverse/datasets/av2/sensor/train/*" av2/sensor/train - -# val (150) and test (150): totally 168GB + 168GB -s5cmd --numworkers 12 --no-sign-request cp "s3://argoverse/datasets/av2/sensor/val/*" av2/sensor/val -s5cmd --numworkers 12 --no-sign-request cp "s3://argoverse/datasets/av2/sensor/test/*" av2/sensor/test - -# for local and online eval mask from official repo -s5cmd --no-sign-request cp "s3://argoverse/tasks/3d_scene_flow/zips/*" . -``` - -Then to quickly pre-process the data, we can [read these commands](#process) on how to generate the pre-processed data for training and evaluation. This will take around 0.5-2 hour for the whole dataset (train & val) based on how powerful your CPU is. - -Optional: More [self-supervised data in AV2 LiDAR only](https://www.argoverse.org/av2.html#lidar-link), note: It **does not** include **imagery or 3D annotations**. The dataset is designed to support research into self-supervised learning in the lidar domain, as well as point cloud forecasting. -```bash -# train is really big (16000): totally 4 TB -s5cmd --numworkers 12 --no-sign-request cp "s3://argoverse/datasets/av2/lidar/train/*" av2/lidar/train - -# val (2000): totally 0.5 TB -s5cmd --numworkers 12 --no-sign-request cp "s3://argoverse/datasets/av2/lidar/val/*" av2/lidar/val - -# test (2000): totally 0.5 TB -s5cmd --numworkers 12 --no-sign-request cp "s3://argoverse/datasets/av2/lidar/test/*" av2/lidar/test -``` - -#### Dataset frames - - - -| Dataset | # Total Scene | # Total Frames | -| ------------ | ------------- | -------------- | -| Sensor/train | 750 | 110071 | -| Sensor/val | 150 | 23547 | -| Sensor/test | 150 | 23574 | -| LiDAR/train | 16000 | - | -| LiDAR/val | 2000 | 597590 | -| LiDAR/test | 2000 | 597575 | - -### nuScenes - -You need sign up an account at [nuScenes](https://www.nuscenes.org/) to download the dataset from [https://www.nuscenes.org/nuscenes#download](https://www.nuscenes.org/nuscenes#download) Full dataset (v1.0), you can choose to download lidar only. Click donwload mini split and unzip the file to the `nuscenes` folder if you want to test. - -![](../assets/docs/nuscenes.png) - - -### Waymo Dataset - -To download the Waymo dataset, you need to register an account at [Waymo Open Dataset](https://waymo.com/open/). You also need to install gcloud SDK and authenticate your account. Please refer to [this page](https://cloud.google.com/sdk/docs/install) for more details. - -For cluster without root user, check [here sdk tar gz](https://cloud.google.com/sdk/docs/downloads-versioned-archives). - -Website to check their file: [https://console.cloud.google.com/storage/browser/waymo_open_dataset_scene_flow](https://console.cloud.google.com/storage/browser/waymo_open_dataset_scene_flow) - -The thing we need is all things about `lidar`, to download the data, you can use the following command: - -```bash -gsutil -m cp -r "gs://waymo_open_dataset_scene_flow/valid" . -gsutil -m cp -r "gs://waymo_open_dataset_scene_flow/train" . -``` - -And flowlabel data can be downloaded here with ground segmentation by HDMap follow the same style of [ZeroFlow](https://github.com/kylevedder/zeroflow/blob/master/data_prep_scripts/waymo/extract_flow_and_remove_ground.py). - -You can download the processed map folder here to free yourself downloaded another type of data again: - -```bash -wget https://huggingface.co/kin-zhang/OpenSceneFlow/resolve/main/waymo_map.tar.gz -tar -xvf waymo_map.tar.gz -C /home/kin/data/waymo/flowlabel -# you will see there is a `map` folder in the `flowlabel` folder now. -``` - -#### Dataset frames - -| Dataset | # Total Scene | # Total Frames | -| ------- | ------------- | -------------- | -| train | 799 | 155687 | -| val | 203 | 39381 | - -## Process -This directory contains the scripts to preprocess the datasets. - -- `extract_av2.py`: Process the datasets in Argoverse 2.0. -- `extract_nus.py`: Process the datasets in nuScenes. -- `extract_waymo.py`: Process the datasets in Waymo. - -Example Running command:` -```bash -# av2: -python dataprocess/extract_av2.py --av2_type sensor --data_mode train --argo_dir /home/kin/data/av2 --output_dir /home/kin/data/av2/preprocess - -# waymo: -python dataprocess/extract_waymo.py --mode train --flow_data_dir /home/kin/data/waymo/flowlabel --map_dir /home/kin/data/waymo/flowlabel/map --output_dir /home/kin/data/waymo/preprocess --nproc 48 -``` - -All these preprocess scripts will generate the same format `.h5` file. The file contains the following in codes: - -File: `[*:logid].h5` file named in logid. Every timestamp is the key of group (f[key]). - -```python -def process_log(data_dir: Path, log_id: str, output_dir: Path, n: Optional[int] = None) : - def create_group_data(group, pc, gm, pose, flow_0to1=None, flow_valid=None, flow_category=None, ego_motion=None): - group.create_dataset('lidar', data=pc.astype(np.float32)) - group.create_dataset('ground_mask', data=gm.astype(bool)) - group.create_dataset('pose', data=pose.astype(np.float32)) - if flow_0to1 is not None: - # ground truth flow information - group.create_dataset('flow', data=flow_0to1.astype(np.float32)) - group.create_dataset('flow_is_valid', data=flow_valid.astype(bool)) - group.create_dataset('flow_category_indices', data=flow_category.astype(np.uint8)) - group.create_dataset('ego_motion', data=ego_motion.astype(np.float32)) -``` - -After preprocessing, all data can use the same dataloader to load the data. As already in our DeFlow code. - -Or you can run testing file to visualize the data. - -```bash -# view gt flow -python tools/visualization.py --data_dir /home/kin/data/av2/preprocess/sensor/mini --res_name flow - -python tools/visualization.py --data_dir /home/kin/data/waymo/preprocess/val --res_name flow -``` \ No newline at end of file diff --git a/dataprocess/extract_av2.py b/dataprocess/extract_av2.py deleted file mode 100644 index e397bab..0000000 --- a/dataprocess/extract_av2.py +++ /dev/null @@ -1,293 +0,0 @@ -""" -# Created: 2023-11-01 17:02 -# Copyright (C) 2023-now, RPL, KTH Royal Institute of Technology -# Author: Qingwen Zhang (https://kin-zhang.github.io/) -# -# This file is part of DeFlow (https://github.com/KTH-RPL/DeFlow). -# If you find this repo helpful, please cite the respective publication as -# listed on the above website. - -# Description: Preprocess Data, save as h5df format for faster loading -# Reference: -# * ZeroFlow data preprocessing work: https://github.com/kylevedder/argoverse2-sf -# * Argoverse API source code: https://github.com/argoverse/av2-api -""" - -import os -os.environ["OMP_NUM_THREADS"] = "1" - -from av2.datasets.sensor.av2_sensor_dataloader import convert_pose_dataframe_to_SE3 -from av2.structures.sweep import Sweep -from av2.structures.cuboid import CuboidList, Cuboid -from av2.utils.io import read_feather -from av2.map.map_api import ArgoverseStaticMap -from av2.geometry.se3 import SE3 -from av2.datasets.sensor.constants import AnnotationCategories - -import multiprocessing -from pathlib import Path -from multiprocessing import Pool, current_process -from typing import Optional, Tuple, Dict, Union, Final -from tqdm import tqdm -import numpy as np -import fire, time, h5py -from collections import defaultdict -import pickle -from zipfile import ZipFile -import pandas as pd - -import os, sys -BASE_DIR = os.path.abspath(os.path.join( os.path.dirname( __file__ ), '..' )) -sys.path.append(BASE_DIR) -from dataprocess.misc_data import create_reading_index -from src.utils.av2_eval import read_ego_SE3_sensor - -BOUNDING_BOX_EXPANSION: Final = 0.2 -CATEGORY_TO_INDEX: Final = { - **{"NONE": 0}, - **{k.value: i + 1 for i, k in enumerate(AnnotationCategories)}, -} - -def create_eval_mask(data_mode: str, output_dir_: Path, mask_dir: str): - """ - Need download the official mask file run: `s5cmd --no-sign-request cp "s3://argoverse/tasks/3d_scene_flow/zips/*" .` - Check more in our assets/README.md - """ - mask_file_path = Path(mask_dir) / f"{data_mode}-masks.zip" - if not mask_file_path.exists(): - print(f'{mask_file_path} not found, please download the mask file for official evaluation.') - return - # extract the mask file - with ZipFile(mask_file_path, 'r') as zipObj: - zipObj.extractall(Path(mask_dir) / f"{data_mode}-masks") - - data_index = [] - # list scene ids - scene_ids = os.listdir(Path(mask_dir) / f"{data_mode}-masks") - for scene_id in tqdm(scene_ids, desc=f'Create {data_mode} eval mask', ncols=100): - timestamps = sorted([int(file.replace('.feather', '')) - for file in os.listdir(Path(mask_dir) / f"{data_mode}-masks" / scene_id) - if file.endswith('.feather')]) - if not os.path.exists(output_dir_ / f'{scene_id}.h5'): - continue - with h5py.File(output_dir_ / f'{scene_id}.h5', 'r+') as f: - for ts in timestamps: - key = str(ts) - if key not in f.keys(): - print(f'{scene_id}/{key} not found') - continue - group = f[key] - mask = pd.read_feather(Path(mask_dir) / f"{data_mode}-masks" / scene_id / f"{key}.feather").to_numpy().astype(bool) - group.create_dataset('eval_mask', data=mask) - data_index.append([scene_id, key]) - - with open(output_dir_/'index_eval.pkl', 'wb') as f: - pickle.dump(data_index, f) - print(f"Create reading index Successfully") - -def read_pose_pc_ground(data_dir: Path, log_id: str, timestamp: int, avm: ArgoverseStaticMap): - log_poses_df = read_feather(data_dir / log_id / "city_SE3_egovehicle.feather") - # more detail: https://argoverse.github.io/user-guide/datasets/lidar.html#sensor-suite - ego2sensor_pose = read_ego_SE3_sensor((data_dir / log_id))['up_lidar'] - filtered_log_poses_df = log_poses_df[log_poses_df["timestamp_ns"].isin([timestamp])] - pose = convert_pose_dataframe_to_SE3(filtered_log_poses_df.loc[filtered_log_poses_df["timestamp_ns"] == timestamp]) - pc = Sweep.from_feather(data_dir / log_id / "sensors" / "lidar" / f"{timestamp}.feather").xyz - # transform to city coordinate since sweeps[0].xyz is in ego coordinate to get ground mask - is_ground = avm.get_ground_points_boolean(pose.transform_point_cloud(pc)) - - # NOTE(SeFlow): transform to sensor coordinate, since some ray-casting based methods need sensor coordinate - pc = ego2sensor_pose.inverse().transform_point_cloud(pc) - return pc, pose, is_ground - -def compute_sceneflow(data_dir: Path, log_id: str, timestamps: Tuple[int, int]) -> Dict[str, Union[np.ndarray, SE3]]: - """Compute sceneflow between the sweeps at the given timestamps. - Args: - data_dir: Argoverse 2.0 directory, e.g. /home/kin/data/av2/sensor/train - log_id: unique id. - timestamps: the timestamps of the lidar sweeps to compute flow between - Returns: - Dictionary with fields: - pcl_0: Nx3 array containing the points at time 0 - pcl_1: Mx3 array containing the points at time 1 - flow_0_1: Nx3 array containing flow from timestamp 0 to 1 - flow_1_0: Mx3 array containing flow from timestamp 1 to 0 - valid_0: Nx1 array indicating if the returned flow from 0 to 1 is valid (1 for valid, 0 otherwise) - valid_1: Mx1 array indicating if the returned flow from 1 to 0 is valid (1 for valid, 0 otherwise) - classes_0: Nx1 array containing the class ids for each point in sweep 0 - classes_1: Nx1 array containing the class ids for each point in sweep 0 - pose_0: SE3 pose at time 0 - pose_1: SE3 pose at time 1 - ego_motion: SE3 motion from sweep 0 to sweep 1 - """ - def compute_flow(sweeps, cuboids, poses): - ego1_SE3_ego0 = poses[1].inverse().compose(poses[0]) - # Convert to float32s - ego1_SE3_ego0.rotation = ego1_SE3_ego0.rotation.astype(np.float32) - ego1_SE3_ego0.translation = ego1_SE3_ego0.translation.astype(np.float32) - - flow = ego1_SE3_ego0.transform_point_cloud(sweeps[0].xyz) - sweeps[0].xyz - # Convert to float32s - flow = flow.astype(np.float32) - - valid = np.ones(len(sweeps[0].xyz), dtype=np.bool_) - # classes = -np.ones(len(sweeps[0].xyz), dtype=np.int8) - classes = np.zeros(len(sweeps[0].xyz), dtype=np.uint8) - - for id in cuboids[0]: - c0 = cuboids[0][id] - c0.length_m += BOUNDING_BOX_EXPANSION # the bounding boxes are a little too tight and some points are missed - c0.width_m += BOUNDING_BOX_EXPANSION - obj_pts, obj_mask = c0.compute_interior_points(sweeps[0].xyz) - classes[obj_mask] = CATEGORY_TO_INDEX[str(c0.category)] - - if id in cuboids[1]: - c1 = cuboids[1][id] - c1_SE3_c0 = c1.dst_SE3_object.compose(c0.dst_SE3_object.inverse()) - obj_flow = c1_SE3_c0.transform_point_cloud(obj_pts) - obj_pts - flow[obj_mask] = obj_flow.astype(np.float32) - else: - valid[obj_mask] = 0 - return flow, classes, valid, ego1_SE3_ego0 - - sweeps = [Sweep.from_feather(data_dir / log_id / "sensors" / "lidar" / f"{ts}.feather") for ts in timestamps] - - # ================== Load annotations ================== - annotations_feather_path = data_dir / log_id / "annotations.feather" - - if not annotations_feather_path.exists(): - # print(f'{annotations_feather_path} not found') - timestamp_cuboid_index = {} - else: - # Load annotations from disk. - # NOTE: This file contains annotations for the ENTIRE sequence. - # The sweep annotations are selected below. - cuboid_list = CuboidList.from_feather(annotations_feather_path) - - raw_data = read_feather(annotations_feather_path) - ids = raw_data.track_uuid.to_numpy() - timestamp_cuboid_index = defaultdict(dict) - for id, cuboid in zip(ids, cuboid_list.cuboids): - timestamp_cuboid_index[cuboid.timestamp_ns][id] = cuboid - # ================== Load annotations ================== - - cuboids = [timestamp_cuboid_index.get(ts, {}) for ts in timestamps] - - log_poses_df = read_feather(data_dir / log_id / "city_SE3_egovehicle.feather") - filtered_log_poses_df = log_poses_df[log_poses_df["timestamp_ns"].isin(timestamps)] - poses = [convert_pose_dataframe_to_SE3(filtered_log_poses_df.loc[filtered_log_poses_df["timestamp_ns"] == ts]) for ts in timestamps] - - flow_0_1, classes_0, valid_0, ego_motion = compute_flow(sweeps, cuboids, poses) - - return {'pcl_0': sweeps[0].xyz, 'pcl_1' :sweeps[1].xyz, 'flow_0_1': flow_0_1, - 'valid_0': valid_0, 'classes_0': classes_0, - 'pose_0': poses[0], 'pose_1': poses[1], - 'ego_motion': ego_motion} - -def process_log(data_dir: Path, log_id: str, output_dir: Path, n: Optional[int] = None) : - - def create_group_data(group, pc, gm, pose, flow_0to1=None, flow_valid=None, flow_category=None, ego_motion=None): - group.create_dataset('lidar', data=pc.astype(np.float32)) - group.create_dataset('ground_mask', data=gm.astype(bool)) - group.create_dataset('pose', data=pose.astype(np.float32)) - if flow_0to1 is not None: - # ground truth flow information - group.create_dataset('flow', data=flow_0to1.astype(np.float32)) - group.create_dataset('flow_is_valid', data=flow_valid.astype(bool)) - group.create_dataset('flow_category_indices', data=flow_category.astype(np.uint8)) - group.create_dataset('ego_motion', data=ego_motion.astype(np.float32)) - - log_map_dirpath = data_dir / log_id / "map" - if(len(os.listdir(log_map_dirpath))<3): - print(f'{log_map_dirpath} needed by 3 to find the ground layer, check if you are using the correct *sensor* dataset') - print("If you are using *lidar* dataset, Please run the following command to generate the map files:") - print(f"python run_steps/0_additional_lidar_map.py --argo_dir {data_dir}") - return - avm = ArgoverseStaticMap.from_map_dir(log_map_dirpath, build_raster=True) - - timestamps = sorted([int(file.replace('.feather', '')) - for file in os.listdir(data_dir / log_id / "sensors/lidar") - if file.endswith('.feather')]) - - gt_flow_flag = False if not (data_dir / log_id / "annotations.feather").exists() else True - - # if n is not None: - # iter_bar = tqdm(zip(timestamps, timestamps[1:]), leave=False, - # total=len(timestamps) - 1, position=n, - # desc=f'Log {log_id}') - # else: - # iter_bar = zip(timestamps, timestamps[1:]) - - with h5py.File(output_dir/f'{log_id}.h5', 'a') as f: - for cnt, ts0 in enumerate(timestamps): - group = f.create_group(str(ts0)) - pc0, pose0, is_ground_0 = read_pose_pc_ground(data_dir, log_id, ts0, avm) - if pc0.shape[0] < 256: - print(f'{log_id}/{ts0} has less than 256 points, skip this scenarios. Please check the data if needed.') - break - if cnt == len(timestamps) - 1 or not gt_flow_flag: - create_group_data(group, pc0, is_ground_0.astype(np.bool_), pose0.transform_matrix.astype(np.float32)) - else: - ts1 = timestamps[cnt + 1] - scene_flow = compute_sceneflow(data_dir, log_id, (ts0, ts1)) - create_group_data(group, pc0, is_ground_0.astype(np.bool_), pose0.transform_matrix.astype(np.float32), - scene_flow['flow_0_1'], scene_flow['valid_0'], scene_flow['classes_0'], - scene_flow['ego_motion'].transform_matrix.astype(np.float32)) - -def proc(x, ignore_current_process=False): - if not ignore_current_process: - current=current_process() - pos = current._identity[0] - else: - pos = 1 - process_log(*x, n=pos) - -def process_logs(data_dir: Path, output_dir: Path, nproc: int): - """Compute sceneflow for all logs in the dataset. Logs are processed in parallel. - Args: - data_dir: Argoverse 2.0 directory - output_dir: Output directory. - """ - - if not data_dir.exists(): - print(f'{data_dir} not found') - return - - # NOTE(Qingwen): if you don't want to all data_dir, then change here: logs = logs[:10] only 10 scene. - logs = os.listdir(data_dir) - args = sorted([(data_dir, log, output_dir) for log in logs]) - print(f'Using {nproc} processes to process data: {data_dir} to .h5 format. (#scenes: {len(args)})') - # for debug - # for x in tqdm(args): - # proc(x, ignore_current_process=True) - # break - if nproc <= 1: - for x in tqdm(args, ncols=120): - proc(x, ignore_current_process=True) - else: - with Pool(processes=nproc) as p: - res = list(tqdm(p.imap_unordered(proc, args), total=len(logs), ncols=120)) - -def main( - argo_dir: str = "/home/kin/data/av2", - output_dir: str ="/home/kin/data/av2/preprocess", - av2_type: str = "sensor", - data_mode: str = "val", - mask_dir: str = "/home/kin/data/av2/3d_scene_flow", - nproc: int = (multiprocessing.cpu_count() - 1), - only_index: bool = False, -): - data_root_ = Path(argo_dir) / av2_type/ data_mode - output_dir_ = Path(output_dir) / av2_type / data_mode - if only_index: - create_reading_index(output_dir_) - return - output_dir_.mkdir(exist_ok=True, parents=True) - process_logs(data_root_, output_dir_, nproc) - create_reading_index(output_dir_) - if data_mode == "val" or data_mode == "test": - create_eval_mask(data_mode, output_dir_, mask_dir) - -if __name__ == '__main__': - start_time = time.time() - fire.Fire(main) - print(f"\nTime used: {(time.time() - start_time)/60:.2f} mins") \ No newline at end of file diff --git a/dataprocess/extract_nus.py b/dataprocess/extract_nus.py deleted file mode 100644 index e69de29..0000000 diff --git a/dataprocess/extract_waymo.py b/dataprocess/extract_waymo.py deleted file mode 100644 index 01598a0..0000000 --- a/dataprocess/extract_waymo.py +++ /dev/null @@ -1,425 +0,0 @@ -""" -# Created: 2024-02-26 12:42 -# Copyright (C) 2024-now, RPL, KTH Royal Institute of Technology -# Author: Qingwen Zhang (https://kin-zhang.github.io/), Tianshuai Hu (thuaj@connect.ust.hk) -# -# This file is part of SeFlow (https://github.com/KTH-RPL/SeFlow). -# If you find this repo helpful, please cite the respective publication as -# listed on the above website. -# -# Description: Preprocess Data, save as h5df format for faster loading -# This one is for Waymo dataset, refer a lot to -# Kyle's ZeroFlow code: https://github.com/kylevedder/zeroflow/tree/master/data_prep_scripts/waymo - -# env need: pip install waymo-open-dataset-tf-2.11.0==1.5.0 -""" - -import os -os.environ["OMP_NUM_THREADS"] = "1" -os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' -# NOTE(2023/02/29): it's really important to set this! otherwise, the point cloud will be wrong. really wried. -os.environ["CUDA_VISIBLE_DEVICES"] = "-1" - -import multiprocessing -from multiprocessing import Pool, current_process -from pathlib import Path -from tqdm import tqdm -import numpy as np -import fire, time, h5py - -from waymo_open_dataset import dataset_pb2 -from waymo_open_dataset.utils import frame_utils - -import tensorflow as tf - -import os, sys, json -BASE_DIR = os.path.abspath(os.path.join( os.path.dirname( __file__ ), '..' )) -sys.path.append(BASE_DIR) -from dataprocess.misc_data import create_reading_index, SE2 -GROUND_HEIGHT_THRESHOLD = 0.4 # 40 centimeters -RANGE_MAX_VALID = 50 - -def is_ground_points( - raster_heightmap, - global_to_raster_se2, - global_to_raster_scale, - global_point_cloud, -) -> np.ndarray: - """Remove ground points from a point cloud. - Args: - point_cloud: Numpy array of shape (k,3) in global coordinates. - Returns: - ground_removed_point_cloud: Numpy array of shape (k,3) in global coordinates. - """ - def get_ground_heights( - raster_heightmap, - global_to_raster_se2, - global_to_raster_scale, - global_point_cloud, - ) -> np.ndarray: - """Get ground height for each of the xy locations in a point cloud. - Args: - point_cloud: Numpy array of shape (k,2) or (k,3) in global coordinates. - Returns: - ground_height_values: Numpy array of shape (k,) - """ - - global_points_xy = global_point_cloud[:, :2] - - raster_points_xy = ( - global_to_raster_se2.transform_point_cloud(global_points_xy) * global_to_raster_scale - ) - - raster_points_xy = np.round(raster_points_xy).astype(np.int64) - - ground_height_values = np.full((raster_points_xy.shape[0]), np.nan) - # outside max X - outside_max_x = (raster_points_xy[:, 0] >= raster_heightmap.shape[1]).astype(bool) - # outside max Y - outside_max_y = (raster_points_xy[:, 1] >= raster_heightmap.shape[0]).astype(bool) - # outside min X - outside_min_x = (raster_points_xy[:, 0] < 0).astype(bool) - # outside min Y - outside_min_y = (raster_points_xy[:, 1] < 0).astype(bool) - ind_valid_pts = ~np.logical_or( - np.logical_or(outside_max_x, outside_max_y), - np.logical_or(outside_min_x, outside_min_y), - ) - - ground_height_values[ind_valid_pts] = raster_heightmap[ - raster_points_xy[ind_valid_pts, 1], raster_points_xy[ind_valid_pts, 0] - ] - - return ground_height_values - ground_height_values = get_ground_heights( - raster_heightmap, - global_to_raster_se2, - global_to_raster_scale, - global_point_cloud, - ) - is_ground_boolean_arr = ( - np.absolute(global_point_cloud[:, 2] - ground_height_values) <= GROUND_HEIGHT_THRESHOLD - ) | (np.array(global_point_cloud[:, 2] - ground_height_values) < 0) - return is_ground_boolean_arr - -def convert_range_image_to_point_cloud( - frame, - range_images, - camera_projections, - point_flows, - range_image_top_pose, - ri_index=0, - keep_polar_features=False, -): - """Convert range images to point cloud. - - Args: - frame: open dataset frame - range_images: A dict of {laser_name, [range_image_first_return, - range_image_second_return]}. - camera_projections: A dict of {laser_name, - [camera_projection_from_first_return, - camera_projection_from_second_return]}. - range_image_top_pose: range image pixel pose for top lidar. - ri_index: 0 for the first return, 1 for the second return. - keep_polar_features: If true, keep the features from the polar range image - (i.e. range, intensity, and elongation) as the first features in the - output range image. - - Returns: - points: {[N, 3]} list of 3d lidar points of length 5 (number of lidars). - (NOTE: Will be {[N, 6]} if keep_polar_features is true. - cp_points: {[N, 6]} list of camera projections of length 5 - (number of lidars). - """ - calibrations = sorted(frame.context.laser_calibrations, key=lambda c: c.name) - points = [] - cp_points = [] - flows = [] - - cartesian_range_images = frame_utils.convert_range_image_to_cartesian( - frame, range_images, range_image_top_pose, ri_index, keep_polar_features - ) - - for c in calibrations: - range_image = range_images[c.name][ri_index] - range_image_tensor = tf.reshape( - tf.convert_to_tensor(value=range_image.data), range_image.shape.dims - ) - range_image_mask = range_image_tensor[..., 0] > 0 - - range_image_cartesian = cartesian_range_images[c.name] - points_tensor = tf.gather_nd(range_image_cartesian, tf.compat.v1.where(range_image_mask)) - - flow = point_flows[c.name][ri_index] - flow_tensor = tf.reshape(tf.convert_to_tensor(value=flow.data), flow.shape.dims) - flow_points_tensor = tf.gather_nd(flow_tensor, tf.compat.v1.where(range_image_mask)) - - cp = camera_projections[c.name][ri_index] - cp_tensor = tf.reshape(tf.convert_to_tensor(value=cp.data), cp.shape.dims) - cp_points_tensor = tf.gather_nd(cp_tensor, tf.compat.v1.where(range_image_mask)) - - points.append(points_tensor.numpy()) - cp_points.append(cp_points_tensor.numpy()) - flows.append(flow_points_tensor.numpy()) - - return points, cp_points, flows - -def parse_range_image_and_camera_projection(frame): - """ - Parse range images and camera projections given a frame. - - Args: - frame: open dataset frame proto - - Returns: - range_images: A dict of {laser_name, - [range_image_first_return, range_image_second_return]}. - camera_projections: A dict of {laser_name, - [camera_projection_from_first_return, - camera_projection_from_second_return]}. - range_image_top_pose: range image pixel pose for top lidar. - """ - range_images = {} - camera_projections = {} - point_flows = {} - range_image_top_pose = None - for laser in frame.lasers: - if ( - len(laser.ri_return1.range_image_compressed) > 0 - ): # pylint: disable=g-explicit-length-test - range_image_str_tensor = tf.io.decode_compressed( - laser.ri_return1.range_image_compressed, "ZLIB" - ) - ri = dataset_pb2.MatrixFloat() - ri.ParseFromString(bytearray(range_image_str_tensor.numpy())) - range_images[laser.name] = [ri] - - if len(laser.ri_return1.range_image_flow_compressed) > 0: - range_image_flow_str_tensor = tf.io.decode_compressed( - laser.ri_return1.range_image_flow_compressed, "ZLIB" - ) - ri = dataset_pb2.MatrixFloat() - ri.ParseFromString(bytearray(range_image_flow_str_tensor.numpy())) - point_flows[laser.name] = [ri] - - if laser.name == dataset_pb2.LaserName.TOP: - range_image_top_pose_str_tensor = tf.io.decode_compressed( - laser.ri_return1.range_image_pose_compressed, "ZLIB" - ) - range_image_top_pose = dataset_pb2.MatrixFloat() - range_image_top_pose.ParseFromString( - bytearray(range_image_top_pose_str_tensor.numpy()) - ) - - camera_projection_str_tensor = tf.io.decode_compressed( - laser.ri_return1.camera_projection_compressed, "ZLIB" - ) - cp = dataset_pb2.MatrixInt32() - cp.ParseFromString(bytearray(camera_projection_str_tensor.numpy())) - camera_projections[laser.name] = [cp] - if ( - len(laser.ri_return2.range_image_compressed) > 0 - ): # pylint: disable=g-explicit-length-test - range_image_str_tensor = tf.io.decode_compressed( - laser.ri_return2.range_image_compressed, "ZLIB" - ) - ri = dataset_pb2.MatrixFloat() - ri.ParseFromString(bytearray(range_image_str_tensor.numpy())) - range_images[laser.name].append(ri) - - if len(laser.ri_return2.range_image_flow_compressed) > 0: - range_image_flow_str_tensor = tf.io.decode_compressed( - laser.ri_return2.range_image_flow_compressed, "ZLIB" - ) - ri = dataset_pb2.MatrixFloat() - ri.ParseFromString(bytearray(range_image_flow_str_tensor.numpy())) - point_flows[laser.name].append(ri) - - camera_projection_str_tensor = tf.io.decode_compressed( - laser.ri_return2.camera_projection_compressed, "ZLIB" - ) - cp = dataset_pb2.MatrixInt32() - cp.ParseFromString(bytearray(camera_projection_str_tensor.numpy())) - camera_projections[laser.name].append(cp) - return range_images, camera_projections, point_flows, range_image_top_pose - -def get_car_pc_global_pc_flow_transform(frame: dataset_pb2.Frame): - - # Parse the frame lidar data into range images. - range_images, camera_projections, point_flows, range_image_top_poses = parse_range_image_and_camera_projection(frame) - - # Project the range images into points. - points_lst, cp_points, flows_lst = convert_range_image_to_point_cloud( - frame, - range_images, - camera_projections, - point_flows, - range_image_top_poses, - keep_polar_features=True) - - car_frame_pc = points_lst[0][:, 3:] - car_frame_flows = flows_lst[0][:, :3] - car_frame_labels = flows_lst[0][:, 3] - num_points = car_frame_pc.shape[0] - - # # Transform the points from the vehicle frame to the world frame. - world_frame_pc = np.concatenate([car_frame_pc, np.ones([num_points, 1])], axis=-1) - car_to_global_transform = np.reshape(np.array(frame.pose.transform), [4, 4]) - world_frame_pc = np.transpose(np.matmul(car_to_global_transform, np.transpose(world_frame_pc)))[:, :3] - - # # Transform the points from the world frame to the map frame. - offset = frame.map_pose_offset - points_offset = np.array([offset.x, offset.y, offset.z]) - world_frame_pc += points_offset - - calibrations = sorted(frame.context.laser_calibrations, key=lambda c: c.name) - for c in calibrations: - if calibrations[0].name == dataset_pb2.LaserName.TOP: - break - ego2sensor = np.reshape(np.array(c.extrinsic.transform), [4, 4]) - car_frame_pc[:, 2] = car_frame_pc[:, 2] - ego2sensor[2, 3] # move center to sensor height - return ( - car_frame_pc, - world_frame_pc, - car_frame_flows, - car_frame_labels, - car_to_global_transform, - ) - -def load_ground_height_raster(map_path: Path): - - raster_heightmap = np.load(map_path / "ground_height.npy") - transform = json.load(open(map_path / "se2.json")) - - transform_rotation = np.array(transform["R"]).reshape(2, 2) - transform_translation = np.array(transform["t"]) - transform_scale = np.array(transform["s"]) - - transform_se2 = SE2(rotation=transform_rotation, translation=transform_translation) - - return raster_heightmap, transform_se2, transform_scale - -def process_log(data_dir: Path, log, log_map_folder, output_dir: Path, n = None) : - - def create_group_data(group, pc, pose, gm = None, flow_0to1=None, flow_valid=None, flow_category=None, ego_motion=None): - group.create_dataset('lidar', data=pc.astype(np.float32)) - group.create_dataset('pose', data=pose.astype(np.float64)) - if ego_motion is not None: - group.create_dataset('ego_motion', data=ego_motion.astype(np.float32)) - if gm is not None: - group.create_dataset('ground_mask', data=gm.astype(bool)) - if flow_0to1 is not None: - group.create_dataset('flow', data=flow_0to1.astype(np.float32)) - flow_valid = np.ones_like(gm) - flow_valid[gm] = 0 - INSIDE_RANGE = np.logical_and(pc[:, 0] < RANGE_MAX_VALID, pc[:, 0] > -RANGE_MAX_VALID) & np.logical_and(pc[:, 1] < RANGE_MAX_VALID, pc[:, 1] > -RANGE_MAX_VALID) - flow_valid[~INSIDE_RANGE] = 0 - group.create_dataset('flow_is_valid', data=flow_valid.astype(bool)) - # From the Waymo Open dataset.proto: - # // If the point is not annotated with scene flow information, class is set - # // to -1. A point is not annotated if it is in a no-label zone or if its label - # // bounding box does not have a corresponding match in the previous frame, - # // making it infeasible to estimate the motion of the point. - # // Otherwise, (vx, vy, vz) are velocity along (x, y, z)-axis for this point - # // and class is set to one of the following values: - # // -1: no-flow-label, the point has no flow information. - # // 0: unlabeled or "background,", i.e., the point is not contained in a - # // bounding box. - # // 1: vehicle, i.e., the point corresponds to a vehicle label box. - # // 2: pedestrian, i.e., the point corresponds to a pedestrian label box. - # // 3: sign, i.e., the point corresponds to a sign label box. - # // 4: cyclist, i.e., the point corresponds to a cyclist label box. - # replace all -1 and 0 to 0 - flow_category[flow_category < 0] = 0 # NONE, Background - # replace 1 to 19, since av2 index 19 is for REGULAR_VEHICLE - flow_category[flow_category == 1] = 19 - # replace 2 to 17, since av2 index 16 is for PEDESTRIAN - flow_category[flow_category == 2] = 17 - # replace 3 to 21, since av2 index 21 is for SIGN - flow_category[flow_category == 3] = 21 - # no replace 4 to 4, since av2 index 4 is for BICYCLIST - group.create_dataset('flow_category_indices', data=flow_category.astype(np.int8)) - - raster_heightmap, transform_se2, transform_scale = load_ground_height_raster(log_map_folder.parent / log_map_folder.stem) - all_data = list(tf.data.TFRecordDataset(data_dir / log, compression_type='').as_numpy_iterator()) - first_frame = dataset_pb2.Frame.FromString(bytearray(all_data[0])) - scene_id = first_frame.context.name - total_lens = len(all_data) - for data_idx in range(1, total_lens): - if data_idx >= total_lens - 2: - # 0: no correct flow label, end(total_lens - 1) - 1: no correct pose flow - continue - frame = dataset_pb2.Frame.FromString(bytearray(all_data[data_idx])) - if scene_id != frame.context.name: - print(f"Scene ID mismatch: {scene_id} vs {frame.context.name}") - break - car_frame_pc, global_frame_pc, flow, label, pose = get_car_pc_global_pc_flow_transform(frame) - _, _, _, _, pose1 = get_car_pc_global_pc_flow_transform(dataset_pb2.Frame.FromString(bytearray(all_data[data_idx+1]))) - ego_motion = np.linalg.inv(pose1) @ pose - pose_flow = car_frame_pc[:, :3] @ ego_motion[:3, :3].T + ego_motion[:3, 3] - car_frame_pc[:, :3] - ground_mask = is_ground_points(raster_heightmap, transform_se2, transform_scale, global_frame_pc) - timestamp = frame.timestamp_micros - if car_frame_pc.shape[0] < 256: - print(f'{scene_id}/{timestamp} has less than 256 points, skip this scenarios. Please check the data if needed.') - break - with h5py.File(output_dir/f'{scene_id}.h5', 'a') as f: - group = f.create_group(str(timestamp)) - create_group_data(group, car_frame_pc, pose, ego_motion=ego_motion, gm=np.array(ground_mask), flow_0to1=(flow/10.0+pose_flow), flow_category=label) - # if data_idx > 10: - # break - -def proc(x, ignore_current_process=False): - if not ignore_current_process: - current=current_process() - pos = current._identity[0] - else: - pos = 1 - process_log(*x, n=pos) - -def process_logs(data_dir: Path, map_dir: Path, output_dir: Path, nproc: int): - """Compute sceneflow for all logs in the dataset. Logs are processed in parallel. - Args: - data_dir: Argoverse 2.0 directory - output_dir: Output directory. - """ - if not (data_dir).exists(): - print(f'{data_dir} not found') - return - - logs = sorted(os.listdir(data_dir)) - args = sorted([(data_dir, log, map_dir/log, output_dir) for log in logs]) - print(f'Using {nproc} processes to process {len(args)} logs.') - - # # for debug - # for x in tqdm(args): - # proc(x, ignore_current_process=True) - # break - - if nproc <= 1: - for x in tqdm(args): - proc(x, ignore_current_process=True) - else: - with Pool(processes=nproc) as p: - res = list(tqdm(p.imap_unordered(proc, args), total=len(args), ncols=100)) - -def main( - flow_data_dir: str = "/home/kin/data/waymo/flowlabel", - mode: str = "test", - map_dir: str = "/home/kin/data/waymo/flowlabel/map", - output_dir: str ="/home/kin/data/waymo/flowlabel/preprocess", - nproc: int = (multiprocessing.cpu_count() - 1), - create_index_only: bool = False, -): - output_dir_ = Path(output_dir) / mode - if create_index_only: - create_reading_index(Path(output_dir_)) - return - output_dir_.mkdir(exist_ok=True, parents=True) - process_logs(Path(flow_data_dir) / mode, Path(map_dir), output_dir_, nproc) - create_reading_index(output_dir_) - -if __name__ == '__main__': - start_time = time.time() - fire.Fire(main) - print(f"\nTime used: {(time.time() - start_time)/60:.2f} mins") \ No newline at end of file diff --git a/dataprocess/misc_data.py b/dataprocess/misc_data.py deleted file mode 100644 index 9698a77..0000000 --- a/dataprocess/misc_data.py +++ /dev/null @@ -1,91 +0,0 @@ -import numpy as np -import pickle, h5py, os, time -from pathlib import Path -from tqdm import tqdm - -def create_reading_index(data_dir: Path): - start_time = time.time() - data_index = [] - for file_name in tqdm(os.listdir(data_dir), ncols=100): - if not file_name.endswith(".h5"): - continue - scene_id = file_name.split(".")[0] - timestamps = [] - with h5py.File(data_dir/file_name, 'r') as f: - timestamps.extend(f.keys()) - timestamps.sort(key=lambda x: int(x)) # make sure the timestamps are in order - for timestamp in timestamps: - data_index.append([scene_id, timestamp]) - - with open(data_dir/'index_total.pkl', 'wb') as f: - pickle.dump(data_index, f) - print(f"Create reading index Successfully, cost: {time.time() - start_time:.2f} s") - -class SE2: - - def __init__(self, rotation: np.ndarray, translation: np.ndarray) -> None: - """Initialize. - Args: - rotation: np.ndarray of shape (2,2). - translation: np.ndarray of shape (2,1). - Raises: - ValueError: if rotation or translation do not have the required shapes. - """ - assert rotation.shape == (2, 2) - assert translation.shape == (2, ) - self.rotation = rotation - self.translation = translation - self.transform_matrix = np.eye(3) - self.transform_matrix[:2, :2] = self.rotation - self.transform_matrix[:2, 2] = self.translation - - def transform_point_cloud(self, point_cloud: np.ndarray) -> np.ndarray: - """Apply the SE(2) transformation to point_cloud. - Args: - point_cloud: np.ndarray of shape (N, 2). - Returns: - transformed_point_cloud: np.ndarray of shape (N, 2). - Raises: - ValueError: if point_cloud does not have the required shape. - """ - assert point_cloud.ndim == 2 - assert point_cloud.shape[1] == 2 - num_points = point_cloud.shape[0] - homogeneous_pts = np.hstack([point_cloud, np.ones((num_points, 1))]) - transformed_point_cloud = homogeneous_pts.dot(self.transform_matrix.T) - return transformed_point_cloud[:, :2] - - def inverse(self) -> "SE2": - """Return the inverse of the current SE2 transformation. - For example, if the current object represents target_SE2_src, we will return instead src_SE2_target. - Returns: - inverse of this SE2 transformation. - """ - return SE2(rotation=self.rotation.T, - translation=self.rotation.T.dot(-self.translation)) - - def inverse_transform_point_cloud(self, - point_cloud: np.ndarray) -> np.ndarray: - """Transform the point_cloud by the inverse of this SE2. - Args: - point_cloud: Numpy array of shape (N,2). - Returns: - point_cloud transformed by the inverse of this SE2. - """ - return self.inverse().transform_point_cloud(point_cloud) - - def compose(self, right_se2: "SE2") -> "SE2": - """Multiply this SE2 from right by right_se2 and return the composed transformation. - Args: - right_se2: SE2 object to multiply this object by from right. - Returns: - The composed transformation. - """ - chained_transform_matrix = self.transform_matrix.dot( - right_se2.transform_matrix) - chained_se2 = SE2( - rotation=chained_transform_matrix[:2, :2], - translation=chained_transform_matrix[:2, 2], - ) - return chained_se2 - \ No newline at end of file diff --git a/environment.yaml b/environment.yaml deleted file mode 100644 index 2d54225..0000000 --- a/environment.yaml +++ /dev/null @@ -1,40 +0,0 @@ -name: seflow -channels: - - conda-forge - - pytorch -dependencies: - - python=3.8 - - pytorch::pytorch=2.0.0 - - pytorch::torchvision - - pytorch::pytorch-cuda=11.7 - - nvidia/label/cuda-11.7.0::cuda - - lightning==2.0.1 - - mkl==2024.0.0 - - numba - - numpy - - pandas - - pip - - scipy - - tqdm - - h5py - - wandb - - omegaconf - - hydra-core - - fire - - tabulate - - scikit-learn==1.3.2 - - hdbscan - - setuptools==69.5.1 - - pip: - - open3d==0.18.0 - - dztimer - - av2==0.2.1 - - dufomap==1.0.0 - -# Reason about the version fixed: -# setuptools==68.5.1: https://github.com/aws-neuron/aws-neuron-sdk/issues/893 -# mkl==2024.0.0: https://github.com/pytorch/pytorch/issues/123097#issue-2218541307 -# av2==0.2.1: in case other version deleted some functions. -# lightning==2.0.1: https://stackoverflow.com/questions/76647518/how-to-fix-error-cannot-import-name-modelmetaclass-from-pydantic-main -# open3d==0.18.0: because 0.17.0 have bug on set the view json file -# dufomap==1.0.0: in case later updating may not compatible with the code. \ No newline at end of file diff --git a/envprocess.yaml b/envprocess.yaml deleted file mode 100644 index 2d676a3..0000000 --- a/envprocess.yaml +++ /dev/null @@ -1,29 +0,0 @@ -name: dataprocess -channels: - - conda-forge - - pytorch -dependencies: - - python=3.8 - - pytorch::pytorch=2.0.0 - - pytorch::torchvision - - mkl==2024.0.0 - - numba - - numpy - - pandas - - pip - - scipy - - tqdm - - fire - - hdbscan - - s5cmd - - pip: - - nuscenes-devkit - - av2==0.2.1 - - waymo-open-dataset-tf-2.11.0==1.5.0 - - dufomap==1.0.0 - - linefit - - dztimer - -# Reason about the version fixed: -# numpy==1.22: package conflicts, need numpy higher or same 1.22 -# mkl==2024.0.0: https://github.com/pytorch/pytorch/issues/123097 \ No newline at end of file diff --git a/eval.py b/eval.py deleted file mode 100644 index c2449a5..0000000 --- a/eval.py +++ /dev/null @@ -1,66 +0,0 @@ -""" -# Created: 2023-08-09 10:28 -# Copyright (C) 2023-now, RPL, KTH Royal Institute of Technology -# Author: Qingwen Zhang (https://kin-zhang.github.io/) -# -# This file is part of DeFlow (https://github.com/KTH-RPL/DeFlow). -# If you find this repo helpful, please cite the respective publication as -# listed on the above website. - -# Description: Output the evaluation results, go for local evaluation or online evaluation -""" - -import torch -from torch.utils.data import DataLoader -import lightning.pytorch as pl -from lightning.pytorch.loggers import WandbLogger -from omegaconf import DictConfig -import hydra, wandb, os, sys -from hydra.core.hydra_config import HydraConfig -from src.dataset import HDF5Dataset -from src.trainer import ModelWrapper - -def precheck_cfg_valid(cfg): - if os.path.exists(cfg.dataset_path + f"/{cfg.av2_mode}") is False: - raise ValueError(f"Dataset {cfg.dataset_path}/{cfg.av2_mode} does not exist. Please check the path.") - if cfg.supervised_flag not in [True, False]: - raise ValueError(f"Supervised flag {cfg.supervised_flag} is not valid. Please set it to True or False.") - if cfg.leaderboard_version not in [1, 2]: - raise ValueError(f"Leaderboard version {cfg.leaderboard_version} is not valid. Please set it to 1 or 2.") - return cfg - -@hydra.main(version_base=None, config_path="conf", config_name="eval") -def main(cfg): - pl.seed_everything(cfg.seed, workers=True) - output_dir = HydraConfig.get().runtime.output_dir - - if not os.path.exists(cfg.checkpoint): - print(f"Checkpoint {cfg.checkpoint} does not exist. Need checkpoints for evaluation.") - sys.exit(1) - - torch_load_ckpt = torch.load(cfg.checkpoint) - checkpoint_params = DictConfig(torch_load_ckpt["hyper_parameters"]) - cfg.output = checkpoint_params.cfg.output + f"-e{torch_load_ckpt['epoch']}-{cfg.av2_mode}-v{cfg.leaderboard_version}" - cfg.model.update(checkpoint_params.cfg.model) - - mymodel = ModelWrapper.load_from_checkpoint(cfg.checkpoint, cfg=cfg, eval=True) - print(f"\n---LOG[eval]: Loaded model from {cfg.checkpoint}. The backbone network is {checkpoint_params.cfg.model.name}.\n") - - wandb_logger = WandbLogger(save_dir=output_dir, - entity="kth-rpl", - project=f"deflow-eval", - name=f"{cfg.output}", - offline=(cfg.wandb_mode == "offline")) - - trainer = pl.Trainer(logger=wandb_logger, devices=1) - # NOTE(Qingwen): search & check: def eval_only_step_(self, batch, res_dict) - trainer.validate(model = mymodel, \ - dataloaders = DataLoader( \ - HDF5Dataset(cfg.dataset_path + f"/{cfg.av2_mode}", \ - n_frames=checkpoint_params.cfg.num_frames if 'num_frames' in checkpoint_params.cfg else 2, \ - eval=True, leaderboard_version=cfg.leaderboard_version), \ - batch_size=1, shuffle=False)) - wandb.finish() - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/logs/.gitkeep b/logs/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/src/lossfuncs.py b/lossfunc.py similarity index 100% rename from src/lossfuncs.py rename to lossfunc.py diff --git a/process.py b/process.py deleted file mode 100644 index 734c68a..0000000 --- a/process.py +++ /dev/null @@ -1,153 +0,0 @@ -""" -# Created: 2023-11-04 15:55 -# Copyright (C) 2023-now, RPL, KTH Royal Institute of Technology -# Author: Qingwen Zhang (https://kin-zhang.github.io/) -# -# This file is part of SeFlow (https://github.com/KTH-RPL/SeFlow). -# If you find this repo helpful, please cite the respective publication as -# listed on the above website. -# -# Description: run dufomap on the dataset we preprocessed for afterward ssl training. -# it's only needed for ssl train but not inference. -# Goal to segment dynamic and static point roughly. -""" - -from pathlib import Path -from tqdm import tqdm -import numpy as np -import fire, time, h5py, os -from hdbscan import HDBSCAN - -from src.utils.mics import HDF5Data, transform_to_array -from dufomap import dufomap - -MIN_AXIS_RANGE = 2 # HARD CODED: remove ego vehicle points -MAX_AXIS_RANGE = 50 # HARD CODED: remove far away points - -def run_cluster( - data_dir: str ="/home/kin/data/av2/preprocess/sensor/train", - scene_range: list = [0, 1], - interval: int = 1, # useless here, just for the same interface args - overwrite: bool = False, -): - data_path = Path(data_dir) - dataset = HDF5Data(data_path) - all_scene_ids = list(dataset.scene_id_bounds.keys()) - for scene_in_data_index, scene_id in enumerate(all_scene_ids): - start_time = time.time() - # NOTE (Qingwen): so the scene id range is [start, end) - if scene_range[0]!= -1 and scene_range[-1]!= -1 and (scene_in_data_index < scene_range[0] or scene_in_data_index >= scene_range[1]): - continue - bounds = dataset.scene_id_bounds[scene_id] - flag_exist_label = True - with h5py.File(os.path.join(data_path, f'{scene_id}.h5'), 'r+') as f: - for ii in range(bounds["min_index"], bounds["max_index"]+1): - key = str(dataset[ii]['timestamp']) - if 'label' not in f[key]: - flag_exist_label = False - break - if flag_exist_label and not overwrite: - print(f"==> Scene {scene_id} has plus label, skip.") - continue - - hdb = HDBSCAN(min_cluster_size=20, cluster_selection_epsilon=0.7) - for i in tqdm(range(bounds["min_index"], bounds["max_index"]+1), desc=f"Start Plus Cluster: {scene_in_data_index}/{len(all_scene_ids)}", ncols=80): - data = dataset[i] - pc0 = data['pc0'][:,:3] - cluster_label = np.zeros(pc0.shape[0], dtype= np.int16) - - if "dufo_label" not in data: - print(f"Warning: {scene_id} {data['timestamp']} has no dufo_label, will be skipped. Better to rerun dufomap again in this scene.") - continue - elif data["dufo_label"].sum() < 20: - print(f"Warning: {scene_id} {data['timestamp']} has no dynamic points, will be skipped. Better to check this scene.") - else: - hdb.fit(pc0[data["dufo_label"]==1]) - # NOTE(Qingwen): since -1 will be assigned if no cluster. We set it to 0. - cluster_label[data["dufo_label"]==1] = hdb.labels_ + 1 - - # save labels - timestamp = data['timestamp'] - key = str(timestamp) - with h5py.File(os.path.join(data_path, f'{scene_id}.h5'), 'r+') as f: - if 'label' in f[key]: - # print(f"Warning: {scene_id} {timestamp} has label, will be overwritten.") - del f[key]['label'] - f[key].create_dataset('label', data=np.array(cluster_label).astype(np.int16)) - print(f"==> Scene {scene_id} finished, used: {(time.time() - start_time)/60:.2f} mins") - print(f"Data inside {str(data_path)} finished. Check the result with vis() function if you want to visualize them.") - -def run_dufo( - data_dir: str ="/home/kin/data/av2/preprocess/sensor/train", - scene_range: list = [0, 1], - interval: int = 1, # interval frames to run dufomap - overwrite: bool = False, -): - data_path = Path(data_dir) - dataset = HDF5Data(data_path) - all_scene_ids = list(dataset.scene_id_bounds.keys()) - for scene_in_data_index, scene_id in enumerate(all_scene_ids): - start_time = time.time() - # NOTE (Qingwen): so the scene id range is [start, end) - if scene_range[0]!= -1 and scene_range[-1]!= -1 and (scene_in_data_index < scene_range[0] or scene_in_data_index >= scene_range[1]): - continue - bounds = dataset.scene_id_bounds[scene_id] - flag_has_dufo_label = True - with h5py.File(os.path.join(data_path, f'{scene_id}.h5'), 'r+') as f: - for ii in range(bounds["min_index"], bounds["max_index"]+1): - key = str(dataset[ii]['timestamp']) - if "dufo_label" not in f[key]: - flag_has_dufo_label = False - break - if flag_has_dufo_label and not overwrite: - print(f"==> Scene {scene_id} has dufo_label, skip.") - continue - - mydufo = dufomap(0.2, 0.2, 1, num_threads=12) # resolution, d_s, d_p, hit_extension - mydufo.setCluster(0, 20, 0.2) # depth=0, min_points=20, max_dist=0.2 - - print(f"==> Scene {scene_id} start, data path: {data_path}") - for i in tqdm(range(bounds["min_index"], bounds["max_index"]+1), desc=f"Dufo run: {scene_in_data_index}/{len(all_scene_ids)}", ncols=80): - if interval != 1 and i % interval != 0 and (i + interval//2 < bounds["max_index"] or i - interval//2 > bounds["min_index"]): - continue - data = dataset[i] - assert data['scene_id'] == scene_id, f"Check the data, scene_id {scene_id} is not consistent in {i}th data in {scene_in_data_index}th scene." - # HARD CODED: remove points outside the range - norm_pc0 = np.linalg.norm(data['pc0'][:, :3], axis=1) - range_mask = ( - (norm_pc0>MIN_AXIS_RANGE) & - (norm_pc0 Scene {scene_id} finished, used: {(time.time() - start_time)/60:.2f} mins") - print(f"Data inside {str(data_path)} finished. Check the result with vis() function if you want to visualize them.") - -if __name__ == '__main__': - start_time = time.time() - # step 1: run dufomap - fire.Fire(run_dufo) - # step 2: run cluster on dufolabel - fire.Fire(run_cluster) - - print(f"\nTime used: {(time.time() - start_time)/60:.2f} mins") \ No newline at end of file diff --git a/save.py b/save.py deleted file mode 100644 index 00bf259..0000000 --- a/save.py +++ /dev/null @@ -1,58 +0,0 @@ -""" -# Created: 2023-12-26 12:41 -# Copyright (C) 2023-now, RPL, KTH Royal Institute of Technology -# Author: Qingwen Zhang (https://kin-zhang.github.io/) -# -# This file is part of DeFlow (https://github.com/KTH-RPL/DeFlow). -# If you find this repo helpful, please cite the respective publication as -# listed on the above website. - -# Description: produce flow based on model predict and write into the dataset, -# then use `tools/visualization.py` to visualize the flow. -""" - -import torch -from torch.utils.data import DataLoader -import lightning.pytorch as pl -from lightning.pytorch.loggers import WandbLogger -from omegaconf import DictConfig, OmegaConf -import hydra, wandb, os, sys -from hydra.core.hydra_config import HydraConfig -from src.dataset import HDF5Dataset -from src.trainer import ModelWrapper -from src.utils import bc - -@hydra.main(version_base=None, config_path="conf", config_name="save") -def main(cfg): - pl.seed_everything(cfg.seed, workers=True) - output_dir = HydraConfig.get().runtime.output_dir - - if not os.path.exists(cfg.checkpoint): - print(f"Checkpoint {cfg.checkpoint} does not exist. Need checkpoints for evaluation.") - sys.exit(1) - - if cfg.res_name is None: - cfg.res_name = cfg.checkpoint.split("/")[-1].split(".")[0] - print(f"{bc.BOLD}NOTE{bc.ENDC}: res_name is not specified, use {bc.OKBLUE}{cfg.res_name}{bc.ENDC} as default.") - - checkpoint_params = DictConfig(torch.load(cfg.checkpoint)["hyper_parameters"]) - cfg.output = checkpoint_params.cfg.output - cfg.model.update(checkpoint_params.cfg.model) - mymodel = ModelWrapper.load_from_checkpoint(cfg.checkpoint, cfg=cfg, eval=True) - - wandb_logger = WandbLogger(save_dir=output_dir, - entity="kth-rpl", - project=f"deflow-eval", - name=f"{cfg.output}", - offline=True) - - trainer = pl.Trainer(logger=wandb_logger, devices=1) - # NOTE(Qingwen): search & check in pl_model.py : def test_step(self, batch, res_dict) - trainer.test(model = mymodel, \ - dataloaders = DataLoader(\ - HDF5Dataset(cfg.dataset_path, n_frames=checkpoint_params.cfg.num_frames if 'num_frames' in checkpoint_params.cfg else 2), \ - batch_size=1, shuffle=False)) - wandb.finish() - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/src/dataset.py b/src/dataset.py deleted file mode 100644 index 20b4fef..0000000 --- a/src/dataset.py +++ /dev/null @@ -1,217 +0,0 @@ -""" -# Created: 2023-11-04 15:52 -# Updated: 2024-07-12 23:16 -# -# Copyright (C) 2023-now, RPL, KTH Royal Institute of Technology -# Author: Qingwen Zhang (https://kin-zhang.github.io/), Jaeyeul Kim (jykim94@dgist.ac.kr) -# -# Change Logs: -# 2024-07-12: Merged num_frame based on Flow4D model from Jaeyeul Kim. -# -# Description: Torch dataloader for the dataset we preprocessed. -# -""" - -import torch -from torch.utils.data import Dataset, DataLoader -import h5py, os, pickle, argparse, sys -from tqdm import tqdm -BASE_DIR = os.path.abspath(os.path.join( os.path.dirname( __file__ ), '..' )) -sys.path.append(BASE_DIR) - -def collate_fn_pad(batch): - - num_frames = 2 - while f'pch{num_frames - 1}' in batch[0]: - num_frames += 1 - - # padding the data - pc0_after_mask_ground, pc1_after_mask_ground= [], [] - pch_after_mask_ground = [[] for _ in range(num_frames - 2)] - for i in range(len(batch)): - pc0_after_mask_ground.append(batch[i]['pc0'][~batch[i]['gm0']]) - pc1_after_mask_ground.append(batch[i]['pc1'][~batch[i]['gm1']]) - for j in range(1, num_frames - 1): - pch_after_mask_ground[j-1].append(batch[i][f'pch{j}'][~batch[i][f'gmh{j}']]) - - pc0_after_mask_ground = torch.nn.utils.rnn.pad_sequence(pc0_after_mask_ground, batch_first=True, padding_value=torch.nan) - pc1_after_mask_ground = torch.nn.utils.rnn.pad_sequence(pc1_after_mask_ground, batch_first=True, padding_value=torch.nan) - pch_after_mask_ground = [torch.nn.utils.rnn.pad_sequence(pch_, batch_first=True, padding_value=torch.nan) for pch_ in pch_after_mask_ground] - - - res_dict = { - 'pc0': pc0_after_mask_ground, - 'pc1': pc1_after_mask_ground, - 'pose0': [batch[i]['pose0'] for i in range(len(batch))], - 'pose1': [batch[i]['pose1'] for i in range(len(batch))] - } - - for j in range(1, num_frames - 1): - res_dict[f'pch{j}'] = pch_after_mask_ground[j-1] - res_dict[f'poseh{j}'] = [batch[i][f'poseh{j}'] for i in range(len(batch))] - - if 'flow' in batch[0]: - flow = torch.nn.utils.rnn.pad_sequence([batch[i]['flow'][~batch[i]['gm0']] for i in range(len(batch))], batch_first=True) - flow_is_valid = torch.nn.utils.rnn.pad_sequence([batch[i]['flow_is_valid'][~batch[i]['gm0']] for i in range(len(batch))], batch_first=True) - flow_category_indices = torch.nn.utils.rnn.pad_sequence([batch[i]['flow_category_indices'][~batch[i]['gm0']] for i in range(len(batch))], batch_first=True) - res_dict['flow'] = flow - res_dict['flow_is_valid'] = flow_is_valid - res_dict['flow_category_indices'] = flow_category_indices - - if 'ego_motion' in batch[0]: - res_dict['ego_motion'] = [batch[i]['ego_motion'] for i in range(len(batch))] - - if 'pc0_dynamic' in batch[0]: - pc0_dynamic_after_mask_ground, pc1_dynamic_after_mask_ground= [], [] - for i in range(len(batch)): - pc0_dynamic_after_mask_ground.append(batch[i]['pc0_dynamic'][~batch[i]['gm0']]) - pc1_dynamic_after_mask_ground.append(batch[i]['pc1_dynamic'][~batch[i]['gm1']]) - pc0_dynamic_after_mask_ground = torch.nn.utils.rnn.pad_sequence(pc0_dynamic_after_mask_ground, batch_first=True, padding_value=0) - pc1_dynamic_after_mask_ground = torch.nn.utils.rnn.pad_sequence(pc1_dynamic_after_mask_ground, batch_first=True, padding_value=0) - res_dict['pc0_dynamic'] = pc0_dynamic_after_mask_ground - res_dict['pc1_dynamic'] = pc1_dynamic_after_mask_ground - - return res_dict -class HDF5Dataset(Dataset): - def __init__(self, directory, n_frames=2, dufo=False, eval = False, leaderboard_version=1): - ''' - directory: the directory of the dataset - n_frames: the number of frames we use, default is 2: current, next if more then it's the history from current. - dufo: if True, we will read the dynamic cluster label - eval: if True, use the eval index - ''' - super(HDF5Dataset, self).__init__() - self.directory = directory - - with open(os.path.join(self.directory, 'index_total.pkl'), 'rb') as f: - self.data_index = pickle.load(f) - - self.eval_index = False - self.dufo = dufo - self.n_frames = n_frames - - if eval: - eval_index_file = os.path.join(self.directory, 'index_eval.pkl') - if leaderboard_version == 2: - print("Using index to leaderboard version 2!!") - eval_index_file = os.path.join(BASE_DIR, 'assets/docs/index_eval_v2.pkl') - if not os.path.exists(eval_index_file): - raise Exception(f"No eval index file found! Please check {self.directory}") - self.eval_index = eval - with open(os.path.join(self.directory, eval_index_file), 'rb') as f: - self.eval_data_index = pickle.load(f) - - self.scene_id_bounds = {} # 存储每个scene_id的最大最小timestamp和位置 - for idx, (scene_id, timestamp) in enumerate(self.data_index): - if scene_id not in self.scene_id_bounds: - self.scene_id_bounds[scene_id] = { - "min_timestamp": timestamp, - "max_timestamp": timestamp, - "min_index": idx, - "max_index": idx - } - else: - bounds = self.scene_id_bounds[scene_id] - # 更新最小timestamp和位置 - if timestamp < bounds["min_timestamp"]: - bounds["min_timestamp"] = timestamp - bounds["min_index"] = idx - # 更新最大timestamp和位置 - if timestamp > bounds["max_timestamp"]: - bounds["max_timestamp"] = timestamp - bounds["max_index"] = idx - - def __len__(self): - if self.eval_index: - return len(self.eval_data_index) - return len(self.data_index) - - def __getitem__(self, index_): - if self.eval_index: - scene_id, timestamp = self.eval_data_index[index_] - # find this one index in the total index - index_ = self.data_index.index([scene_id, timestamp]) - else: - scene_id, timestamp = self.data_index[index_] - # to make sure we have continuous frames - if self.scene_id_bounds[scene_id]["max_index"] == index_: - index_ = index_ - 1 - # get the data again - scene_id, timestamp = self.data_index[index_] - - key = str(timestamp) - with h5py.File(os.path.join(self.directory, f'{scene_id}.h5'), 'r') as f: - pc0 = torch.tensor(f[key]['lidar'][:][:,:3]) - gm0 = torch.tensor(f[key]['ground_mask'][:]) - pose0 = torch.tensor(f[key]['pose'][:]) - - next_timestamp = str(self.data_index[index_+1][1]) - pc1 = torch.tensor(f[next_timestamp]['lidar'][:][:,:3]) - gm1 = torch.tensor(f[next_timestamp]['ground_mask'][:]) - pose1 = torch.tensor(f[next_timestamp]['pose'][:]) - res_dict = { - 'scene_id': scene_id, - 'timestamp': key, - 'pc0': pc0, - 'gm0': gm0, - 'pose0': pose0, - 'pc1': pc1, - 'gm1': gm1, - 'pose1': pose1, - } - - if self.n_frames > 2: - past_frames = [] - num_past_frames = self.n_frames - 2 - - for i in range(1, num_past_frames + 1): - frame_index = index_ - i - if frame_index < self.scene_id_bounds[scene_id]["min_index"]: - frame_index = self.scene_id_bounds[scene_id]["min_index"] - - past_timestamp = str(self.data_index[frame_index][1]) - past_pc = torch.tensor(f[past_timestamp]['lidar'][:][:,:3]) - past_gm = torch.tensor(f[past_timestamp]['ground_mask'][:]) - past_pose = torch.tensor(f[past_timestamp]['pose'][:]) - - past_frames.append((past_pc, past_gm, past_pose)) - - for i, (past_pc, past_gm, past_pose) in enumerate(past_frames): - res_dict[f'pch{i+1}'] = past_pc - res_dict[f'gmh{i+1}'] = past_gm - res_dict[f'poseh{i+1}'] = past_pose - - if 'flow' in f[key]: - flow = torch.tensor(f[key]['flow'][:]) - flow_is_valid = torch.tensor(f[key]['flow_is_valid'][:]) - flow_category_indices = torch.tensor(f[key]['flow_category_indices'][:]) - res_dict['flow'] = flow - res_dict['flow_is_valid'] = flow_is_valid - res_dict['flow_category_indices'] = flow_category_indices - - if 'ego_motion' in f[key]: - ego_motion = torch.tensor(f[key]['ego_motion'][:]) - res_dict['ego_motion'] = ego_motion - - if self.dufo: - res_dict['pc0_dynamic'] = torch.tensor(f[key]['label'][:].astype('int16')) - res_dict['pc1_dynamic'] = torch.tensor(f[next_timestamp]['label'][:].astype('int16')) - - if self.eval_index: - # looks like v2 not follow the same rule as v1 with eval_mask provided - eval_mask = torch.tensor(f[key]['eval_mask'][:]) if 'eval_mask' in f[key] else torch.ones_like(pc0[:, 0], dtype=torch.bool) - res_dict['eval_mask'] = eval_mask - - return res_dict - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="DataLoader test") - parser.add_argument('--data_mode', '-m', type=str, default='val', metavar='N', help='Dataset mode.') - parser.add_argument('--data_dir', '-d', type=str, default='/home/kin/data/av2/preprocess/sensor', metavar='N', help='preprocess data path.') - options = parser.parse_args() - - # testing eval mode - dataset = HDF5Dataset(options.data_dir+"/"+options.data_mode, eval=True) - dataloader = DataLoader(dataset, batch_size=16, shuffle=False, num_workers=16, collate_fn=collate_fn_pad) - for data in tqdm(dataloader, ncols=80, desc="eval mode"): - res_dict = data \ No newline at end of file diff --git a/src/models/__init__.py b/src/models/__init__.py deleted file mode 100644 index 3e7a139..0000000 --- a/src/models/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .deflow import DeFlow -from .fastflow3d import FastFlow3D \ No newline at end of file diff --git a/src/models/basic/__init__.py b/src/models/basic/__init__.py deleted file mode 100644 index 7d46b47..0000000 --- a/src/models/basic/__init__.py +++ /dev/null @@ -1,35 +0,0 @@ -import torch -import torch.nn as nn - -@torch.no_grad() -def cal_pose0to1(pose0: torch.Tensor, pose1: torch.Tensor): - """ - Note(Qingwen 2023-12-05 11:09): - Don't know why but it needed set the pose to float64 to calculate the inverse - otherwise it will be not expected result.... - """ - pose1_inv = torch.eye(4, dtype=torch.float64, device=pose1.device) - pose1_inv[:3,:3] = pose1[:3,:3].T - pose1_inv[:3,3] = (pose1[:3,:3].T * -pose1[:3,3]).sum(axis=1) - pose_0to1 = pose1_inv @ pose0.type(torch.float64) - return pose_0to1.type(torch.float32) - -class ConvWithNorms(nn.Module): - - def __init__(self, in_num_channels: int, out_num_channels: int, - kernel_size: int, stride: int, padding: int): - super().__init__() - self.conv = nn.Conv2d(in_num_channels, out_num_channels, kernel_size, - stride, padding) - self.batchnorm = nn.BatchNorm2d(out_num_channels) - self.nonlinearity = nn.GELU() - - def forward(self, x: torch.Tensor) -> torch.Tensor: - conv_res = self.conv(x) - if conv_res.shape[2] == 1 and conv_res.shape[3] == 1: - # This is a hack to get around the fact that batchnorm doesn't support - # 1x1 convolutions - batchnorm_res = conv_res - else: - batchnorm_res = self.batchnorm(conv_res) - return self.nonlinearity(batchnorm_res) \ No newline at end of file diff --git a/src/models/basic/decoder.py b/src/models/basic/decoder.py deleted file mode 100644 index 8206d5d..0000000 --- a/src/models/basic/decoder.py +++ /dev/null @@ -1,220 +0,0 @@ -import torch -import torch.nn as nn -from typing import List, Tuple, Dict -from . import ConvWithNorms - -SPLIT_BATCH_SIZE = 512 - -class MMHeadDecoder(nn.Module): - - def __init__(self, pseudoimage_channels: int = 64): - super().__init__() - - self.offset_encoder = nn.Linear(3, 128) - - # FIXME: figure out how to set nheads and num_layers properly - # ref: https://pytorch.org/docs/stable/generated/torch.nn.TransformerDecoder.html - # https://pytorch.org/docs/stable/generated/torch.nn.TransformerDecoderLayer.html - transform_decoder_layers = nn.TransformerDecoderLayer(d_model=128, nhead=4) - self.pts_off_transformer = nn.TransformerDecoder(transform_decoder_layers, num_layers=4) - - self.decoder = nn.Sequential( - nn.Linear(pseudoimage_channels*2, 32), nn.GELU(), - nn.Linear(32, 3)) - - def forward_single(self, before_pseudoimage: torch.Tensor, - after_pseudoimage: torch.Tensor, - point_offsets: torch.Tensor, - voxel_coords: torch.Tensor) -> torch.Tensor: - voxel_coords = voxel_coords.long() - # assert (voxel_coords[:, 0] == 0).all(), "Z index must be 0" - - # Voxel coords are Z, Y, X, and the pseudoimage is Channel, Y, X - # I have confirmed via visualization that these coordinates are correct. - after_voxel_vectors = after_pseudoimage[:, voxel_coords[:, 1], - voxel_coords[:, 2]].T - before_voxel_vectors = before_pseudoimage[:, voxel_coords[:, 1], - voxel_coords[:, 2]].T - - # [N, 64] [N, 64] -> [N, 128] - concatenated_vectors = torch.cat([before_voxel_vectors, after_voxel_vectors], dim=1) - - # [N, 128] [N, 128] -> [N, 1, 128] - voxel_feature = concatenated_vectors.unsqueeze(1) - point_offsets_feature = self.offset_encoder(point_offsets).unsqueeze(1) - concatenated_feature = torch.zeros_like(voxel_feature) - - for spilt_range in range(0, concatenated_feature.shape[0], SPLIT_BATCH_SIZE): - concatenated_feature[spilt_range:spilt_range+SPLIT_BATCH_SIZE] = self.pts_off_transformer( - voxel_feature[spilt_range:spilt_range+SPLIT_BATCH_SIZE], - point_offsets_feature[spilt_range:spilt_range+SPLIT_BATCH_SIZE] - ) - - flow = self.decoder(concatenated_feature.squeeze(1)) - return flow - - def forward( - self, before_pseudoimages: torch.Tensor, - after_pseudoimages: torch.Tensor, - voxelizer_infos: List[Dict[str, - torch.Tensor]]) -> List[torch.Tensor]: - - flow_results = [] - for before_pseudoimage, after_pseudoimage, voxelizer_info in zip( - before_pseudoimages, after_pseudoimages, voxelizer_infos): - point_offsets = voxelizer_info["point_offsets"] - voxel_coords = voxelizer_info["voxel_coords"] - flow = self.forward_single(before_pseudoimage, after_pseudoimage, - point_offsets, voxel_coords) - flow_results.append(flow) - return flow_results - -class LinearDecoder(nn.Module): - - def __init__(self, pseudoimage_channels: int = 64): - super().__init__() - - self.offset_encoder = nn.Linear(3, 128) - - self.decoder = nn.Sequential( - nn.Linear(pseudoimage_channels*4, 32), nn.GELU(), - nn.Linear(32, 3)) - - def forward_single(self, before_pseudoimage: torch.Tensor, - after_pseudoimage: torch.Tensor, - point_offsets: torch.Tensor, - voxel_coords: torch.Tensor) -> torch.Tensor: - voxel_coords = voxel_coords.long() - # assert (voxel_coords[:, 0] == 0).all(), "Z index must be 0" - - # Voxel coords are Z, Y, X, and the pseudoimage is Channel, Y, X - # I have confirmed via visualization that these coordinates are correct. - after_voxel_vectors = after_pseudoimage[:, voxel_coords[:, 1], - voxel_coords[:, 2]].T - before_voxel_vectors = before_pseudoimage[:, voxel_coords[:, 1], - voxel_coords[:, 2]].T - - # [N, 64] [N, 64] -> [N, 128] - concatenated_vectors = torch.cat([before_voxel_vectors, after_voxel_vectors], dim=1) - - # [N, 3] -> [N, 128] - point_offsets_feature = self.offset_encoder(point_offsets) - - flow = self.decoder(torch.cat([concatenated_vectors, point_offsets_feature], dim=1)) - return flow - - def forward( - self, before_pseudoimages: torch.Tensor, - after_pseudoimages: torch.Tensor, - voxelizer_infos: List[Dict[str, - torch.Tensor]]) -> List[torch.Tensor]: - - flow_results = [] - for before_pseudoimage, after_pseudoimage, voxelizer_info in zip( - before_pseudoimages, after_pseudoimages, voxelizer_infos): - point_offsets = voxelizer_info["point_offsets"] - voxel_coords = voxelizer_info["voxel_coords"] - flow = self.forward_single(before_pseudoimage, after_pseudoimage, - point_offsets, voxel_coords) - flow_results.append(flow) - return flow_results - -# from https://github.com/weiyithu/PV-RAFT/blob/main/model/update.py -class ConvGRU(nn.Module): - def __init__(self, input_dim=64, hidden_dim=128): - super(ConvGRU, self).__init__() - self.convz = nn.Conv1d(input_dim+hidden_dim, hidden_dim, 1) - self.convr = nn.Conv1d(input_dim+hidden_dim, hidden_dim, 1) - self.convq = nn.Conv1d(input_dim+hidden_dim, hidden_dim, 1) - - def forward(self, h, x): - hx = torch.cat([h, x], dim=1) - - z = torch.sigmoid(self.convz(hx)) - r = torch.sigmoid(self.convr(hx)) - rh_x = torch.cat([r*h, x], dim=1) - q = torch.tanh(self.convq(rh_x)) - - h = (1 - z) * h + z * q - return h - -class ConvGRUDecoder(nn.Module): - - def __init__(self, pseudoimage_channels: int = 64, num_iters: int = 4): - super().__init__() - - self.offset_encoder = nn.Linear(3, pseudoimage_channels) - - # NOTE: voxel feature is hidden input, point offset is input, check paper's Fig. 3 - self.gru = ConvGRU(input_dim=pseudoimage_channels, hidden_dim=pseudoimage_channels*2) - - self.decoder = nn.Sequential( - nn.Linear(pseudoimage_channels*3, pseudoimage_channels//2), nn.GELU(), - nn.Linear(pseudoimage_channels//2, 3)) - self.num_iters = num_iters - - def forward_single(self, before_pseudoimage: torch.Tensor, - after_pseudoimage: torch.Tensor, - point_offsets: torch.Tensor, - voxel_coords: torch.Tensor) -> torch.Tensor: - voxel_coords = voxel_coords.long() - # assert (voxel_coords[:, 0] == 0).all(), "Z index must be 0" - - # Voxel coords are Z, Y, X, and the pseudoimage is Channel, Y, X - # I have confirmed via visualization that these coordinates are correct. - after_voxel_vectors = after_pseudoimage[:, voxel_coords[:, 1], - voxel_coords[:, 2]].T - before_voxel_vectors = before_pseudoimage[:, voxel_coords[:, 1], - voxel_coords[:, 2]].T - - # [N, 64] [N, 64] -> [N, 128] - concatenated_vectors = torch.cat([before_voxel_vectors, after_voxel_vectors], dim=1) - - # [N, 3] -> [N, 64] - point_offsets_feature = self.offset_encoder(point_offsets) - - # [N, 128] -> [N, 128, 1] - concatenated_vectors = concatenated_vectors.unsqueeze(2) - - for itr in range(self.num_iters): - concatenated_vectors = self.gru(concatenated_vectors, point_offsets_feature.unsqueeze(2)) - - flow = self.decoder(torch.cat([concatenated_vectors.squeeze(2), point_offsets_feature], dim=1)) - return flow - - def forward( - self, before_pseudoimages: torch.Tensor, - after_pseudoimages: torch.Tensor, - voxelizer_infos: List[Dict[str, - torch.Tensor]]) -> List[torch.Tensor]: - - flow_results = [] - for before_pseudoimage, after_pseudoimage, voxelizer_info in zip( - before_pseudoimages, after_pseudoimages, voxelizer_infos): - point_offsets = voxelizer_info["point_offsets"] - voxel_coords = voxelizer_info["voxel_coords"] - flow = self.forward_single(before_pseudoimage, after_pseudoimage, - point_offsets, voxel_coords) - flow_results.append(flow) - return flow_results - - -class ConvWithNorms(nn.Module): - - def __init__(self, in_num_channels: int, out_num_channels: int, - kernel_size: int, stride: int, padding: int): - super().__init__() - self.conv = nn.Conv2d(in_num_channels, out_num_channels, kernel_size, - stride, padding) - self.batchnorm = nn.BatchNorm2d(out_num_channels) - self.nonlinearity = nn.GELU() - - def forward(self, x: torch.Tensor) -> torch.Tensor: - conv_res = self.conv(x) - if conv_res.shape[2] == 1 and conv_res.shape[3] == 1: - # This is a hack to get around the fact that batchnorm doesn't support - # 1x1 convolutions - batchnorm_res = conv_res - else: - batchnorm_res = self.batchnorm(conv_res) - return self.nonlinearity(batchnorm_res) \ No newline at end of file diff --git a/src/models/basic/encoder.py b/src/models/basic/encoder.py deleted file mode 100644 index 88e6c07..0000000 --- a/src/models/basic/encoder.py +++ /dev/null @@ -1,588 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F -from typing import List, Tuple - -from assets.cuda.mmcv import Voxelization -from assets.cuda.mmcv import DynamicScatter - -def get_paddings_indicator(actual_num, max_num, axis=0): - """Create boolean mask by actually number of a padded tensor. - Args: - actual_num (torch.Tensor): Actual number of points in each voxel. - max_num (int): Max number of points in each voxel - Returns: - torch.Tensor: Mask indicates which points are valid inside a voxel. - """ - actual_num = torch.unsqueeze(actual_num, axis + 1) - # tiled_actual_num: [N, M, 1] - max_num_shape = [1] * len(actual_num.shape) - max_num_shape[axis + 1] = -1 - max_num = torch.arange(max_num, dtype=torch.int, - device=actual_num.device).view(max_num_shape) - # tiled_actual_num: [[3,3,3,3,3], [4,4,4,4,4], [2,2,2,2,2]] - # tiled_max_num: [[0,1,2,3,4], [0,1,2,3,4], [0,1,2,3,4]] - paddings_indicator = actual_num.int() > max_num - # paddings_indicator shape: [batch_size, max_num] - return paddings_indicator - -class PFNLayer(nn.Module): - """Pillar Feature Net Layer. - The Pillar Feature Net is composed of a series of these layers, but the - PointPillars paper results only used a single PFNLayer. - Args: - in_channels (int): Number of input channels. - out_channels (int): Number of output channels. - last_layer (bool, optional): If last_layer, there is no - concatenation of features. Defaults to False. - mode (str, optional): Pooling model to gather features inside voxels. - Defaults to 'max'. - """ - - def __init__(self, - in_channels, - out_channels, - last_layer=False, - mode='max'): - - super().__init__() - self.fp16_enabled = False - self.name = 'PFNLayer' - self.last_vfe = last_layer - if not self.last_vfe: - out_channels = out_channels // 2 - self.units = out_channels - - self.norm = nn.BatchNorm1d(self.units, eps=1e-3, momentum=0.01) - self.linear = nn.Linear(in_channels, self.units, bias=False) - - assert mode in ['max', 'avg'] - self.mode = mode - - def forward(self, inputs, num_voxels=None, aligned_distance=None): - """Forward function. - Args: - inputs (torch.Tensor): Pillar/Voxel inputs with shape (N, M, C). - N is the number of voxels, M is the number of points in - voxels, C is the number of channels of point features. - num_voxels (torch.Tensor, optional): Number of points in each - voxel. Defaults to None. - aligned_distance (torch.Tensor, optional): The distance of - each points to the voxel center. Defaults to None. - Returns: - torch.Tensor: Features of Pillars. - """ - x = self.linear(inputs) - x = self.norm(x.permute(0, 2, 1).contiguous()).permute(0, 2, - 1).contiguous() - x = F.gelu(x) - - if self.mode == 'max': - if aligned_distance is not None: - x = x.mul(aligned_distance.unsqueeze(-1)) - x_max = torch.max(x, dim=1, keepdim=True)[0] - elif self.mode == 'avg': - if aligned_distance is not None: - x = x.mul(aligned_distance.unsqueeze(-1)) - x_max = x.sum(dim=1, - keepdim=True) / num_voxels.type_as(inputs).view( - -1, 1, 1) - - if self.last_vfe: - return x_max - else: - x_repeat = x_max.repeat(1, inputs.shape[1], 1) - x_concatenated = torch.cat([x, x_repeat], dim=2) - return x_concatenated - -class PointPillarsScatter(nn.Module): - """Point Pillar's Scatter. - - Converts learned features from dense tensor to sparse pseudo image. - - Args: - in_channels (int): Channels of input features. - output_shape (list[int]): Required output shape of features. - """ - - def __init__(self, in_channels, output_shape): - super().__init__() - self.output_shape = output_shape - self.ny = output_shape[0] - self.nx = output_shape[1] - self.in_channels = in_channels - self.fp16_enabled = False - - def forward(self, voxel_features, coors, batch_size=None): - """Foraward function to scatter features.""" - # TODO: rewrite the function in a batch manner - # no need to deal with different batch cases - if batch_size is not None: - return self.forward_batch(voxel_features, coors, batch_size) - else: - return self.forward_single(voxel_features, coors) - - def forward_single(self, voxel_features, coors): - """Scatter features of single sample. - - Args: - voxel_features (torch.Tensor): Voxel features in shape (N, M, C). - coors (torch.Tensor): Coordinates of each voxel. - """ - # Create the canvas for this sample - canvas = torch.zeros( - self.in_channels, - self.nx * self.ny, - dtype=voxel_features.dtype, - device=voxel_features.device) - - indices = coors[:, 1] * self.nx + coors[:, 2] - indices = indices.long() - voxels = voxel_features.t() - # Now scatter the blob back to the canvas. - canvas[:, indices] = voxels - # Undo the column stacking to final 4-dim tensor - canvas = canvas.view(1, self.in_channels, self.ny, self.nx) - return canvas - - def forward_batch(self, voxel_features, coors, batch_size): - """Scatter features of single sample. - - Args: - voxel_features (torch.Tensor): Voxel features in shape (N, M, C). - coors (torch.Tensor): Coordinates of each voxel in shape (N, 4). - The first column indicates the sample ID. - batch_size (int): Number of samples in the current batch. - """ - # batch_canvas will be the final output. - batch_canvas = [] - for batch_itt in range(batch_size): - # Create the canvas for this sample - canvas = torch.zeros( - self.in_channels, - self.nx * self.ny, - dtype=voxel_features.dtype, - device=voxel_features.device) - - # Only include non-empty pillars - batch_mask = coors[:, 0] == batch_itt - this_coors = coors[batch_mask, :] - indices = this_coors[:, 2] * self.nx + this_coors[:, 3] - indices = indices.type(torch.long) - voxels = voxel_features[batch_mask, :] - voxels = voxels.t() - - # Now scatter the blob back to the canvas. - canvas[:, indices] = voxels - - # Append to a list for later stacking. - batch_canvas.append(canvas) - - # Stack to 3-dim tensor (batch-size, in_channels, nrows*ncols) - batch_canvas = torch.stack(batch_canvas, 0) - - # Undo the column stacking to final 4-dim tensor - batch_canvas = batch_canvas.view(batch_size, self.in_channels, self.ny, - self.nx) - - return batch_canvas - -class PillarFeatureNet(nn.Module): - """Pillar Feature Net. - The network prepares the pillar features and performs forward pass - through PFNLayers. - Args: - in_channels (int, optional): Number of input features, - either x, y, z or x, y, z, r. Defaults to 4. - feat_channels (tuple, optional): Number of features in each of the - N PFNLayers. Defaults to (64, ). - with_distance (bool, optional): Whether to include Euclidean distance - to points. Defaults to False. - with_cluster_center (bool, optional): [description]. Defaults to True. - with_voxel_center (bool, optional): [description]. Defaults to True. - voxel_size (tuple[float], optional): Size of voxels, only utilize x - and y size. Defaults to (0.2, 0.2, 4). - point_cloud_range (tuple[float], optional): Point cloud range, only - utilizes x and y min. Defaults to (0, -40, -3, 70.4, 40, 1). - mode (str, optional): The mode to gather point features. Options are - 'max' or 'avg'. Defaults to 'max'. - legacy (bool, optional): Whether to use the new behavior or - the original behavior. Defaults to True. - """ - - def __init__(self, - in_channels=4, - feat_channels=(64, ), - with_distance=False, - with_cluster_center=True, - with_voxel_center=True, - voxel_size=(0.2, 0.2, 4), - point_cloud_range=(0, -40, -3, 70.4, 40, 1), - mode='max'): - super(PillarFeatureNet, self).__init__() - assert len(feat_channels) > 0 - if with_cluster_center: - in_channels += 3 - if with_voxel_center: - in_channels += 3 - if with_distance: - in_channels += 1 - self._with_distance = with_distance - self._with_cluster_center = with_cluster_center - self._with_voxel_center = with_voxel_center - self.fp16_enabled = False - # Create PillarFeatureNet layers - self.in_channels = in_channels - feat_channels = [in_channels] + list(feat_channels) - pfn_layers = [] - for i in range(len(feat_channels) - 1): - in_filters = feat_channels[i] - out_filters = feat_channels[i + 1] - if i < len(feat_channels) - 2: - last_layer = False - else: - last_layer = True - pfn_layers.append( - PFNLayer(in_filters, - out_filters, - last_layer=last_layer, - mode=mode)) - self.pfn_layers = nn.ModuleList(pfn_layers) - - # Need pillar (voxel) size and x/y offset in order to calculate offset - self.vx = voxel_size[0] - self.vy = voxel_size[1] - self.vz = voxel_size[2] - self.x_offset = self.vx / 2 + point_cloud_range[0] - self.y_offset = self.vy / 2 + point_cloud_range[1] - self.z_offset = self.vz / 2 + point_cloud_range[2] - self.point_cloud_range = point_cloud_range - - def forward(self, features, num_points, coors): - """Forward function. - Args: - features (torch.Tensor): Point features or raw points in shape - (N, M, C). - num_points (torch.Tensor): Number of points in each pillar. - coors (torch.Tensor): Coordinates of each voxel. - Returns: - torch.Tensor: Features of pillars. - """ - features_ls = [features] - # Find distance of x, y, and z from cluster center - if self._with_cluster_center: - points_mean = features[:, :, :3].sum( - dim=1, keepdim=True) / num_points.type_as(features).view( - -1, 1, 1) - f_cluster = features[:, :, :3] - points_mean - features_ls.append(f_cluster) - - # Find distance of x, y, and z from pillar center - dtype = features.dtype - if self._with_voxel_center: - f_center = torch.zeros_like(features[:, :, :3]) - f_center[:, :, 0] = features[:, :, 0] - ( - coors[:, 2].to(dtype).unsqueeze(1) * self.vx + self.x_offset) - f_center[:, :, 1] = features[:, :, 1] - ( - coors[:, 1].to(dtype).unsqueeze(1) * self.vy + self.y_offset) - f_center[:, :, 2] = features[:, :, 2] - ( - coors[:, 0].to(dtype).unsqueeze(1) * self.vz + self.z_offset) - features_ls.append(f_center) - - if self._with_distance: - points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True) - features_ls.append(points_dist) - - # Combine together feature decorations - features = torch.cat(features_ls, dim=-1) - # The feature decorations were calculated without regard to whether - # pillar was empty. Need to ensure that - # empty pillars remain set to zeros. - voxel_count = features.shape[1] - mask = get_paddings_indicator(num_points, voxel_count, axis=0) - mask = torch.unsqueeze(mask, -1).type_as(features) - features *= mask - - for pfn in self.pfn_layers: - features = pfn(features, num_points) - - return features.squeeze(1) - -class DynamicPillarFeatureNet(PillarFeatureNet): - """Pillar Feature Net using dynamic voxelization. - The network prepares the pillar features and performs forward pass - through PFNLayers. The main difference is that it is used for - dynamic voxels, which contains different number of points inside a voxel - without limits. - Args: - in_channels (int, optional): Number of input features, - either x, y, z or x, y, z, r. Defaults to 4. - feat_channels (tuple, optional): Number of features in each of the - N PFNLayers. Defaults to (64, ). - with_distance (bool, optional): Whether to include Euclidean distance - to points. Defaults to False. - with_cluster_center (bool, optional): [description]. Defaults to True. - with_voxel_center (bool, optional): [description]. Defaults to True. - voxel_size (tuple[float], optional): Size of voxels, only utilize x - and y size. Defaults to (0.2, 0.2, 4). - point_cloud_range (tuple[float], optional): Point cloud range, only - utilizes x and y min. Defaults to (0, -40, -3, 70.4, 40, 1). - norm_cfg ([type], optional): [description]. - Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01). - mode (str, optional): The mode to gather point features. Options are - 'max' or 'avg'. Defaults to 'max'. - legacy (bool, optional): Whether to use the new behavior or - the original behavior. Defaults to True. - """ - - def __init__(self, - in_channels, - voxel_size, - point_cloud_range, - feat_channels=(64, ), - with_distance=False, - with_cluster_center=True, - with_voxel_center=True, - mode='max'): - super(DynamicPillarFeatureNet, - self).__init__(in_channels, - feat_channels, - with_distance, - with_cluster_center=with_cluster_center, - with_voxel_center=with_voxel_center, - voxel_size=voxel_size, - point_cloud_range=point_cloud_range, - mode=mode) - self.fp16_enabled = False - feat_channels = [self.in_channels] + list(feat_channels) - pfn_layers = [] - # TODO: currently only support one PFNLayer - - for i in range(len(feat_channels) - 1): - in_filters = feat_channels[i] - out_filters = feat_channels[i + 1] - if i > 0: - in_filters *= 2 - pfn_layers.append( - nn.Sequential( - nn.Linear(in_filters, out_filters, bias=False), - nn.BatchNorm1d(out_filters, eps=1e-3, momentum=0.01), - nn.ReLU(inplace=True))) - self.num_pfn = len(pfn_layers) - self.pfn_layers = nn.ModuleList(pfn_layers) - self.pfn_scatter = DynamicScatter(voxel_size, point_cloud_range, - (mode != 'max')) - self.cluster_scatter = DynamicScatter(voxel_size, - point_cloud_range, - average_points=True) - - def map_voxel_center_to_point(self, pts_coors, voxel_mean, voxel_coors): - """Map the centers of voxels to its corresponding points. - Args: - pts_coors (torch.Tensor): The coordinates of each points, shape - (M, 3), where M is the number of points. - voxel_mean (torch.Tensor): The mean or aggregated features of a - voxel, shape (N, C), where N is the number of voxels. - voxel_coors (torch.Tensor): The coordinates of each voxel. - Returns: - torch.Tensor: Corresponding voxel centers of each points, shape - (M, C), where M is the number of points. - """ - if pts_coors.shape[0] == 0: - return torch.zeros((0, voxel_mean.shape[1]), - dtype=voxel_mean.dtype, - device=voxel_mean.device) - # Step 1: scatter voxel into canvas - # Calculate necessary things for canvas creation - assert voxel_mean.shape[0] == voxel_coors.shape[ - 0], f"voxel_mean.shape[0] {voxel_mean.shape[0]} != voxel_coors.shape[0] {voxel_coors.shape[0]}" - assert pts_coors.shape[ - 1] == 3, f"pts_coors.shape[1] {pts_coors.shape[1]} != 3" - assert voxel_coors.shape[ - 1] == 3, f"voxel_coors.shape[1] {voxel_coors.shape[1]} != 3" - - canvas_y = int( - (self.point_cloud_range[4] - self.point_cloud_range[1]) / self.vy) - canvas_x = int( - (self.point_cloud_range[3] - self.point_cloud_range[0]) / self.vx) - canvas_channel = voxel_mean.size(1) - batch_size = pts_coors[:, 0].max() + 1 - - canvas_len = canvas_y * canvas_x * batch_size - # Create the canvas for this sample - canvas = voxel_mean.new_zeros(canvas_channel, canvas_len) - # Only include non-empty pillars - indices = (voxel_coors[:, 0] * canvas_y * canvas_x + - voxel_coors[:, 2] * canvas_x + voxel_coors[:, 1]) - assert indices.long().max() < canvas_len, 'Index out of range' - assert indices.long().min() >= 0, 'Index out of range' - # Scatter the blob back to the canvas - canvas[:, indices.long()] = voxel_mean.t() - # Step 2: get voxel mean for each point - voxel_index = (pts_coors[:, 0] * canvas_y * canvas_x + - pts_coors[:, 2] * canvas_x + pts_coors[:, 1]) - assert voxel_index.long().max() < canvas_len, 'Index out of range' - assert voxel_index.long().min() >= 0, 'Index out of range' - center_per_point = canvas[:, voxel_index.long()].t() - return center_per_point - - def forward(self, features, coors): - """Forward function. - Args: - features (torch.Tensor): Point features or raw points in shape - (N, M, C). - coors (torch.Tensor): Coordinates of each voxel - Returns: - torch.Tensor: Features of pillars. - """ - features_ls = [features] - # Find distance of x, y, and z from cluster center - if self._with_cluster_center: - voxel_mean, mean_coors = self.cluster_scatter(features, coors) - points_mean = self.map_voxel_center_to_point( - coors, voxel_mean, mean_coors) - # TODO: maybe also do cluster for reflectivity - f_cluster = features[:, :3] - points_mean[:, :3] - features_ls.append(f_cluster) - - # Find distance of x, y, and z from pillar center - if self._with_voxel_center: - f_center = features.new_zeros(size=(features.size(0), 3)) - f_center[:, 0] = features[:, 0] - ( - coors[:, 2].type_as(features) * self.vx + self.x_offset) - f_center[:, 1] = features[:, 1] - ( - coors[:, 1].type_as(features) * self.vy + self.y_offset) - f_center[:, 2] = features[:, 2] - ( - coors[:, 0].type_as(features) * self.vz + self.z_offset) - features_ls.append(f_center) - - if self._with_distance: - points_dist = torch.norm(features[:, :3], 2, 1, keepdim=True) - features_ls.append(points_dist) - - # Combine together feature decorations - features = torch.cat(features_ls, dim=-1) - for i, pfn in enumerate(self.pfn_layers): - point_feats = pfn(features) - voxel_feats, voxel_coors = self.pfn_scatter(point_feats, coors) - if i != len(self.pfn_layers) - 1: - # need to concat voxel feats if it is not the last pfn - feat_per_point = self.map_voxel_center_to_point( - coors, voxel_feats, voxel_coors) - features = torch.cat([point_feats, feat_per_point], dim=1) - - return voxel_feats, voxel_coors, point_feats - -class HardVoxelizer(nn.Module): - - def __init__(self, voxel_size, point_cloud_range, - max_points_per_voxel: int): - super().__init__() - assert max_points_per_voxel > 0, f"max_points_per_voxel must be > 0, got {max_points_per_voxel}" - - self.voxelizer = Voxelization(voxel_size, - point_cloud_range, - max_points_per_voxel, - deterministic=False) - - def forward(self, points: torch.Tensor): - assert isinstance( - points, - torch.Tensor), f"points must be a torch.Tensor, got {type(points)}" - not_nan_mask = ~torch.isnan(points).any(dim=2) - return {"voxel_coords": self.voxelizer(points[not_nan_mask])} - -class DynamicVoxelizer(nn.Module): - - def __init__(self, voxel_size, point_cloud_range): - super().__init__() - self.voxel_size = voxel_size - self.point_cloud_range = point_cloud_range - self.voxelizer = Voxelization(voxel_size, - point_cloud_range, - max_num_points=-1) - - def _get_point_offsets(self, points: torch.Tensor, - voxel_coords: torch.Tensor): - - point_cloud_range = torch.tensor(self.point_cloud_range, - dtype=points.dtype, - device=points.device) - min_point = point_cloud_range[:3] - voxel_size = torch.tensor(self.voxel_size, - dtype=points.dtype, - device=points.device) - - # Voxel coords are in the form Z, Y, X :eyeroll:, convert to X, Y, Z - voxel_coords = voxel_coords[:, [2, 1, 0]] - - # Offsets are computed relative to min point - voxel_centers = voxel_coords * voxel_size + min_point + voxel_size / 2 - - return points - voxel_centers - - def forward( - self, - points: torch.Tensor) -> List[Tuple[torch.Tensor, torch.Tensor]]: - - batch_results = [] - for batch_idx in range(len(points)): - batch_points = points[batch_idx] - valid_point_idxes = torch.arange(batch_points.shape[0], - device=batch_points.device) - not_nan_mask = ~torch.isnan(batch_points).any(dim=1) - batch_non_nan_points = batch_points[not_nan_mask] - valid_point_idxes = valid_point_idxes[not_nan_mask] - batch_voxel_coords = self.voxelizer(batch_non_nan_points) - # If any of the coords are -1, then the point is not in the voxel grid and should be discarded - batch_voxel_coords_mask = (batch_voxel_coords != -1).all(dim=1) - - valid_batch_voxel_coords = batch_voxel_coords[ - batch_voxel_coords_mask] - valid_batch_non_nan_points = batch_non_nan_points[ - batch_voxel_coords_mask] - valid_point_idxes = valid_point_idxes[batch_voxel_coords_mask] - - point_offsets = self._get_point_offsets(valid_batch_non_nan_points, - valid_batch_voxel_coords) - - result_dict = { - "points": valid_batch_non_nan_points, - "voxel_coords": valid_batch_voxel_coords, - "point_idxes": valid_point_idxes, - "point_offsets": point_offsets - } - - batch_results.append(result_dict) - return batch_results - -class DynamicEmbedder(nn.Module): - - def __init__(self, voxel_size, pseudo_image_dims, point_cloud_range, - feat_channels: int) -> None: - super().__init__() - self.voxelizer = DynamicVoxelizer(voxel_size=voxel_size, - point_cloud_range=point_cloud_range) - self.feature_net = DynamicPillarFeatureNet( - in_channels=3, - feat_channels=(feat_channels, ), - point_cloud_range=point_cloud_range, - voxel_size=voxel_size, - mode='avg') - self.scatter = PointPillarsScatter(in_channels=feat_channels, - output_shape=pseudo_image_dims) - - def forward(self, points: torch.Tensor) -> torch.Tensor: - - # List of points and coordinates for each batch - voxel_info_list = self.voxelizer(points) - - pseudoimage_lst = [] - for voxel_info_dict in voxel_info_list: - points = voxel_info_dict['points'] - coordinates = voxel_info_dict['voxel_coords'] - voxel_feats, voxel_coors, _ = self.feature_net(points, coordinates) - pseudoimage = self.scatter(voxel_feats, voxel_coors) - pseudoimage_lst.append(pseudoimage) - # Concatenate the pseudoimages along the batch dimension - return torch.cat(pseudoimage_lst, dim=0), voxel_info_list diff --git a/src/models/basic/nsfp_module.py b/src/models/basic/nsfp_module.py deleted file mode 100644 index 8d293f8..0000000 --- a/src/models/basic/nsfp_module.py +++ /dev/null @@ -1,295 +0,0 @@ -""" -This file is directly copied from: -https://github.com/Lilac-Lee/Neural_Scene_Flow_Prior -""" -import torch - -class Neural_Prior(torch.nn.Module): - def __init__(self, dim_x=3, filter_size=128, act_fn='relu', layer_size=8): - super().__init__() - self.layer_size = layer_size - - self.nn_layers = torch.nn.ModuleList([]) - # input layer (default: xyz -> 128) - if layer_size >= 1: - self.nn_layers.append(torch.nn.Sequential(torch.nn.Linear(dim_x, filter_size))) - if act_fn == 'relu': - self.nn_layers.append(torch.nn.ReLU()) - elif act_fn == 'sigmoid': - self.nn_layers.append(torch.nn.Sigmoid()) - for _ in range(layer_size-1): - self.nn_layers.append(torch.nn.Sequential(torch.nn.Linear(filter_size, filter_size))) - if act_fn == 'relu': - self.nn_layers.append(torch.nn.ReLU()) - elif act_fn == 'sigmoid': - self.nn_layers.append(torch.nn.Sigmoid()) - self.nn_layers.append(torch.nn.Linear(filter_size, dim_x)) - else: - self.nn_layers.append(torch.nn.Sequential(torch.nn.Linear(dim_x, dim_x))) - - def forward(self, x): - """ points -> features - [B, N, 3] -> [B, K] - """ - for layer in self.nn_layers: - x = layer(x) - - return x - -# ANCHOR: early stopping strategy -class EarlyStopping(object): - def __init__(self, mode='min', min_delta=0, patience=10, percentage=False): - self.mode = mode - self.min_delta = min_delta - self.patience = patience - self.best = None - self.num_bad_epochs = 0 - self.is_better = None - self._init_is_better(mode, min_delta, percentage) - - if patience == 0: - self.is_better = lambda a, b: True - self.step = lambda a: False - - def step(self, metrics): - if self.best is None: - self.best = metrics - return False - - if torch.isnan(metrics): - return True - - if self.is_better(metrics, self.best): - self.num_bad_epochs = 0 - self.best = metrics - else: - self.num_bad_epochs += 1 - - if self.num_bad_epochs >= self.patience: - return True - - return False - - def _init_is_better(self, mode, min_delta, percentage): - if mode not in {'min', 'max'}: - raise ValueError('mode ' + mode + ' is unknown!') - if not percentage: - if mode == 'min': - self.is_better = lambda a, best: a < best - min_delta - if mode == 'max': - self.is_better = lambda a, best: a > best + min_delta - else: - if mode == 'min': - self.is_better = lambda a, best: a < best - ( - best * min_delta / 100) - if mode == 'max': - self.is_better = lambda a, best: a > best + ( - best * min_delta / 100) - - - -import torch.nn.functional as F -from pytorch3d.ops.knn import knn_gather, knn_points -from pytorch3d.structures.pointclouds import Pointclouds -from typing import Union - - -def _validate_chamfer_reduction_inputs(batch_reduction: Union[str, None], - point_reduction: str): - """Check the requested reductions are valid. - Args: - batch_reduction: Reduction operation to apply for the loss across the - batch, can be one of ["mean", "sum"] or None. - point_reduction: Reduction operation to apply for the loss across the - points, can be one of ["mean", "sum"]. - """ - if batch_reduction is not None and batch_reduction not in ["mean", "sum"]: - raise ValueError( - 'batch_reduction must be one of ["mean", "sum"] or None') - if point_reduction not in ["mean", "sum"]: - raise ValueError('point_reduction must be one of ["mean", "sum"]') - - -def _handle_pointcloud_input( - points: Union[torch.Tensor, Pointclouds], - lengths: Union[torch.Tensor, None], -): - """ - If points is an instance of Pointclouds, retrieve the padded points tensor - along with the number of points per batch and the padded normals. - Otherwise, return the input points (and normals) with the number of points per cloud - set to the size of the second dimension of `points`. - """ - if isinstance(points, Pointclouds): - X = points.points_padded() - lengths = points.num_points_per_cloud() - elif torch.is_tensor(points): - if points.ndim != 3: - raise ValueError( - f"Expected points to be of shape (N, P, D), got {points.shape}" - ) - X = points - if lengths is not None and (lengths.ndim != 1 - or lengths.shape[0] != X.shape[0]): - raise ValueError("Expected lengths to be of shape (N,)") - if lengths is None: - lengths = torch.full((X.shape[0], ), - X.shape[1], - dtype=torch.int64, - device=points.device) - else: - raise ValueError("The input pointclouds should be either " + - "Pointclouds objects or torch.Tensor of shape " + - "(minibatch, num_points, 3).") - return X, lengths - -def my_chamfer_fn( - x, - y, - x_lengths=None, - y_lengths=None, - x_normals=None, - y_normals=None, - weights=None, - batch_reduction: Union[str, None] = "mean", - point_reduction: str = "mean", - truncate_dist=True, -): - """ - Chamfer distance between two pointclouds x and y. - - Args: - x: FloatTensor of shape (N, P1, D) or a Pointclouds object representing - a batch of point clouds with at most P1 points in each batch element, - batch size N and feature dimension D. - y: FloatTensor of shape (N, P2, D) or a Pointclouds object representing - a batch of point clouds with at most P2 points in each batch element, - batch size N and feature dimension D. - x_lengths: Optional LongTensor of shape (N,) giving the number of points in each - cloud in x. - y_lengths: Optional LongTensor of shape (N,) giving the number of points in each - cloud in x. - x_normals: Optional FloatTensor of shape (N, P1, D). - y_normals: Optional FloatTensor of shape (N, P2, D). - weights: Optional FloatTensor of shape (N,) giving weights for - batch elements for reduction operation. - batch_reduction: Reduction operation to apply for the loss across the - batch, can be one of ["mean", "sum"] or None. - point_reduction: Reduction operation to apply for the loss across the - points, can be one of ["mean", "sum"]. - - Returns: - 2-element tuple containing - - - **loss**: Tensor giving the reduced distance between the pointclouds - in x and the pointclouds in y. - - **loss_normals**: Tensor giving the reduced cosine distance of normals - between pointclouds in x and pointclouds in y. Returns None if - x_normals and y_normals are None. - """ - _validate_chamfer_reduction_inputs(batch_reduction, point_reduction) - - x, x_lengths = _handle_pointcloud_input(x, None) - y, y_lengths = _handle_pointcloud_input(y, None) - - if x_lengths.item() == 0 or y_lengths.item() == 0: - return 0.0, 0.0 - # assert x_lengths.item() > 0, f"x_lengths is {x_lengths.item()}" - # assert y_lengths.item() > 0, f"y_lengths is {y_lengths.item()}" - - return_normals = x_normals is not None and y_normals is not None - - N, P1, D = x.shape - P2 = y.shape[1] - - # Check if inputs are heterogeneous and create a lengths mask. - is_x_heterogeneous = (x_lengths != P1).any() - is_y_heterogeneous = (y_lengths != P2).any() - x_mask = (torch.arange(P1, device=x.device)[None] >= x_lengths[:, None] - ) # shape [N, P1] - y_mask = (torch.arange(P2, device=y.device)[None] >= y_lengths[:, None] - ) # shape [N, P2] - - if y.shape[0] != N or y.shape[2] != D: - raise ValueError("y does not have the correct shape.") - if weights is not None: - if weights.size(0) != N: - raise ValueError("weights must be of shape (N,).") - if not (weights >= 0).all(): - raise ValueError("weights cannot be negative.") - if weights.sum() == 0.0: - weights = weights.view(N, 1) - if batch_reduction in ["mean", "sum"]: - return ( - (x.sum((1, 2)) * weights).sum() * 0.0, - (x.sum((1, 2)) * weights).sum() * 0.0, - ) - return ((x.sum((1, 2)) * weights) * 0.0, (x.sum( - (1, 2)) * weights) * 0.0) - - cham_norm_x = x.new_zeros(()) - cham_norm_y = x.new_zeros(()) - - x_nn = knn_points(x, y, lengths1=x_lengths, lengths2=y_lengths, K=1) - y_nn = knn_points(y, x, lengths1=y_lengths, lengths2=x_lengths, K=1) - - cham_x = x_nn.dists[..., 0] # (N, P1) - cham_y = y_nn.dists[..., 0] # (N, P2) - - # NOTE: truncated Chamfer distance. - if truncate_dist: - dist_thd = 2 - x_mask[cham_x >= dist_thd] = True - y_mask[cham_y >= dist_thd] = True - cham_x[x_mask] = 0.0 - cham_y[y_mask] = 0.0 - - if is_x_heterogeneous: - cham_x[x_mask] = 0.0 - if is_y_heterogeneous: - cham_y[y_mask] = 0.0 - - if weights is not None: - cham_x *= weights.view(N, 1) - cham_y *= weights.view(N, 1) - - if return_normals: - # Gather the normals using the indices and keep only value for k=0 - x_normals_near = knn_gather(y_normals, x_nn.idx, y_lengths)[..., 0, :] - y_normals_near = knn_gather(x_normals, y_nn.idx, x_lengths)[..., 0, :] - - cham_norm_x = 1 - torch.abs( - F.cosine_similarity(x_normals, x_normals_near, dim=2, eps=1e-6)) - cham_norm_y = 1 - torch.abs( - F.cosine_similarity(y_normals, y_normals_near, dim=2, eps=1e-6)) - - if is_x_heterogeneous: - # pyre-fixme[16]: `int` has no attribute `__setitem__`. - cham_norm_x[x_mask] = 0.0 - if is_y_heterogeneous: - cham_norm_y[y_mask] = 0.0 - - if weights is not None: - cham_norm_x *= weights.view(N, 1) - cham_norm_y *= weights.view(N, 1) - - # Apply point reduction - cham_x = cham_x.sum(1) # (N,) - cham_y = cham_y.sum(1) # (N,) - - if point_reduction == "mean": - cham_x /= x_lengths - cham_y /= y_lengths - - if batch_reduction is not None: - # batch_reduction == "sum" - cham_x = cham_x.sum() - cham_y = cham_y.sum() - if batch_reduction == "mean": - cham_x /= N - cham_y /= N - - cham_dist = cham_x + cham_y - cham_normals = cham_norm_x + cham_norm_y if return_normals else None - - return cham_dist, cham_normals \ No newline at end of file diff --git a/src/models/basic/unet.py b/src/models/basic/unet.py deleted file mode 100644 index f7f651d..0000000 --- a/src/models/basic/unet.py +++ /dev/null @@ -1,101 +0,0 @@ -import torch -import torch.nn as nn - -from typing import Tuple -from . import ConvWithNorms - - -class BilinearDecoder(nn.Module): - - def __init__(self, scale_factor: int): - super().__init__() - self.scale_factor = scale_factor - - def forward(self, x: torch.Tensor) -> torch.Tensor: - return nn.functional.interpolate(x, - scale_factor=self.scale_factor, - mode="bilinear", - align_corners=False) - -class UpsampleSkip(nn.Module): - - def __init__(self, skip_channels: int, latent_channels: int, - out_channels: int): - super().__init__() - self.u1_u2 = nn.Sequential( - nn.Conv2d(skip_channels, latent_channels, 1, 1, 0), - BilinearDecoder(2)) - self.u3 = nn.Conv2d(latent_channels, latent_channels, 1, 1, 0) - self.u4_u5 = nn.Sequential( - nn.Conv2d(2 * latent_channels, out_channels, 3, 1, 1), - nn.Conv2d(out_channels, out_channels, 3, 1, 1)) - - def forward(self, a: torch.Tensor, b: torch.Tensor) -> torch.Tensor: - u2_res = self.u1_u2(a) - u3_res = self.u3(b) - u5_res = self.u4_u5(torch.cat([u2_res, u3_res], dim=1)) - return u5_res - - -class FastFlow3DUNet(nn.Module): - """ - Standard UNet with a few modifications: - - Uses Bilinear interpolation instead of transposed convolutions - """ - - def __init__(self) -> None: - super().__init__() - - self.encoder_step_1 = nn.Sequential(ConvWithNorms(32, 64, 3, 2, 1), - ConvWithNorms(64, 64, 3, 1, 1), - ConvWithNorms(64, 64, 3, 1, 1), - ConvWithNorms(64, 64, 3, 1, 1)) - self.encoder_step_2 = nn.Sequential(ConvWithNorms(64, 128, 3, 2, 1), - ConvWithNorms(128, 128, 3, 1, 1), - ConvWithNorms(128, 128, 3, 1, 1), - ConvWithNorms(128, 128, 3, 1, 1), - ConvWithNorms(128, 128, 3, 1, 1), - ConvWithNorms(128, 128, 3, 1, 1)) - self.encoder_step_3 = nn.Sequential(ConvWithNorms(128, 256, 3, 2, 1), - ConvWithNorms(256, 256, 3, 1, 1), - ConvWithNorms(256, 256, 3, 1, 1), - ConvWithNorms(256, 256, 3, 1, 1), - ConvWithNorms(256, 256, 3, 1, 1), - ConvWithNorms(256, 256, 3, 1, 1)) - self.decoder_step1 = UpsampleSkip(512, 256, 256) - self.decoder_step2 = UpsampleSkip(256, 128, 128) - self.decoder_step3 = UpsampleSkip(128, 64, 64) - self.decoder_step4 = nn.Conv2d(64, 64, 3, 1, 1) - - def forward(self, pc0_B: torch.Tensor, - pc1_B: torch.Tensor) -> torch.Tensor: - - expected_channels = 32 - assert pc0_B.shape[ - 1] == expected_channels, f"Expected {expected_channels} channels, got {pc0_B.shape[1]}" - assert pc1_B.shape[ - 1] == expected_channels, f"Expected {expected_channels} channels, got {pc1_B.shape[1]}" - - pc0_F = self.encoder_step_1(pc0_B) - pc0_L = self.encoder_step_2(pc0_F) - pc0_R = self.encoder_step_3(pc0_L) - - pc1_F = self.encoder_step_1(pc1_B) - pc1_L = self.encoder_step_2(pc1_F) - pc1_R = self.encoder_step_3(pc1_L) - - Rstar = torch.cat([pc0_R, pc1_R], - dim=1) # torch.Size([1, 512, 64, 64]) - Lstar = torch.cat([pc0_L, pc1_L], - dim=1) # torch.Size([1, 256, 128, 128]) - Fstar = torch.cat([pc0_F, pc1_F], - dim=1) # torch.Size([1, 128, 256, 256]) - Bstar = torch.cat([pc0_B, pc1_B], - dim=1) # torch.Size([1, 64, 512, 512]) - - S = self.decoder_step1(Rstar, Lstar) - T = self.decoder_step2(S, Fstar) - U = self.decoder_step3(T, Bstar) - V = self.decoder_step4(U) - - return V \ No newline at end of file diff --git a/src/models/deflow.py b/src/models/deflow.py deleted file mode 100644 index 1a4a8c3..0000000 --- a/src/models/deflow.py +++ /dev/null @@ -1,113 +0,0 @@ - -""" -# Created: 2023-07-18 15:08 -# Copyright (C) 2023-now, RPL, KTH Royal Institute of Technology -# Author: Qingwen Zhang (https://kin-zhang.github.io/) -# -# This file is part of DeFlow (https://github.com/KTH-RPL/DeFlow). -# If you find this repo helpful, please cite the respective publication as -# listed on the above website. -""" - -import torch.nn as nn -import dztimer, torch - -from .basic.unet import FastFlow3DUNet -from .basic.encoder import DynamicEmbedder -from .basic.decoder import LinearDecoder, ConvGRUDecoder -from .basic import cal_pose0to1 - -class DeFlow(nn.Module): - def __init__(self, voxel_size = [0.2, 0.2, 6], - point_cloud_range = [-51.2, -51.2, -3, 51.2, 51.2, 3], - grid_feature_size = [512, 512], - decoder_option = "gru", - num_iters = 4): - super().__init__() - self.embedder = DynamicEmbedder(voxel_size=voxel_size, - pseudo_image_dims=grid_feature_size, - point_cloud_range=point_cloud_range, - feat_channels=32) - - self.backbone = FastFlow3DUNet() - if decoder_option == "gru": - self.head = ConvGRUDecoder(num_iters = num_iters) - elif decoder_option == "linear": - self.head = LinearDecoder() - - self.timer = dztimer.Timing() - self.timer.start("Total") - - def load_from_checkpoint(self, ckpt_path): - ckpt = torch.load(ckpt_path, map_location="cpu")["state_dict"] - state_dict = { - k[len("model.") :]: v for k, v in ckpt.items() if k.startswith("model.") - } - print("\nLoading... model weight from: ", ckpt_path, "\n") - return self.load_state_dict(state_dict=state_dict, strict=False) - - def forward(self, batch): - """ - input: using the batch from dataloader, which is a dict - Detail: [pc0, pc1, pose0, pose1] - output: the predicted flow, pose_flow, and the valid point index of pc0 - """ - self.timer[0].start("Data Preprocess") - batch_sizes = len(batch["pose0"]) - - pose_flows = [] - transform_pc0s = [] - for batch_id in range(batch_sizes): - selected_pc0 = batch["pc0"][batch_id] - self.timer[0][0].start("pose") - with torch.no_grad(): - if 'ego_motion' in batch: - pose_0to1 = batch['ego_motion'][batch_id] - else: - pose_0to1 = cal_pose0to1(batch["pose0"][batch_id], batch["pose1"][batch_id]) - self.timer[0][0].stop() - - self.timer[0][1].start("transform") - # transform selected_pc0 to pc1 - transform_pc0 = selected_pc0 @ pose_0to1[:3, :3].T + pose_0to1[:3, 3] - self.timer[0][1].stop() - pose_flows.append(transform_pc0 - selected_pc0) - transform_pc0s.append(transform_pc0) - - pc0s = torch.stack(transform_pc0s, dim=0) - pc1s = batch["pc1"] - self.timer[0].stop() - - self.timer[1].start("Voxelization") - pc0_before_pseudoimages, pc0_voxel_infos_lst = self.embedder(pc0s) - pc1_before_pseudoimages, pc1_voxel_infos_lst = self.embedder(pc1s) - self.timer[1].stop() - - self.timer[2].start("Encoder") - grid_flow_pseudoimage = self.backbone(pc0_before_pseudoimages, - pc1_before_pseudoimages) - self.timer[2].stop() - - self.timer[3].start("Decoder") - flows = self.head( - torch.cat((pc0_before_pseudoimages, pc1_before_pseudoimages), - dim=1), grid_flow_pseudoimage, pc0_voxel_infos_lst) - self.timer[3].stop() - - pc0_points_lst = [e["points"] for e in pc0_voxel_infos_lst] - pc1_points_lst = [e["points"] for e in pc1_voxel_infos_lst] - - pc0_valid_point_idxes = [e["point_idxes"] for e in pc0_voxel_infos_lst] - pc1_valid_point_idxes = [e["point_idxes"] for e in pc1_voxel_infos_lst] - - model_res = { - "flow": flows, - 'pose_flow': pose_flows, - - "pc0_valid_point_idxes": pc0_valid_point_idxes, - "pc0_points_lst": pc0_points_lst, - - "pc1_valid_point_idxes": pc1_valid_point_idxes, - "pc1_points_lst": pc1_points_lst, - } - return model_res \ No newline at end of file diff --git a/src/models/fastflow3d.py b/src/models/fastflow3d.py deleted file mode 100644 index 6fe68c0..0000000 --- a/src/models/fastflow3d.py +++ /dev/null @@ -1,133 +0,0 @@ - -""" -This file is directly copied from: -https://github.com/kylevedder/zeroflow/blob/master/models/fast_flow_3d.py - -with slightly modification to have unified format with all benchmark. -""" - -import dztimer, torch -import torch.nn as nn - -from .basic.unet import FastFlow3DUNet -from .basic.encoder import DynamicEmbedder -from .basic.decoder import LinearDecoder -from .basic import cal_pose0to1 - - -class FastFlow3D(nn.Module): - def __init__(self, voxel_size = [0.2, 0.2, 6], - point_cloud_range = [-51.2, -51.2, -3, 51.2, 51.2, 3], - grid_feature_size = [512, 512]): - super().__init__() - - self.embedder = DynamicEmbedder(voxel_size=voxel_size, - pseudo_image_dims=grid_feature_size, - point_cloud_range=point_cloud_range, - feat_channels=32) - self.backbone = FastFlow3DUNet() - self.head = LinearDecoder() - - self.timer = dztimer.Timing() - self.timer.start("Total") - - def load_from_checkpoint(self, ckpt_path): - ckpt = torch.load(ckpt_path, map_location="cpu")["state_dict"] - state_dict = { - k[len("model.") :]: v for k, v in ckpt.items() if k.startswith("model.") - } - print("\nLoading... model weight from: ", ckpt_path, "\n") - return self.load_state_dict(state_dict=state_dict, strict=False) - - def _model_forward(self, pc0s, pc1s): - - pc0_before_pseudoimages, pc0_voxel_infos_lst = self.embedder(pc0s) - pc1_before_pseudoimages, pc1_voxel_infos_lst = self.embedder(pc1s) - - grid_flow_pseudoimage = self.backbone(pc0_before_pseudoimages, - pc1_before_pseudoimages) - flows = self.head( - torch.cat((pc0_before_pseudoimages, pc1_before_pseudoimages), - dim=1), grid_flow_pseudoimage, pc0_voxel_infos_lst) - - pc0_points_lst = [e["points"] for e in pc0_voxel_infos_lst] - pc0_valid_point_idxes = [e["point_idxes"] for e in pc0_voxel_infos_lst] - pc1_points_lst = [e["points"] for e in pc1_voxel_infos_lst] - pc1_valid_point_idxes = [e["point_idxes"] for e in pc1_voxel_infos_lst] - - pc0_warped_pc1_points_lst = [ - pc0_points + flow - for pc0_points, flow in zip(pc0_points_lst, flows) - ] - - return { - "flow": flows, - "pc0_points_lst": pc0_points_lst, - "pc0_warped_pc1_points_lst": pc0_warped_pc1_points_lst, - "pc0_valid_point_idxes": pc0_valid_point_idxes, - "pc1_points_lst": pc1_points_lst, - "pc1_valid_point_idxes": pc1_valid_point_idxes - } - - def forward(self, batch, - compute_cycle=False, - compute_symmetry_x=False, - compute_symmetry_y=False): - - self.timer[0].start("Data Preprocess") - batch_sizes = len(batch["pose0"]) - - pose_flows = [] - transform_pc0s = [] - for batch_id in range(batch_sizes): - selected_pc0 = batch["pc0"][batch_id] - self.timer[0][0].start("pose") - pose_0to1 = cal_pose0to1(batch["pose0"][batch_id], batch["pose1"][batch_id]) - self.timer[0][0].stop() - - self.timer[0][1].start("transform") - # transform selected_pc0 to pc1 - transform_pc0 = selected_pc0 @ pose_0to1[:3, :3].T + pose_0to1[:3, 3] - self.timer[0][1].stop() - pose_flows.append(transform_pc0 - selected_pc0) - transform_pc0s.append(transform_pc0) - - pc0s = torch.stack(transform_pc0s, dim=0) - pc1s = batch["pc1"] - self.timer[0].stop() - - model_res = self._model_forward(pc0s, pc1s) - - ret_dict = model_res - ret_dict["pose_flow"] = pose_flows - if compute_cycle: - # The warped pointcloud, original pointcloud should be the input to the model - pc0_warped_pc1_points_lst = model_res["pc0_warped_pc1_points_lst"] - pc0_points_lst = model_res["pc0_points_lst"] - # Some of the warped points may be outside the pseudoimage range, causing them to be clipped. - # When we compute this reverse flow, we need to solve for the original points that were warped to the clipped points. - backward_model_res = self._model_forward(pc0_warped_pc1_points_lst, - pc0_points_lst) - # model_res["reverse_flow"] = backward_model_res["flow"] - # model_res[ - # "flow_valid_point_idxes_for_reverse_flow"] = backward_model_res[ - # "pc0_valid_point_idxes"] - ret_dict["backward"] = backward_model_res - - if compute_symmetry_x: - pc0s_sym = pc0s.clone() - pc0s_sym[:, :, 0] *= -1 - pc1s_sym = pc1s.clone() - pc1s_sym[:, :, 0] *= -1 - model_res_sym = self._model_forward(pc0s_sym, pc1s_sym) - ret_dict["symmetry_x"] = model_res_sym - - if compute_symmetry_y: - pc0s_sym = pc0s.clone() - pc0s_sym[:, :, 1] *= -1 - pc1s_sym = pc1s.clone() - pc1s_sym[:, :, 1] *= -1 - model_res_sym = self._model_forward(pc0s_sym, pc1s_sym) - ret_dict["symmetry_y"] = model_res_sym - - return ret_dict \ No newline at end of file diff --git a/src/trainer.py b/src/trainer.py deleted file mode 100644 index 97dc1e7..0000000 --- a/src/trainer.py +++ /dev/null @@ -1,314 +0,0 @@ -""" - -# Created: 2023-11-05 10:00 -# Copyright (C) 2023-now, RPL, KTH Royal Institute of Technology -# Author: Qingwen Zhang (https://kin-zhang.github.io/) -# -# This file is part of DeFlow (https://github.com/KTH-RPL/DeFlow). -# If you find this repo helpful, please cite the respective publication as -# listed on the above website. -# -# Description: Model Wrapper for Pytorch Lightning - -""" - -import numpy as np -import torch -import torch.optim as optim -from pathlib import Path - -from lightning import LightningModule -from hydra.utils import instantiate -from omegaconf import OmegaConf,open_dict - -import os, sys, time, h5py -BASE_DIR = os.path.abspath(os.path.join( os.path.dirname( __file__ ), '..' )) -sys.path.append(BASE_DIR) -from src.utils.mics import import_func, weights_init, zip_res -from src.utils.av2_eval import write_output_file -from src.models.basic import cal_pose0to1 -from src.utils.eval_metric import OfficialMetrics, evaluate_leaderboard, evaluate_leaderboard_v2 - -# debugging tools -# import faulthandler -# faulthandler.enable() - -torch.set_float32_matmul_precision('medium') -class ModelWrapper(LightningModule): - def __init__(self, cfg, eval=False): - super().__init__() - - # set grid size - if ('voxel_size' in cfg.model.target) and ('point_cloud_range' in cfg.model.target) and not eval and 'point_cloud_range' in cfg: - OmegaConf.set_struct(cfg.model.target, True) - with open_dict(cfg.model.target): - cfg.model.target['grid_feature_size'] = \ - [abs(int((cfg.point_cloud_range[0] - cfg.point_cloud_range[3]) / cfg.voxel_size[0])), - abs(int((cfg.point_cloud_range[1] - cfg.point_cloud_range[4]) / cfg.voxel_size[1])), - abs(int((cfg.point_cloud_range[2] - cfg.point_cloud_range[5]) / cfg.voxel_size[2]))] - else: - with open_dict(cfg.model.target): - cfg.model.target['grid_feature_size'] = \ - [abs(int((cfg.model.target.point_cloud_range[0] - cfg.model.target.point_cloud_range[3]) / cfg.model.target.voxel_size[0])), - abs(int((cfg.model.target.point_cloud_range[1] - cfg.model.target.point_cloud_range[4]) / cfg.model.target.voxel_size[1])), - abs(int((cfg.model.target.point_cloud_range[2] - cfg.model.target.point_cloud_range[5]) / cfg.model.target.voxel_size[2]))] - - self.model = instantiate(cfg.model.target) - self.model.apply(weights_init) - - self.loss_fn = import_func("src.lossfuncs."+cfg.loss_fn) if 'loss_fn' in cfg else None - self.add_seloss = cfg.add_seloss if 'add_seloss' in cfg else None - self.cfg_loss_name = cfg.loss_fn if 'loss_fn' in cfg else None - - self.batch_size = int(cfg.batch_size) if 'batch_size' in cfg else 1 - self.lr = cfg.lr if 'lr' in cfg else None - self.epochs = cfg.epochs if 'epochs' in cfg else None - - self.metrics = OfficialMetrics() - - self.load_checkpoint_path = cfg.checkpoint if 'checkpoint' in cfg else None - - - self.leaderboard_version = cfg.leaderboard_version if 'leaderboard_version' in cfg else 1 - # NOTE(Qingwen): since we have seflow version which is unsupervised, we need to set the flag to false. - self.supervised_flag = cfg.supervised_flag if 'supervised_flag' in cfg else True - self.save_res = False - if 'av2_mode' in cfg: - self.av2_mode = cfg.av2_mode - self.save_res = cfg.save_res if 'save_res' in cfg else False - - if self.save_res or self.av2_mode == 'test': - self.save_res_path = Path(cfg.dataset_path).parent / "results" / cfg.output - os.makedirs(self.save_res_path, exist_ok=True) - print(f"We are in {cfg.av2_mode}, results will be saved in: {self.save_res_path} with version: {self.leaderboard_version} format for online leaderboard.") - else: - self.av2_mode = None - if 'pretrained_weights' in cfg: - if cfg.pretrained_weights is not None: - self.model.load_from_checkpoint(cfg.pretrained_weights) - - self.dataset_path = cfg.dataset_path if 'dataset_path' in cfg else None - self.vis_name = cfg.res_name if 'res_name' in cfg else 'default' - self.save_hyperparameters() - - def training_step(self, batch, batch_idx): - self.model.timer[4].start("One Scan in model") - res_dict = self.model(batch) - self.model.timer[4].stop() - - self.model.timer[5].start("Loss") - # compute loss - total_loss = 0.0 - - if self.cfg_loss_name in ['seflowLoss']: - loss_items, weights = zip(*[(key, weight) for key, weight in self.add_seloss.items()]) - loss_logger = {'chamfer_dis': 0.0, 'dynamic_chamfer_dis': 0.0, 'static_flow_loss': 0.0, 'cluster_based_pc0pc1': 0.0} - else: - loss_items, weights = ['loss'], [1.0] - loss_logger = {'loss': 0.0} - - pc0_valid_idx = res_dict['pc0_valid_point_idxes'] # since padding - pc1_valid_idx = res_dict['pc1_valid_point_idxes'] # since padding - if 'pc0_points_lst' in res_dict and 'pc1_points_lst' in res_dict: - pc0_points_lst = res_dict['pc0_points_lst'] - pc1_points_lst = res_dict['pc1_points_lst'] - - batch_sizes = len(batch["pose0"]) - pose_flows = res_dict['pose_flow'] - est_flow = res_dict['flow'] - - for batch_id in range(batch_sizes): - pc0_valid_from_pc2res = pc0_valid_idx[batch_id] - pc1_valid_from_pc2res = pc1_valid_idx[batch_id] - pose_flow_ = pose_flows[batch_id][pc0_valid_from_pc2res] - - dict2loss = {'est_flow': est_flow[batch_id], - 'gt_flow': None if 'flow' not in batch else batch['flow'][batch_id][pc0_valid_from_pc2res] - pose_flow_, - 'gt_classes': None if 'flow_category_indices' not in batch else batch['flow_category_indices'][batch_id][pc0_valid_from_pc2res]} - - if 'pc0_dynamic' in batch: - dict2loss['pc0_labels'] = batch['pc0_dynamic'][batch_id][pc0_valid_from_pc2res] - dict2loss['pc1_labels'] = batch['pc1_dynamic'][batch_id][pc1_valid_from_pc2res] - - # different methods may don't have this in the res_dict - if 'pc0_points_lst' in res_dict and 'pc1_points_lst' in res_dict: - dict2loss['pc0'] = pc0_points_lst[batch_id] - dict2loss['pc1'] = pc1_points_lst[batch_id] - - res_loss = self.loss_fn(dict2loss) - for i, loss_name in enumerate(loss_items): - total_loss += weights[i] * res_loss[loss_name] - for key in res_loss: - loss_logger[key] += res_loss[key] - - self.log("trainer/loss", total_loss/batch_sizes, sync_dist=True, batch_size=self.batch_size, prog_bar=True) - if self.add_seloss is not None: - for key in loss_logger: - self.log(f"trainer/{key}", loss_logger[key]/batch_sizes, sync_dist=True, batch_size=self.batch_size) - self.model.timer[5].stop() - - # NOTE (Qingwen): if you want to view the detail breakdown of time cost - # self.model.timer.print(random_colors=False, bold=False) - return total_loss - - def train_validation_step_(self, batch, res_dict): - # means there are ground truth flow so we can evaluate the EPE-3 Way metric - if batch['flow'][0].shape[0] > 0: - pose_flows = res_dict['pose_flow'] - for batch_id, gt_flow in enumerate(batch["flow"]): - valid_from_pc2res = res_dict['pc0_valid_point_idxes'][batch_id] - pose_flow = pose_flows[batch_id][valid_from_pc2res] - - final_flow_ = pose_flow.clone() + res_dict['flow'][batch_id] - v1_dict= evaluate_leaderboard(final_flow_, pose_flow, batch['pc0'][batch_id][valid_from_pc2res], gt_flow[valid_from_pc2res], \ - batch['flow_is_valid'][batch_id][valid_from_pc2res], batch['flow_category_indices'][batch_id][valid_from_pc2res]) - v2_dict = evaluate_leaderboard_v2(final_flow_, pose_flow, batch['pc0'][batch_id][valid_from_pc2res], gt_flow[valid_from_pc2res], \ - batch['flow_is_valid'][batch_id][valid_from_pc2res], batch['flow_category_indices'][batch_id][valid_from_pc2res]) - - self.metrics.step(v1_dict, v2_dict) - else: - pass - - def configure_optimizers(self): - optimizer = optim.Adam(self.model.parameters(), lr=self.lr) - return optimizer - - def on_train_epoch_start(self): - self.time_start_train_epoch = time.time() - - def on_train_epoch_end(self): - self.log("pre_epoch_cost (mins)", (time.time()-self.time_start_train_epoch)/60.0, on_step=False, on_epoch=True, sync_dist=True) - - def on_validation_epoch_end(self): - self.model.timer.print(random_colors=False, bold=False) - - if self.av2_mode == 'test': - print(f"\nModel: {self.model.__class__.__name__}, Checkpoint from: {self.load_checkpoint_path}") - print(f"Test results saved in: {self.save_res_path}, Please run submit command and upload to online leaderboard for results.") - if self.leaderboard_version == 1: - print(f"\nevalai challenge 2010 phase 4018 submit --file {self.save_res_path}.zip --large --private\n") - elif self.leaderboard_version == 2: - print(f"\nevalai challenge 2210 phase 4396 submit --file {self.save_res_path}.zip --large --private\n") - else: - print(f"Please check the leaderboard version in the config file. We only support version 1 and 2.") - output_file = zip_res(self.save_res_path, leaderboard_version=self.leaderboard_version, is_supervised = self.supervised_flag, output_file=self.save_res_path.as_posix() + ".zip") - # wandb.log_artifact(output_file) - return - - if self.av2_mode == 'val': - print(f"\nModel: {self.model.__class__.__name__}, Checkpoint from: {self.load_checkpoint_path}") - print(f"More details parameters and training status are in checkpoints") - - self.metrics.normalize() - - # wandb log things: - for key in self.metrics.bucketed: - for type_ in 'Static', 'Dynamic': - self.log(f"val/{type_}/{key}", self.metrics.bucketed[key][type_], sync_dist=True) - for key in self.metrics.epe_3way: - self.log(f"val/{key}", self.metrics.epe_3way[key], sync_dist=True) - - self.metrics.print() - - self.metrics = OfficialMetrics() - - if self.save_res: - print(f"We already write the flow_est into the dataset, please run following commend to visualize the flow. Copy and paste it to your terminal:") - print(f"python tools/visualization.py --res_name '{self.vis_name}' --data_dir {self.dataset_path}") - print(f"Enjoy! ^v^ ------ \n") - - def eval_only_step_(self, batch, res_dict): - eval_mask = batch['eval_mask'].squeeze() - pc0 = batch['origin_pc0'] - pose_0to1 = cal_pose0to1(batch["pose0"], batch["pose1"]) - transform_pc0 = pc0 @ pose_0to1[:3, :3].T + pose_0to1[:3, 3] - pose_flow = transform_pc0 - pc0 - - final_flow = pose_flow.clone() - if 'pc0_valid_point_idxes' in res_dict: - valid_from_pc2res = res_dict['pc0_valid_point_idxes'] - - # flow in the original pc0 coordinate - pred_flow = pose_flow[~batch['gm0']].clone() - pred_flow[valid_from_pc2res] = res_dict['flow'] + pose_flow[~batch['gm0']][valid_from_pc2res] - final_flow[~batch['gm0']] = pred_flow - else: - final_flow[~batch['gm0']] = res_dict['flow'] + pose_flow[~batch['gm0']] - - if self.av2_mode == 'val': # since only val we have ground truth flow to eval - gt_flow = batch["flow"] - v1_dict = evaluate_leaderboard(final_flow[eval_mask], pose_flow[eval_mask], pc0[eval_mask], \ - gt_flow[eval_mask], batch['flow_is_valid'][eval_mask], \ - batch['flow_category_indices'][eval_mask]) - v2_dict = evaluate_leaderboard_v2(final_flow[eval_mask], pose_flow[eval_mask], pc0[eval_mask], \ - gt_flow[eval_mask], batch['flow_is_valid'][eval_mask], batch['flow_category_indices'][eval_mask]) - - self.metrics.step(v1_dict, v2_dict) - - # NOTE (Qingwen): Since val and test, we will force set batch_size = 1 - if self.save_res or self.av2_mode == 'test': # test must save data to submit in the online leaderboard. - save_pred_flow = final_flow[eval_mask, :3].cpu().detach().numpy() - rigid_flow = pose_flow[eval_mask, :3].cpu().detach().numpy() - is_dynamic = np.linalg.norm(save_pred_flow - rigid_flow, axis=1, ord=2) >= 0.05 - sweep_uuid = (batch['scene_id'], batch['timestamp']) - if self.leaderboard_version == 2: - save_pred_flow = (final_flow - pose_flow).cpu().detach().numpy() # all points here... since 2rd version we need to save the relative flow. - write_output_file(save_pred_flow, is_dynamic, sweep_uuid, self.save_res_path, leaderboard_version=self.leaderboard_version) - - def run_model_wo_ground_data(self, batch): - # NOTE (Qingwen): only needed when val or test mode, since train we will go through collate_fn to remove. - batch['origin_pc0'] = batch['pc0'].clone() - batch['pc0'] = batch['pc0'][~batch['gm0']].unsqueeze(0) - batch['pc1'] = batch['pc1'][~batch['gm1']].unsqueeze(0) - if 'pcb0' in batch: - batch['pcb0'] = batch['pcb0'][~batch['gmb0']].unsqueeze(0) - self.model.timer[12].start("One Scan") - res_dict = self.model(batch) - self.model.timer[12].stop() - - # NOTE (Qingwen): Since val and test, we will force set batch_size = 1 - batch = {key: batch[key][0] for key in batch if len(batch[key])>0} - res_dict = {key: res_dict[key][0] for key in res_dict if len(res_dict[key])>0} - return batch, res_dict - - def validation_step(self, batch, batch_idx): - if self.av2_mode == 'val' or self.av2_mode == 'test': - batch, res_dict = self.run_model_wo_ground_data(batch) - self.eval_only_step_(batch, res_dict) - else: - res_dict = self.model(batch) - self.train_validation_step_(batch, res_dict) - - def test_step(self, batch, batch_idx): - batch, res_dict = self.run_model_wo_ground_data(batch) - pc0 = batch['origin_pc0'] - pose_0to1 = cal_pose0to1(batch["pose0"], batch["pose1"]) - transform_pc0 = pc0 @ pose_0to1[:3, :3].T + pose_0to1[:3, 3] - pose_flow = transform_pc0 - pc0 - - final_flow = pose_flow.clone() - if 'pc0_valid_point_idxes' in res_dict: - valid_from_pc2res = res_dict['pc0_valid_point_idxes'] - - # flow in the original pc0 coordinate - pred_flow = pose_flow[~batch['gm0']].clone() - pred_flow[valid_from_pc2res] = pose_flow[~batch['gm0']][valid_from_pc2res] + res_dict['flow'] - - final_flow[~batch['gm0']] = pred_flow - else: - final_flow[~batch['gm0']] = res_dict['flow'] + pose_flow[~batch['gm0']] - - # write final_flow into the dataset. - key = str(batch['timestamp']) - scene_id = batch['scene_id'] - with h5py.File(os.path.join(self.dataset_path, f'{scene_id}.h5'), 'r+') as f: - if self.vis_name in f[key]: - del f[key][self.vis_name] - f[key].create_dataset(self.vis_name, data=final_flow.cpu().detach().numpy().astype(np.float32)) - - def on_test_epoch_end(self): - self.model.timer.print(random_colors=False, bold=False) - print(f"\n\nModel: {self.model.__class__.__name__}, Checkpoint from: {self.load_checkpoint_path}") - print(f"We already write the flow_est into the dataset, please run following commend to visualize the flow. Copy and paste it to your terminal:") - print(f"python tools/visualization.py --res_name '{self.vis_name}' --data_dir {self.dataset_path}") - print(f"Enjoy! ^v^ ------ \n") diff --git a/src/utils/__init__.py b/src/utils/__init__.py deleted file mode 100644 index ca2f9cc..0000000 --- a/src/utils/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -class bc: - HEADER = '\033[95m' - OKBLUE = '\033[94m' - OKCYAN = '\033[96m' - OKGREEN = '\033[92m' - WARNING = '\033[93m' - FAIL = '\033[91m' - ENDC = '\033[0m' - BOLD = '\033[1m' - UNDERLINE = '\033[4m' \ No newline at end of file diff --git a/src/utils/av2_eval.py b/src/utils/av2_eval.py deleted file mode 100644 index 3d28688..0000000 --- a/src/utils/av2_eval.py +++ /dev/null @@ -1,871 +0,0 @@ -import zipfile -from collections import defaultdict -from pathlib import Path -from typing import ( - Any, - Callable, - DefaultDict, - Dict, - Final, - List, - Optional, - Tuple, - Union, - cast, -) - -import numpy as np -import pandas as pd -from rich.progress import track - -"""Constants for scene flow evaluation.""" - -from enum import Enum, unique -from typing import Final - -from av2.datasets.sensor.constants import AnnotationCategories - -SCENE_FLOW_DYNAMIC_THRESHOLD: Final = 0.05 -SWEEP_PAIR_TIME_DELTA: Final = 0.1 -CLOSE_DISTANCE_THRESHOLD: Final = 35.0 - -CATEGORY_TO_INDEX: Final = { - **{"NONE": 0}, - **{k.value: i + 1 for i, k in enumerate(AnnotationCategories)}, -} -# These catagories are ignored because of labeling oddities (moving stop signs on side of school bus, etc) -ROAD_SIGNS = [ - "BOLLARD", - "CONSTRUCTION_BARREL", - "CONSTRUCTION_CONE", - "MOBILE_PEDESTRIAN_CROSSING_SIGN", - "SIGN", - "STOP_SIGN", - "MESSAGE_BOARD_TRAILER", - "TRAFFIC_LIGHT_TRAILER", -] -PEDESTRIAN_CATEGORIES = ["PEDESTRIAN", "STROLLER", "WHEELCHAIR", "OFFICIAL_SIGNALER"] -WHEELED_VRU = [ - "BICYCLE", - "BICYCLIST", - "MOTORCYCLE", - "MOTORCYCLIST", - "WHEELED_DEVICE", - "WHEELED_RIDER", -] -CAR = ["REGULAR_VEHICLE"] -OTHER_VEHICLES = [ - "BOX_TRUCK", - "LARGE_VEHICLE", - "RAILED_VEHICLE", - "TRUCK", - "TRUCK_CAB", - "VEHICULAR_TRAILER", - "ARTICULATED_BUS", - "BUS", - "SCHOOL_BUS", -] -BACKGROUND_CATEGORIES = ["NONE"] # + ROAD_SIGNS -BUCKETED_METACATAGORIES = { - "BACKGROUND": BACKGROUND_CATEGORIES, - "CAR": CAR, - "PEDESTRIAN": PEDESTRIAN_CATEGORIES, - "WHEELED_VRU": WHEELED_VRU, - "OTHER_VEHICLES": OTHER_VEHICLES, -} - -import av2.geometry.geometry as geometry_utils -from av2.geometry.se3 import SE3 -from av2.utils.typing import NDArrayFloat -from av2.utils.io import read_feather - -# Mapping from egovehicle time in nanoseconds to egovehicle pose. -TimestampedCitySE3EgoPoses = Dict[int, SE3] - -# Mapping from sensor name to sensor pose. -SensorPosesMapping = Dict[str, SE3] - -def read_ego_SE3_sensor(log_dir: Path) -> SensorPosesMapping: - """Read the sensor poses for the given log. - - The sensor pose defines an SE3 transformation from the sensor reference frame to the egovehicle reference frame. - Mathematically we define this transformation as: $$ego_SE3_sensor$$. - - In other words, when this transformation is applied to a set of points in the sensor reference frame, they - will be transformed to the egovehicle reference frame. - - Example (1). - points_ego = ego_SE3_sensor(points_sensor) apply the SE3 transformation to points in the sensor reference frame. - - Example (2). - sensor_SE3_ego = ego_SE3_sensor^{-1} take the inverse of the SE3 transformation. - points_sensor = sensor_SE3_ego(points_ego) apply the SE3 transformation to points in the ego reference frame. - - Extrinsics: - sensor_name: Name of the sensor. - qw: scalar component of a quaternion. - qx: X-axis coefficient of a quaternion. - qy: Y-axis coefficient of a quaternion. - qz: Z-axis coefficient of a quaternion. - tx_m: X-axis translation component. - ty_m: Y-axis translation component. - tz_m: Z-axis translation component. - - Args: - log_dir: Path to the log directory. - - Returns: - Mapping from sensor name to sensor pose. - """ - ego_SE3_sensor_path = Path(log_dir, "calibration", "egovehicle_SE3_sensor.feather") - ego_SE3_sensor = read_feather(ego_SE3_sensor_path) - rotations = geometry_utils.quat_to_mat( - ego_SE3_sensor.loc[:, ["qw", "qx", "qy", "qz"]].to_numpy() - ) - translations = ego_SE3_sensor.loc[:, ["tx_m", "ty_m", "tz_m"]].to_numpy() - sensor_names = ego_SE3_sensor.loc[:, "sensor_name"].to_numpy() - - sensor_name_to_pose: SensorPosesMapping = { - name: SE3(rotation=rotations[i], translation=translations[i]) - for i, name in enumerate(sensor_names) - } - return sensor_name_to_pose - -@unique -class SceneFlowMetricType(str, Enum): - """Scene Flow metrics.""" - - ACCURACY_RELAX = "ACCURACY_RELAX" - ACCURACY_STRICT = "ACCURACY_STRICT" - ANGLE_ERROR = "ANGLE_ERROR" - EPE = "EPE" - - -@unique -class SegmentationMetricType(str, Enum): - """Segmentation metrics.""" - - TP = "TP" - TN = "TN" - FP = "FP" - FN = "FN" - - -@unique -class InanimateCategories(str, Enum): - """Annotation categories representing inanimate objects that aren't vehicles.""" - - BOLLARD = "BOLLARD" - CONSTRUCTION_BARREL = "CONSTRUCTION_BARREL" - CONSTRUCTION_CONE = "CONSTRUCTION_CONE" - MOBILE_PEDESTRIAN_CROSSING_SIGN = "MOBILE_PEDESTRIAN_CROSSING_SIGN" - SIGN = "SIGN" - STOP_SIGN = "STOP_SIGN" - - -@unique -class LeggedCategories(str, Enum): - """Annotation categories representing objects that move using legs.""" - - ANIMAL = "ANIMAL" - DOG = "DOG" - OFFICIAL_SIGNALER = "OFFICIAL_SIGNALER" - PEDESTRIAN = "PEDESTRIAN" - - -@unique -class SmallVehicleCategories(str, Enum): - """Annotation categories representing small vehicles.""" - - BICYCLE = "BICYCLE" - BICYCLIST = "BICYCLIST" - MOTORCYCLE = "MOTORCYCLE" - MOTORCYCLIST = "MOTORCYCLIST" - STROLLER = "STROLLER" - WHEELCHAIR = "WHEELCHAIR" - WHEELED_DEVICE = "WHEELED_DEVICE" - WHEELED_RIDER = "WHEELED_RIDER" - - -@unique -class VehicleCategories(str, Enum): - """Annotation categories representing regular vehicles.""" - - ARTICULATED_BUS = "ARTICULATED_BUS" - BOX_TRUCK = "BOX_TRUCK" - BUS = "BUS" - LARGE_VEHICLE = "LARGE_VEHICLE" - MESSAGE_BOARD_TRAILER = "MESSAGE_BOARD_TRAILER" - RAILED_VEHICLE = "RAILED_VEHICLE" - REGULAR_VEHICLE = "REGULAR_VEHICLE" - SCHOOL_BUS = "SCHOOL_BUS" - TRAFFIC_LIGHT_TRAILER = "TRAFFIC_LIGHT_TRAILER" - TRUCK = "TRUCK" - TRUCK_CAB = "TRUCK_CAB" - VEHICULAR_TRAILER = "VEHICULAR_TRAILER" - - -@unique -class MetricBreakdownCategories(str, Enum): - """Meta-categories for the scene flow task.""" - - ALL = "All" - BACKGROUND = "Background" - FOREGROUND = "Foreground" - - -NO_CLASS_BREAKDOWN: Final = {MetricBreakdownCategories.ALL: list(range(31))} -FOREGROUND_BACKGROUND_BREAKDOWN: Final = { - MetricBreakdownCategories.BACKGROUND: [0], - MetricBreakdownCategories.FOREGROUND: [ - CATEGORY_TO_INDEX[k.value] - for k in ( - list(InanimateCategories) - + list(LeggedCategories) - + list(SmallVehicleCategories) - + list(VehicleCategories) - ) - ], -} - -FLOW_COLUMNS: Final = ("flow_tx_m", "flow_ty_m", "flow_tz_m") - - -from av2.utils.typing import NDArrayBool, NDArrayFloat, NDArrayInt - -ACCURACY_RELAX_DISTANCE_THRESHOLD: Final = 0.1 -ACCURACY_STRICT_DISTANCE_THRESHOLD: Final = 0.05 -NO_FMT_INDICES: Final = ("Background", "Dynamic") -EPS: Final = 1e-10 - -def compute_end_point_error(dts: NDArrayFloat, gts: NDArrayFloat) -> NDArrayFloat: - """Compute the end-point error between predictions and ground truth. - - Args: - dts: (N,3) Array containing predicted flows. - gts: (N,3) Array containing ground truth flows. - - Returns: - The point-wise end-point error. - """ - end_point_error: NDArrayFloat = np.linalg.norm(dts - gts, axis=-1).astype( - np.float64 - ) - return end_point_error - - -def compute_accuracy( - dts: NDArrayFloat, gts: NDArrayFloat, distance_threshold: float -) -> NDArrayFloat: - """Compute the percent of inliers for a given threshold for a set of prediction and ground truth vectors. - - Args: - dts: (N,3) Array containing predicted flows. - gts: (N,3) Array containing ground truth flows. - distance_threshold: Distance threshold for classifying inliers. - - Returns: - The pointwise inlier assignments. - """ - l2_norm = np.linalg.norm(dts - gts, axis=-1) - gts_norm = np.linalg.norm(gts, axis=-1) - relative_error = np.divide(l2_norm, gts_norm + EPS) - abs_error_inlier = np.less(l2_norm, distance_threshold).astype(bool) - relative_error_inlier = np.less(relative_error, distance_threshold).astype(bool) - accuracy: NDArrayFloat = np.logical_or( - abs_error_inlier, relative_error_inlier - ).astype(np.float64) - return accuracy - - -def compute_accuracy_strict(dts: NDArrayFloat, gts: NDArrayFloat) -> NDArrayFloat: - """Compute the accuracy with a 0.05 threshold. - - Args: - dts: (N,3) Array containing predicted flows. - gts: (N,3) Array containing ground truth flows. - - Returns: - The pointwise inlier assignments at a 0.05 threshold - """ - return compute_accuracy(dts, gts, ACCURACY_STRICT_DISTANCE_THRESHOLD) - - -def compute_accuracy_relax(dts: NDArrayFloat, gts: NDArrayFloat) -> NDArrayFloat: - """Compute the accuracy with a 0.1 threshold. - - Args: - dts: (N,3) Array containing predicted flows. - gts: (N,3) Array containing ground truth flows. - - Returns: - The pointwise inlier assignments at a 0.1 threshold. - """ - return compute_accuracy(dts, gts, ACCURACY_RELAX_DISTANCE_THRESHOLD) - - -def compute_angle_error(dts: NDArrayFloat, gts: NDArrayFloat) -> NDArrayFloat: - """Compute the angle error in space-time between the prediced and ground truth flow vectors. - - Args: - dts: (N,3) Array containing predicted flows. - gts: (N,3) Array containing ground truth flows. - - Returns: - The pointwise angle errors in space-time. - """ - # Convert the 3D flow vectors to 4D space-time vectors. - dts_space_time = np.pad( - dts, ((0, 0), (0, 1)), constant_values=SWEEP_PAIR_TIME_DELTA - ) - gts_space_time = np.pad( - gts, ((0, 0), (0, 1)), constant_values=SWEEP_PAIR_TIME_DELTA - ) - - dts_space_time_norm = np.linalg.norm(dts_space_time, axis=-1, keepdims=True) - gts_space_time_norm = np.linalg.norm(gts_space_time, axis=-1, keepdims=True) - unit_dts = dts_space_time / dts_space_time_norm - unit_gts = gts_space_time / gts_space_time_norm - - dot_product = np.einsum("bd,bd->b", unit_dts, unit_gts) - - # Floating point errors can cause `dot_product` to be slightly greater than 1 or less than -1. - clipped_dot_product = np.clip(dot_product, -1.0, 1.0) - angle_error: NDArrayFloat = np.arccos(clipped_dot_product).astype(np.float64) - return angle_error - - -def compute_true_positives(dts: NDArrayBool, gts: NDArrayBool) -> int: - """Compute true positive count. - - Args: - dts: (N,) Array containing predicted dynamic segmentation. - gts: (N,) Array containing ground truth dynamic segmentation. - - Returns: - The number of true positive classifications. - """ - return int(np.logical_and(dts, gts).sum()) - - -def compute_true_negatives(dts: NDArrayBool, gts: NDArrayBool) -> int: - """Compute true negative count. - - Args: - dts: (N,) Array containing predicted dynamic segmentation. - gts: (N,) Array containing ground truth dynamic segmentation. - - Returns: - The number of true negative classifications. - """ - return int(np.logical_and(~dts, ~gts).sum()) - - -def compute_false_positives(dts: NDArrayBool, gts: NDArrayBool) -> int: - """Compute false positive count. - - Args: - dts: (N,) Array containing predicted dynamic segmentation. - gts: (N,) Array containing ground truth dynamic segmentation. - - Returns: - The number of false positive classifications. - """ - return int(np.logical_and(dts, ~gts).sum()) - - -def compute_false_negatives(dts: NDArrayBool, gts: NDArrayBool) -> int: - """Compute false negative count. - - Args: - dts: (N,) Array containing predicted dynamic segmentation. - gts: (N,) Array containing ground truth dynamic segmentation. - - Returns: - The number of false negative classifications - """ - return int(np.logical_and(~dts, gts).sum()) - - -def compute_scene_flow_metrics( - dts: NDArrayFloat, gts: NDArrayFloat, scene_flow_metric_type: SceneFlowMetricType -) -> NDArrayFloat: - """Compute scene flow metrics. - - Args: - dts: (N,3) Array containing predicted flows. - gts: (N,3) Array containing ground truth flows. - scene_flow_metric_type: Scene flow metric type. - - Returns: - Scene flow metric corresponding to `scene_flow_metric_type`. - - Raises: - NotImplementedError: If the `scene_flow_metric_type` is not implemented. - """ - if scene_flow_metric_type == SceneFlowMetricType.ACCURACY_RELAX: - return compute_accuracy_relax(dts, gts) - elif scene_flow_metric_type == SceneFlowMetricType.ACCURACY_STRICT: - return compute_accuracy_strict(dts, gts) - elif scene_flow_metric_type == SceneFlowMetricType.ANGLE_ERROR: - return compute_angle_error(dts, gts) - elif scene_flow_metric_type == SceneFlowMetricType.EPE: - return compute_end_point_error(dts, gts) - else: - raise NotImplementedError( - f"The scene flow metric type {scene_flow_metric_type} is not implemented!" - ) - - -def compute_segmentation_metrics( - dts: NDArrayBool, gts: NDArrayBool, segmentation_metric_type: SegmentationMetricType -) -> int: - """Compute segmentation metrics. - - Args: - dts: (N,) Array containing predicted dynamic segmentation. - gts: (N,) Array containing ground truth dynamic segmentation. - segmentation_metric_type: Segmentation metric type. - - Returns: - Segmentation metric corresponding to `segmentation_metric_type`. - - Raises: - NotImplementedError: If the `segmentation_metric_type` is not implemented. - """ - if segmentation_metric_type == SegmentationMetricType.TP: - return compute_true_positives(dts, gts) - elif segmentation_metric_type == SegmentationMetricType.TN: - return compute_true_negatives(dts, gts) - elif segmentation_metric_type == SegmentationMetricType.FP: - return compute_false_positives(dts, gts) - elif segmentation_metric_type == SegmentationMetricType.FN: - return compute_false_negatives(dts, gts) - else: - raise NotImplementedError( - f"The segmentation metric type {segmentation_metric_type} is not implemented!" - ) - -EPS: Final = 1e-6 -def compute_epe(res_dict, indices, eps=1e-8): - epe_sum = 0 - count_sum = 0 - for index in indices: - count = res_dict['Count'][index] - if count != 0: - epe_sum += res_dict['EPE'][index] * count - count_sum += count - return epe_sum / (count_sum + eps) if count_sum != 0 else 0.0 - -def compute_metrics( - pred_flow: NDArrayFloat, - pred_dynamic: NDArrayBool, - gts: NDArrayFloat, - category_indices: NDArrayInt, - is_dynamic: NDArrayBool, - is_close: NDArrayBool, - is_valid: NDArrayBool, - # metric_categories: Dict[MetricBreakdownCategories, List[int]], -) -> Dict[str, List[Any]]: - """Compute all the metrics for a given example and package them into a list to be put into a DataFrame. - - Args: - pred_flow: (N,3) Predicted flow vectors. - pred_dynamic: (N,) Predicted dynamic labels. - gts: (N,3) Ground truth flow vectors. - category_indices: (N,) Integer class labels for each point. - is_dynamic: (N,) Ground truth dynamic labels. - is_close: (N,) True for a point if it is within a 70m x 70m box around the AV. - is_valid: (N,) True for a point if its flow vector was successfully computed. - metric_categories: A dictionary mapping segmentation labels to groups of category indices. - - Returns: - A dictionary of columns to create a long-form DataFrame of the results from. - One row for each subset in the breakdown. - """ - metric_categories = FOREGROUND_BACKGROUND_BREAKDOWN - pred_flow = pred_flow[is_valid].astype(np.float64) - pred_dynamic = pred_dynamic[is_valid].astype(bool) - gts = gts[is_valid].astype(np.float64) - category_indices = category_indices[is_valid].astype(int) - is_dynamic = is_dynamic[is_valid].astype(bool) - is_close = is_close[is_valid].astype(bool) - - results: DefaultDict[str, List[Any]] = defaultdict(list) - - # Each metric is broken down by point labels on Object Class, Motion, and Distance from the AV. - # We iterate over all combinations of those three categories and compute average metrics on each subset. - for cls, category_idxs in metric_categories.items(): - # Compute the union of all masks within the meta-category. - category_mask = category_indices == category_idxs[0] - for i in category_idxs[1:]: - category_mask = np.logical_or(category_mask, (category_indices == i)) - - for motion, m_mask in [("Dynamic", is_dynamic), ("Static", ~is_dynamic)]: - for distance, d_mask in [("Close", is_close), ("Far", ~is_close)]: - mask = category_mask & m_mask & d_mask - subset_size = mask.sum().item() - gts_sub = gts[mask] - pred_sub = pred_flow[mask] - results["Class"] += [cls.value] - results["Motion"] += [motion] - results["Distance"] += [distance] - results["Count"] += [subset_size] - - # Check if there are any points in this subset and if so compute all the average metrics. - if subset_size > 0: - for flow_metric_type in SceneFlowMetricType: - results[flow_metric_type] += [ - compute_scene_flow_metrics( - pred_sub, gts_sub, flow_metric_type - ).mean() - ] - for seg_metric_type in SegmentationMetricType: - results[seg_metric_type] += [ - compute_segmentation_metrics( - pred_dynamic[mask], is_dynamic[mask], seg_metric_type - ) - ] - else: - for flow_metric_type in SceneFlowMetricType: - results[flow_metric_type] += [np.nan] - for seg_metric_type in SegmentationMetricType: - results[seg_metric_type] += [0.0] - - # reference: eval.py L503: https://github.com/argoverse/av2-api/blob/main/src/av2/evaluation/scene_flow/eval.py - # we need Dynamic IoU and EPE 3-Way Average to calculate loss! - # weighted: (x[metric_type.value] * x.Count).sum() / total - # 'Class': ['Background', 'Background', 'Background', 'Background', 'Foreground', 'Foreground', 'Foreground', 'Foreground'] - # 'Motion': ['Dynamic', 'Dynamic', 'Static', 'Static', 'Dynamic', 'Dynamic', 'Static', 'Static'] - # 'Distance': ['Close', 'Far', 'Close', 'Far', 'Close', 'Far', 'Close', 'Far'] - - EPE_Background_Static = compute_epe(results, [2, 3]) - EPE_Dynamic = compute_epe(results, [4, 5]) - EPE_Foreground_Static = compute_epe(results, [6, 7]) - Dynamic_IoU = sum(results['TP']) / (sum(results['TP']) + sum(results['FP']) + sum(results['FN'])+EPS) - - return { - 'EPE_BS': EPE_Background_Static, - 'EPE_FD': EPE_Dynamic, - 'EPE_FS': EPE_Foreground_Static, - 'IoU': Dynamic_IoU - } - - -def evaluate_predictions( - annotations_dir: Path, get_prediction: Callable[[Path], pd.DataFrame] -) -> pd.DataFrame: - """Run the evaluation on predictions and labels. - - Args: - annotations_dir: Path to the directory containing the annotation files produced by `make_annotation_files.py`. - get_prediction: Function that retrieves a predictions DataFrame for a given relative - annotation filepath, or None if no prediction exists. - - Returns: - DataFrame containing the average metrics on each subset of each example. - """ - results: DefaultDict[str, List[Any]] = defaultdict(list) - annotation_files = sorted(annotations_dir.rglob("*.feather")) - for anno_file in track(annotation_files, description="Evaluating..."): - gts = pd.read_feather(anno_file) - name: Path = anno_file.relative_to(annotations_dir) - pred = get_prediction(name) - if pred is None: - continue - current_example_results = compute_metrics( - pred[list(FLOW_COLUMNS)].to_numpy().astype(float), - pred["is_dynamic"].to_numpy().astype(bool), - gts[list(FLOW_COLUMNS)].to_numpy().astype(float), - gts["category_indices"].to_numpy().astype(np.uint8), - gts["is_dynamic"].to_numpy().astype(bool), - gts["is_close"].to_numpy().astype(bool), - gts["is_valid"].to_numpy().astype(bool), - FOREGROUND_BACKGROUND_BREAKDOWN, - ) - num_subsets = len(list(current_example_results.values())[0]) - results["Example"] += [str(name) for _ in range(num_subsets)] - for m in current_example_results: - results[m] += current_example_results[m] - df = pd.DataFrame( - results, - columns=["Example", "Class", "Motion", "Distance", "Count"] - + list(SceneFlowMetricType) - + list(SegmentationMetricType), - ) - return df - - -def get_prediction_from_directory( - annotation_name: Path, predictions_dir: Path -) -> Optional[pd.DataFrame]: - """Get the prediction corresponding annotation from a directory of prediction files. - - Args: - annotation_name: Relative path to the annotation file. - predictions_dir: Path to the predicition files in submission_format. - - Returns: - DataFrame containing the predictions for that annotation file or None if it does not exist. - """ - pred_file = predictions_dir / annotation_name - if not pred_file.exists(): - return None - pred = pd.read_feather(pred_file) - return pred - - -def get_prediction_from_zipfile( - annotation_name: Path, predictions_zip: Path -) -> Optional[pd.DataFrame]: - """Get the prediction corresponding annotation from a zip archive of prediction files. - - Args: - annotation_name: Relative path to the annotation file. - predictions_zip: Path to the prediction files in a zip archive. - - Returns: - DataFrame containing the predictions for that annotation file or None if it does not exist. - """ - with ZipFile(predictions_zip, "r") as zf: - name = annotation_name.as_posix() - path = zipfile.Path(zf, name) - if path.exists(): - return pd.read_feather(zf.open(name)) - else: - return None - -def evaluate_directories(annotations_dir: Path, predictions_dir: Path) -> pd.DataFrame: - """Run the evaluation on predictions and labels saved to disk. - - Args: - annotations_dir: Path to the directory containing the annotation files produced by `make_annotation_files.py`. - predictions_dir: Path to the prediction files in submission format. - - Returns: - DataFrame containing the average metrics on each subset of each example. - """ - return evaluate_predictions( - annotations_dir, lambda n: get_prediction_from_directory(n, predictions_dir) - ) - -def evaluate_zip(annotations_dir: Path, predictions_zip: Path) -> pd.DataFrame: - """Run the evaluation on predictions and labels saved to disk. - - Args: - annotations_dir: Path to the directory containing the annotation files produced by `make_annotation_files.py`. - predictions_zip: Path to the prediction files in a zip archive. - - Returns: - DataFrame containing the average metrics on each subset of each example. - """ - return evaluate_predictions( - annotations_dir, lambda n: get_prediction_from_zipfile(n, predictions_zip) - ) - -def results_to_dict(frame: pd.DataFrame) -> Dict[str, float]: - """Convert a results DataFrame to a dictionary of whole dataset metrics. - - Args: - frame: DataFrame returned by evaluate_directories. - - Returns: - Dictionary string keys "" mapped to average metrics on that subset. - """ - output = {} - grouped = frame.groupby(["Class", "Motion", "Distance"]) - - def weighted_average( - x: pd.DataFrame, metric_type: Union[SceneFlowMetricType, SegmentationMetricType] - ) -> float: - """Weighted average of metric m using the Count column. - - Args: - x: Input data-frame. - metric_type: Metric type. - - Returns: - Weighted average over the metric_type; - """ - total = cast(int, x["Count"].sum()) - if total == 0: - return np.nan - averages: float = (x[metric_type.value] * x.Count).sum() / total - return averages - - for metric_type in SceneFlowMetricType: - avg: pd.Series[float] = grouped.apply( - lambda x, m=metric_type: weighted_average(x, metric_type=m) - ) - segments: List[Tuple[str, str, str]] = avg.index.to_list() - for segment in segments: - if segment[:2] == NO_FMT_INDICES: - continue - - metric_type_str = ( - metric_type.title().replace("_", " ") - if metric_type != SceneFlowMetricType.EPE - else metric_type - ) - name = metric_type_str + "/" + "/".join([str(i) for i in segment]) - output[name] = avg.loc[segment] - - grouped = frame.groupby(["Class", "Motion"]) - for metric_type in SceneFlowMetricType: - avg_nodist: pd.Series[float] = grouped.apply( - lambda x, m=metric_type: weighted_average(x, metric_type=m) - ) - segments_nodist: List[Tuple[str, str, str]] = avg_nodist.index.to_list() - for segment in segments_nodist: - if segment[:2] == NO_FMT_INDICES: - continue - - metric_type_str = ( - metric_type.title().replace("_", " ") - if metric_type != SceneFlowMetricType.EPE - else metric_type - ) - name = metric_type_str + "/" + "/".join([str(i) for i in segment]) - output[name] = avg_nodist.loc[segment] - output["Dynamic IoU"] = frame.TP.sum() / ( - frame.TP.sum() + frame.FP.sum() + frame.FN.sum() - ) - output["EPE 3-Way Average"] = ( - output["EPE/Foreground/Dynamic"] - + output["EPE/Foreground/Static"] - + output["EPE/Background/Static"] - ) / 3 - return output - -def evaluate(annotations_dir: str, predictions_dir: str) -> Dict[str, float]: - """Evaluate a set of predictions and print the results. - - Args: - annotations_dir: Path to the directory containing the annotation files produced by `make_annotation_files.py`. - predictions_dir: Path to the prediction files in submission format. - - Returns: - The results as a dict of metric names and values. - """ - results_df = evaluate_directories(Path(annotations_dir), Path(predictions_dir)) - results_dict = results_to_dict(results_df) - - for metric in sorted(results_dict): - print(f"{metric}: {results_dict[metric]:.3f}") - - return results_dict - -def write_output_file( - flow: NDArrayFloat, - is_dynamic: NDArrayBool, - sweep_uuid: Tuple[str, int], - output_dir: Path, - leaderboard_version: int = 1, -) -> None: - """Write an output predictions file in the correct format for submission. - - Args: - flow: (N,3) Flow predictions. - is_dynamic: (N,) Dynamic segmentation prediction. - sweep_uuid: Identifier of the sweep being predicted (log_id, timestamp_ns). - output_dir: Top level directory containing all predictions. - leaderboard_version: Version of the leaderboard format to use. - version 1 for: https://eval.ai/web/challenges/challenge-page/2010/evaluation - version 2 for: https://eval.ai/web/challenges/challenge-page/2210/evaluation - """ - output_log_dir = output_dir / sweep_uuid[0] - output_log_dir.mkdir(exist_ok=True, parents=True) - fx_m = flow[:, 0].astype(np.float16) - fy_m = flow[:, 1].astype(np.float16) - fz_m = flow[:, 2].astype(np.float16) - if leaderboard_version == 1: - - output = pd.DataFrame( - { - "flow_tx_m": fx_m, - "flow_ty_m": fy_m, - "flow_tz_m": fz_m, - "is_dynamic": is_dynamic.astype(bool), - } - ) - output.to_feather(output_log_dir / f"{sweep_uuid[1]}.feather") - elif leaderboard_version == 2: - output = pd.DataFrame( - { - "is_valid": np.ones_like(fx_m, dtype=bool), - "flow_tx_m": fx_m, - "flow_ty_m": fy_m, - "flow_tz_m": fz_m, - } - ) - output.to_feather(output_log_dir / f"{sweep_uuid[1]}.feather") - -from zipfile import ZipFile -from torch import BoolTensor -import torch -def get_eval_point_mask(sweep_uuid: Tuple[str, int], mask_file: Path) -> BoolTensor: - """Retrieve for a given sweep, a boolean mask indicating which points are evaluated on. - - Args: - sweep_uuid: The uuid of the first sweep in the pair to retrieve the mask for. - mask_file: Archive of submission masks. - - Returns: - The submission mask for that pair. - """ - with ZipFile(mask_file) as masks: - log_id, timestamp_ns = sweep_uuid - mask = ( - pd.read_feather(masks.open(f"{log_id}/{timestamp_ns}.feather")) - .to_numpy() - .astype(bool) - ) - - return BoolTensor(torch.from_numpy(mask).squeeze()) - - -# python >= 3.7 -from dataclasses import dataclass -@dataclass(frozen=True, eq=True, repr=True) -class BaseSplitValue: - name: str - avg_epe: float - avg_speed: float - speed_thresholds: Tuple[float, float] - count: int - def __eq__(self, __value: object) -> bool: - return hash(self) == hash(__value) - -def compute_bucketed_epe( - pred_flow: NDArrayFloat, - gt_flow: NDArrayFloat, - category_indices: NDArrayInt, - is_valid: NDArrayBool, -): - storage_error_matrix = [] - # bucket_max_speed, num_buckets, distance_thresholds set is from: eval/bucketed_epe.py#L226 - bucket_edges = np.concatenate([np.linspace(0, 2.0, 51), [np.inf]]) - speed_thresholds = list(zip(bucket_edges, bucket_edges[1:])) - - gt_speeds = np.linalg.norm(gt_flow, axis=-1) - error_flow = np.linalg.norm(pred_flow - gt_flow, axis=-1) - # based on each category, compute the epe - for cats_name in BUCKETED_METACATAGORIES: - selected_classes_ids = [CATEGORY_TO_INDEX[cat] for cat in BUCKETED_METACATAGORIES[cats_name]] - cat_mask = np.isin(category_indices, np.array(selected_classes_ids)) - # since background don't have speed, we just compute the average epe - if cats_name == "BACKGROUND": - mask = cat_mask & is_valid - storage_error_matrix.append(BaseSplitValue(cats_name, error_flow[mask].mean(), gt_speeds[mask].mean(), (0.0, 0.04), mask.sum())) - continue - for min_speed_threshold, max_speed_threshold in speed_thresholds: - speed_mask = (gt_speeds >= min_speed_threshold) & (gt_speeds < max_speed_threshold) - mask = cat_mask & speed_mask & is_valid - count_pts = mask.sum() - if count_pts == 0: - continue - avg_epe = error_flow[mask].mean() - avg_speed = gt_speeds[mask].mean() - storage_error_matrix.append(BaseSplitValue(cats_name, avg_epe, avg_speed, (min_speed_threshold, max_speed_threshold), count_pts)) - - return storage_error_matrix diff --git a/src/utils/eval_metric.py b/src/utils/eval_metric.py deleted file mode 100644 index 246794b..0000000 --- a/src/utils/eval_metric.py +++ /dev/null @@ -1,272 +0,0 @@ -""" -# -# Created: 2024-04-14 11:57 -# Copyright (C) 2024-now, RPL, KTH Royal Institute of Technology -# Author: Qingwen Zhang (https://kin-zhang.github.io/) -# -# -# Reference to official evaluation scripts: -# - EPE Threeway: https://github.com/argoverse/av2-api/blob/main/src/av2/evaluation/scene_flow/eval.py -# - Bucketed EPE: https://github.com/kylevedder/BucketedSceneFlowEval/blob/master/bucketed_scene_flow_eval/eval/bucketed_epe.py -""" - -import torch -import os, sys -import numpy as np -from typing import Dict, Final, List, Tuple -from tabulate import tabulate - -BASE_DIR = os.path.abspath(os.path.join( os.path.dirname( __file__ ), '../..' )) -sys.path.append(BASE_DIR) -from src.utils.av2_eval import compute_metrics, compute_bucketed_epe, CLOSE_DISTANCE_THRESHOLD - - -# EPE Three-way: Foreground Dynamic, Background Dynamic, Background Static -# leaderboard link: https://eval.ai/web/challenges/challenge-page/2010/evaluation -def evaluate_leaderboard(est_flow, rigid_flow, pc0, gt_flow, is_valid, pts_ids): - gt_is_dynamic = torch.linalg.vector_norm(gt_flow - rigid_flow, dim=-1) >= 0.05 - mask_ = ~est_flow.isnan().any(dim=1) & ~rigid_flow.isnan().any(dim=1) & ~pc0[:, :3].isnan().any(dim=1) & ~gt_flow.isnan().any(dim=1) - mask_no_nan = mask_ & ~gt_is_dynamic.isnan() & ~is_valid.isnan() & ~pts_ids.isnan() - est_flow = est_flow[mask_no_nan, :] - rigid_flow = rigid_flow[mask_no_nan, :] - pc0 = pc0[mask_no_nan, :] - gt_flow = gt_flow[mask_no_nan, :] - gt_is_dynamic = gt_is_dynamic[mask_no_nan] - is_valid = is_valid[mask_no_nan] - pts_ids = pts_ids[mask_no_nan] - - est_is_dynamic = torch.linalg.vector_norm(est_flow - rigid_flow, dim=-1) >= 0.05 - is_close = torch.all(torch.abs(pc0[:, :2]) <= CLOSE_DISTANCE_THRESHOLD, dim=1) - res_dict = compute_metrics( - est_flow.detach().cpu().numpy().astype(float), - est_is_dynamic.detach().cpu().numpy().astype(bool), - gt_flow.detach().cpu().numpy().astype(float), - pts_ids.detach().cpu().numpy().astype(np.uint8), - gt_is_dynamic.detach().cpu().numpy().astype(bool), - is_close.detach().cpu().numpy().astype(bool), - is_valid.detach().cpu().numpy().astype(bool) - ) - return res_dict - -# EPE Bucketed: BACKGROUND, CAR, PEDESTRIAN, WHEELED_VRU, OTHER_VEHICLES -def evaluate_leaderboard_v2(est_flow, rigid_flow, pc0, gt_flow, is_valid, pts_ids): - # in x,y dis, ref to official evaluation: eval/base_per_frame_sceneflow_eval.py#L118-L119 - pc_distance = torch.linalg.vector_norm(pc0[:, :2], dim=-1) - distance_mask = pc_distance <= CLOSE_DISTANCE_THRESHOLD - - mask_flow_non_nan = ~est_flow.isnan().any(dim=1) & ~rigid_flow.isnan().any(dim=1) & ~pc0[:, :3].isnan().any(dim=1) & ~gt_flow.isnan().any(dim=1) - mask_eval = mask_flow_non_nan & ~is_valid.isnan() & ~pts_ids.isnan() & distance_mask - rigid_flow = rigid_flow[mask_eval, :] - est_flow = est_flow[mask_eval, :] - rigid_flow - gt_flow = gt_flow[mask_eval, :] - rigid_flow # in v2 evaluation, we don't add rigid flow to evaluate - is_valid = is_valid[mask_eval] - pts_ids = pts_ids[mask_eval] - - res_dict = compute_bucketed_epe( - est_flow.detach().cpu().numpy().astype(float), - gt_flow.detach().cpu().numpy().astype(float), - pts_ids.detach().cpu().numpy().astype(np.uint8), - is_valid.detach().cpu().numpy().astype(bool), - ) - return res_dict - -# reference to official evaluation: bucketed_scene_flow_eval/eval/bucketed_epe.py -# python >= 3.7 -from dataclasses import dataclass -import warnings -@dataclass(frozen=True, eq=True, repr=True) -class OverallError: - static_epe: float - dynamic_error: float - - def __repr__(self) -> str: - static_epe_val_str = ( - f"{self.static_epe:0.6f}" if np.isfinite(self.static_epe) else f"{self.static_epe}" - ) - dynamic_error_val_str = ( - f"{self.dynamic_error:0.6f}" - if np.isfinite(self.dynamic_error) - else f"{self.dynamic_error}" - ) - return f"({static_epe_val_str}, {dynamic_error_val_str})" - - def to_tuple(self) -> Tuple[float, float]: - return (self.static_epe, self.dynamic_error) - -class BucketResultMatrix: - def __init__(self, class_names: List[str], speed_buckets: List[Tuple[float, float]]): - self.class_names = class_names - self.speed_buckets = speed_buckets - - assert ( - len(self.class_names) > 0 - ), f"class_names must have at least one entry, got {len(self.class_names)}" - assert ( - len(self.speed_buckets) > 0 - ), f"speed_buckets must have at least one entry, got {len(self.speed_buckets)}" - - # By default, NaNs are not counted in np.nanmean - self.epe_storage_matrix = np.zeros((len(class_names), len(self.speed_buckets))) * np.NaN - self.speed_storage_matrix = np.zeros((len(class_names), len(self.speed_buckets))) * np.NaN - self.count_storage_matrix = np.zeros( - (len(class_names), len(self.speed_buckets)), dtype=np.int64 - ) - - def accumulate_value( - self, - class_name: str, - speed_bucket: Tuple[float, float], - average_epe: float, - average_speed: float, - count: int, - ): - assert count > 0, f"count must be greater than 0, got {count}" - assert np.isfinite(average_epe), f"average_epe must be finite, got {average_epe}" - assert np.isfinite(average_speed), f"average_speed must be finite, got {average_speed}" - - class_idx = self.class_names.index(class_name) - speed_bucket_idx = self.speed_buckets.index(speed_bucket) - - prior_epe = self.epe_storage_matrix[class_idx, speed_bucket_idx] - prior_speed = self.speed_storage_matrix[class_idx, speed_bucket_idx] - prior_count = self.count_storage_matrix[class_idx, speed_bucket_idx] - - if np.isnan(prior_epe): - self.epe_storage_matrix[class_idx, speed_bucket_idx] = average_epe - self.speed_storage_matrix[class_idx, speed_bucket_idx] = average_speed - self.count_storage_matrix[class_idx, speed_bucket_idx] = count - return - - # Accumulate the average EPE and speed, weighted by the number of samples using np.mean - self.epe_storage_matrix[class_idx, speed_bucket_idx] = np.average( - [prior_epe, average_epe], weights=[prior_count, count] - ) - self.speed_storage_matrix[class_idx, speed_bucket_idx] = np.average( - [prior_speed, average_speed], weights=[prior_count, count] - ) - self.count_storage_matrix[class_idx, speed_bucket_idx] += count - - def get_normalized_error_matrix(self) -> np.ndarray: - error_matrix = self.epe_storage_matrix.copy() - # For the 1: columns, normalize EPE entries by the speed - error_matrix[:, 1:] = error_matrix[:, 1:] / self.speed_storage_matrix[:, 1:] - return error_matrix - - def get_overall_class_errors(self, normalized: bool = True): - # -> dict[str, OverallError] - if normalized: - error_matrix = self.get_normalized_error_matrix() - else: - error_matrix = self.epe_storage_matrix.copy() - static_epes = error_matrix[:, 0] - # Hide the warning about mean of empty slice - # I expect to see RuntimeWarnings in this block - with warnings.catch_warnings(): - warnings.simplefilter("ignore", category=RuntimeWarning) - dynamic_errors = np.nanmean(error_matrix[:, 1:], axis=1) - - return { - class_name: OverallError(static_epe, dynamic_error) - for class_name, static_epe, dynamic_error in zip( - self.class_names, static_epes, dynamic_errors - ) - } - - def get_class_entries(self, class_name: str) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: - class_idx = self.class_names.index(class_name) - - epe = self.epe_storage_matrix[class_idx, :] - speed = self.speed_storage_matrix[class_idx, :] - count = self.count_storage_matrix[class_idx, :] - return epe, speed, count - - def get_mean_average_values(self, normalized: bool = True) -> OverallError: - overall_errors = self.get_overall_class_errors(normalized=normalized) - - average_static_epe = np.nanmean([v.static_epe for v in overall_errors.values()]) - average_dynamic_error = np.nanmean([v.dynamic_error for v in overall_errors.values()]) - - return OverallError(average_static_epe, average_dynamic_error) - -class OfficialMetrics: - def __init__(self): - # same with BUCKETED_METACATAGORIES - self.bucketed= { - 'BACKGROUND': {'Static': [], 'Dynamic': []}, - 'CAR': {'Static': [], 'Dynamic': []}, - 'OTHER_VEHICLES': {'Static': [], 'Dynamic': []}, - 'PEDESTRIAN': {'Static': [], 'Dynamic': []}, - 'WHEELED_VRU': {'Static': [], 'Dynamic': []}, - 'Mean': {'Static': [], 'Dynamic': []} - } - - self.epe_3way = { - 'EPE_FD': [], - 'EPE_BS': [], - 'EPE_FS': [], - 'IoU': [], - 'Three-way': [] - } - - self.norm_flag = False - - - # bucket_max_speed, num_buckets, distance_thresholds set is from: eval/bucketed_epe.py#L226 - bucket_edges = np.concatenate([np.linspace(0, 2.0, 51), [np.inf]]) - speed_thresholds = list(zip(bucket_edges, bucket_edges[1:])) - self.bucketedMatrix = BucketResultMatrix( - class_names=['BACKGROUND', 'CAR', 'OTHER_VEHICLES', 'PEDESTRIAN', 'WHEELED_VRU'], - speed_buckets=speed_thresholds - ) - def step(self, epe_dict, bucket_dict): - """ - This step function is used to store the results of **each frame**. - """ - for key in epe_dict: - self.epe_3way[key].append(epe_dict[key]) - - for item_ in bucket_dict: - category_name = item_.name - speed_tuple = item_.speed_thresholds - self.bucketedMatrix.accumulate_value( - category_name, - speed_tuple, - item_.avg_epe, - item_.avg_speed, - item_.count, - ) - - def normalize(self): - """ - This normalize mean average results between **frame and frame**. - """ - for key in self.epe_3way: - self.epe_3way[key] = np.mean(self.epe_3way[key]) - self.epe_3way['Three-way'] = np.mean([self.epe_3way['EPE_FD'], self.epe_3way['EPE_BS'], self.epe_3way['EPE_FS']]) - - mean = self.bucketedMatrix.get_mean_average_values(normalized=True).to_tuple() - class_errors = self.bucketedMatrix.get_overall_class_errors(normalized=True) - for key in self.bucketed: - if key == 'Mean': - self.bucketed[key]['Static'] = mean[0] - self.bucketed[key]['Dynamic'] = mean[1] - continue - for i, sub_key in enumerate(self.bucketed[key]): - self.bucketed[key][sub_key] = class_errors[key].to_tuple()[i] # 0: static, 1: dynamic - self.norm_flag = True - - def print(self): - if not self.norm_flag: - self.normalize() - printed_data = [] - for key in self.epe_3way: - printed_data.append([key,self.epe_3way[key]]) - print("Version 1 Metric on EPE Three-way:") - print(tabulate(printed_data), "\n") - - printed_data = [] - for key in self.bucketed: - printed_data.append([key, self.bucketed[key]['Static'], self.bucketed[key]['Dynamic']]) - print("Version 2 Metric on Category-based:") - print(tabulate(printed_data, headers=["Class", "Static", "Dynamic"], tablefmt='orgtbl'), "\n") - \ No newline at end of file diff --git a/src/utils/mics.py b/src/utils/mics.py deleted file mode 100644 index 4bff972..0000000 --- a/src/utils/mics.py +++ /dev/null @@ -1,344 +0,0 @@ -import numpy as np -from itertools import accumulate -from collections import namedtuple -from typing import Optional - -import torch.nn.init as init -import torch - -import pickle, h5py, os - -# ref: https://github.com/Lightning-AI/lightning/issues/924#issuecomment-1670917204 -def weights_update(model, checkpoint): - print("Loading pretrained weights from epoch:", checkpoint['epoch']+1) - model_dict = model.state_dict() - - pretrained_dict = {} - for k, v in checkpoint['state_dict'].items(): - k = k.replace("model.", "") - if k in model_dict: - pretrained_dict[k] = v - - model_dict.update(pretrained_dict) - model.load_state_dict(model_dict) - return model - -class EarlyStopping(object): - def __init__(self, mode='min', min_delta=0, patience=10, percentage=False): - self.mode = mode - self.min_delta = min_delta - self.patience = patience - self.best = None - self.num_bad_epochs = 0 - self.is_better = None - self._init_is_better(mode, min_delta, percentage) - - if patience == 0: - self.is_better = lambda a, b: True - self.step = lambda a: False - - def step(self, metrics): - if self.best is None: - self.best = metrics - return False - - if torch.isnan(metrics): - return True - - if self.is_better(metrics, self.best): - self.num_bad_epochs = 0 - self.best = metrics - else: - self.num_bad_epochs += 1 - - if self.num_bad_epochs >= self.patience: - return True - - return False - - def _init_is_better(self, mode, min_delta, percentage): - if mode not in {'min', 'max'}: - raise ValueError('mode ' + mode + ' is unknown!') - if not percentage: - if mode == 'min': - self.is_better = lambda a, best: a < best - min_delta - if mode == 'max': - self.is_better = lambda a, best: a > best + min_delta - else: - if mode == 'min': - self.is_better = lambda a, best: a < best - ( - best * min_delta / 100) - if mode == 'max': - self.is_better = lambda a, best: a > best + ( - best * min_delta / 100) - -def setup_multi_gpu(if_multi_gpu: bool): - n_gpu = torch.cuda.device_count() - if n_gpu>1 and if_multi_gpu: - torch.distributed.init_process_group(backend='nccl') - else: - if_multi_gpu = False - n_gpu = 1 - assert n_gpu, "Can't find any GPU device on this machine." - - return n_gpu - -def init_weights(m) -> None: - """ - Apply the weight initialization to a single layer. - Use this with your_module.apply(init_weights). - The single layer is a module that has the weights parameter. This does not yield for all modules. - :param m: the layer to apply the init to - :return: None - """ - if type(m) in [torch.nn.Linear, torch.nn.Conv2d]: - # Note: There is also xavier_normal_ but the paper does not state which one they used. - torch.nn.init.xavier_uniform_(m.weight) - -def weights_init(m): - if isinstance(m, torch.nn.Conv2d) or isinstance(m, torch.nn.Linear): - init.xavier_uniform_(m.weight.data) - if m.bias is not None: - init.constant_(m.bias.data, 0.0) - elif isinstance(m, torch.nn.BatchNorm2d): - init.constant_(m.weight.data, 1.0) - init.constant_(m.bias.data, 0.0) - -def ground_range_filter(av2_sweep, range_lim=50): - pointcloud = av2_sweep.lidar.as_tensor().numpy()[:,:3] - ground_mask = ~av2_sweep.is_ground.numpy() - range_mask = np.where((pointcloud[:,0] > -range_lim) & (pointcloud[:,0] < range_lim) & - (pointcloud[:,1] > -range_lim) & (pointcloud[:,1] < range_lim) & - (pointcloud[:,2] > -range_lim) & (pointcloud[:,2] < range_lim)) - return pointcloud[ground_mask&range_mask] - -def ground_range_mask(av2_sweep, range_lim=50): - pointcloud = av2_sweep.lidar.as_tensor().numpy()[:,:3] - ground_mask = ~av2_sweep.is_ground.numpy() - range_mask = ((pointcloud[:,0] > -range_lim) & (pointcloud[:,0] < range_lim) & - (pointcloud[:,1] > -range_lim) & (pointcloud[:,1] < range_lim) & - (pointcloud[:,2] > -range_lim) & (pointcloud[:,2] < range_lim)) - return ground_mask&range_mask - -def ground_range_tmask(pointcloud, ground, range_lim=50): - pointcloud = pointcloud.numpy()[:,:3] - ground_mask = ~ground.numpy() - range_mask = ((pointcloud[:,0] > -range_lim) & (pointcloud[:,0] < range_lim) & - (pointcloud[:,1] > -range_lim) & (pointcloud[:,1] < range_lim) & - (pointcloud[:,2] > -range_lim) & (pointcloud[:,2] < range_lim)) - return ground_mask&range_mask - -# ====> import func through string, ref: https://stackoverflow.com/a/19393328 -import importlib -def import_func(path: str): - function_string = path - mod_name, func_name = function_string.rsplit('.',1) - mod = importlib.import_module(mod_name) - func = getattr(mod, func_name) - return func - - -# plotting from FastNSF: - -# ANCHOR: visualization as in the paper -DEFAULT_TRANSITIONS = (15, 6, 4, 11, 13, 6) - -def make_colorwheel(transitions: tuple=DEFAULT_TRANSITIONS) -> np.ndarray: - """Creates a colorwheel (borrowed/modified from flowpy). - A colorwheel defines the transitions between the six primary hues: - Red(255, 0, 0), Yellow(255, 255, 0), Green(0, 255, 0), Cyan(0, 255, 255), Blue(0, 0, 255) and Magenta(255, 0, 255). - Args: - transitions: Contains the length of the six transitions, based on human color perception. - Returns: - colorwheel: The RGB values of the transitions in the color space. - Notes: - For more information, see: - https://web.archive.org/web/20051107102013/http://members.shaw.ca/quadibloc/other/colint.htm - http://vision.middlebury.edu/flow/flowEval-iccv07.pdf - """ - colorwheel_length = sum(transitions) - # The red hue is repeated to make the colorwheel cyclic - base_hues = map( - np.array, ([255, 0, 0], [255, 255, 0], [0, 255, 0], [0, 255, 255], [0, 0, 255], [255, 0, 255], [255, 0, 0]) - ) - colorwheel = np.zeros((colorwheel_length, 3), dtype="uint8") - hue_from = next(base_hues) - start_index = 0 - for hue_to, end_index in zip(base_hues, accumulate(transitions)): - transition_length = end_index - start_index - colorwheel[start_index:end_index] = np.linspace(hue_from, hue_to, transition_length, endpoint=False) - hue_from = hue_to - start_index = end_index - return colorwheel - - -def flow_to_rgb( - flow: np.ndarray, - flow_max_radius: Optional[float]=None, - background: Optional[str]="bright", -) -> np.ndarray: - """Creates a RGB representation of an optical flow (borrowed/modified from flowpy). - Args: - flow: scene flow. - flow[..., 0] should be the x-displacement - flow[..., 1] should be the y-displacement - flow[..., 2] should be the z-displacement - flow_max_radius: Set the radius that gives the maximum color intensity, useful for comparing different flows. - Default: The normalization is based on the input flow maximum radius. - background: States if zero-valued flow should look 'bright' or 'dark'. - Returns: An array of RGB colors. - """ - valid_backgrounds = ("bright", "dark") - if background not in valid_backgrounds: - raise ValueError(f"background should be one the following: {valid_backgrounds}, not {background}.") - wheel = make_colorwheel() - # For scene flow, it's reasonable to assume displacements in x and y directions only for visualization pursposes. - complex_flow = flow[..., 0] + 1j * flow[..., 1] - radius, angle = np.abs(complex_flow), np.angle(complex_flow) - if flow_max_radius is None: - flow_max_radius = np.max(radius) - if flow_max_radius > 0: - radius /= flow_max_radius - ncols = len(wheel) - # Map the angles from (-pi, pi] to [0, 2pi) to [0, ncols - 1) - angle[angle < 0] += 2 * np.pi - angle = angle * ((ncols - 1) / (2 * np.pi)) - # Make the wheel cyclic for interpolation - wheel = np.vstack((wheel, wheel[0])) - # Interpolate the hues - (angle_fractional, angle_floor), angle_ceil = np.modf(angle), np.ceil(angle) - angle_fractional = angle_fractional.reshape((angle_fractional.shape) + (1,)) - float_hue = ( - wheel[angle_floor.astype(np.int32)] * (1 - angle_fractional) + wheel[angle_ceil.astype(np.int32)] * angle_fractional - ) - ColorizationArgs = namedtuple( - 'ColorizationArgs', ['move_hue_valid_radius', 'move_hue_oversized_radius', 'invalid_color'] - ) - def move_hue_on_V_axis(hues, factors): - return hues * np.expand_dims(factors, -1) - def move_hue_on_S_axis(hues, factors): - return 255. - np.expand_dims(factors, -1) * (255. - hues) - if background == "dark": - parameters = ColorizationArgs( - move_hue_on_V_axis, move_hue_on_S_axis, np.array([255, 255, 255], dtype=np.float64) - ) - else: - parameters = ColorizationArgs(move_hue_on_S_axis, move_hue_on_V_axis, np.array([0, 0, 0], dtype=np.float64)) - colors = parameters.move_hue_valid_radius(float_hue, radius) - oversized_radius_mask = radius > 1 - colors[oversized_radius_mask] = parameters.move_hue_oversized_radius( - float_hue[oversized_radius_mask], - 1 / radius[oversized_radius_mask] - ) - return colors.astype(np.uint8) - - -class HDF5Data: - def __init__(self, directory, flow_view=False, vis_name="flow"): - ''' - directory: the directory of the dataset - t_x: how many past frames we want to extract - ''' - self.flow_view = flow_view - self.vis_name = vis_name - self.directory = directory - with open(os.path.join(self.directory, 'index_total.pkl'), 'rb') as f: - self.data_index = pickle.load(f) - - self.scene_id_bounds = {} # 存储每个scene_id的最大最小timestamp和位置 - for idx, (scene_id, timestamp) in enumerate(self.data_index): - if scene_id not in self.scene_id_bounds: - self.scene_id_bounds[scene_id] = { - "min_timestamp": timestamp, - "max_timestamp": timestamp, - "min_index": idx, - "max_index": idx - } - else: - bounds = self.scene_id_bounds[scene_id] - # 更新最小timestamp和位置 - if timestamp < bounds["min_timestamp"]: - bounds["min_timestamp"] = timestamp - bounds["min_index"] = idx - # 更新最大timestamp和位置 - if timestamp > bounds["max_timestamp"]: - bounds["max_timestamp"] = timestamp - bounds["max_index"] = idx - - def __len__(self): - return len(self.data_index) - - def __getitem__(self, index): - scene_id, timestamp = self.data_index[index] - # to make sure we have continuous frames for flow view - if self.flow_view and self.scene_id_bounds[scene_id]["max_index"] == index: - index = index - 1 - scene_id, timestamp = self.data_index[index] - - key = str(timestamp) - data_dict = { - 'scene_id': scene_id, - 'timestamp': timestamp, - } - with h5py.File(os.path.join(self.directory, f'{scene_id}.h5'), 'r') as f: - # original data - data_dict['pc0'] = f[key]['lidar'][:] - data_dict['gm0'] = f[key]['ground_mask'][:] - data_dict['pose0'] = f[key]['pose'][:] - for flow_key in [self.vis_name, 'dufo_label', 'label']: - if flow_key in f[key]: - data_dict[flow_key] = f[key][flow_key][:] - - if self.flow_view: - next_timestamp = str(self.data_index[index+1][1]) - data_dict['pose1'] = f[next_timestamp]['pose'][:] - data_dict['pc1'] = f[next_timestamp]['lidar'][:] - data_dict['gm1'] = f[next_timestamp]['ground_mask'][:] - elif self.flow_view: - print(f"[Warning]: No {self.vis_name} in {scene_id} at {timestamp}, check the data.") - return data_dict - -from av2.geometry.se3 import SE3 -from scipy.spatial.transform import Rotation as R -def transform_to_array(pose): - pose_se3 = SE3(rotation=pose[:3,:3], translation=pose[:3,3]) - qxyzw = R.from_matrix(pose_se3.rotation).as_quat() - pose_array = [pose_se3.translation[0], pose_se3.translation[1], pose_se3.translation[2], \ - qxyzw[3], qxyzw[0], qxyzw[1], qxyzw[2]] - return pose_array - -from zipfile import ZipFile -import time, json -def zip_res(res_folder, output_file="av2_submit.zip", leaderboard_version=2, is_supervised=False): - """ - res_folder: the folder of the output results - """ - all_scenes = os.listdir(res_folder) - start_time = time.time() - - if leaderboard_version == 1: - with ZipFile(output_file, "w") as myzip: - for scene in all_scenes: - scene_folder = os.path.join(res_folder, scene) - all_logs = os.listdir(scene_folder) - for log in all_logs: - if not log.endswith(".feather"): - continue - file_path = os.path.join(scene, log) - myzip.write(os.path.join(res_folder, file_path), arcname=file_path) - else: - output_file = output_file.replace(".zip", f"_v{leaderboard_version}.zip") if output_file=="av2_submit.zip" else output_file - metadata = {"Is Supervised?": is_supervised} - with ZipFile(output_file, "w") as myzip: - myzip.writestr("metadata.json", json.dumps(metadata, indent=4)) - for scene in all_scenes: - scene_folder = os.path.join(res_folder, scene) - relative_idx = 0 - for log in sorted(os.listdir(scene_folder)): - if not log.endswith(".feather"): - continue - file_path = os.path.join(scene, log) - myzip.write(os.path.join(res_folder, file_path), arcname=os.path.join(scene, f"{relative_idx:010d}.feather")) - relative_idx += 5 - print(f"Time cost: {time.time()-start_time:.2f}s, check the zip file: {output_file}") - return output_file \ No newline at end of file diff --git a/src/utils/o3d_view.py b/src/utils/o3d_view.py deleted file mode 100644 index 39ca32a..0000000 --- a/src/utils/o3d_view.py +++ /dev/null @@ -1,145 +0,0 @@ -''' -# @date: 2023-1-26 16:38 -# @author: Qingwen Zhang (https://kin-zhang.github.io/) -# Copyright (C) 2023-now, RPL, KTH Royal Institute of Technology -# @detail: -# 1. Play the data you want in open3d, and save the view control to json file. -# 2. Use the json file to view the data again. -# 3. Save the screen shot and view file for later check and animation. -# -# code gits: https://gist.github.com/Kin-Zhang/77e8aa77a998f1a4f7495357843f24ef -# -# CHANGELOG: -# 2024-08-23 21:41(Qingwen): remove totally on view setting from scratch but use open3d>=0.18.0 version for set_view from json text func. -# 2024-04-15 12:06(Qingwen): show a example json text. add hex_to_rgb, color_map_hex, color_map (for color points if needed) -# 2024-01-27 0:41(Qingwen): update MyVisualizer class, reference from kiss-icp: https://github.com/PRBonn/kiss-icp/blob/main/python/kiss_icp/tools/visualizer.py -''' - -import open3d as o3d -import os, time -from typing import List, Callable -from functools import partial - -def hex_to_rgb(hex_color): - hex_color = hex_color.lstrip("#") - return tuple(int(hex_color[i:i + 2], 16) / 255.0 for i in (0, 2, 4)) - -color_map_hex = ['#a6cee3', '#de2d26', '#1f78b4','#b2df8a','#33a02c','#fb9a99','#e31a1c','#fdbf6f','#ff7f00',\ - '#cab2d6','#6a3d9a','#ffff99','#b15928', '#8dd3c7','#ffffb3','#bebada','#fb8072','#80b1d3',\ - '#fdb462','#b3de69','#fccde5','#d9d9d9','#bc80bd','#ccebc5','#ffed6f'] - -color_map = [hex_to_rgb(color) for color in color_map_hex] - - -class MyVisualizer: - def __init__(self, view_file=None, window_title="Default", save_folder="logs/imgs"): - self.params = None - self.vis = o3d.visualization.VisualizerWithKeyCallback() - self.vis.create_window(window_name=window_title) - self.view_file = view_file - - self.block_vis = True - self.play_crun = False - self.reset_bounding_box = True - self.save_img_folder = save_folder - os.makedirs(self.save_img_folder, exist_ok=True) - print( - f"\n{window_title.capitalize()} initialized. Press:\n" - "\t[SPACE] to pause/start\n" - "\t[ESC/Q] to exit\n" - "\t [P] to save screen and viewpoint\n" - "\t [N] to step\n" - ) - self._register_key_callback(["Ā", "Q", "\x1b"], self._quit) - self._register_key_callback(["P"], self._save_screen) - self._register_key_callback([" "], self._start_stop) - self._register_key_callback(["N"], self._next_frame) - - def show(self, assets: List): - self.vis.clear_geometries() - - for asset in assets: - self.vis.add_geometry(asset) - if self.view_file is not None: - self.vis.set_view_status(open(self.view_file).read()) - - self.vis.update_renderer() - self.vis.poll_events() - self.vis.run() - self.vis.destroy_window() - - def update(self, assets: List, clear: bool = True): - if clear: - self.vis.clear_geometries() - - for asset in assets: - self.vis.add_geometry(asset, reset_bounding_box=False) - self.vis.update_geometry(asset) - - if self.reset_bounding_box: - self.vis.reset_view_point(True) - if self.view_file is not None: - self.vis.set_view_status(open(self.view_file).read()) - self.reset_bounding_box = False - - self.vis.update_renderer() - while self.block_vis: - self.vis.poll_events() - if self.play_crun: - break - self.block_vis = not self.block_vis - - def _register_key_callback(self, keys: List, callback: Callable): - for key in keys: - self.vis.register_key_callback(ord(str(key)), partial(callback)) - - def _next_frame(self, vis): - self.block_vis = not self.block_vis - - def _start_stop(self, vis): - self.play_crun = not self.play_crun - - def _quit(self, vis): - print("Destroying Visualizer. Thanks for using ^v^.") - vis.destroy_window() - os._exit(0) - - def _save_screen(self, vis): - timestamp = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) - png_file = f"{self.save_img_folder}/ScreenShot_{timestamp}.png" - view_json_file = f"{self.save_img_folder}/ScreenView_{timestamp}.json" - with open(view_json_file, 'w') as f: - f.write(vis.get_view_status()) - vis.capture_screen_image(png_file) - print(f"ScreenShot saved to: {png_file}, Please check it.") - -if __name__ == "__main__": - json_content = """{ - "class_name" : "ViewTrajectory", - "interval" : 29, - "is_loop" : false, - "trajectory" : - [ - { - "boundingbox_max" : [ 3.9660897254943848, 2.427476167678833, 2.55859375 ], - "boundingbox_min" : [ 0.55859375, 0.83203125, 0.56663715839385986 ], - "field_of_view" : 60.0, - "front" : [ 0.27236083595988803, -0.25567329763523589, -0.92760484038816615 ], - "lookat" : [ 2.4114965637897101, 1.8070288935660688, 1.5662280268112718 ], - "up" : [ -0.072779625398507866, -0.96676294585190281, 0.24509698622097265 ], - "zoom" : 0.47999999999999976 - } - ], - "version_major" : 1, - "version_minor" : 0 -} -""" - # write to json file - view_json_file = "view.json" - with open(view_json_file, 'w') as f: - f.write(json_content) - sample_ply_data = o3d.data.PLYPointCloud() - pcd = o3d.io.read_point_cloud(sample_ply_data.path) - - viz = MyVisualizer(view_json_file, window_title="Qingwen's View") - viz.show([pcd]) \ No newline at end of file diff --git a/tools/README.md b/tools/README.md deleted file mode 100644 index a479138..0000000 --- a/tools/README.md +++ /dev/null @@ -1,36 +0,0 @@ -Tools ---- - -Here we introduce some tools to help you: -- visualize the data and results. -- convert the pretrained model from others. -- ... More to come. - -## Visualization - -run `tools/visualization.py` to view the scene flow dataset with ground truth flow. Note the color wheel in under world coordinate. - -```bash -# view gt flow -python3 tools/visualization.py --data_dir /home/kin/data/av2/preprocess/sensor/mini --res_name flow - -# view est flow -python3 tools/visualization.py --data_dir /home/kin/data/av2/preprocess/sensor/mini --res_name deflow_best -python3 tools/visualization.py --data_dir /home/kin/data/av2/preprocess/sensor/mini --res_name seflow_best -``` - -Demo Effect (press `SPACE` to stop and start in the visualization window): - -https://github.com/user-attachments/assets/f031d1a2-2d2f-4947-a01f-834ed1c146e6 - -## Conversion - -run `tools/zero2ours.py` to convert the ZeroFlow pretrained model to our codebase. - -```bash -python tools/zero2ours.py --model_path /home/kin/nsfp_distilatation_3x_49_epochs.ckpt --reference_path /home/kin/fastflow3d.ckpt --output_path /home/kin/zeroflow3x.ckpt -``` - -- model_path, you can download from: [kylevedder/zeroflow_weights](https://github.com/kylevedder/zeroflow_weights/tree/master/argo) -- reference_path, you can download fastflow3d model from: [zendo](https://zenodo.org/records/12632962) -- output_path, the converted model path. You can then run any evaluation script and visualization script with the converted model. \ No newline at end of file diff --git a/tools/visualization.py b/tools/visualization.py deleted file mode 100644 index a97c34a..0000000 --- a/tools/visualization.py +++ /dev/null @@ -1,118 +0,0 @@ -""" -# Created: 2023-11-29 21:22 -# Copyright (C) 2023-now, RPL, KTH Royal Institute of Technology -# Author: Qingwen Zhang (https://kin-zhang.github.io/) -# -# This file is part of DeFlow (https://github.com/KTH-RPL/DeFlow). -# If you find this repo helpful, please cite the respective publication as -# listed on the above website. -# -# Description: view scene flow dataset after preprocess. -""" - -import numpy as np -import fire, time -from tqdm import tqdm - -import open3d as o3d -import os, sys -BASE_DIR = os.path.abspath(os.path.join( os.path.dirname( __file__ ), '..' )) -sys.path.append(BASE_DIR) -from src.utils.mics import HDF5Data, flow_to_rgb -from src.utils.o3d_view import MyVisualizer, color_map - - -VIEW_FILE = f"{BASE_DIR}/assets/view/av2.json" - -def check_flow( - data_dir: str ="/home/kin/data/av2/preprocess/sensor/mini", - res_name: str = "flow", # "flow", "flow_est" - start_id: int = 0, - point_size: float = 3.0, -): - dataset = HDF5Data(data_dir, vis_name=res_name, flow_view=True) - o3d_vis = MyVisualizer(view_file=VIEW_FILE, window_title=f"view {'ground truth flow' if res_name == 'flow' else f'{res_name} flow'}, `SPACE` start/stop") - - opt = o3d_vis.vis.get_render_option() - opt.background_color = np.asarray([80/255, 90/255, 110/255]) - opt.point_size = point_size - - for data_id in (pbar := tqdm(range(start_id, len(dataset)))): - data = dataset[data_id] - now_scene_id = data['scene_id'] - pbar.set_description(f"id: {data_id}, scene_id: {now_scene_id}, timestamp: {data['timestamp']}") - gm0 = data['gm0'] - pc0 = data['pc0'][~gm0] - - pcd = o3d.geometry.PointCloud() - pcd.points = o3d.utility.Vector3dVector(pc0[:, :3]) - pcd.paint_uniform_color([1.0, 0.0, 0.0]) # red: pc0 - - pc1 = data['pc1'] - pcd1 = o3d.geometry.PointCloud() - pcd1.points = o3d.utility.Vector3dVector(pc1[:, :3][~data['gm1']]) - pcd1.paint_uniform_color([0.0, 1.0, 0.0]) # green: pc1 - - pcd2 = o3d.geometry.PointCloud() - # pcd2.points = o3d.utility.Vector3dVector(pc0[:, :3] + pose_flow) # if you want to check pose_flow - pcd2.points = o3d.utility.Vector3dVector(pc0[:, :3] + data[res_name][~gm0]) - pcd2.paint_uniform_color([0.0, 0.0, 1.0]) # blue: pc0 + flow - o3d_vis.update([pcd, pcd1, pcd2, o3d.geometry.TriangleMesh.create_coordinate_frame(size=2)]) - -def vis( - data_dir: str ="/home/kin/data/av2/preprocess/sensor/mini", - res_name: str = "flow", # "flow", "flow_est" - start_id: int = 0, - point_size: float = 2.0, -): - dataset = HDF5Data(data_dir, vis_name=res_name, flow_view=True) - o3d_vis = MyVisualizer(view_file=VIEW_FILE, window_title=f"view {'ground truth flow' if res_name == 'flow' else f'{res_name} flow'}, `SPACE` start/stop") - - opt = o3d_vis.vis.get_render_option() - # opt.background_color = np.asarray([216, 216, 216]) / 255.0 - opt.background_color = np.asarray([80/255, 90/255, 110/255]) - # opt.background_color = np.asarray([1, 1, 1]) - opt.point_size = point_size - - for data_id in (pbar := tqdm(range(start_id, len(dataset)))): - data = dataset[data_id] - now_scene_id = data['scene_id'] - pbar.set_description(f"id: {data_id}, scene_id: {now_scene_id}, timestamp: {data['timestamp']}") - - pc0 = data['pc0'] - gm0 = data['gm0'] - pose0 = data['pose0'] - pose1 = data['pose1'] - ego_pose = np.linalg.inv(pose1) @ pose0 - - pose_flow = pc0[:, :3] @ ego_pose[:3, :3].T + ego_pose[:3, 3] - pc0[:, :3] - - pcd = o3d.geometry.PointCloud() - if res_name == 'raw': # no result, only show **raw point cloud** - pcd.points = o3d.utility.Vector3dVector(pc0[:, :3]) - pcd.paint_uniform_color([1.0, 1.0, 1.0]) - elif res_name in ['dufo_label', 'label']: - labels = data[res_name] - pcd_i = o3d.geometry.PointCloud() - for label_i in np.unique(labels): - pcd_i.points = o3d.utility.Vector3dVector(pc0[labels == label_i][:, :3]) - if label_i <= 0: - pcd_i.paint_uniform_color([1.0, 1.0, 1.0]) - else: - pcd_i.paint_uniform_color(color_map[label_i % len(color_map)]) - pcd += pcd_i - elif res_name in data: - pcd.points = o3d.utility.Vector3dVector(pc0[:, :3]) - flow = data[res_name] - pose_flow # ego motion compensation here. - flow_color = flow_to_rgb(flow) / 255.0 - is_dynamic = np.linalg.norm(flow, axis=1) > 0.1 - flow_color[~is_dynamic] = [1, 1, 1] - flow_color[gm0] = [1, 1, 1] - pcd.colors = o3d.utility.Vector3dVector(flow_color) - o3d_vis.update([pcd, o3d.geometry.TriangleMesh.create_coordinate_frame(size=2)]) - -if __name__ == '__main__': - start_time = time.time() - # fire.Fire(check_flow) - fire.Fire(vis) - print(f"Time used: {time.time() - start_time:.2f} s") \ No newline at end of file diff --git a/tools/write4conf.py b/tools/write4conf.py deleted file mode 100644 index 60cc560..0000000 --- a/tools/write4conf.py +++ /dev/null @@ -1,23 +0,0 @@ -''' -# Usage for old model file to new version code -''' -import torch -import fire, time - -def main( - # download from: https://zenodo.org/records/12632962 - model_path: str = "/home/kin/model_zoo/v2/seflow_best.ckpt", - # new output weight file - output_path: str = "/home/kin/model_zoo/v3/seflow_best.ckpt", -): - model = torch.load(model_path) - model_name = model['hyper_parameters']['cfg']['model']['name'] - old_path = model['hyper_parameters']['cfg']['model']['target']['_target_'] - new_path = old_path.replace(f"scripts.network.models.{model_name}", "src.models") - model['hyper_parameters']['cfg']['model']['target']['_target_'] = new_path - torch.save(model, output_path) - -if __name__ == '__main__': - start_time = time.time() - fire.Fire(main) - print(f"Time used: {time.time() - start_time:.2f} s") \ No newline at end of file diff --git a/tools/zerof2ours.py b/tools/zerof2ours.py deleted file mode 100644 index d8e8084..0000000 --- a/tools/zerof2ours.py +++ /dev/null @@ -1,30 +0,0 @@ -import torch -from pathlib import Path -import numpy as np -import fire, time - -def main( - # download from: https://github.com/kylevedder/zeroflow_weights/tree/master/argo - model_path: str = "/home/kin/nsfp_distilatation_3x_49_epochs.ckpt", - # download from: https://zenodo.org/records/12632962 - reference_path: str = "/home/kin/fastflow3d.ckpt", - # new output weight file - output_path: str = "/home/kin/zeroflow3x.ckpt", -): - model = torch.load(model_path) - reference = torch.load(reference_path) - - ref_model_weight = reference['state_dict'] - real_model_weight = model['state_dict'] - for k in real_model_weight.keys(): - if k not in ref_model_weight.keys(): - print(f"Warning: {k} not in reference model, not same model.") - exit(0) - - reference['state_dict'] = real_model_weight - torch.save(reference, output_path) - -if __name__ == '__main__': - start_time = time.time() - fire.Fire(main) - print(f"Time used: {time.time() - start_time:.2f} s") \ No newline at end of file diff --git a/train.py b/train.py deleted file mode 100644 index 51be0b0..0000000 --- a/train.py +++ /dev/null @@ -1,146 +0,0 @@ -""" -# Created: 2023-07-12 19:30 -# Copyright (C) 2023-now, RPL, KTH Royal Institute of Technology -# Author: Qingwen Zhang (https://kin-zhang.github.io/) -# -# This file is part of DeFlow (https://github.com/KTH-RPL/DeFlow) and -# SeFlow (https://github.com/KTH-RPL/SeFlow) projects. -# If you find this repo helpful, please cite the respective publication as -# listed on the above website. - -# Description: Train Model -""" - -import torch -from torch.utils.data import DataLoader -import lightning.pytorch as pl -from lightning.pytorch.loggers import TensorBoardLogger, WandbLogger -from lightning.pytorch.callbacks import ( - LearningRateMonitor, - ModelCheckpoint -) - -from omegaconf import DictConfig, OmegaConf -import hydra, wandb, os, math -from hydra.core.hydra_config import HydraConfig -from pathlib import Path - -from src.dataset import HDF5Dataset, collate_fn_pad -from src.trainer import ModelWrapper - -def precheck_cfg_valid(cfg): - if cfg.loss_fn == 'seflowLoss' and cfg.add_seloss is None: - raise ValueError("Please specify the self-supervised loss items for seflowLoss.") - - grid_size = [(cfg.point_cloud_range[3] - cfg.point_cloud_range[0]) * (1/cfg.voxel_size[0]), - (cfg.point_cloud_range[4] - cfg.point_cloud_range[1]) * (1/cfg.voxel_size[1]), - (cfg.point_cloud_range[5] - cfg.point_cloud_range[2]) * (1/cfg.voxel_size[2])] - - for i, dim_size in enumerate(grid_size): - # NOTE(Qingwen): - # * the range is divisible to voxel, e.g. 51.2/0.2=256 good, 51.2/0.3=170.67 wrong. - # * the grid size to be divisible by 8 (2^3) for three bisections for the UNet. - target_divisor = 8 - if i <= 1: # Only check x and y dimensions - if dim_size % target_divisor != 0: - adjusted_dim_size = math.ceil(dim_size / target_divisor) * target_divisor - suggest_range_setting = (adjusted_dim_size * cfg.voxel_size[i]) / 2 - raise ValueError(f"Suggest x/y range setting: {suggest_range_setting:.2f} based on {cfg.voxel_size[i]}") - else: - if dim_size.is_integer() is False: - suggest_range_setting = (math.ceil(dim_size) * cfg.voxel_size[i]) / 2 - raise ValueError(f"Suggest z range setting: {suggest_range_setting:.2f} or {suggest_range_setting/2:.2f} based on {cfg.voxel_size[i]}") - return cfg - -@hydra.main(version_base=None, config_path="conf", config_name="config") -def main(cfg): - precheck_cfg_valid(cfg) - pl.seed_everything(cfg.seed, workers=True) - - train_dataset = HDF5Dataset(cfg.train_data, n_frames=cfg.num_frames, dufo=(cfg.loss_fn == 'seflowLoss')) - train_loader = DataLoader(train_dataset, - batch_size=cfg.batch_size, - shuffle=True, - num_workers=cfg.num_workers, - collate_fn=collate_fn_pad, - pin_memory=True) - val_loader = DataLoader(HDF5Dataset(cfg.val_data, n_frames=cfg.num_frames), - batch_size=cfg.batch_size, - shuffle=False, - num_workers=cfg.num_workers, - collate_fn=collate_fn_pad, - pin_memory=True) - - # count gpus, overwrite gpus - cfg.gpus = torch.cuda.device_count() if torch.cuda.is_available() else 0 - - output_dir = HydraConfig.get().runtime.output_dir - # overwrite logging folder name for SSL. - if cfg.loss_fn == 'seflowLoss': - cfg.output = cfg.output.replace(cfg.model.name, "seflow") - output_dir = output_dir.replace(cfg.model.name, "seflow") - method_name = "seflow" - else: - method_name = cfg.model.name - - # FIXME: hydra output_dir with ddp run will mkdir in the parent folder. Looks like PL and Hydra trying to fix in lib. - # print(f"Output Directory: {output_dir} in gpu rank: {torch.cuda.current_device()}") - Path(os.path.join(output_dir, "checkpoints")).mkdir(parents=True, exist_ok=True) - - cfg = DictConfig(OmegaConf.to_container(cfg, resolve=True)) - model = ModelWrapper(cfg) - - callbacks = [ - ModelCheckpoint( - dirpath=os.path.join(output_dir, "checkpoints"), - filename="{epoch:02d}_"+method_name, - auto_insert_metric_name=False, - monitor=cfg.model.val_monitor, - mode="min", - save_top_k=cfg.save_top_model, - save_last=True, - ), - LearningRateMonitor(logging_interval="epoch") - ] - - if cfg.wandb_mode != "disabled": - logger = WandbLogger(save_dir=output_dir, - entity="kth-rpl", - project=f"{cfg.wandb_project_name}", - name=f"{cfg.output}", - offline=(cfg.wandb_mode == "offline"), - log_model=(True if cfg.wandb_mode == "online" else False)) - logger.watch(model, log_graph=False) - else: - # check local tensorboard logging: tensorboard --logdir logs/jobs/{log folder} - logger = TensorBoardLogger(save_dir=output_dir, name="logs") - - - trainer = pl.Trainer(logger=logger, - log_every_n_steps=50, - accelerator="gpu", - devices=cfg.gpus, - check_val_every_n_epoch=cfg.val_every, - gradient_clip_val=cfg.gradient_clip_val, - strategy="ddp_find_unused_parameters_false" if cfg.gpus > 1 else "auto", - callbacks=callbacks, - max_epochs=cfg.epochs, - sync_batchnorm=cfg.sync_bn) - - - if trainer.global_rank == 0: - print("\n"+"-"*40) - print("Initiating wandb and trainer successfully. ^V^ ") - print(f"We will use {cfg.gpus} GPUs to train the model. Check the checkpoints in {output_dir} checkpoints folder.") - print("Total Train Dataset Size: ", len(train_dataset)) - if cfg.add_seloss is not None and cfg.loss_fn == 'seflowLoss': - print(f"Note: We are in **self-supervised** training now. No ground truth label is used.") - print(f"We will use these loss items in {cfg.loss_fn}: {cfg.add_seloss}") - print("-"*40+"\n") - - # NOTE(Qingwen): search & check: def training_step(self, batch, batch_idx) - trainer.fit(model, train_dataloaders = train_loader, val_dataloaders = val_loader, ckpt_path = cfg.checkpoint) - wandb.finish() - -if __name__ == "__main__": - main() \ No newline at end of file