IN_DATA_DIR="/project/train/src_repo/data/video" OUT_DATA_DIR="/project/train/src_repo/data/splitvideo" str="_" if [[ ! -d "${OUT_DATA_DIR}" ]]; then echo "${OUT_DATA_DIR} doesn't exist. Creating it."; mkdir -p ${OUT_DATA_DIR} fi
for video in $(ls -A1 -U ${IN_DATA_DIR}/*) do for i in {0..10} do index=$(expr $i \* 10) out_name="${OUT_DATA_DIR}/${i}${str}${video##*/}" if [ ! -f "${out_name}" ]; then ffmpeg -ss ${index} -t 80 -i "${video}" "${out_name}" fi done done
if [[ ! -d "${OUT_DATA_DIR}" ]]; then echo "${OUT_DATA_DIR} doesn't exist. Creating it."; mkdir -p ${OUT_DATA_DIR} fi
for video in $(ls -A1 -U ${IN_DATA_DIR}/*) do out_name="${OUT_DATA_DIR}/${video##*/}" if [ ! -f "${out_name}" ]; then ffmpeg -ss 0 -t 100 -i "${video}" "${out_name}" fi done
!python -m pip install pyyaml==5.1 import sys, os, distutils.core # Note: This is a faster way to install detectron2 in Colab, but it does not include all functionalities. # See https://detectron2./tutorials/install.html for full installation instructions !git clone 'https://github.com/facebookresearch/detectron2' dist = distutils.core.run_setup("./detectron2/setup.py") !python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])} sys.path.insert(0, os.path.abspath('./detectron2'))
from pytorchvideo.layers.distributed import ( # noqa ImportError: cannot import name 'cat_all_gather'from'pytorchvideo.layers.distributed' (/site-packages/pytorchvideo/layers/distributed.py)
3.pytorchvideo.losses 报错
File "SlowFast/slowfast/models/losses.py", line 11, in from pytorchvideo.losses.soft_target_cross_entropy import ( ModuleNotFoundError: No module named 'pytorchvideo.losses'
img = torch.from_numpy(img).to(device) img = img.half() img /= 255.0# 0 - 255 to 0.0 - 1.0 if len(img.shape) == 3: img = img[None] pred = model(img, augment=False, val=True)[0] pred = non_max_suppression(pred, conf_thres, iou_thres, agnostic=False) result=[] for i, det in enumerate(pred): # detections per image gn = torch.tensor(img0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det isnotNoneand len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round() for *xyxy, conf, cls in det: if cls==0: result.append([float(xyxy[0]),float(xyxy[1]),float(xyxy[2]),float(xyxy[3])]) if len(result)==0: returnNone return torch.from_numpy(np.array(result))
2.bbox 预处理
defscale_boxes(size, boxes, height, width): """ Scale the short side of the box to size. Args: size (int): size to scale the image. boxes (ndarray): bounding boxes to peform scale. The dimension is `num boxes` x 4. height (int): the height of the image. width (int): the width of the image. Returns: boxes (ndarray): scaled bounding boxes. """ if (width <= height and width == size) or ( height <= width and height == size ): return boxes
defscale(size, image): """ Scale the short side of the image to size. Args: size (int): size to scale the image. image (array): image to perform short side scale. Dimension is `height` x `width` x `channel`. Returns: (ndarray): the scaled image with dimension of `height` x `width` x `channel`. """ height = image.shape[0] width = image.shape[1] # print(height,width) if (width <= height and width == size) or ( height <= width and height == size ): return image new_width = size new_height = size if width < height: new_height = int(math.floor((float(height) / width) * size)) else: new_width = int(math.floor((float(width) / height) * size)) img = cv2.resize( image, (new_width, new_height), interpolation=cv2.INTER_LINEAR ) # print(new_width, new_height) return img.astype(np.float32)
2.归一化
deftensor_normalize(tensor, mean, std, func=None): """ Normalize a given tensor by subtracting the mean and dividing the std. Args: tensor (tensor): tensor to normalize. mean (tensor or list): mean value to subtract. std (tensor or list): std to divide. """ if tensor.dtype == torch.uint8: tensor = tensor.float() tensor = tensor / 255.0 if type(mean) == list: mean = torch.tensor(mean) if type(std) == list: std = torch.tensor(std) if func isnotNone: tensor = func(tensor) tensor = tensor - mean tensor = tensor / std return tensor
3.构建slow以及fast 输入数据主要思路为从64帧图像数据中选取32帧作为fast的输入,再从fast中选取8帧作为slow的输入,并将 T H W C -> C T H W.因此最后fast_pathway维度为(b,3,32,h,w)slow_pathway的维度为(b,3,8,h,w)
defprocess_cv2_inputs(frames): """ Normalize and prepare inputs as a list of tensors. Each tensor correspond to a unique pathway. Args: frames (list of array): list of input images (correspond to one clip) in range [0, 255]. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ inputs = torch.from_numpy(np.array(frames)).float() / 255 inputs = tensor_normalize(inputs, [0.45,0.45,0.45], [0.225,0.225,0.225]) # T H W C -> C T H W. inputs = inputs.permute(3, 0, 1, 2) # Sample frames for num_frames specified. index = torch.linspace(0, inputs.shape[1] - 1, 32).long() print(index) inputs = torch.index_select(inputs, 1, index) fast_pathway = inputs slow_pathway = torch.index_select( inputs, 1, torch.linspace( 0, inputs.shape[1] - 1, inputs.shape[1] // 4 ).long(), ) frame_list = [slow_pathway, fast_pathway] print(np.shape(frame_list[0])) inputs = [inp.unsqueeze(0) for inp in frame_list] return inputs
5.slowfast onnx 推理
5.1 导出onnx文件
import os import sys from collections import OrderedDict import torch import argparse work_root = os.path.split(os.path.realpath(__file__))[0] from slowfast.config.defaults import get_cfg import slowfast.utils.checkpoint as cu from slowfast.models import build_model
img = torch.from_numpy(img).to(device) img = img.half() img /= 255.0# 0 - 255 to 0.0 - 1.0 if len(img.shape) == 3: img = img[None] pred = model(img, augment=False, val=True)[0] pred = non_max_suppression(pred, conf_thres, iou_thres, agnostic=False) result=[] for i, det in enumerate(pred): # detections per image gn = torch.tensor(img0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det isnotNoneand len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round() for *xyxy, conf, cls in det: if cls==0: result.append([float(xyxy[0]),float(xyxy[1]),float(xyxy[2]),float(xyxy[3])]) if len(result)==0: returnNone for i in range(32-len(result)): result.append([float(0),float(0),float(0),float(0)]) return torch.from_numpy(np.array(result)) defscale(size, image): """ Scale the short side of the image to size. Args: size (int): size to scale the image. image (array): image to perform short side scale. Dimension is `height` x `width` x `channel`. Returns: (ndarray): the scaled image with dimension of `height` x `width` x `channel`. """ height = image.shape[0] width = image.shape[1] # print(height,width) if (width <= height and width == size) or ( height <= width and height == size ): return image new_width = size new_height = size if width < height: new_height = int(math.floor((float(height) / width) * size)) else: new_width = int(math.floor((float(width) / height) * size)) img = cv2.resize( image, (new_width, new_height), interpolation=cv2.INTER_LINEAR ) # print(new_width, new_height) return img.astype(np.float32) deftensor_normalize(tensor, mean, std, func=None): """ Normalize a given tensor by subtracting the mean and dividing the std. Args: tensor (tensor): tensor to normalize. mean (tensor or list): mean value to subtract. std (tensor or list): std to divide. """ if tensor.dtype == torch.uint8: tensor = tensor.float() tensor = tensor / 255.0 if type(mean) == list: mean = torch.tensor(mean) if type(std) == list: std = torch.tensor(std) if func isnotNone: tensor = func(tensor) tensor = tensor - mean tensor = tensor / std return tensor defscale_boxes(size, boxes, height, width): """ Scale the short side of the box to size. Args: size (int): size to scale the image. boxes (ndarray): bounding boxes to peform scale. The dimension is `num boxes` x 4. height (int): the height of the image. width (int): the width of the image. Returns: boxes (ndarray): scaled bounding boxes. """ if (width <= height and width == size) or ( height <= width and height == size ): return boxes
new_width = size new_height = size if width < height: new_height = int(math.floor((float(height) / width) * size)) boxes *= float(new_height) / height else: new_width = int(math.floor((float(width) / height) * size)) boxes *= float(new_width) / width return boxes defprocess_cv2_inputs(frames): """ Normalize and prepare inputs as a list of tensors. Each tensor correspond to a unique pathway. Args: frames (list of array): list of input images (correspond to one clip) in range [0, 255]. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ inputs = torch.from_numpy(np.array(frames)).float() / 255 inputs = tensor_normalize(inputs, [0.45,0.45,0.45], [0.225,0.225,0.225]) # T H W C -> C T H W. inputs = inputs.permute(3, 0, 1, 2) # Sample frames for num_frames specified. index = torch.linspace(0, inputs.shape[1] - 1, 32).long() print(index) inputs = torch.index_select(inputs, 1, index) fast_pathway = inputs slow_pathway = torch.index_select( inputs, 1, torch.linspace( 0, inputs.shape[1] - 1, inputs.shape[1] // 4 ).long(), ) frame_list = [slow_pathway, fast_pathway] print(np.shape(frame_list[0])) inputs = [inp.unsqueeze(0) for inp in frame_list] return inputs #加载模型 yolov5=init() slowfast = onnxruntime.InferenceSession('/content/SLOWFAST_32x2_R101_50_50.onnx') #加载数据开始推理 cap = cv2.VideoCapture("/content/atm_125.mp4") was_read=True while was_read: frames=[] seq_length=64 while was_read and len(frames) < seq_length: was_read, frame =cap.read() frames.append(frame)
bboxes = process_image(yolov5,frames[64//2]) if bboxes isnotNone: frames = [cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) for frame in frames] frames = [scale(256, frame) for frame in frames] inputs = process_cv2_inputs(frames) if bboxes isnotNone: bboxes = scale_boxes(256,bboxes,1080,1920) index_pad = torch.full( size=(bboxes.shape[0], 1), fill_value=float(0), device=bboxes.device, ) # Pad frame index for each box. bboxes = torch.cat([index_pad, bboxes], axis=1) for i in range(len(inputs)): inputs[i] = inputs[i].numpy() if bboxes isnotNone: outputs = slowfast.run(None, {'slow_pathway': inputs[0],'fast_pathway':inputs[1],'bbox':bboxes}) for i in range(80): if outputs[0][0][i]>0.3: print(i) print(np.shape(prd)) else: print("没有检测到任何人物")
import ctypes import os import numpy as np import cv2 import random import tensorrt as trt import pycuda.autoinit import pycuda.driver as cuda import threading import time
classTrtInference(): _batch_size = 1 def__init__(self, model_path=None, cuda_ctx=None): self._model_path = model_path if self._model_path isNone: print("please set trt model path!") exit() self.cuda_ctx = cuda_ctx if self.cuda_ctx isNone: self.cuda_ctx = cuda.Device(0).make_context() if self.cuda_ctx: self.cuda_ctx.push() self.trt_logger = trt.Logger(trt.Logger.INFO) self._load_plugins() self.engine = self._load_engine() try: self.context = self.engine.create_execution_context() self.stream = cuda.Stream() for index, binding in enumerate(self.engine): if self.engine.binding_is_input(binding): batch_shape = list(self.engine.get_binding_shape(binding)).copy() batch_shape[0] = self._batch_size self.context.set_binding_shape(index, batch_shape) self.host_inputs, self.host_outputs, self.cuda_inputs, self.cuda_outputs, self.bindings = self._allocate_buffers() except Exception as e: raise RuntimeError('fail to allocate CUDA resources') from e finally: if self.cuda_ctx: self.cuda_ctx.pop()
def_load_plugins(self): pass
def_load_engine(self): with open(self._model_path, 'rb') as f, trt.Runtime(self.trt_logger) as runtime: return runtime.deserialize_cuda_engine(f.read())
defdestroy(self): """Free CUDA memories and context.""" del self.cuda_outputs del self.cuda_inputs del self.stream if self.cuda_ctx: self.cuda_ctx.pop() del self.cuda_ctx
classTrtInference_head(): _batch_size = 1 def__init__(self, model_path=None, cuda_ctx=None): self._model_path = model_path if self._model_path isNone: print("please set trt model path!") exit() self.cuda_ctx = cuda_ctx if self.cuda_ctx isNone: self.cuda_ctx = cuda.Device(0).make_context() if self.cuda_ctx: self.cuda_ctx.push() self.trt_logger = trt.Logger(trt.Logger.INFO) self._load_plugins() self.engine = self._load_engine() try: self.context = self.engine.create_execution_context() self.stream = cuda.Stream() for index, binding in enumerate(self.engine): if self.engine.binding_is_input(binding): batch_shape = list(self.engine.get_binding_shape(binding)).copy() batch_shape[0] = self._batch_size self.context.set_binding_shape(index, batch_shape) self.host_inputs, self.host_outputs, self.cuda_inputs, self.cuda_outputs, self.bindings = self._allocate_buffers() except Exception as e: raise RuntimeError('fail to allocate CUDA resources') from e finally: if self.cuda_ctx: self.cuda_ctx.pop()
def_load_plugins(self): pass
def_load_engine(self): with open(self._model_path, 'rb') as f, trt.Runtime(self.trt_logger) as runtime: return runtime.deserialize_cuda_engine(f.read())
defdestroy(self): """Free CUDA memories and context.""" del self.cuda_outputs del self.cuda_inputs del self.stream if self.cuda_ctx: self.cuda_ctx.pop() del self.cuda_ctx
img = torch.from_numpy(img).to(device) img = img.half() img /= 255.0# 0 - 255 to 0.0 - 1.0 if len(img.shape) == 3: img = img[None] pred = model(img, augment=False, val=True)[0] pred = non_max_suppression(pred, conf_thres, iou_thres, agnostic=False) result=[] for i, det in enumerate(pred): # detections per image gn = torch.tensor(img0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det isnotNoneand len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round() for *xyxy, conf, cls in det: if cls==0: result.append([float(xyxy[0]),float(xyxy[1]),float(xyxy[2]),float(xyxy[3])]) if len(result)==0: returnNone for i in range(32-len(result)): result.append([float(0),float(0),float(0),float(0)]) return torch.from_numpy(np.array(result)) defscale(size, image): """ Scale the short side of the image to size. Args: size (int): size to scale the image. image (array): image to perform short side scale. Dimension is `height` x `width` x `channel`. Returns: (ndarray): the scaled image with dimension of `height` x `width` x `channel`. """ height = image.shape[0] width = image.shape[1] # print(height,width) if (width <= height and width == size) or ( height <= width and height == size ): return image new_width = size new_height = size if width < height: new_height = int(math.floor((float(height) / width) * size)) else: new_width = int(math.floor((float(width) / height) * size)) img = cv2.resize( image, (new_width, new_height), interpolation=cv2.INTER_LINEAR ) # print(new_width, new_height) return img.astype(np.float32) deftensor_normalize(tensor, mean, std, func=None): """ Normalize a given tensor by subtracting the mean and dividing the std. Args: tensor (tensor): tensor to normalize. mean (tensor or list): mean value to subtract. std (tensor or list): std to divide. """ if tensor.dtype == torch.uint8: tensor = tensor.float() tensor = tensor / 255.0 if type(mean) == list: mean = torch.tensor(mean) if type(std) == list: std = torch.tensor(std) if func isnotNone: tensor = func(tensor) tensor = tensor - mean tensor = tensor / std return tensor defscale_boxes(size, boxes, height, width): """ Scale the short side of the box to size. Args: size (int): size to scale the image. boxes (ndarray): bounding boxes to peform scale. The dimension is `num boxes` x 4. height (int): the height of the image. width (int): the width of the image. Returns: boxes (ndarray): scaled bounding boxes. """ if (width <= height and width == size) or ( height <= width and height == size ): return boxes
new_width = size new_height = size if width < height: new_height = int(math.floor((float(height) / width) * size)) boxes *= float(new_height) / height else: new_width = int(math.floor((float(width) / height) * size)) boxes *= float(new_width) / width return boxes defprocess_cv2_inputs(frames): """ Normalize and prepare inputs as a list of tensors. Each tensor correspond to a unique pathway. Args: frames (list of array): list of input images (correspond to one clip) in range [0, 255]. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ inputs = torch.from_numpy(np.array(frames)).float() / 255 inputs = tensor_normalize(inputs, [0.45,0.45,0.45], [0.225,0.225,0.225]) # T H W C -> C T H W. inputs = inputs.permute(3, 0, 1, 2) # Sample frames for num_frames specified. index = torch.linspace(0, inputs.shape[1] - 1, 32).long() print(index) inputs = torch.index_select(inputs, 1, index) fast_pathway = inputs slow_pathway = torch.index_select( inputs, 1, torch.linspace( 0, inputs.shape[1] - 1, inputs.shape[1] // 4 ).long(), ) frame_list = [slow_pathway, fast_pathway] print(np.shape(frame_list[0])) inputs = [inp.unsqueeze(0) for inp in frame_list] return inputs #加载模型 yolov5=init() slowfast = TrtInference('/content/SLOWFAST_32x2_R101_50_50.engine',None) head = TrtInference_head('/content/SLOWFAST_head.engine',None)
#加载数据开始推理 cap = cv2.VideoCapture("/content/atm_125.mp4") was_read=True while was_read: frames=[] seq_length=64 while was_read and len(frames) < seq_length: was_read, frame =cap.read() frames.append(frame)
bboxes = process_image(yolov5,frames[64//2]) if bboxes isnotNone: frames = [cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) for frame in frames] frames = [scale(256, frame) for frame in frames] inputs = process_cv2_inputs(frames) print(bboxes) if bboxes isnotNone: bboxes = scale_boxes(256,bboxes,1080,1920) index_pad = torch.full( size=(bboxes.shape[0], 1), fill_value=float(0), device=bboxes.device, ) # Pad frame index for each box. bboxes = torch.cat([index_pad, bboxes], axis=1) for i in range(len(inputs)): inputs[i] = inputs[i].numpy() if bboxes isnotNone: outputs=slowfast.inference(inputs) outputs[0]=outputs[0].reshape(1,2048,16,29) outputs[1]=outputs[1].reshape(1,256,16,29) outputs[0]=torch.from_numpy(outputs[0]) outputs[1]=torch.from_numpy(outputs[1]) outputs[0]=roi_align(outputs[0],bboxes.to(dtype=outputs[0].dtype),7,1.0/16,0,True) outputs[1]=roi_align(outputs[1],bboxes.to(dtype=outputs[1].dtype),7,1.0/16,0,True) outputs[0] = outputs[0].numpy() outputs[1] = outputs[1].numpy() prd=head.inference(outputs) prd=prd.reshape(32,80) for i in range(80): if prd[0][i]>0.3: print(i) else: print("没有检测到任何人物")