安装
pip install transformers
Demo
from PIL import Image, ImageDraw, ImageFont
import numpy as np
import torch
from transformers import AutoProcessor, Owlv2ForObjectDetection
from transformers. utils. constants import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD
processor = AutoProcessor. from_pretrained( "/home/share3/mayunchuan/google/owlv2-large-patch14-ensemble" )
model = Owlv2ForObjectDetection. from_pretrained( "/home/share3/mayunchuan/google/owlv2-large-patch14-ensemble" ) . cuda( )
image = Image. open ( '/home/mayunchuan/lavad/dataset/Thumos14_25fps/frames/video_test_0000293/004902.jpg' )
texts = [ [ " javelin" ] ]
inputs = processor( text= texts, images= image, return_tensors= "pt" )
inputs[ 'input_ids' ] = inputs[ 'input_ids' ] . cuda( )
inputs[ 'attention_mask' ] = inputs[ 'attention_mask' ] . cuda( )
inputs[ 'pixel_values' ] = inputs[ 'pixel_values' ] . cuda( )
with torch. no_grad( ) :
outputs = model( ** inputs)
def get_preprocessed_image ( pixel_values) :
pixel_values = pixel_values. squeeze( ) . cpu( ) . numpy( )
unnormalized_image = ( pixel_values * np. array( OPENAI_CLIP_STD) [ : , None , None ] ) + np. array( OPENAI_CLIP_MEAN) [ : , None , None ]
unnormalized_image = ( unnormalized_image * 255 ) . astype( np. uint8)
unnormalized_image = np. moveaxis( unnormalized_image, 0 , - 1 )
unnormalized_image = Image. fromarray( unnormalized_image)
return unnormalized_image
unnormalized_image = get_preprocessed_image( inputs. pixel_values)
target_sizes = torch. Tensor( [ unnormalized_image. size[ : : - 1 ] ] )
results = processor. post_process_object_detection(
outputs= outputs, threshold= 0.2 , target_sizes= target_sizes
)
i = 0
text = texts[ i]
boxes, scores, labels = results[ i] [ "boxes" ] , results[ i] [ "scores" ] , results[ i] [ "labels" ]
for box, score, label in zip ( boxes, scores, labels) :
box = [ round ( i, 2 ) for i in box. tolist( ) ]
print ( f"Detected { text[ label] } with confidence { round ( score. item( ) , 3 ) } at location { box} " )
draw = ImageDraw. Draw( unnormalized_image)
for score, label, box in zip ( scores, labels, boxes) :
box = [ round ( i, 2 ) for i in box. tolist( ) ]
x, y, x2, y2 = tuple ( box)
draw. rectangle( ( x, y, x2, y2) , outline= "red" , width= 1 )
draw. text( ( x, y) , text[ label. item( ) ] , font_size= 20 , fill= "black" )
unnormalized_image. save( "marked_image.jpg" )