QuickStart

Build NLP Search in 5 Minutes

Log into the client as belows.

[10]:
from vectorai.client import ViClient
vi_client = ViClient(username, api_key, url)
collection_name = 'nlp_quickstart'

Then use our Text2Vec model to convert the text to vectors!

[9]:
from vectorai.models.deployed import ViText2Vec
text_encoder = ViText2Vec(username, api_key, url)

Then insert the data as shown below!

For the example below, we got the data from: https://www.kaggle.com/datatattle/covid-19-nlp-text-classification

[18]:
import pandas as pd
df = pd.read_csv('data/Corona_NLP_train.csv', encoding='latin-1')
df.sample(2)
[18]:
UserName ScreenName Location TweetAt OriginalTweet Sentiment
11344 15143 60095 India 20-03-2020 Crude #oil prices have plummeted by 20% since ... Negative
29408 33207 78159 Ontario, Canada 05-04-2020 Grocery prices are INSANE! Normally spend 180-... Extremely Negative
[19]:
vi_client.insert_df(collection_name, df, models={'OriginalTweet':text_encoder.encode})

As we are inserting the data, note that the vector field names automatically adapt to our schema. In other words, the field “OriginalTweet” ends up becoming “OriginalTweet_vector_”. Before we search, let us take a quick look at all our data to make sure it’s been properly inserted. And now, our data is ready to search!

[13]:
vi_client.collection_stats(collection_name)
[13]:
{'size_mb': 316.401376,
 'number_of_documents': 41157,
 'number_of_searches': 2,
 'number_of_id_lookups': 0}
[87]:
import pandas as pd
result = vi_client.search(collection_name, text_encoder.encode('No more tissue paper'),
    field='OriginalTweet_vector_', page_size=5)
vi_client.results_pretty(result, 'OriginalTweet')
[87]:
OriginalTweet
0 Tissue Issue
1 No toilet paper Do this
2 Toilet paper anyone
3 Dont touch my toiletpaper
4 Toilet paper

And provide recommendations (also known as search by ID)!

Build Image Search In 5 Minutes

[38]:
username = 'your_username'
api_key = 'your_api_key'
url = 'https://api.vctr.ai'
collection_name = 'pokemon_images'
use_jobs = False
[39]:
documents = []
for i in range(1, 20):
    documents.append({
        'image': 'https://assets.pokemon.com/assets/cms2/img/pokedex/full/{}.png'.format(f'{i:03}'),
        'pokemon_id' : str(i),
        '_id': i
    })
[41]:
#1. specify the vdb client
from vectorai.client import ViClient
vi_client = ViClient(username, api_key, url)

#2. specify an image encoder
from vectorai.models.deployed import ViImage2Vec
image_encoder = ViImage2Vec(username, api_key, url)
[42]:
#3. insert the documents and encode images simultaneously
if use_jobs:
    vi_client.insert_documents(collection_name, documents)
    job = vi_client.encode_image_job(collection_name, 'image')
    vi_client.wait_till_jobs_complete(collection_name, job['job_id'], job['job_name'])
else:
    vi_client.insert_documents(collection_name, documents, models={'image':image_encoder.encode})
Failed: []
Failed: []

[42]:
{'inserted_successfully': 19, 'failed': 0, 'failed_document_ids': []}
[47]:
#4. search
search_results = vi_client.search(collection_name,
    image_encoder.encode('https://assets.pokemon.com/assets/cms2/img/pokedex/full/003.png'),
    'image_vector_', page_size=5)

#4.2 first result is the query audio itself
vi_client.show_json(search_results, image_fields=['image'], image_width=150)

[47]:
_id image pokemon_id insert_date_ _search_score
0 3 3 2020-09-29T09:54:11.334926 1.000000
1 2 2 2020-09-29T09:54:11.332865 0.920337
2 1 1 2020-09-29T09:54:11.330522 0.838996
3 17 17 2020-09-29T09:54:22.218190 0.835111
4 16 16 2020-09-29T09:54:22.215866 0.813012
[50]:
#5 recommendation by id
search_by_id_results = vi_client.search_by_id(collection_name, '2', 'image_vector_', page_size=5)

#5.2 first result is the id's audio itself
vi_client.show_json(search_by_id_results, image_fields=['image'], image_width=150)
[50]:
_id image pokemon_id insert_date_ _search_score
0 2 2 2020-09-29T09:54:11.332865 1.000000
1 3 3 2020-09-29T09:54:11.334926 0.920337
2 1 1 2020-09-29T09:54:11.330522 0.895991
3 7 7 2020-09-29T09:54:11.345444 0.839945
4 17 17 2020-09-29T09:54:22.218190 0.833277

Build Audio Search in 5 Minutes

Building Audio search is easy with Vi!

[22]:
#create the documents
documents = []
for i in range(1, 1001):
    documents.append({
        'audio': 'https://vecsearch-bucket.s3.us-east-2.amazonaws.com/voices/common_voice_en_{}.wav'.format(i),
        'name' : 'common_voice_en_{}.wav'.format(i),
        '_id': i
    })
[9]:
use_jobs = True
[ ]:
#1. specify the vdb client
from vectorai.client import ViClient
vi_client = ViClient(username, api_key, url)
[20]:
from vectorai.models.deployed import ViAudio2Vec
audio_encoder = ViAudio2Vec(username, api_key, url)
[12]:
#2. specify an audio encoder
from vectorai.models.deployed import ViAudio2Vec
audio_encoder = ViAudio2Vec(username, api_key, url)

#3. insert the documents and encode audio simultaneously
if use_jobs:
    vi_client.insert_documents(collection_name, documents)
    job = vi_client.encode_audio_job(collection_name, 'audio')
    vi_client.wait_till_jobs_complete(collection_name, job['job_id'], job['job_name'])
else:
    vi_client.insert_documents(collection_name, documents, models={'audio':audio_encoder.encode})

{'status': 'Finished'}
[28]:
import IPython.display as ipd
#4. search
search_results = vi_client.search(collection_name, audio_encoder.encode(documents[0]['audio']),
    'audio_vector_', page_size=5)

vi_client.show_json(search_results, audio_fields=['audio'])
[28]:
_id name audio _search_score
0 1 common_voice_en_1.wav 1.000000
1 12 common_voice_en_12.wav 0.893219
2 32 common_voice_en_32.wav 0.891373
3 20 common_voice_en_20.wav 0.882336
4 15 common_voice_en_15.wav 0.877323
[29]:
#5 recommendation by id
search_by_id_results = vi_client.search_by_id(collection_name, '2', 'audio_vector_', page_size=5)

vi_client.show_json(search_by_id_results, audio_fields=['audio'])
[29]:
_id name audio _search_score
0 2 common_voice_en_2.wav 1.000000
1 40 common_voice_en_40.wav 0.884632
2 3 common_voice_en_3.wav 0.879187
3 14 common_voice_en_14.wav 0.874556
4 21 common_voice_en_21.wav 0.865409

Build Text QA Search in 5 minutes

[ ]:
%pip install nlp
[75]:
import nlp
squad_dataset = nlp.load_dataset('squad')
documents = [{'_id':str(n), **d} for n, d in enumerate(squad_dataset['validation'])]
[76]:
#your credentials
vecdb_url = 'https://api.vctr.ai'
username = 'your_username'
api_key = 'your_api_key'
collection_name = 'squad'
use_jobs = True
[77]:
#1. specify the vdb client
from vectorai.client import ViClient
vi_client = ViClient(username, api_key, vecdb_url)

#2. specify an audio encoder
from vectorai.models.deployed import ViText2Vec
text_encoder = ViText2Vec(username, api_key, 'https://api.vctr.ai')
Logged in. Welcome public-demo. To view list of available collections, call list_collections() method.
[78]:
#3. insert the documents and encode text simultaneously
if use_jobs:
    vi_client.insert_documents(collection_name, documents)
    job = vi_client.encode_text_job(collection_name, 'question')
    vi_client.wait_till_jobs_complete(collection_name, job['job_id'], job['job_name'])
else:
    vi_client.insert_documents(collection_name, documents, models={'question':text_encoder.encode})

[78]:
{'inserted_successfully': 10570, 'failed': 0, 'failed_document_ids': []}
{'status': 'Finished'}
[78]:
'Done'
[79]:
#4. search
search_results = vi_client.search(collection_name,
                                   text_encoder.encode('who was the winner for nfl fifty'),
                                   'question_vector_', page_size=5)

#4.2 first result is the query text itself
vi_client.results_to_df(search_results)
[79]:
_id question answers context insert_date_ id title _search_score
0 11 Who won Super Bowl 50? {'answer_start': [177, 177, 177], 'text': ['De... Super Bowl 50 was an American football game to... 2020-09-20T14:17:12.051093 56beace93aeaaa14008c91df Super_Bowl_50 0.798744
1 24 Who won Super Bowl 50? {'answer_start': [177, 177, 177], 'text': ['De... Super Bowl 50 was an American football game to... 2020-09-20T14:17:12.352544 56d20362e7d4791d009025eb Super_Bowl_50 0.798744
2 3 Which NFL team won Super Bowl 50? {'answer_start': [177, 177, 177], 'text': ['De... Super Bowl 50 was an American football game to... 2020-09-20T14:17:12.051093 56be4db0acb8001400a502ef Super_Bowl_50 0.763209
3 55 Who was the Super Bowl 50 MVP? {'answer_start': [248, 248, 252], 'text': ['Vo... The Broncos took an early lead in Super Bowl 5... 2020-09-20T14:17:12.741881 56be4eafacb8001400a50302 Super_Bowl_50 0.754090
4 26 Which team won Super Bowl 50. {'answer_start': [177, 177, 177], 'text': ['De... Super Bowl 50 was an American football game to... 2020-09-20T14:17:12.352544 56d600e31c85041400946eb0 Super_Bowl_50 0.742759
[80]:
#5 recommendation by id
search_by_id_results = vi_client.search_by_id(collection_name, documents[50]['_id'], 'question_vector_', page_size=5)

#5.2 first result is the id's text itself
vi_client.results_to_df(search_by_id_results)
[80]:
_id question answers context insert_date_ id title _search_score
0 50 Who did Denver beat in the 2015 AFC Championsh... {'answer_start': [372, 368, 372], 'text': ['Ne... The Panthers finished the regular season with ... 2020-09-20T14:17:12.741881 56d6017d1c85041400946ec1 Super_Bowl_50 1.000000
1 48 Who did Denver beat in the AFC championship? {'answer_start': [372, 368, 372], 'text': ['Ne... The Panthers finished the regular season with ... 2020-09-20T14:17:12.741881 56d2045de7d4791d009025f6 Super_Bowl_50 0.960072
2 331 Who did the Broncos beat to win their division... {'answer_start': [25, 25, 36], 'text': ['Pitts... The Broncos defeated the Pittsburgh Steelers i... 2020-09-20T14:17:16.565142 56d99f99dc89441400fdb628 Super_Bowl_50 0.923735
3 330 Who did the Broncos defeat in the AFC Champion... {'answer_start': [192, 192, 204], 'text': ['Ne... The Broncos defeated the Pittsburgh Steelers i... 2020-09-20T14:17:16.565142 56d7018a0d65d214001982c5 Super_Bowl_50 0.915792
4 328 Who did the Broncos beat in the divisional game? {'answer_start': [25, 21, 36], 'text': ['Pitts... The Broncos defeated the Pittsburgh Steelers i... 2020-09-20T14:17:16.370009 56d7018a0d65d214001982c2 Super_Bowl_50 0.906187
[81]:
#6 hybrid search combining traditional and nlp vector search
search_results = vi_client.hybrid_search(collection_name, 'Peyton Men passing',
                                          text_encoder.encode('Peyton Men'),
                                          ['question_vector_'], ['question'],
                                          traditional_weight=0.015,
                                          page_size=5)
vi_client.results_to_df(search_results)
[81]:
_id question answers context insert_date_ id title _search_score
0 258 How old was Peyton Manning in 2015? {'answer_start': [817, 817, 817], 'text': ['39... Following their loss in the divisional round o... 2020-09-20T14:17:15.618332 56bf301c3aeaaa14008c9550 Super_Bowl_50 0.630390
1 276 How may yards did Peyton Manning throw? {'answer_start': [77, 77, 77], 'text': ['2,249... Manning finished the year with a career-low 67... 2020-09-20T14:17:15.813571 56bf38383aeaaa14008c956c Super_Bowl_50 0.623954
2 270 What was Peyton Manning's passer rating for th... {'answer_start': [44, 44, 44], 'text': ['67.9'... Manning finished the year with a career-low 67... 2020-09-20T14:17:15.813571 56beb57b3aeaaa14008c9279 Super_Bowl_50 0.608059
3 252 Who did Peyton Manning play for as a rookie? {'answer_start': [641, 637, 654], 'text': ['In... Following their loss in the divisional round o... 2020-09-20T14:17:15.435333 56beb4e43aeaaa14008c9267 Super_Bowl_50 0.603111
4 356 Peyton Manning took how many different teams t... {'answer_start': [57, 57, 57, 57], 'text': ['t... Peyton Manning became the first quarterback ev... 2020-09-20T14:17:16.763268 56d704430d65d214001982de Super_Bowl_50 0.602743
[ ]: