from pykeen import datasets
dataset = datasets.Wikidata5M()
verbalizer.verbalize("Q1053299", "P159")
# Output: 'predict [SEP] CRH headquarters location [SEP] inverse of owned by Ancon 0 [SEP] instance of business [SEP] legal form joint-stock company [SEP] stock exchange London Stock Exchange [SEP] stock exchange New York Stock Exchange [SEP]'
# Relation id to text mapping
relations_path = 'demo_mappings/relation_mapping.txt'
relation_mapping = {}
with open(relations_path, 'r') as f:
for line in tqdm(f):
line = line.strip().split('\t')
id, name = line[0], line[1]
relation_mapping[id] = name
# Entity id to text mapping
entities_path = "demo_mappings/entity_mapping.txt"
entity_mapping = {}
with open(entities_path, "r") as f:
for line in tqdm(f):
line = line.strip().split("\t")
id, name = line[0], line[1]
entity_mapping[id] = name
direct_neighbors = self.base_dataset[self.base_dataset["head"] == node_id]
inverse_neighbors = self.base_dataset[self.base_dataset["tail"] == node_id]
direct_neighbors = direct_neighbors[
(direct_neighbors["tail"] != tail_id)
| (direct_neighbors["relation"] != relation_id)
]
inverse_neighbors = inverse_neighbors[
(inverse_neighbors["head"] != tail_id)
| (inverse_neighbors["relation"] != relation_id)
]
fasttext_emb = list(map(lambda x: model_en.get_sentence_vector(x), relations))
similarity_matrix = cosine_similarity(fasttext_emb)
relations = list(set(relation_mapping.values()))
direct_relation2ind = {rel: i for i, rel in enumerate(relations)}
inverse_relation2ind = {"inverse of " + rel: i for i, rel in enumerate(relations)}
relation2ind = {**direct_relation2ind, **inverse_relation2ind}
# sorting neighborhood by relation similarity
neighborhood.sort(
key=lambda x: (
self.similarity_matrix[self.relation2index[x["relation"]]][
self.relation2index[relation]
]
),
reverse=True,
)
neighborhood = neighborhood[:512]
tokenizer(verbalization, max_length=512, return_tensors='pt',
**encode_plus_kwargs)
system_prompt = (
"You will be provided with an incomplete triplet from the Wikidata knowledge graph. "
"Your task is to complete the triplet with a tail (subject) based on the given triplet head (object) and relation. "
"Your answer should only include the tail of the triplet."
)
query = (
" ".join(wikidata_test["input_strings"][i].split(" [SEP] ")[1:2])
.replace("[SEP]", "")
.strip()
)
user_text = f"{query}. Tail: "
system_prompt = (
"You will be provided with an incomplete triplet from the Wikidata knowledge graph. "
"Your task is to complete the triplet with a tail (subject) based on the given triplet head (object) and relation. "
"Your answer should only include the tail of the triplet. "
"To help you with the task you will be provided with adjacent relations and subjects. Requested subject may not be from provided adjacent subjects."
)
query = (
" ".join(wikidata_test["input_strings"][i].split(" [SEP] ")[1:2])
.replace("[SEP]", "")
.strip()
)
neigh = "; ".join(wikidata_test["input_strings"][i].split(" [SEP] ")[2:]).replace(
" [SEP]", ";"
)
user_text = f"Adjacent relations: {neigh}\nTriplet to complete: {query}. Tail: "
params = {
"max_tokens": 10,
"temperature": 1.0,
"top_p": 1,
"n": 3,
"stream": False,
"stop": None,
}
openai.ChatCompletion.create(
model="gpt-4",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_text},
],
**params,
)