# List of documents
# Each outer list represents one document
# Each document contains one or more sentences
documents = [
["Hello world", "Hello Python"],
["Python is great", "Hello again"],
["Enjoy coding in Python"]
]
# Dictionary to store the inverted index
# Key -> word
# Value -> list of document IDs where the word appears
index = {}
# Loop through each document with its index (document ID)
for doc_id, doc in enumerate(documents):
# Join all sentences in the current document into a single string
# Convert text to lowercase to avoid case-sensitive duplicates
text = " ".join(doc).lower()
# Split the text into individual words using space as delimiter
words = text.split()
# Loop through each word in the current document
for word in words:
# If the word is not already in the index,
# create a new entry with an empty list
if word not in index:
index[word] = []
# Add the document ID only if it is not already present
# This avoids duplicate document IDs for the same word
if doc_id not in index[word]:
index[word].append(doc_id)
# Print the final inverted index
print(index)
output:
{
'hello': [0, 1],
'world': [0],
'python': [0, 1, 2],
'is': [1],
'great': [1],
'again': [1],
'enjoy': [2],
'coding': [2],
'in': [2]
}
=========================================
# Input data: a list of tuples
# Each tuple contains (name, score)
data = [('Alice', 88), ('Bob', 72), ('Alice', 91), ('Bob', 85)]
# Function to group scores by name
def group_scores(data):
# Dictionary to store the grouped result
# Key -> name (e.g., 'Alice', 'Bob')
# Value -> list of scores for that name
result = {}
# Loop through each (name, score) pair in the input data
for name, score in data:
# If the name is not already a key in the dictionary,
# create a new key with an empty list
if name not in result:
result[name] = []
# Add the score to the list for the corresponding name
result[name].append(score)
# Return the dictionary containing grouped scores
return result
# Call the function and print the result
print(group_scores(data))
output:
{
'Alice': [88, 91],
'Bob': [72, 85]
}
============================================
No comments:
Post a Comment