vimport pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

data = {
    'email': [
        'This is a sample email about category1.',
        'Another example related to category2.',
        'Email with no specific category.',
        'A category1 email with more details.',
        'Sample message for category3.',
        'Uncategorized email.',
        'Email for category2.',
        'Category1 message.',
    ],
    'category': [
        'category1',
        'category2',
        'others',
        'category1',
        'category3',
        'others',
        'category2',
        'category1',
    ]
}

df = pd.DataFrame(data)

# Split the data into training and testing sets
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

# Text preprocessing and feature extraction
vectorizer = TfidfVectorizer(stop_words='english')
X_train = vectorizer.fit_transform(train_data['email'])
X_test = vectorizer.transform(test_data['email'])

# Model selection and training
classifier = MultinomialNB()
classifier.fit(X_train, train_data['category'])

# Evaluation
predictions = classifier.predict(X_test)

# Create a new DataFrame with email text, true category, and predicted category
result_df = pd.DataFrame({
    'email': test_data['email'],
    'true_category': test_data['category'],
    'predicted_category': predictions,
})

# Print the result DataFrame
print(result_df)