c85c0068a2
- Replace all datetime.utcnow() with datetime.now(tz=timezone.utc) across 8 files - Fix 12 failing tests to match current implementation behavior - Fix pytest_plugins in non-top-level conftest (moved to root conftest.py) - Auto-fix 189 lint issues (import sorting, unused imports) - Add CI/CD pipeline infrastructure (ARC, ArgoCD, Kargo manifests) - Add values-beta.yaml and values-paper.yaml for staged deployments - Update GitHub Actions workflow to use self-hosted-gremlin runners - Add integration-test job to CI pipeline Result: 1596 passed, 0 failed, 0 warnings
163 lines
8.1 KiB
Python
163 lines
8.1 KiB
Python
"""Seed MinIO buckets with sample normalized text files for integration tests.
|
||
|
||
Uploads synthetic normalized text corresponding to documents seeded by
|
||
seed_sandbox.py. Each file is keyed by content_hash so the query API and
|
||
other services can locate them in the stonks-normalized bucket.
|
||
|
||
Usage:
|
||
python -m tests.integration.seed_minio
|
||
|
||
Environment variables:
|
||
MINIO_ENDPOINT (default: minio:9000)
|
||
MINIO_ACCESS_KEY (default: minioadmin)
|
||
MINIO_SECRET_KEY (default: minioadmin)
|
||
MINIO_SECURE (default: false)
|
||
"""
|
||
|
||
import os
|
||
from io import BytesIO
|
||
|
||
from minio import Minio
|
||
|
||
BUCKET = "stonks-normalized"
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Sample normalized text content keyed by content_hash.
|
||
# These hashes match the documents inserted by seed_sandbox.py (DOC_01–DOC_10).
|
||
# We seed at least 5 files covering a mix of news, filings, and macro events.
|
||
# ---------------------------------------------------------------------------
|
||
|
||
NORMALIZED_TEXTS: dict[str, str] = {
|
||
"hash_doc_01": (
|
||
"Apple Inc reported fourth-quarter earnings that exceeded Wall Street "
|
||
"expectations, driven by stronger-than-anticipated iPhone sales across "
|
||
"all major markets. Revenue for the quarter came in at $89.5 billion, "
|
||
"up 6 percent year over year, while earnings per share reached $1.46.\n\n"
|
||
"Services revenue continued its upward trajectory, hitting a new record "
|
||
"of $22.3 billion. Management highlighted growth in Apple TV+ subscribers "
|
||
"and the expanding installed base of over 2.2 billion active devices.\n\n"
|
||
"Greater China revenue declined 2 percent amid a competitive smartphone "
|
||
"landscape, though management expressed confidence in the region's "
|
||
"long-term trajectory. Gross margin expanded to 46.2 percent, reflecting "
|
||
"favorable product mix and supply chain efficiencies."
|
||
),
|
||
"hash_doc_02": (
|
||
"Microsoft Corporation reported a 29 percent year-over-year increase in "
|
||
"Azure cloud revenue, surpassing analyst estimates and reinforcing the "
|
||
"company's position as a leading cloud infrastructure provider.\n\n"
|
||
"Total Intelligent Cloud segment revenue reached $25.9 billion for the "
|
||
"quarter. CEO Satya Nadella attributed the acceleration to enterprise "
|
||
"adoption of AI workloads running on Azure, including OpenAI-powered "
|
||
"services integrated into Microsoft 365 Copilot.\n\n"
|
||
"Operating income for the segment grew 23 percent, with margins "
|
||
"expanding despite increased capital expenditure on data center "
|
||
"capacity. The company guided for continued double-digit Azure growth "
|
||
"in the coming quarter."
|
||
),
|
||
"hash_doc_03": (
|
||
"JPMorgan Chase & Co filed its annual 10-K report with the Securities "
|
||
"and Exchange Commission, disclosing record full-year net income of "
|
||
"$49.6 billion. The filing detailed strong performance across all major "
|
||
"business lines.\n\n"
|
||
"Investment banking fees rose 18 percent for the year, driven by a "
|
||
"rebound in equity and debt underwriting activity. The consumer banking "
|
||
"division reported net interest income of $89.3 billion, benefiting "
|
||
"from the elevated rate environment.\n\n"
|
||
"The filing noted credit provisions of $9.8 billion, reflecting a "
|
||
"cautious outlook on consumer credit quality. Total assets stood at "
|
||
"$3.9 trillion, with a Common Equity Tier 1 ratio of 15.0 percent, "
|
||
"well above regulatory minimums."
|
||
),
|
||
"hash_doc_05": (
|
||
"Exxon Mobil Corporation announced a 15 percent increase in Permian "
|
||
"Basin production output, reaching 620,000 barrels of oil equivalent "
|
||
"per day. The expansion was attributed to improved drilling efficiency "
|
||
"and the integration of Pioneer Natural Resources assets.\n\n"
|
||
"Total upstream production for the quarter averaged 3.7 million barrels "
|
||
"of oil equivalent per day. Management reiterated its target of "
|
||
"achieving 4.0 million barrels per day by year-end through organic "
|
||
"growth and operational optimization.\n\n"
|
||
"Downstream margins remained under pressure due to elevated refining "
|
||
"costs and softer demand in European markets. The company maintained "
|
||
"its quarterly dividend at $0.95 per share."
|
||
),
|
||
"hash_doc_08": (
|
||
"The Federal Reserve held its benchmark interest rate steady at the "
|
||
"5.25 to 5.50 percent range following its January 2025 policy meeting, "
|
||
"in line with market expectations. The decision was unanimous among "
|
||
"voting members of the Federal Open Market Committee.\n\n"
|
||
"In the accompanying statement, the committee acknowledged continued "
|
||
"progress on inflation, with the core Personal Consumption Expenditures "
|
||
"index declining to 2.6 percent. Chair Jerome Powell signaled that rate "
|
||
"cuts could begin as early as the second quarter if disinflation trends "
|
||
"persist.\n\n"
|
||
"Treasury yields fell modestly following the announcement, with the "
|
||
"10-year note declining 5 basis points to 4.12 percent. Equity markets "
|
||
"rallied on the dovish forward guidance, with the S&P 500 gaining "
|
||
"0.8 percent in after-hours trading."
|
||
),
|
||
"hash_doc_09": (
|
||
"Trade tensions between the United States and China escalated after "
|
||
"the White House proposed a new round of tariffs targeting advanced "
|
||
"semiconductor equipment and AI-related technology exports. The "
|
||
"proposed measures would expand existing restrictions on chip "
|
||
"manufacturing tools.\n\n"
|
||
"Beijing responded with a statement warning of retaliatory measures "
|
||
"on US agricultural and energy exports. Analysts noted that the "
|
||
"escalation could disrupt supply chains for major technology companies "
|
||
"with significant manufacturing operations in China.\n\n"
|
||
"Shares of semiconductor equipment makers declined 3 to 5 percent in "
|
||
"pre-market trading. Apple, which assembles the majority of its "
|
||
"iPhones in China, saw its stock dip 1.2 percent on concerns about "
|
||
"potential supply chain disruptions."
|
||
),
|
||
"hash_doc_10": (
|
||
"JPMorgan Chase reported a 20 percent increase in investment banking "
|
||
"fees for the fourth quarter, driven by a surge in mergers and "
|
||
"acquisitions advisory revenue and a recovery in initial public "
|
||
"offering activity.\n\n"
|
||
"The bank advised on several high-profile transactions during the "
|
||
"quarter, including three deals valued at over $10 billion each. "
|
||
"Equity capital markets revenue doubled compared to the prior year "
|
||
"period as IPO volumes returned to pre-pandemic levels.\n\n"
|
||
"Fixed income trading revenue rose 8 percent, supported by elevated "
|
||
"volatility in interest rate and credit markets. The bank's total "
|
||
"markets revenue reached $7.1 billion for the quarter, exceeding "
|
||
"consensus estimates by approximately 5 percent."
|
||
),
|
||
}
|
||
|
||
|
||
def seed_minio() -> None:
|
||
"""Upload sample normalized text files to the stonks-normalized bucket."""
|
||
client = Minio(
|
||
os.environ.get("MINIO_ENDPOINT", "minio:9000"),
|
||
access_key=os.environ.get("MINIO_ACCESS_KEY", "minioadmin"),
|
||
secret_key=os.environ.get("MINIO_SECRET_KEY", "minioadmin"),
|
||
secure=os.environ.get("MINIO_SECURE", "false").lower() == "true",
|
||
)
|
||
|
||
# Ensure bucket exists (should already be created by minio-bucket-init Job)
|
||
if not client.bucket_exists(BUCKET):
|
||
client.make_bucket(BUCKET)
|
||
|
||
uploaded = 0
|
||
for content_hash, text in NORMALIZED_TEXTS.items():
|
||
key = f"{content_hash}.txt"
|
||
data = text.encode("utf-8")
|
||
client.put_object(
|
||
BUCKET,
|
||
key,
|
||
BytesIO(data),
|
||
length=len(data),
|
||
content_type="text/plain",
|
||
)
|
||
uploaded += 1
|
||
print(f" uploaded {BUCKET}/{key} ({len(data)} bytes)")
|
||
|
||
print(f"Seeded {uploaded} normalized text files into {BUCKET}.")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
seed_minio()
|